1/*
2 * Based on arch/arm/mm/mmu.c
3 *
4 * Copyright (C) 1995-2005 Russell King
5 * Copyright (C) 2012 ARM Ltd.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <linux/export.h>
21#include <linux/kernel.h>
22#include <linux/errno.h>
23#include <linux/init.h>
24#include <linux/mman.h>
25#include <linux/nodemask.h>
26#include <linux/memblock.h>
27#include <linux/fs.h>
28#include <linux/io.h>
29#include <linux/stop_machine.h>
30
31#include <asm/cputype.h>
32#include <asm/fixmap.h>
33#include <asm/sections.h>
34#include <asm/setup.h>
35#include <asm/sizes.h>
36#include <asm/tlb.h>
37#include <asm/memblock.h>
38#include <asm/mmu_context.h>
39
40#include "mm.h"
41
42/*
43 * Empty_zero_page is a special page that is used for zero-initialized data
44 * and COW.
45 */
46struct page *empty_zero_page;
47EXPORT_SYMBOL(empty_zero_page);
48
49struct cachepolicy {
50	const char	policy[16];
51	u64		mair;
52	u64		tcr;
53};
54
55static struct cachepolicy cache_policies[] __initdata = {
56	{
57		.policy		= "uncached",
58		.mair		= 0x44,			/* inner, outer non-cacheable */
59		.tcr		= TCR_IRGN_NC | TCR_ORGN_NC,
60	}, {
61		.policy		= "writethrough",
62		.mair		= 0xaa,			/* inner, outer write-through, read-allocate */
63		.tcr		= TCR_IRGN_WT | TCR_ORGN_WT,
64	}, {
65		.policy		= "writeback",
66		.mair		= 0xee,			/* inner, outer write-back, read-allocate */
67		.tcr		= TCR_IRGN_WBnWA | TCR_ORGN_WBnWA,
68	}
69};
70
71/*
72 * These are useful for identifying cache coherency problems by allowing the
73 * cache or the cache and writebuffer to be turned off. It changes the Normal
74 * memory caching attributes in the MAIR_EL1 register.
75 */
76static int __init early_cachepolicy(char *p)
77{
78	int i;
79	u64 tmp;
80
81	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
82		int len = strlen(cache_policies[i].policy);
83
84		if (memcmp(p, cache_policies[i].policy, len) == 0)
85			break;
86	}
87	if (i == ARRAY_SIZE(cache_policies)) {
88		pr_err("ERROR: unknown or unsupported cache policy: %s\n", p);
89		return 0;
90	}
91
92	flush_cache_all();
93
94	/*
95	 * Modify MT_NORMAL attributes in MAIR_EL1.
96	 */
97	asm volatile(
98	"	mrs	%0, mair_el1\n"
99	"	bfi	%0, %1, %2, #8\n"
100	"	msr	mair_el1, %0\n"
101	"	isb\n"
102	: "=&r" (tmp)
103	: "r" (cache_policies[i].mair), "i" (MT_NORMAL * 8));
104
105	/*
106	 * Modify TCR PTW cacheability attributes.
107	 */
108	asm volatile(
109	"	mrs	%0, tcr_el1\n"
110	"	bic	%0, %0, %2\n"
111	"	orr	%0, %0, %1\n"
112	"	msr	tcr_el1, %0\n"
113	"	isb\n"
114	: "=&r" (tmp)
115	: "r" (cache_policies[i].tcr), "r" (TCR_IRGN_MASK | TCR_ORGN_MASK));
116
117	flush_cache_all();
118
119	return 0;
120}
121early_param("cachepolicy", early_cachepolicy);
122
123pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
124			      unsigned long size, pgprot_t vma_prot)
125{
126	if (!pfn_valid(pfn))
127		return pgprot_noncached(vma_prot);
128	else if (file->f_flags & O_SYNC)
129		return pgprot_writecombine(vma_prot);
130	return vma_prot;
131}
132EXPORT_SYMBOL(phys_mem_access_prot);
133
134static void __init *early_alloc(unsigned long sz)
135{
136	void *ptr = __va(memblock_alloc(sz, sz));
137	BUG_ON(!ptr);
138	memset(ptr, 0, sz);
139	return ptr;
140}
141
142/*
143 * remap a PMD into pages
144 */
145static void split_pmd(pmd_t *pmd, pte_t *pte)
146{
147	unsigned long pfn = pmd_pfn(*pmd);
148	int i = 0;
149
150	do {
151		/*
152		 * Need to have the least restrictive permissions available
153		 * permissions will be fixed up later
154		 */
155		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
156		pfn++;
157	} while (pte++, i++, i < PTRS_PER_PTE);
158}
159
160static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
161				  unsigned long end, unsigned long pfn,
162				  pgprot_t prot,
163				  void *(*alloc)(unsigned long size))
164{
165	pte_t *pte;
166
167	if (pmd_none(*pmd) || pmd_bad(*pmd)) {
168		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
169		if (pmd_sect(*pmd))
170			split_pmd(pmd, pte);
171		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
172		flush_tlb_all();
173	}
174
175	pte = pte_offset_kernel(pmd, addr);
176	do {
177		set_pte(pte, pfn_pte(pfn, prot));
178		pfn++;
179	} while (pte++, addr += PAGE_SIZE, addr != end);
180}
181
182void split_pud(pud_t *old_pud, pmd_t *pmd)
183{
184	unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT;
185	pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr);
186	int i = 0;
187
188	do {
189		set_pmd(pmd, __pmd(addr | prot));
190		addr += PMD_SIZE;
191	} while (pmd++, i++, i < PTRS_PER_PMD);
192}
193
194static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
195				  unsigned long addr, unsigned long end,
196				  phys_addr_t phys, pgprot_t prot,
197				  void *(*alloc)(unsigned long size))
198{
199	pmd_t *pmd;
200	unsigned long next;
201
202	/*
203	 * Check for initial section mappings in the pgd/pud and remove them.
204	 */
205	if (pud_none(*pud) || pud_bad(*pud)) {
206		pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
207		if (pud_sect(*pud)) {
208			/*
209			 * need to have the 1G of mappings continue to be
210			 * present
211			 */
212			split_pud(pud, pmd);
213		}
214		pud_populate(mm, pud, pmd);
215		flush_tlb_all();
216	}
217
218	pmd = pmd_offset(pud, addr);
219	do {
220		next = pmd_addr_end(addr, end);
221		/* try section mapping first */
222		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
223			pmd_t old_pmd =*pmd;
224			set_pmd(pmd, __pmd(phys |
225					   pgprot_val(mk_sect_prot(prot))));
226			/*
227			 * Check for previous table entries created during
228			 * boot (__create_page_tables) and flush them.
229			 */
230			if (!pmd_none(old_pmd))
231				flush_tlb_all();
232		} else {
233			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
234				       prot, alloc);
235		}
236		phys += next - addr;
237	} while (pmd++, addr = next, addr != end);
238}
239
240static inline bool use_1G_block(unsigned long addr, unsigned long next,
241			unsigned long phys)
242{
243	if (PAGE_SHIFT != 12)
244		return false;
245
246	if (((addr | next | phys) & ~PUD_MASK) != 0)
247		return false;
248
249	return true;
250}
251
252static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
253				  unsigned long addr, unsigned long end,
254				  phys_addr_t phys, pgprot_t prot,
255				  void *(*alloc)(unsigned long size))
256{
257	pud_t *pud;
258	unsigned long next;
259
260	if (pgd_none(*pgd)) {
261		pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
262		pgd_populate(mm, pgd, pud);
263	}
264	BUG_ON(pgd_bad(*pgd));
265
266	pud = pud_offset(pgd, addr);
267	do {
268		next = pud_addr_end(addr, end);
269
270		/*
271		 * For 4K granule only, attempt to put down a 1GB block
272		 */
273		if (use_1G_block(addr, next, phys)) {
274			pud_t old_pud = *pud;
275			set_pud(pud, __pud(phys |
276					   pgprot_val(mk_sect_prot(prot))));
277
278			/*
279			 * If we have an old value for a pud, it will
280			 * be pointing to a pmd table that we no longer
281			 * need (from swapper_pg_dir).
282			 *
283			 * Look up the old pmd table and free it.
284			 */
285			if (!pud_none(old_pud)) {
286				phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
287				memblock_free(table, PAGE_SIZE);
288				flush_tlb_all();
289			}
290		} else {
291			alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
292		}
293		phys += next - addr;
294	} while (pud++, addr = next, addr != end);
295}
296
297/*
298 * Create the page directory entries and any necessary page tables for the
299 * mapping specified by 'md'.
300 */
301static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
302				    phys_addr_t phys, unsigned long virt,
303				    phys_addr_t size, pgprot_t prot,
304				    void *(*alloc)(unsigned long size))
305{
306	unsigned long addr, length, end, next;
307
308	addr = virt & PAGE_MASK;
309	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
310
311	end = addr + length;
312	do {
313		next = pgd_addr_end(addr, end);
314		alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
315		phys += next - addr;
316	} while (pgd++, addr = next, addr != end);
317}
318
319static void *late_alloc(unsigned long size)
320{
321	void *ptr;
322
323	BUG_ON(size > PAGE_SIZE);
324	ptr = (void *)__get_free_page(PGALLOC_GFP);
325	BUG_ON(!ptr);
326	return ptr;
327}
328
329static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
330				  phys_addr_t size, pgprot_t prot)
331{
332	if (virt < VMALLOC_START) {
333		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
334			&phys, virt);
335		return;
336	}
337	__create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt,
338			 size, prot, early_alloc);
339}
340
341void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
342			       unsigned long virt, phys_addr_t size,
343			       pgprot_t prot)
344{
345	__create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
346				early_alloc);
347}
348
349static void create_mapping_late(phys_addr_t phys, unsigned long virt,
350				  phys_addr_t size, pgprot_t prot)
351{
352	if (virt < VMALLOC_START) {
353		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
354			&phys, virt);
355		return;
356	}
357
358	return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK),
359				phys, virt, size, prot, late_alloc);
360}
361
362#ifdef CONFIG_DEBUG_RODATA
363static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
364{
365	/*
366	 * Set up the executable regions using the existing section mappings
367	 * for now. This will get more fine grained later once all memory
368	 * is mapped
369	 */
370	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
371	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
372
373	if (end < kernel_x_start) {
374		create_mapping(start, __phys_to_virt(start),
375			end - start, PAGE_KERNEL);
376	} else if (start >= kernel_x_end) {
377		create_mapping(start, __phys_to_virt(start),
378			end - start, PAGE_KERNEL);
379	} else {
380		if (start < kernel_x_start)
381			create_mapping(start, __phys_to_virt(start),
382				kernel_x_start - start,
383				PAGE_KERNEL);
384		create_mapping(kernel_x_start,
385				__phys_to_virt(kernel_x_start),
386				kernel_x_end - kernel_x_start,
387				PAGE_KERNEL_EXEC);
388		if (kernel_x_end < end)
389			create_mapping(kernel_x_end,
390				__phys_to_virt(kernel_x_end),
391				end - kernel_x_end,
392				PAGE_KERNEL);
393	}
394
395}
396#else
397static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
398{
399	create_mapping(start, __phys_to_virt(start), end - start,
400			PAGE_KERNEL_EXEC);
401}
402#endif
403
404static void __init map_mem(void)
405{
406	struct memblock_region *reg;
407	phys_addr_t limit;
408
409	/*
410	 * Temporarily limit the memblock range. We need to do this as
411	 * create_mapping requires puds, pmds and ptes to be allocated from
412	 * memory addressable from the initial direct kernel mapping.
413	 *
414	 * The initial direct kernel mapping, located at swapper_pg_dir, gives
415	 * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
416	 * PHYS_OFFSET (which must be aligned to 2MB as per
417	 * Documentation/arm64/booting.txt).
418	 */
419	if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
420		limit = PHYS_OFFSET + PMD_SIZE;
421	else
422		limit = PHYS_OFFSET + PUD_SIZE;
423	memblock_set_current_limit(limit);
424
425	/* map all the memory banks */
426	for_each_memblock(memory, reg) {
427		phys_addr_t start = reg->base;
428		phys_addr_t end = start + reg->size;
429
430		if (start >= end)
431			break;
432
433#ifndef CONFIG_ARM64_64K_PAGES
434		/*
435		 * For the first memory bank align the start address and
436		 * current memblock limit to prevent create_mapping() from
437		 * allocating pte page tables from unmapped memory.
438		 * When 64K pages are enabled, the pte page table for the
439		 * first PGDIR_SIZE is already present in swapper_pg_dir.
440		 */
441		if (start < limit)
442			start = ALIGN(start, PMD_SIZE);
443		if (end < limit) {
444			limit = end & PMD_MASK;
445			memblock_set_current_limit(limit);
446		}
447#endif
448		__map_memblock(start, end);
449	}
450
451	/* Limit no longer required. */
452	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
453}
454
455void __init fixup_executable(void)
456{
457#ifdef CONFIG_DEBUG_RODATA
458	/* now that we are actually fully mapped, make the start/end more fine grained */
459	if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
460		unsigned long aligned_start = round_down(__pa(_stext),
461							SECTION_SIZE);
462
463		create_mapping(aligned_start, __phys_to_virt(aligned_start),
464				__pa(_stext) - aligned_start,
465				PAGE_KERNEL);
466	}
467
468	if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
469		unsigned long aligned_end = round_up(__pa(__init_end),
470							SECTION_SIZE);
471		create_mapping(__pa(__init_end), (unsigned long)__init_end,
472				aligned_end - __pa(__init_end),
473				PAGE_KERNEL);
474	}
475#endif
476}
477
478#ifdef CONFIG_DEBUG_RODATA
479void mark_rodata_ro(void)
480{
481	create_mapping_late(__pa(_stext), (unsigned long)_stext,
482				(unsigned long)_etext - (unsigned long)_stext,
483				PAGE_KERNEL_EXEC | PTE_RDONLY);
484
485}
486#endif
487
488void fixup_init(void)
489{
490	create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
491			(unsigned long)__init_end - (unsigned long)__init_begin,
492			PAGE_KERNEL);
493}
494
495/*
496 * paging_init() sets up the page tables, initialises the zone memory
497 * maps and sets up the zero page.
498 */
499void __init paging_init(void)
500{
501	void *zero_page;
502
503	map_mem();
504	fixup_executable();
505
506	/*
507	 * Finally flush the caches and tlb to ensure that we're in a
508	 * consistent state.
509	 */
510	flush_cache_all();
511	flush_tlb_all();
512
513	/* allocate the zero page. */
514	zero_page = early_alloc(PAGE_SIZE);
515
516	bootmem_init();
517
518	empty_zero_page = virt_to_page(zero_page);
519
520	/*
521	 * TTBR0 is only used for the identity mapping at this stage. Make it
522	 * point to zero page to avoid speculatively fetching new entries.
523	 */
524	cpu_set_reserved_ttbr0();
525	flush_tlb_all();
526}
527
528/*
529 * Enable the identity mapping to allow the MMU disabling.
530 */
531void setup_mm_for_reboot(void)
532{
533	cpu_switch_mm(idmap_pg_dir, &init_mm);
534	flush_tlb_all();
535}
536
537/*
538 * Check whether a kernel address is valid (derived from arch/x86/).
539 */
540int kern_addr_valid(unsigned long addr)
541{
542	pgd_t *pgd;
543	pud_t *pud;
544	pmd_t *pmd;
545	pte_t *pte;
546
547	if ((((long)addr) >> VA_BITS) != -1UL)
548		return 0;
549
550	pgd = pgd_offset_k(addr);
551	if (pgd_none(*pgd))
552		return 0;
553
554	pud = pud_offset(pgd, addr);
555	if (pud_none(*pud))
556		return 0;
557
558	if (pud_sect(*pud))
559		return pfn_valid(pud_pfn(*pud));
560
561	pmd = pmd_offset(pud, addr);
562	if (pmd_none(*pmd))
563		return 0;
564
565	if (pmd_sect(*pmd))
566		return pfn_valid(pmd_pfn(*pmd));
567
568	pte = pte_offset_kernel(pmd, addr);
569	if (pte_none(*pte))
570		return 0;
571
572	return pfn_valid(pte_pfn(*pte));
573}
574#ifdef CONFIG_SPARSEMEM_VMEMMAP
575#ifdef CONFIG_ARM64_64K_PAGES
576int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
577{
578	return vmemmap_populate_basepages(start, end, node);
579}
580#else	/* !CONFIG_ARM64_64K_PAGES */
581int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
582{
583	unsigned long addr = start;
584	unsigned long next;
585	pgd_t *pgd;
586	pud_t *pud;
587	pmd_t *pmd;
588
589	do {
590		next = pmd_addr_end(addr, end);
591
592		pgd = vmemmap_pgd_populate(addr, node);
593		if (!pgd)
594			return -ENOMEM;
595
596		pud = vmemmap_pud_populate(pgd, addr, node);
597		if (!pud)
598			return -ENOMEM;
599
600		pmd = pmd_offset(pud, addr);
601		if (pmd_none(*pmd)) {
602			void *p = NULL;
603
604			p = vmemmap_alloc_block_buf(PMD_SIZE, node);
605			if (!p)
606				return -ENOMEM;
607
608			set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
609		} else
610			vmemmap_verify((pte_t *)pmd, node, addr, next);
611	} while (addr = next, addr != end);
612
613	return 0;
614}
615#endif	/* CONFIG_ARM64_64K_PAGES */
616void vmemmap_free(unsigned long start, unsigned long end)
617{
618}
619#endif	/* CONFIG_SPARSEMEM_VMEMMAP */
620
621static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
622#if CONFIG_ARM64_PGTABLE_LEVELS > 2
623static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
624#endif
625#if CONFIG_ARM64_PGTABLE_LEVELS > 3
626static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
627#endif
628
629static inline pud_t * fixmap_pud(unsigned long addr)
630{
631	pgd_t *pgd = pgd_offset_k(addr);
632
633	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
634
635	return pud_offset(pgd, addr);
636}
637
638static inline pmd_t * fixmap_pmd(unsigned long addr)
639{
640	pud_t *pud = fixmap_pud(addr);
641
642	BUG_ON(pud_none(*pud) || pud_bad(*pud));
643
644	return pmd_offset(pud, addr);
645}
646
647static inline pte_t * fixmap_pte(unsigned long addr)
648{
649	pmd_t *pmd = fixmap_pmd(addr);
650
651	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
652
653	return pte_offset_kernel(pmd, addr);
654}
655
656void __init early_fixmap_init(void)
657{
658	pgd_t *pgd;
659	pud_t *pud;
660	pmd_t *pmd;
661	unsigned long addr = FIXADDR_START;
662
663	pgd = pgd_offset_k(addr);
664	pgd_populate(&init_mm, pgd, bm_pud);
665	pud = pud_offset(pgd, addr);
666	pud_populate(&init_mm, pud, bm_pmd);
667	pmd = pmd_offset(pud, addr);
668	pmd_populate_kernel(&init_mm, pmd, bm_pte);
669
670	/*
671	 * The boot-ioremap range spans multiple pmds, for which
672	 * we are not preparted:
673	 */
674	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
675		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
676
677	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
678	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
679		WARN_ON(1);
680		pr_warn("pmd %p != %p, %p\n",
681			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
682			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
683		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
684			fix_to_virt(FIX_BTMAP_BEGIN));
685		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
686			fix_to_virt(FIX_BTMAP_END));
687
688		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
689		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
690	}
691}
692
693void __set_fixmap(enum fixed_addresses idx,
694			       phys_addr_t phys, pgprot_t flags)
695{
696	unsigned long addr = __fix_to_virt(idx);
697	pte_t *pte;
698
699	if (idx >= __end_of_fixed_addresses) {
700		BUG();
701		return;
702	}
703
704	pte = fixmap_pte(addr);
705
706	if (pgprot_val(flags)) {
707		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
708	} else {
709		pte_clear(&init_mm, addr, pte);
710		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
711	}
712}
713