init_64.c revision edeed30589f5defe63ce6aaae56f2b7c855e4520
1/*
2 *  linux/arch/x86_64/mm/init.c
3 *
4 *  Copyright (C) 1995  Linus Torvalds
5 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6 *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
9#include <linux/signal.h>
10#include <linux/sched.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/types.h>
15#include <linux/ptrace.h>
16#include <linux/mman.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/pagemap.h>
22#include <linux/bootmem.h>
23#include <linux/proc_fs.h>
24#include <linux/pci.h>
25#include <linux/pfn.h>
26#include <linux/poison.h>
27#include <linux/dma-mapping.h>
28#include <linux/module.h>
29#include <linux/memory_hotplug.h>
30#include <linux/nmi.h>
31
32#include <asm/processor.h>
33#include <asm/system.h>
34#include <asm/uaccess.h>
35#include <asm/pgtable.h>
36#include <asm/pgalloc.h>
37#include <asm/dma.h>
38#include <asm/fixmap.h>
39#include <asm/e820.h>
40#include <asm/apic.h>
41#include <asm/tlb.h>
42#include <asm/mmu_context.h>
43#include <asm/proto.h>
44#include <asm/smp.h>
45#include <asm/sections.h>
46#include <asm/kdebug.h>
47#include <asm/numa.h>
48
49#ifndef Dprintk
50#define Dprintk(x...)
51#endif
52
53const struct dma_mapping_ops* dma_ops;
54EXPORT_SYMBOL(dma_ops);
55
56static unsigned long dma_reserve __initdata;
57
58DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
59
60/*
61 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
62 * physical space so we can cache the place of the first one and move
63 * around without checking the pgd every time.
64 */
65
66void show_mem(void)
67{
68	long i, total = 0, reserved = 0;
69	long shared = 0, cached = 0;
70	pg_data_t *pgdat;
71	struct page *page;
72
73	printk(KERN_INFO "Mem-info:\n");
74	show_free_areas();
75	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
76
77	for_each_online_pgdat(pgdat) {
78               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
79			/* this loop can take a while with 256 GB and 4k pages
80			   so update the NMI watchdog */
81			if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
82				touch_nmi_watchdog();
83			}
84			if (!pfn_valid(pgdat->node_start_pfn + i))
85				continue;
86			page = pfn_to_page(pgdat->node_start_pfn + i);
87			total++;
88			if (PageReserved(page))
89				reserved++;
90			else if (PageSwapCache(page))
91				cached++;
92			else if (page_count(page))
93				shared += page_count(page) - 1;
94               }
95	}
96	printk(KERN_INFO "%lu pages of RAM\n", total);
97	printk(KERN_INFO "%lu reserved pages\n",reserved);
98	printk(KERN_INFO "%lu pages shared\n",shared);
99	printk(KERN_INFO "%lu pages swap cached\n",cached);
100}
101
102int after_bootmem;
103
104static __init void *spp_getpage(void)
105{
106	void *ptr;
107	if (after_bootmem)
108		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
109	else
110		ptr = alloc_bootmem_pages(PAGE_SIZE);
111	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
112		panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
113
114	Dprintk("spp_getpage %p\n", ptr);
115	return ptr;
116}
117
118static __init void set_pte_phys(unsigned long vaddr,
119			 unsigned long phys, pgprot_t prot)
120{
121	pgd_t *pgd;
122	pud_t *pud;
123	pmd_t *pmd;
124	pte_t *pte, new_pte;
125
126	Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
127
128	pgd = pgd_offset_k(vaddr);
129	if (pgd_none(*pgd)) {
130		printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
131		return;
132	}
133	pud = pud_offset(pgd, vaddr);
134	if (pud_none(*pud)) {
135		pmd = (pmd_t *) spp_getpage();
136		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
137		if (pmd != pmd_offset(pud, 0)) {
138			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
139			return;
140		}
141	}
142	pmd = pmd_offset(pud, vaddr);
143	if (pmd_none(*pmd)) {
144		pte = (pte_t *) spp_getpage();
145		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
146		if (pte != pte_offset_kernel(pmd, 0)) {
147			printk("PAGETABLE BUG #02!\n");
148			return;
149		}
150	}
151	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
152
153	pte = pte_offset_kernel(pmd, vaddr);
154	if (!pte_none(*pte) &&
155	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
156		pte_ERROR(*pte);
157	set_pte(pte, new_pte);
158
159	/*
160	 * It's enough to flush this one mapping.
161	 * (PGE mappings get flushed as well)
162	 */
163	__flush_tlb_one(vaddr);
164}
165
166/* NOTE: this is meant to be run only at boot */
167void __init
168__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
169{
170	unsigned long address = __fix_to_virt(idx);
171
172	if (idx >= __end_of_fixed_addresses) {
173		printk("Invalid __set_fixmap\n");
174		return;
175	}
176	set_pte_phys(address, phys, prot);
177}
178
179static unsigned long __initdata table_start;
180static unsigned long __meminitdata table_end;
181
182static __meminit void *alloc_low_page(unsigned long *phys)
183{
184	unsigned long pfn = table_end++;
185	void *adr;
186
187	if (after_bootmem) {
188		adr = (void *)get_zeroed_page(GFP_ATOMIC);
189		*phys = __pa(adr);
190		return adr;
191	}
192
193	if (pfn >= end_pfn)
194		panic("alloc_low_page: ran out of memory");
195
196	adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
197	memset(adr, 0, PAGE_SIZE);
198	*phys  = pfn * PAGE_SIZE;
199	return adr;
200}
201
202static __meminit void unmap_low_page(void *adr)
203{
204
205	if (after_bootmem)
206		return;
207
208	early_iounmap(adr, PAGE_SIZE);
209}
210
211/* Must run before zap_low_mappings */
212__meminit void *early_ioremap(unsigned long addr, unsigned long size)
213{
214	unsigned long vaddr;
215	pmd_t *pmd, *last_pmd;
216	int i, pmds;
217
218	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
219	vaddr = __START_KERNEL_map;
220	pmd = level2_kernel_pgt;
221	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
222	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
223		for (i = 0; i < pmds; i++) {
224			if (pmd_present(pmd[i]))
225				goto next;
226		}
227		vaddr += addr & ~PMD_MASK;
228		addr &= PMD_MASK;
229		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
230			set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
231		__flush_tlb_all();
232		return (void *)vaddr;
233	next:
234		;
235	}
236	printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
237	return NULL;
238}
239
240/* To avoid virtual aliases later */
241__meminit void early_iounmap(void *addr, unsigned long size)
242{
243	unsigned long vaddr;
244	pmd_t *pmd;
245	int i, pmds;
246
247	vaddr = (unsigned long)addr;
248	pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
249	pmd = level2_kernel_pgt + pmd_index(vaddr);
250	for (i = 0; i < pmds; i++)
251		pmd_clear(pmd + i);
252	__flush_tlb_all();
253}
254
255static void __meminit
256phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
257{
258	int i = pmd_index(address);
259
260	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
261		unsigned long entry;
262		pmd_t *pmd = pmd_page + pmd_index(address);
263
264		if (address >= end) {
265			if (!after_bootmem)
266				for (; i < PTRS_PER_PMD; i++, pmd++)
267					set_pmd(pmd, __pmd(0));
268			break;
269		}
270
271		if (pmd_val(*pmd))
272			continue;
273
274		entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
275		entry &= __supported_pte_mask;
276		set_pmd(pmd, __pmd(entry));
277	}
278}
279
280static void __meminit
281phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
282{
283	pmd_t *pmd = pmd_offset(pud,0);
284	spin_lock(&init_mm.page_table_lock);
285	phys_pmd_init(pmd, address, end);
286	spin_unlock(&init_mm.page_table_lock);
287	__flush_tlb_all();
288}
289
290static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
291{
292	int i = pud_index(addr);
293
294
295	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
296		unsigned long pmd_phys;
297		pud_t *pud = pud_page + pud_index(addr);
298		pmd_t *pmd;
299
300		if (addr >= end)
301			break;
302
303		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
304			set_pud(pud, __pud(0));
305			continue;
306		}
307
308		if (pud_val(*pud)) {
309			phys_pmd_update(pud, addr, end);
310			continue;
311		}
312
313		pmd = alloc_low_page(&pmd_phys);
314		spin_lock(&init_mm.page_table_lock);
315		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
316		phys_pmd_init(pmd, addr, end);
317		spin_unlock(&init_mm.page_table_lock);
318		unmap_low_page(pmd);
319	}
320	__flush_tlb_all();
321}
322
323static void __init find_early_table_space(unsigned long end)
324{
325	unsigned long puds, pmds, tables, start;
326
327	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
328	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
329	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
330		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
331
332 	/* RED-PEN putting page tables only on node 0 could
333 	   cause a hotspot and fill up ZONE_DMA. The page tables
334 	   need roughly 0.5KB per GB. */
335 	start = 0x8000;
336 	table_start = find_e820_area(start, end, tables);
337	if (table_start == -1UL)
338		panic("Cannot find space for the kernel page tables");
339
340	table_start >>= PAGE_SHIFT;
341	table_end = table_start;
342
343	early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
344		end, table_start << PAGE_SHIFT,
345		(table_start << PAGE_SHIFT) + tables);
346}
347
348/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
349   This runs before bootmem is initialized and gets pages directly from the
350   physical memory. To access them they are temporarily mapped. */
351void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
352{
353	unsigned long next;
354
355	Dprintk("init_memory_mapping\n");
356
357	/*
358	 * Find space for the kernel direct mapping tables.
359	 * Later we should allocate these tables in the local node of the memory
360	 * mapped.  Unfortunately this is done currently before the nodes are
361	 * discovered.
362	 */
363	if (!after_bootmem)
364		find_early_table_space(end);
365
366	start = (unsigned long)__va(start);
367	end = (unsigned long)__va(end);
368
369	for (; start < end; start = next) {
370		unsigned long pud_phys;
371		pgd_t *pgd = pgd_offset_k(start);
372		pud_t *pud;
373
374		if (after_bootmem)
375			pud = pud_offset(pgd, start & PGDIR_MASK);
376		else
377			pud = alloc_low_page(&pud_phys);
378
379		next = start + PGDIR_SIZE;
380		if (next > end)
381			next = end;
382		phys_pud_init(pud, __pa(start), __pa(next));
383		if (!after_bootmem)
384			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
385		unmap_low_page(pud);
386	}
387
388	if (!after_bootmem)
389		mmu_cr4_features = read_cr4();
390	__flush_tlb_all();
391
392	reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
393}
394
395#ifndef CONFIG_NUMA
396void __init paging_init(void)
397{
398	unsigned long max_zone_pfns[MAX_NR_ZONES];
399	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
400	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
401	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
402	max_zone_pfns[ZONE_NORMAL] = end_pfn;
403
404	memory_present(0, 0, end_pfn);
405	sparse_init();
406	free_area_init_nodes(max_zone_pfns);
407}
408#endif
409
410/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
411   from the CPU leading to inconsistent cache lines. address and size
412   must be aligned to 2MB boundaries.
413   Does nothing when the mapping doesn't exist. */
414void __init clear_kernel_mapping(unsigned long address, unsigned long size)
415{
416	unsigned long end = address + size;
417
418	BUG_ON(address & ~LARGE_PAGE_MASK);
419	BUG_ON(size & ~LARGE_PAGE_MASK);
420
421	for (; address < end; address += LARGE_PAGE_SIZE) {
422		pgd_t *pgd = pgd_offset_k(address);
423		pud_t *pud;
424		pmd_t *pmd;
425		if (pgd_none(*pgd))
426			continue;
427		pud = pud_offset(pgd, address);
428		if (pud_none(*pud))
429			continue;
430		pmd = pmd_offset(pud, address);
431		if (!pmd || pmd_none(*pmd))
432			continue;
433		if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
434			/* Could handle this, but it should not happen currently. */
435			printk(KERN_ERR
436	       "clear_kernel_mapping: mapping has been split. will leak memory\n");
437			pmd_ERROR(*pmd);
438		}
439		set_pmd(pmd, __pmd(0));
440	}
441	__flush_tlb_all();
442}
443
444/*
445 * Memory hotplug specific functions
446 */
447void online_page(struct page *page)
448{
449	ClearPageReserved(page);
450	init_page_count(page);
451	__free_page(page);
452	totalram_pages++;
453	num_physpages++;
454}
455
456#ifdef CONFIG_MEMORY_HOTPLUG
457/*
458 * Memory is added always to NORMAL zone. This means you will never get
459 * additional DMA/DMA32 memory.
460 */
461int arch_add_memory(int nid, u64 start, u64 size)
462{
463	struct pglist_data *pgdat = NODE_DATA(nid);
464	struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
465	unsigned long start_pfn = start >> PAGE_SHIFT;
466	unsigned long nr_pages = size >> PAGE_SHIFT;
467	int ret;
468
469	init_memory_mapping(start, (start + size -1));
470
471	ret = __add_pages(zone, start_pfn, nr_pages);
472	if (ret)
473		goto error;
474
475	return ret;
476error:
477	printk("%s: Problem encountered in __add_pages!\n", __func__);
478	return ret;
479}
480EXPORT_SYMBOL_GPL(arch_add_memory);
481
482#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
483int memory_add_physaddr_to_nid(u64 start)
484{
485	return 0;
486}
487EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
488#endif
489
490#endif /* CONFIG_MEMORY_HOTPLUG */
491
492static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
493			 kcore_vsyscall;
494
495void __init mem_init(void)
496{
497	long codesize, reservedpages, datasize, initsize;
498
499	pci_iommu_alloc();
500
501	/* clear_bss() already clear the empty_zero_page */
502
503	/* temporary debugging - double check it's true: */
504	{
505		int i;
506
507		for (i = 0; i < 1024; i++)
508			WARN_ON_ONCE(empty_zero_page[i]);
509	}
510
511	reservedpages = 0;
512
513	/* this will put all low memory onto the freelists */
514#ifdef CONFIG_NUMA
515	totalram_pages = numa_free_all_bootmem();
516#else
517	totalram_pages = free_all_bootmem();
518#endif
519	reservedpages = end_pfn - totalram_pages -
520					absent_pages_in_range(0, end_pfn);
521
522	after_bootmem = 1;
523
524	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
525	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
526	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
527
528	/* Register memory areas for /proc/kcore */
529	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
530	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
531		   VMALLOC_END-VMALLOC_START);
532	kclist_add(&kcore_kernel, &_stext, _end - _stext);
533	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
534	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
535				 VSYSCALL_END - VSYSCALL_START);
536
537	printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
538		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
539		end_pfn << (PAGE_SHIFT-10),
540		codesize >> 10,
541		reservedpages << (PAGE_SHIFT-10),
542		datasize >> 10,
543		initsize >> 10);
544}
545
546void free_init_pages(char *what, unsigned long begin, unsigned long end)
547{
548	unsigned long addr;
549
550	if (begin >= end)
551		return;
552
553	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
554	for (addr = begin; addr < end; addr += PAGE_SIZE) {
555		ClearPageReserved(virt_to_page(addr));
556		init_page_count(virt_to_page(addr));
557		memset((void *)(addr & ~(PAGE_SIZE-1)),
558			POISON_FREE_INITMEM, PAGE_SIZE);
559		free_page(addr);
560		totalram_pages++;
561	}
562#ifdef CONFIG_DEBUG_RODATA
563	/*
564	 * This will make the __init pages not present and
565	 * not executable, so that any attempt to use a
566	 * __init function from now on will fault immediately
567	 * rather than supriously later when memory gets reused.
568	 *
569	 * We only do this for DEBUG_RODATA to not break up the
570	 * 2Mb kernel mapping just for this debug feature.
571	 */
572	if (begin >= __START_KERNEL_map) {
573		set_memory_rw(begin, (end - begin)/PAGE_SIZE);
574		set_memory_np(begin, (end - begin)/PAGE_SIZE);
575		set_memory_nx(begin, (end - begin)/PAGE_SIZE);
576		rodata_test();
577	}
578#endif
579}
580
581void free_initmem(void)
582{
583	free_init_pages("unused kernel memory",
584			(unsigned long)(&__init_begin),
585			(unsigned long)(&__init_end));
586}
587
588#ifdef CONFIG_DEBUG_RODATA
589const int rodata_test_data = 0xC3;
590EXPORT_SYMBOL_GPL(rodata_test_data);
591
592void mark_rodata_ro(void)
593{
594	unsigned long start = (unsigned long)_stext, end;
595
596#ifdef CONFIG_HOTPLUG_CPU
597	/* It must still be possible to apply SMP alternatives. */
598	if (num_possible_cpus() > 1)
599		start = (unsigned long)_etext;
600#endif
601
602#ifdef CONFIG_KPROBES
603	start = (unsigned long)__start_rodata;
604#endif
605
606	end = (unsigned long)__end_rodata;
607	start = (start + PAGE_SIZE - 1) & PAGE_MASK;
608	end &= PAGE_MASK;
609	if (end <= start)
610		return;
611
612	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
613
614	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
615	       (end - start) >> 10);
616
617#ifdef CONFIG_CPA_DEBUG
618	printk("Testing CPA: undo %lx-%lx\n", start, end);
619	set_memory_rw(start, (end-start) >> PAGE_SHIFT);
620
621	printk("Testing CPA: again\n");
622	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
623#endif
624}
625#endif
626
627#ifdef CONFIG_BLK_DEV_INITRD
628void free_initrd_mem(unsigned long start, unsigned long end)
629{
630	free_init_pages("initrd memory", start, end);
631}
632#endif
633
634void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
635{
636#ifdef CONFIG_NUMA
637	int nid = phys_to_nid(phys);
638#endif
639	unsigned long pfn = phys >> PAGE_SHIFT;
640	if (pfn >= end_pfn) {
641		/* This can happen with kdump kernels when accessing firmware
642		   tables. */
643		if (pfn < end_pfn_map)
644			return;
645		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
646				phys, len);
647		return;
648	}
649
650	/* Should check here against the e820 map to avoid double free */
651#ifdef CONFIG_NUMA
652  	reserve_bootmem_node(NODE_DATA(nid), phys, len);
653#else
654	reserve_bootmem(phys, len);
655#endif
656	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
657		dma_reserve += len / PAGE_SIZE;
658		set_dma_reserve(dma_reserve);
659	}
660}
661
662int kern_addr_valid(unsigned long addr)
663{
664	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
665       pgd_t *pgd;
666       pud_t *pud;
667       pmd_t *pmd;
668       pte_t *pte;
669
670	if (above != 0 && above != -1UL)
671		return 0;
672
673	pgd = pgd_offset_k(addr);
674	if (pgd_none(*pgd))
675		return 0;
676
677	pud = pud_offset(pgd, addr);
678	if (pud_none(*pud))
679		return 0;
680
681	pmd = pmd_offset(pud, addr);
682	if (pmd_none(*pmd))
683		return 0;
684	if (pmd_large(*pmd))
685		return pfn_valid(pmd_pfn(*pmd));
686
687	pte = pte_offset_kernel(pmd, addr);
688	if (pte_none(*pte))
689		return 0;
690	return pfn_valid(pte_pfn(*pte));
691}
692
693/* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
694   covers the 64bit vsyscall page now. 32bit has a real VMA now and does
695   not need special handling anymore. */
696
697static struct vm_area_struct gate_vma = {
698	.vm_start = VSYSCALL_START,
699	.vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
700	.vm_page_prot = PAGE_READONLY_EXEC,
701	.vm_flags = VM_READ | VM_EXEC
702};
703
704struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
705{
706#ifdef CONFIG_IA32_EMULATION
707	if (test_tsk_thread_flag(tsk, TIF_IA32))
708		return NULL;
709#endif
710	return &gate_vma;
711}
712
713int in_gate_area(struct task_struct *task, unsigned long addr)
714{
715	struct vm_area_struct *vma = get_gate_vma(task);
716	if (!vma)
717		return 0;
718	return (addr >= vma->vm_start) && (addr < vma->vm_end);
719}
720
721/* Use this when you have no reliable task/vma, typically from interrupt
722 * context.  It is less reliable than using the task's vma and may give
723 * false positives.
724 */
725int in_gate_area_no_task(unsigned long addr)
726{
727	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
728}
729
730const char *arch_vma_name(struct vm_area_struct *vma)
731{
732	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
733		return "[vdso]";
734	if (vma == &gate_vma)
735		return "[vsyscall]";
736	return NULL;
737}
738
739#ifdef CONFIG_SPARSEMEM_VMEMMAP
740/*
741 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
742 */
743int __meminit vmemmap_populate(struct page *start_page,
744						unsigned long size, int node)
745{
746	unsigned long addr = (unsigned long)start_page;
747	unsigned long end = (unsigned long)(start_page + size);
748	unsigned long next;
749	pgd_t *pgd;
750	pud_t *pud;
751	pmd_t *pmd;
752
753	for (; addr < end; addr = next) {
754		next = pmd_addr_end(addr, end);
755
756		pgd = vmemmap_pgd_populate(addr, node);
757		if (!pgd)
758			return -ENOMEM;
759		pud = vmemmap_pud_populate(pgd, addr, node);
760		if (!pud)
761			return -ENOMEM;
762
763		pmd = pmd_offset(pud, addr);
764		if (pmd_none(*pmd)) {
765			pte_t entry;
766			void *p = vmemmap_alloc_block(PMD_SIZE, node);
767			if (!p)
768				return -ENOMEM;
769
770			entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
771			set_pmd(pmd, __pmd(pte_val(entry)));
772
773			printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
774				addr, addr + PMD_SIZE - 1, p, node);
775		} else
776			vmemmap_verify((pte_t *)pmd, node, addr, next);
777	}
778
779	return 0;
780}
781#endif
782