init_64.c revision f62d0f008e889915c93631c04d4c7d871f05bea7
1/* 2 * linux/arch/x86_64/mm/init.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de> 7 */ 8 9#include <linux/signal.h> 10#include <linux/sched.h> 11#include <linux/kernel.h> 12#include <linux/errno.h> 13#include <linux/string.h> 14#include <linux/types.h> 15#include <linux/ptrace.h> 16#include <linux/mman.h> 17#include <linux/mm.h> 18#include <linux/swap.h> 19#include <linux/smp.h> 20#include <linux/init.h> 21#include <linux/pagemap.h> 22#include <linux/bootmem.h> 23#include <linux/proc_fs.h> 24#include <linux/pci.h> 25#include <linux/pfn.h> 26#include <linux/poison.h> 27#include <linux/dma-mapping.h> 28#include <linux/module.h> 29#include <linux/memory_hotplug.h> 30#include <linux/nmi.h> 31 32#include <asm/processor.h> 33#include <asm/system.h> 34#include <asm/uaccess.h> 35#include <asm/pgtable.h> 36#include <asm/pgalloc.h> 37#include <asm/dma.h> 38#include <asm/fixmap.h> 39#include <asm/e820.h> 40#include <asm/apic.h> 41#include <asm/tlb.h> 42#include <asm/mmu_context.h> 43#include <asm/proto.h> 44#include <asm/smp.h> 45#include <asm/sections.h> 46#include <asm/kdebug.h> 47#include <asm/numa.h> 48 49#ifndef Dprintk 50#define Dprintk(x...) 51#endif 52 53const struct dma_mapping_ops* dma_ops; 54EXPORT_SYMBOL(dma_ops); 55 56static unsigned long dma_reserve __initdata; 57 58DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 59 60/* 61 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the 62 * physical space so we can cache the place of the first one and move 63 * around without checking the pgd every time. 64 */ 65 66void show_mem(void) 67{ 68 long i, total = 0, reserved = 0; 69 long shared = 0, cached = 0; 70 pg_data_t *pgdat; 71 struct page *page; 72 73 printk(KERN_INFO "Mem-info:\n"); 74 show_free_areas(); 75 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 76 77 for_each_online_pgdat(pgdat) { 78 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 79 /* this loop can take a while with 256 GB and 4k pages 80 so update the NMI watchdog */ 81 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { 82 touch_nmi_watchdog(); 83 } 84 if (!pfn_valid(pgdat->node_start_pfn + i)) 85 continue; 86 page = pfn_to_page(pgdat->node_start_pfn + i); 87 total++; 88 if (PageReserved(page)) 89 reserved++; 90 else if (PageSwapCache(page)) 91 cached++; 92 else if (page_count(page)) 93 shared += page_count(page) - 1; 94 } 95 } 96 printk(KERN_INFO "%lu pages of RAM\n", total); 97 printk(KERN_INFO "%lu reserved pages\n",reserved); 98 printk(KERN_INFO "%lu pages shared\n",shared); 99 printk(KERN_INFO "%lu pages swap cached\n",cached); 100} 101 102int after_bootmem; 103 104static __init void *spp_getpage(void) 105{ 106 void *ptr; 107 if (after_bootmem) 108 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 109 else 110 ptr = alloc_bootmem_pages(PAGE_SIZE); 111 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) 112 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":""); 113 114 Dprintk("spp_getpage %p\n", ptr); 115 return ptr; 116} 117 118static __init void set_pte_phys(unsigned long vaddr, 119 unsigned long phys, pgprot_t prot) 120{ 121 pgd_t *pgd; 122 pud_t *pud; 123 pmd_t *pmd; 124 pte_t *pte, new_pte; 125 126 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys); 127 128 pgd = pgd_offset_k(vaddr); 129 if (pgd_none(*pgd)) { 130 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n"); 131 return; 132 } 133 pud = pud_offset(pgd, vaddr); 134 if (pud_none(*pud)) { 135 pmd = (pmd_t *) spp_getpage(); 136 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); 137 if (pmd != pmd_offset(pud, 0)) { 138 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); 139 return; 140 } 141 } 142 pmd = pmd_offset(pud, vaddr); 143 if (pmd_none(*pmd)) { 144 pte = (pte_t *) spp_getpage(); 145 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); 146 if (pte != pte_offset_kernel(pmd, 0)) { 147 printk("PAGETABLE BUG #02!\n"); 148 return; 149 } 150 } 151 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); 152 153 pte = pte_offset_kernel(pmd, vaddr); 154 if (!pte_none(*pte) && 155 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) 156 pte_ERROR(*pte); 157 set_pte(pte, new_pte); 158 159 /* 160 * It's enough to flush this one mapping. 161 * (PGE mappings get flushed as well) 162 */ 163 __flush_tlb_one(vaddr); 164} 165 166/* NOTE: this is meant to be run only at boot */ 167void __init 168__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) 169{ 170 unsigned long address = __fix_to_virt(idx); 171 172 if (idx >= __end_of_fixed_addresses) { 173 printk("Invalid __set_fixmap\n"); 174 return; 175 } 176 set_pte_phys(address, phys, prot); 177} 178 179static unsigned long __initdata table_start; 180static unsigned long __meminitdata table_end; 181 182static __meminit void *alloc_low_page(unsigned long *phys) 183{ 184 unsigned long pfn = table_end++; 185 void *adr; 186 187 if (after_bootmem) { 188 adr = (void *)get_zeroed_page(GFP_ATOMIC); 189 *phys = __pa(adr); 190 return adr; 191 } 192 193 if (pfn >= end_pfn) 194 panic("alloc_low_page: ran out of memory"); 195 196 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); 197 memset(adr, 0, PAGE_SIZE); 198 *phys = pfn * PAGE_SIZE; 199 return adr; 200} 201 202static __meminit void unmap_low_page(void *adr) 203{ 204 205 if (after_bootmem) 206 return; 207 208 early_iounmap(adr, PAGE_SIZE); 209} 210 211/* Must run before zap_low_mappings */ 212__meminit void *early_ioremap(unsigned long addr, unsigned long size) 213{ 214 unsigned long vaddr; 215 pmd_t *pmd, *last_pmd; 216 int i, pmds; 217 218 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; 219 vaddr = __START_KERNEL_map; 220 pmd = level2_kernel_pgt; 221 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; 222 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { 223 for (i = 0; i < pmds; i++) { 224 if (pmd_present(pmd[i])) 225 goto next; 226 } 227 vaddr += addr & ~PMD_MASK; 228 addr &= PMD_MASK; 229 for (i = 0; i < pmds; i++, addr += PMD_SIZE) 230 set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); 231 __flush_tlb_all(); 232 return (void *)vaddr; 233 next: 234 ; 235 } 236 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); 237 return NULL; 238} 239 240/* To avoid virtual aliases later */ 241__meminit void early_iounmap(void *addr, unsigned long size) 242{ 243 unsigned long vaddr; 244 pmd_t *pmd; 245 int i, pmds; 246 247 vaddr = (unsigned long)addr; 248 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; 249 pmd = level2_kernel_pgt + pmd_index(vaddr); 250 for (i = 0; i < pmds; i++) 251 pmd_clear(pmd + i); 252 __flush_tlb_all(); 253} 254 255static void __meminit 256phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) 257{ 258 int i = pmd_index(address); 259 260 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { 261 unsigned long entry; 262 pmd_t *pmd = pmd_page + pmd_index(address); 263 264 if (address >= end) { 265 if (!after_bootmem) 266 for (; i < PTRS_PER_PMD; i++, pmd++) 267 set_pmd(pmd, __pmd(0)); 268 break; 269 } 270 271 if (pmd_val(*pmd)) 272 continue; 273 274 entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address; 275 entry &= __supported_pte_mask; 276 set_pmd(pmd, __pmd(entry)); 277 } 278} 279 280static void __meminit 281phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) 282{ 283 pmd_t *pmd = pmd_offset(pud,0); 284 spin_lock(&init_mm.page_table_lock); 285 phys_pmd_init(pmd, address, end); 286 spin_unlock(&init_mm.page_table_lock); 287 __flush_tlb_all(); 288} 289 290static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) 291{ 292 int i = pud_index(addr); 293 294 295 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { 296 unsigned long pmd_phys; 297 pud_t *pud = pud_page + pud_index(addr); 298 pmd_t *pmd; 299 300 if (addr >= end) 301 break; 302 303 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) { 304 set_pud(pud, __pud(0)); 305 continue; 306 } 307 308 if (pud_val(*pud)) { 309 phys_pmd_update(pud, addr, end); 310 continue; 311 } 312 313 pmd = alloc_low_page(&pmd_phys); 314 spin_lock(&init_mm.page_table_lock); 315 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 316 phys_pmd_init(pmd, addr, end); 317 spin_unlock(&init_mm.page_table_lock); 318 unmap_low_page(pmd); 319 } 320 __flush_tlb_all(); 321} 322 323static void __init find_early_table_space(unsigned long end) 324{ 325 unsigned long puds, pmds, tables, start; 326 327 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 328 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 329 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + 330 round_up(pmds * sizeof(pmd_t), PAGE_SIZE); 331 332 /* RED-PEN putting page tables only on node 0 could 333 cause a hotspot and fill up ZONE_DMA. The page tables 334 need roughly 0.5KB per GB. */ 335 start = 0x8000; 336 table_start = find_e820_area(start, end, tables); 337 if (table_start == -1UL) 338 panic("Cannot find space for the kernel page tables"); 339 340 table_start >>= PAGE_SHIFT; 341 table_end = table_start; 342 343 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", 344 end, table_start << PAGE_SHIFT, 345 (table_start << PAGE_SHIFT) + tables); 346} 347 348/* Setup the direct mapping of the physical memory at PAGE_OFFSET. 349 This runs before bootmem is initialized and gets pages directly from the 350 physical memory. To access them they are temporarily mapped. */ 351void __init_refok init_memory_mapping(unsigned long start, unsigned long end) 352{ 353 unsigned long next; 354 355 Dprintk("init_memory_mapping\n"); 356 357 /* 358 * Find space for the kernel direct mapping tables. 359 * Later we should allocate these tables in the local node of the memory 360 * mapped. Unfortunately this is done currently before the nodes are 361 * discovered. 362 */ 363 if (!after_bootmem) 364 find_early_table_space(end); 365 366 start = (unsigned long)__va(start); 367 end = (unsigned long)__va(end); 368 369 for (; start < end; start = next) { 370 unsigned long pud_phys; 371 pgd_t *pgd = pgd_offset_k(start); 372 pud_t *pud; 373 374 if (after_bootmem) 375 pud = pud_offset(pgd, start & PGDIR_MASK); 376 else 377 pud = alloc_low_page(&pud_phys); 378 379 next = start + PGDIR_SIZE; 380 if (next > end) 381 next = end; 382 phys_pud_init(pud, __pa(start), __pa(next)); 383 if (!after_bootmem) 384 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); 385 unmap_low_page(pud); 386 } 387 388 if (!after_bootmem) 389 mmu_cr4_features = read_cr4(); 390 __flush_tlb_all(); 391 392 reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); 393} 394 395#ifndef CONFIG_NUMA 396void __init paging_init(void) 397{ 398 unsigned long max_zone_pfns[MAX_NR_ZONES]; 399 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 400 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 401 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 402 max_zone_pfns[ZONE_NORMAL] = end_pfn; 403 404 memory_present(0, 0, end_pfn); 405 sparse_init(); 406 free_area_init_nodes(max_zone_pfns); 407} 408#endif 409 410/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches 411 from the CPU leading to inconsistent cache lines. address and size 412 must be aligned to 2MB boundaries. 413 Does nothing when the mapping doesn't exist. */ 414void __init clear_kernel_mapping(unsigned long address, unsigned long size) 415{ 416 unsigned long end = address + size; 417 418 BUG_ON(address & ~LARGE_PAGE_MASK); 419 BUG_ON(size & ~LARGE_PAGE_MASK); 420 421 for (; address < end; address += LARGE_PAGE_SIZE) { 422 pgd_t *pgd = pgd_offset_k(address); 423 pud_t *pud; 424 pmd_t *pmd; 425 if (pgd_none(*pgd)) 426 continue; 427 pud = pud_offset(pgd, address); 428 if (pud_none(*pud)) 429 continue; 430 pmd = pmd_offset(pud, address); 431 if (!pmd || pmd_none(*pmd)) 432 continue; 433 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 434 /* Could handle this, but it should not happen currently. */ 435 printk(KERN_ERR 436 "clear_kernel_mapping: mapping has been split. will leak memory\n"); 437 pmd_ERROR(*pmd); 438 } 439 set_pmd(pmd, __pmd(0)); 440 } 441 __flush_tlb_all(); 442} 443 444/* 445 * Memory hotplug specific functions 446 */ 447void online_page(struct page *page) 448{ 449 ClearPageReserved(page); 450 init_page_count(page); 451 __free_page(page); 452 totalram_pages++; 453 num_physpages++; 454} 455 456#ifdef CONFIG_MEMORY_HOTPLUG 457/* 458 * Memory is added always to NORMAL zone. This means you will never get 459 * additional DMA/DMA32 memory. 460 */ 461int arch_add_memory(int nid, u64 start, u64 size) 462{ 463 struct pglist_data *pgdat = NODE_DATA(nid); 464 struct zone *zone = pgdat->node_zones + ZONE_NORMAL; 465 unsigned long start_pfn = start >> PAGE_SHIFT; 466 unsigned long nr_pages = size >> PAGE_SHIFT; 467 int ret; 468 469 init_memory_mapping(start, (start + size -1)); 470 471 ret = __add_pages(zone, start_pfn, nr_pages); 472 if (ret) 473 goto error; 474 475 return ret; 476error: 477 printk("%s: Problem encountered in __add_pages!\n", __func__); 478 return ret; 479} 480EXPORT_SYMBOL_GPL(arch_add_memory); 481 482#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) 483int memory_add_physaddr_to_nid(u64 start) 484{ 485 return 0; 486} 487EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); 488#endif 489 490#endif /* CONFIG_MEMORY_HOTPLUG */ 491 492static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, 493 kcore_vsyscall; 494 495void __init mem_init(void) 496{ 497 long codesize, reservedpages, datasize, initsize; 498 499 pci_iommu_alloc(); 500 501 /* clear_bss() already clear the empty_zero_page */ 502 503 /* temporary debugging - double check it's true: */ 504 { 505 int i; 506 507 for (i = 0; i < 1024; i++) 508 WARN_ON_ONCE(empty_zero_page[i]); 509 } 510 511 reservedpages = 0; 512 513 /* this will put all low memory onto the freelists */ 514#ifdef CONFIG_NUMA 515 totalram_pages = numa_free_all_bootmem(); 516#else 517 totalram_pages = free_all_bootmem(); 518#endif 519 reservedpages = end_pfn - totalram_pages - 520 absent_pages_in_range(0, end_pfn); 521 522 after_bootmem = 1; 523 524 codesize = (unsigned long) &_etext - (unsigned long) &_text; 525 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 526 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 527 528 /* Register memory areas for /proc/kcore */ 529 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 530 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 531 VMALLOC_END-VMALLOC_START); 532 kclist_add(&kcore_kernel, &_stext, _end - _stext); 533 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN); 534 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 535 VSYSCALL_END - VSYSCALL_START); 536 537 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", 538 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 539 end_pfn << (PAGE_SHIFT-10), 540 codesize >> 10, 541 reservedpages << (PAGE_SHIFT-10), 542 datasize >> 10, 543 initsize >> 10); 544} 545 546void free_init_pages(char *what, unsigned long begin, unsigned long end) 547{ 548 unsigned long addr; 549 550 if (begin >= end) 551 return; 552 553 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 554 for (addr = begin; addr < end; addr += PAGE_SIZE) { 555 ClearPageReserved(virt_to_page(addr)); 556 init_page_count(virt_to_page(addr)); 557 memset((void *)(addr & ~(PAGE_SIZE-1)), 558 POISON_FREE_INITMEM, PAGE_SIZE); 559 free_page(addr); 560 totalram_pages++; 561 } 562#ifdef CONFIG_DEBUG_RODATA 563 /* 564 * This will make the __init pages not present and 565 * not executable, so that any attempt to use a 566 * __init function from now on will fault immediately 567 * rather than supriously later when memory gets reused. 568 * 569 * We only do this for DEBUG_RODATA to not break up the 570 * 2Mb kernel mapping just for this debug feature. 571 */ 572 if (begin >= __START_KERNEL_map) { 573 set_memory_np(begin, (end - begin)/PAGE_SIZE); 574 set_memory_nx(begin, (end - begin)/PAGE_SIZE); 575 } 576#endif 577} 578 579void free_initmem(void) 580{ 581 free_init_pages("unused kernel memory", 582 (unsigned long)(&__init_begin), 583 (unsigned long)(&__init_end)); 584} 585 586#ifdef CONFIG_DEBUG_RODATA 587 588void mark_rodata_ro(void) 589{ 590 unsigned long start = (unsigned long)_stext, end; 591 592#ifdef CONFIG_HOTPLUG_CPU 593 /* It must still be possible to apply SMP alternatives. */ 594 if (num_possible_cpus() > 1) 595 start = (unsigned long)_etext; 596#endif 597 598#ifdef CONFIG_KPROBES 599 start = (unsigned long)__start_rodata; 600#endif 601 602 end = (unsigned long)__end_rodata; 603 start = (start + PAGE_SIZE - 1) & PAGE_MASK; 604 end &= PAGE_MASK; 605 if (end <= start) 606 return; 607 608 set_memory_ro(start, (end - start) >> PAGE_SHIFT); 609 610 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 611 (end - start) >> 10); 612 613 /* 614 * set_memory_*() requires a global_flush_tlb() call after it. 615 * We do this after the printk so that if something went wrong in the 616 * change, the printk gets out at least to give a better debug hint 617 * of who is the culprit. 618 */ 619 global_flush_tlb(); 620 621#ifdef CONFIG_CPA_DEBUG 622 printk("Testing CPA: undo %lx-%lx\n", start, end); 623 set_memory_rw(start, (end-start) >> PAGE_SHIFT); 624 global_flush_tlb(); 625 626 printk("Testing CPA: again\n"); 627 set_memory_ro(start, (end-start) >> PAGE_SHIFT); 628 global_flush_tlb(); 629#endif 630} 631#endif 632 633#ifdef CONFIG_BLK_DEV_INITRD 634void free_initrd_mem(unsigned long start, unsigned long end) 635{ 636 free_init_pages("initrd memory", start, end); 637} 638#endif 639 640void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 641{ 642#ifdef CONFIG_NUMA 643 int nid = phys_to_nid(phys); 644#endif 645 unsigned long pfn = phys >> PAGE_SHIFT; 646 if (pfn >= end_pfn) { 647 /* This can happen with kdump kernels when accessing firmware 648 tables. */ 649 if (pfn < end_pfn_map) 650 return; 651 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", 652 phys, len); 653 return; 654 } 655 656 /* Should check here against the e820 map to avoid double free */ 657#ifdef CONFIG_NUMA 658 reserve_bootmem_node(NODE_DATA(nid), phys, len); 659#else 660 reserve_bootmem(phys, len); 661#endif 662 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { 663 dma_reserve += len / PAGE_SIZE; 664 set_dma_reserve(dma_reserve); 665 } 666} 667 668int kern_addr_valid(unsigned long addr) 669{ 670 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; 671 pgd_t *pgd; 672 pud_t *pud; 673 pmd_t *pmd; 674 pte_t *pte; 675 676 if (above != 0 && above != -1UL) 677 return 0; 678 679 pgd = pgd_offset_k(addr); 680 if (pgd_none(*pgd)) 681 return 0; 682 683 pud = pud_offset(pgd, addr); 684 if (pud_none(*pud)) 685 return 0; 686 687 pmd = pmd_offset(pud, addr); 688 if (pmd_none(*pmd)) 689 return 0; 690 if (pmd_large(*pmd)) 691 return pfn_valid(pmd_pfn(*pmd)); 692 693 pte = pte_offset_kernel(pmd, addr); 694 if (pte_none(*pte)) 695 return 0; 696 return pfn_valid(pte_pfn(*pte)); 697} 698 699/* A pseudo VMA to allow ptrace access for the vsyscall page. This only 700 covers the 64bit vsyscall page now. 32bit has a real VMA now and does 701 not need special handling anymore. */ 702 703static struct vm_area_struct gate_vma = { 704 .vm_start = VSYSCALL_START, 705 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), 706 .vm_page_prot = PAGE_READONLY_EXEC, 707 .vm_flags = VM_READ | VM_EXEC 708}; 709 710struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 711{ 712#ifdef CONFIG_IA32_EMULATION 713 if (test_tsk_thread_flag(tsk, TIF_IA32)) 714 return NULL; 715#endif 716 return &gate_vma; 717} 718 719int in_gate_area(struct task_struct *task, unsigned long addr) 720{ 721 struct vm_area_struct *vma = get_gate_vma(task); 722 if (!vma) 723 return 0; 724 return (addr >= vma->vm_start) && (addr < vma->vm_end); 725} 726 727/* Use this when you have no reliable task/vma, typically from interrupt 728 * context. It is less reliable than using the task's vma and may give 729 * false positives. 730 */ 731int in_gate_area_no_task(unsigned long addr) 732{ 733 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); 734} 735 736const char *arch_vma_name(struct vm_area_struct *vma) 737{ 738 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) 739 return "[vdso]"; 740 if (vma == &gate_vma) 741 return "[vsyscall]"; 742 return NULL; 743} 744 745#ifdef CONFIG_SPARSEMEM_VMEMMAP 746/* 747 * Initialise the sparsemem vmemmap using huge-pages at the PMD level. 748 */ 749int __meminit vmemmap_populate(struct page *start_page, 750 unsigned long size, int node) 751{ 752 unsigned long addr = (unsigned long)start_page; 753 unsigned long end = (unsigned long)(start_page + size); 754 unsigned long next; 755 pgd_t *pgd; 756 pud_t *pud; 757 pmd_t *pmd; 758 759 for (; addr < end; addr = next) { 760 next = pmd_addr_end(addr, end); 761 762 pgd = vmemmap_pgd_populate(addr, node); 763 if (!pgd) 764 return -ENOMEM; 765 pud = vmemmap_pud_populate(pgd, addr, node); 766 if (!pud) 767 return -ENOMEM; 768 769 pmd = pmd_offset(pud, addr); 770 if (pmd_none(*pmd)) { 771 pte_t entry; 772 void *p = vmemmap_alloc_block(PMD_SIZE, node); 773 if (!p) 774 return -ENOMEM; 775 776 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE); 777 set_pmd(pmd, __pmd(pte_val(entry))); 778 779 printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", 780 addr, addr + PMD_SIZE - 1, p, node); 781 } else 782 vmemmap_verify((pte_t *)pmd, node, addr, next); 783 } 784 785 return 0; 786} 787#endif 788