bootmem.c revision 4cc278b721d5bf3569dfc5f1100253042e097bc3
1/* 2 * bootmem - A boot-time physical memory allocator and configurator 3 * 4 * Copyright (C) 1999 Ingo Molnar 5 * 1999 Kanoj Sarcar, SGI 6 * 2008 Johannes Weiner 7 * 8 * Access to this subsystem has to be serialized externally (which is true 9 * for the boot process anyway). 10 */ 11#include <linux/init.h> 12#include <linux/pfn.h> 13#include <linux/bootmem.h> 14#include <linux/module.h> 15 16#include <asm/bug.h> 17#include <asm/io.h> 18#include <asm/processor.h> 19 20#include "internal.h" 21 22unsigned long max_low_pfn; 23unsigned long min_low_pfn; 24unsigned long max_pfn; 25 26#ifdef CONFIG_CRASH_DUMP 27/* 28 * If we have booted due to a crash, max_pfn will be a very low value. We need 29 * to know the amount of memory that the previous kernel used. 30 */ 31unsigned long saved_max_pfn; 32#endif 33 34bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; 35 36static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); 37 38static int bootmem_debug; 39 40static int __init bootmem_debug_setup(char *buf) 41{ 42 bootmem_debug = 1; 43 return 0; 44} 45early_param("bootmem_debug", bootmem_debug_setup); 46 47#define bdebug(fmt, args...) ({ \ 48 if (unlikely(bootmem_debug)) \ 49 printk(KERN_INFO \ 50 "bootmem::%s " fmt, \ 51 __FUNCTION__, ## args); \ 52}) 53 54static unsigned long __init bootmap_bytes(unsigned long pages) 55{ 56 unsigned long bytes = (pages + 7) / 8; 57 58 return ALIGN(bytes, sizeof(long)); 59} 60 61/** 62 * bootmem_bootmap_pages - calculate bitmap size in pages 63 * @pages: number of pages the bitmap has to represent 64 */ 65unsigned long __init bootmem_bootmap_pages(unsigned long pages) 66{ 67 unsigned long bytes = bootmap_bytes(pages); 68 69 return PAGE_ALIGN(bytes) >> PAGE_SHIFT; 70} 71 72/* 73 * link bdata in order 74 */ 75static void __init link_bootmem(bootmem_data_t *bdata) 76{ 77 struct list_head *iter; 78 79 list_for_each(iter, &bdata_list) { 80 bootmem_data_t *ent; 81 82 ent = list_entry(iter, bootmem_data_t, list); 83 if (bdata->node_boot_start < ent->node_boot_start) 84 break; 85 } 86 list_add_tail(&bdata->list, iter); 87} 88 89/* 90 * Called once to set up the allocator itself. 91 */ 92static unsigned long __init init_bootmem_core(bootmem_data_t *bdata, 93 unsigned long mapstart, unsigned long start, unsigned long end) 94{ 95 unsigned long mapsize; 96 97 mminit_validate_memmodel_limits(&start, &end); 98 bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); 99 bdata->node_boot_start = PFN_PHYS(start); 100 bdata->node_low_pfn = end; 101 link_bootmem(bdata); 102 103 /* 104 * Initially all pages are reserved - setup_arch() has to 105 * register free RAM areas explicitly. 106 */ 107 mapsize = bootmap_bytes(end - start); 108 memset(bdata->node_bootmem_map, 0xff, mapsize); 109 110 bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n", 111 bdata - bootmem_node_data, start, mapstart, end, mapsize); 112 113 return mapsize; 114} 115 116/** 117 * init_bootmem_node - register a node as boot memory 118 * @pgdat: node to register 119 * @freepfn: pfn where the bitmap for this node is to be placed 120 * @startpfn: first pfn on the node 121 * @endpfn: first pfn after the node 122 * 123 * Returns the number of bytes needed to hold the bitmap for this node. 124 */ 125unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, 126 unsigned long startpfn, unsigned long endpfn) 127{ 128 return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn); 129} 130 131/** 132 * init_bootmem - register boot memory 133 * @start: pfn where the bitmap is to be placed 134 * @pages: number of available physical pages 135 * 136 * Returns the number of bytes needed to hold the bitmap. 137 */ 138unsigned long __init init_bootmem(unsigned long start, unsigned long pages) 139{ 140 max_low_pfn = pages; 141 min_low_pfn = start; 142 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); 143} 144 145static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) 146{ 147 int aligned; 148 struct page *page; 149 unsigned long start, end, pages, count = 0; 150 151 if (!bdata->node_bootmem_map) 152 return 0; 153 154 start = PFN_DOWN(bdata->node_boot_start); 155 end = bdata->node_low_pfn; 156 157 /* 158 * If the start is aligned to the machines wordsize, we might 159 * be able to free pages in bulks of that order. 160 */ 161 aligned = !(start & (BITS_PER_LONG - 1)); 162 163 bdebug("nid=%td start=%lx end=%lx aligned=%d\n", 164 bdata - bootmem_node_data, start, end, aligned); 165 166 while (start < end) { 167 unsigned long *map, idx, vec; 168 169 map = bdata->node_bootmem_map; 170 idx = start - PFN_DOWN(bdata->node_boot_start); 171 vec = ~map[idx / BITS_PER_LONG]; 172 173 if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) { 174 int order = ilog2(BITS_PER_LONG); 175 176 __free_pages_bootmem(pfn_to_page(start), order); 177 count += BITS_PER_LONG; 178 } else { 179 unsigned long off = 0; 180 181 while (vec && off < BITS_PER_LONG) { 182 if (vec & 1) { 183 page = pfn_to_page(start + off); 184 __free_pages_bootmem(page, 0); 185 count++; 186 } 187 vec >>= 1; 188 off++; 189 } 190 } 191 start += BITS_PER_LONG; 192 } 193 194 page = virt_to_page(bdata->node_bootmem_map); 195 pages = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); 196 pages = bootmem_bootmap_pages(pages); 197 count += pages; 198 while (pages--) 199 __free_pages_bootmem(page++, 0); 200 201 bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); 202 203 return count; 204} 205 206/** 207 * free_all_bootmem_node - release a node's free pages to the buddy allocator 208 * @pgdat: node to be released 209 * 210 * Returns the number of pages actually released. 211 */ 212unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) 213{ 214 register_page_bootmem_info_node(pgdat); 215 return free_all_bootmem_core(pgdat->bdata); 216} 217 218/** 219 * free_all_bootmem - release free pages to the buddy allocator 220 * 221 * Returns the number of pages actually released. 222 */ 223unsigned long __init free_all_bootmem(void) 224{ 225 return free_all_bootmem_core(NODE_DATA(0)->bdata); 226} 227 228static void __init __free(bootmem_data_t *bdata, 229 unsigned long sidx, unsigned long eidx) 230{ 231 unsigned long idx; 232 233 bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, 234 sidx + PFN_DOWN(bdata->node_boot_start), 235 eidx + PFN_DOWN(bdata->node_boot_start)); 236 237 if (bdata->hint_idx > sidx) 238 bdata->hint_idx = sidx; 239 240 for (idx = sidx; idx < eidx; idx++) 241 if (!test_and_clear_bit(idx, bdata->node_bootmem_map)) 242 BUG(); 243} 244 245static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx, 246 unsigned long eidx, int flags) 247{ 248 unsigned long idx; 249 int exclusive = flags & BOOTMEM_EXCLUSIVE; 250 251 bdebug("nid=%td start=%lx end=%lx flags=%x\n", 252 bdata - bootmem_node_data, 253 sidx + PFN_DOWN(bdata->node_boot_start), 254 eidx + PFN_DOWN(bdata->node_boot_start), 255 flags); 256 257 for (idx = sidx; idx < eidx; idx++) 258 if (test_and_set_bit(idx, bdata->node_bootmem_map)) { 259 if (exclusive) { 260 __free(bdata, sidx, idx); 261 return -EBUSY; 262 } 263 bdebug("silent double reserve of PFN %lx\n", 264 idx + PFN_DOWN(bdata->node_boot_start)); 265 } 266 return 0; 267} 268 269static int __init mark_bootmem_node(bootmem_data_t *bdata, 270 unsigned long start, unsigned long end, 271 int reserve, int flags) 272{ 273 unsigned long sidx, eidx; 274 275 bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n", 276 bdata - bootmem_node_data, start, end, reserve, flags); 277 278 BUG_ON(start < PFN_DOWN(bdata->node_boot_start)); 279 BUG_ON(end > bdata->node_low_pfn); 280 281 sidx = start - PFN_DOWN(bdata->node_boot_start); 282 eidx = end - PFN_DOWN(bdata->node_boot_start); 283 284 if (reserve) 285 return __reserve(bdata, sidx, eidx, flags); 286 else 287 __free(bdata, sidx, eidx); 288 return 0; 289} 290 291static int __init mark_bootmem(unsigned long start, unsigned long end, 292 int reserve, int flags) 293{ 294 unsigned long pos; 295 bootmem_data_t *bdata; 296 297 pos = start; 298 list_for_each_entry(bdata, &bdata_list, list) { 299 int err; 300 unsigned long max; 301 302 if (pos < PFN_DOWN(bdata->node_boot_start)) { 303 BUG_ON(pos != start); 304 continue; 305 } 306 307 max = min(bdata->node_low_pfn, end); 308 309 err = mark_bootmem_node(bdata, pos, max, reserve, flags); 310 if (reserve && err) { 311 mark_bootmem(start, pos, 0, 0); 312 return err; 313 } 314 315 if (max == end) 316 return 0; 317 pos = bdata->node_low_pfn; 318 } 319 BUG(); 320} 321 322/** 323 * free_bootmem_node - mark a page range as usable 324 * @pgdat: node the range resides on 325 * @physaddr: starting address of the range 326 * @size: size of the range in bytes 327 * 328 * Partial pages will be considered reserved and left as they are. 329 * 330 * The range must reside completely on the specified node. 331 */ 332void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 333 unsigned long size) 334{ 335 unsigned long start, end; 336 337 start = PFN_UP(physaddr); 338 end = PFN_DOWN(physaddr + size); 339 340 mark_bootmem_node(pgdat->bdata, start, end, 0, 0); 341} 342 343/** 344 * free_bootmem - mark a page range as usable 345 * @addr: starting address of the range 346 * @size: size of the range in bytes 347 * 348 * Partial pages will be considered reserved and left as they are. 349 * 350 * The range must be contiguous but may span node boundaries. 351 */ 352void __init free_bootmem(unsigned long addr, unsigned long size) 353{ 354 unsigned long start, end; 355 356 start = PFN_UP(addr); 357 end = PFN_DOWN(addr + size); 358 359 mark_bootmem(start, end, 0, 0); 360} 361 362/** 363 * reserve_bootmem_node - mark a page range as reserved 364 * @pgdat: node the range resides on 365 * @physaddr: starting address of the range 366 * @size: size of the range in bytes 367 * @flags: reservation flags (see linux/bootmem.h) 368 * 369 * Partial pages will be reserved. 370 * 371 * The range must reside completely on the specified node. 372 */ 373int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 374 unsigned long size, int flags) 375{ 376 unsigned long start, end; 377 378 start = PFN_DOWN(physaddr); 379 end = PFN_UP(physaddr + size); 380 381 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); 382} 383 384#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE 385/** 386 * reserve_bootmem - mark a page range as usable 387 * @addr: starting address of the range 388 * @size: size of the range in bytes 389 * @flags: reservation flags (see linux/bootmem.h) 390 * 391 * Partial pages will be reserved. 392 * 393 * The range must be contiguous but may span node boundaries. 394 */ 395int __init reserve_bootmem(unsigned long addr, unsigned long size, 396 int flags) 397{ 398 unsigned long start, end; 399 400 start = PFN_DOWN(addr); 401 end = PFN_UP(addr + size); 402 403 return mark_bootmem(start, end, 1, flags); 404} 405#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 406 407static void * __init alloc_bootmem_core(struct bootmem_data *bdata, 408 unsigned long size, unsigned long align, 409 unsigned long goal, unsigned long limit) 410{ 411 unsigned long fallback = 0; 412 unsigned long min, max, start, sidx, midx, step; 413 414 BUG_ON(!size); 415 BUG_ON(align & (align - 1)); 416 BUG_ON(limit && goal + size > limit); 417 418 if (!bdata->node_bootmem_map) 419 return NULL; 420 421 bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n", 422 bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, 423 align, goal, limit); 424 425 min = PFN_DOWN(bdata->node_boot_start); 426 max = bdata->node_low_pfn; 427 428 goal >>= PAGE_SHIFT; 429 limit >>= PAGE_SHIFT; 430 431 if (limit && max > limit) 432 max = limit; 433 if (max <= min) 434 return NULL; 435 436 step = max(align >> PAGE_SHIFT, 1UL); 437 438 if (goal && min < goal && goal < max) 439 start = ALIGN(goal, step); 440 else 441 start = ALIGN(min, step); 442 443 sidx = start - PFN_DOWN(bdata->node_boot_start); 444 midx = max - PFN_DOWN(bdata->node_boot_start); 445 446 if (bdata->hint_idx > sidx) { 447 /* 448 * Handle the valid case of sidx being zero and still 449 * catch the fallback below. 450 */ 451 fallback = sidx + 1; 452 sidx = ALIGN(bdata->hint_idx, step); 453 } 454 455 while (1) { 456 int merge; 457 void *region; 458 unsigned long eidx, i, start_off, end_off; 459find_block: 460 sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); 461 sidx = ALIGN(sidx, step); 462 eidx = sidx + PFN_UP(size); 463 464 if (sidx >= midx || eidx > midx) 465 break; 466 467 for (i = sidx; i < eidx; i++) 468 if (test_bit(i, bdata->node_bootmem_map)) { 469 sidx = ALIGN(i, step); 470 if (sidx == i) 471 sidx += step; 472 goto find_block; 473 } 474 475 if (bdata->last_end_off && 476 PFN_DOWN(bdata->last_end_off) + 1 == sidx) 477 start_off = ALIGN(bdata->last_end_off, align); 478 else 479 start_off = PFN_PHYS(sidx); 480 481 merge = PFN_DOWN(start_off) < sidx; 482 end_off = start_off + size; 483 484 bdata->last_end_off = end_off; 485 bdata->hint_idx = PFN_UP(end_off); 486 487 /* 488 * Reserve the area now: 489 */ 490 if (__reserve(bdata, PFN_DOWN(start_off) + merge, 491 PFN_UP(end_off), BOOTMEM_EXCLUSIVE)) 492 BUG(); 493 494 region = phys_to_virt(bdata->node_boot_start + start_off); 495 memset(region, 0, size); 496 return region; 497 } 498 499 if (fallback) { 500 sidx = ALIGN(fallback - 1, step); 501 fallback = 0; 502 goto find_block; 503 } 504 505 return NULL; 506} 507 508static void * __init ___alloc_bootmem_nopanic(unsigned long size, 509 unsigned long align, 510 unsigned long goal, 511 unsigned long limit) 512{ 513 bootmem_data_t *bdata; 514 515restart: 516 list_for_each_entry(bdata, &bdata_list, list) { 517 void *region; 518 519 if (goal && bdata->node_low_pfn <= PFN_DOWN(goal)) 520 continue; 521 if (limit && bdata->node_boot_start >= limit) 522 break; 523 524 region = alloc_bootmem_core(bdata, size, align, goal, limit); 525 if (region) 526 return region; 527 } 528 529 if (goal) { 530 goal = 0; 531 goto restart; 532 } 533 534 return NULL; 535} 536 537/** 538 * __alloc_bootmem_nopanic - allocate boot memory without panicking 539 * @size: size of the request in bytes 540 * @align: alignment of the region 541 * @goal: preferred starting address of the region 542 * 543 * The goal is dropped if it can not be satisfied and the allocation will 544 * fall back to memory below @goal. 545 * 546 * Allocation may happen on any node in the system. 547 * 548 * Returns NULL on failure. 549 */ 550void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, 551 unsigned long goal) 552{ 553 return ___alloc_bootmem_nopanic(size, align, goal, 0); 554} 555 556static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, 557 unsigned long goal, unsigned long limit) 558{ 559 void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit); 560 561 if (mem) 562 return mem; 563 /* 564 * Whoops, we cannot satisfy the allocation request. 565 */ 566 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); 567 panic("Out of memory"); 568 return NULL; 569} 570 571/** 572 * __alloc_bootmem - allocate boot memory 573 * @size: size of the request in bytes 574 * @align: alignment of the region 575 * @goal: preferred starting address of the region 576 * 577 * The goal is dropped if it can not be satisfied and the allocation will 578 * fall back to memory below @goal. 579 * 580 * Allocation may happen on any node in the system. 581 * 582 * The function panics if the request can not be satisfied. 583 */ 584void * __init __alloc_bootmem(unsigned long size, unsigned long align, 585 unsigned long goal) 586{ 587 return ___alloc_bootmem(size, align, goal, 0); 588} 589 590static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, 591 unsigned long size, unsigned long align, 592 unsigned long goal, unsigned long limit) 593{ 594 void *ptr; 595 596 ptr = alloc_bootmem_core(bdata, size, align, goal, limit); 597 if (ptr) 598 return ptr; 599 600 return ___alloc_bootmem(size, align, goal, limit); 601} 602 603/** 604 * __alloc_bootmem_node - allocate boot memory from a specific node 605 * @pgdat: node to allocate from 606 * @size: size of the request in bytes 607 * @align: alignment of the region 608 * @goal: preferred starting address of the region 609 * 610 * The goal is dropped if it can not be satisfied and the allocation will 611 * fall back to memory below @goal. 612 * 613 * Allocation may fall back to any node in the system if the specified node 614 * can not hold the requested memory. 615 * 616 * The function panics if the request can not be satisfied. 617 */ 618void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, 619 unsigned long align, unsigned long goal) 620{ 621 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); 622} 623 624#ifdef CONFIG_SPARSEMEM 625/** 626 * alloc_bootmem_section - allocate boot memory from a specific section 627 * @size: size of the request in bytes 628 * @section_nr: sparse map section to allocate from 629 * 630 * Return NULL on failure. 631 */ 632void * __init alloc_bootmem_section(unsigned long size, 633 unsigned long section_nr) 634{ 635 void *ptr; 636 unsigned long limit, goal, start_nr, end_nr, pfn; 637 struct pglist_data *pgdat; 638 639 pfn = section_nr_to_pfn(section_nr); 640 goal = PFN_PHYS(pfn); 641 limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1; 642 pgdat = NODE_DATA(early_pfn_to_nid(pfn)); 643 ptr = alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal, 644 limit); 645 646 if (!ptr) 647 return NULL; 648 649 start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr))); 650 end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size)); 651 if (start_nr != section_nr || end_nr != section_nr) { 652 printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n", 653 section_nr); 654 free_bootmem_node(pgdat, __pa(ptr), size); 655 ptr = NULL; 656 } 657 658 return ptr; 659} 660#endif 661 662void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, 663 unsigned long align, unsigned long goal) 664{ 665 void *ptr; 666 667 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); 668 if (ptr) 669 return ptr; 670 671 return __alloc_bootmem_nopanic(size, align, goal); 672} 673 674#ifndef ARCH_LOW_ADDRESS_LIMIT 675#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL 676#endif 677 678/** 679 * __alloc_bootmem_low - allocate low boot memory 680 * @size: size of the request in bytes 681 * @align: alignment of the region 682 * @goal: preferred starting address of the region 683 * 684 * The goal is dropped if it can not be satisfied and the allocation will 685 * fall back to memory below @goal. 686 * 687 * Allocation may happen on any node in the system. 688 * 689 * The function panics if the request can not be satisfied. 690 */ 691void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, 692 unsigned long goal) 693{ 694 return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); 695} 696 697/** 698 * __alloc_bootmem_low_node - allocate low boot memory from a specific node 699 * @pgdat: node to allocate from 700 * @size: size of the request in bytes 701 * @align: alignment of the region 702 * @goal: preferred starting address of the region 703 * 704 * The goal is dropped if it can not be satisfied and the allocation will 705 * fall back to memory below @goal. 706 * 707 * Allocation may fall back to any node in the system if the specified node 708 * can not hold the requested memory. 709 * 710 * The function panics if the request can not be satisfied. 711 */ 712void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, 713 unsigned long align, unsigned long goal) 714{ 715 return ___alloc_bootmem_node(pgdat->bdata, size, align, 716 goal, ARCH_LOW_ADDRESS_LIMIT); 717} 718