e820.c revision 41c094fd3ca54f1a71233049cf136ff94c91f4ae
1/* 2 * Handle the memory map. 3 * The functions here do the job until bootmem takes over. 4 * 5 * Getting sanitize_e820_map() in sync with i386 version by applying change: 6 * - Provisions for empty E820 memory regions (reported by certain BIOSes). 7 * Alex Achenbach <xela@slit.de>, December 2002. 8 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 9 * 10 */ 11#include <linux/kernel.h> 12#include <linux/types.h> 13#include <linux/init.h> 14#include <linux/bootmem.h> 15#include <linux/ioport.h> 16#include <linux/string.h> 17#include <linux/kexec.h> 18#include <linux/module.h> 19#include <linux/mm.h> 20#include <linux/pfn.h> 21#include <linux/suspend.h> 22 23#include <asm/pgtable.h> 24#include <asm/page.h> 25#include <asm/e820.h> 26#include <asm/proto.h> 27#include <asm/setup.h> 28#include <asm/trampoline.h> 29 30struct e820map e820; 31 32/* For PCI or other memory-mapped resources */ 33unsigned long pci_mem_start = 0xaeedbabe; 34#ifdef CONFIG_PCI 35EXPORT_SYMBOL(pci_mem_start); 36#endif 37 38/* 39 * This function checks if any part of the range <start,end> is mapped 40 * with type. 41 */ 42int 43e820_any_mapped(u64 start, u64 end, unsigned type) 44{ 45 int i; 46 47 for (i = 0; i < e820.nr_map; i++) { 48 struct e820entry *ei = &e820.map[i]; 49 50 if (type && ei->type != type) 51 continue; 52 if (ei->addr >= end || ei->addr + ei->size <= start) 53 continue; 54 return 1; 55 } 56 return 0; 57} 58EXPORT_SYMBOL_GPL(e820_any_mapped); 59 60/* 61 * This function checks if the entire range <start,end> is mapped with type. 62 * 63 * Note: this function only works correct if the e820 table is sorted and 64 * not-overlapping, which is the case 65 */ 66int __init e820_all_mapped(u64 start, u64 end, unsigned type) 67{ 68 int i; 69 70 for (i = 0; i < e820.nr_map; i++) { 71 struct e820entry *ei = &e820.map[i]; 72 73 if (type && ei->type != type) 74 continue; 75 /* is the region (part) in overlap with the current region ?*/ 76 if (ei->addr >= end || ei->addr + ei->size <= start) 77 continue; 78 79 /* if the region is at the beginning of <start,end> we move 80 * start to the end of the region since it's ok until there 81 */ 82 if (ei->addr <= start) 83 start = ei->addr + ei->size; 84 /* 85 * if start is now at or beyond end, we're done, full 86 * coverage 87 */ 88 if (start >= end) 89 return 1; 90 } 91 return 0; 92} 93 94/* 95 * Add a memory region to the kernel e820 map. 96 */ 97void __init e820_add_region(u64 start, u64 size, int type) 98{ 99 int x = e820.nr_map; 100 101 if (x == ARRAY_SIZE(e820.map)) { 102 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); 103 return; 104 } 105 106 e820.map[x].addr = start; 107 e820.map[x].size = size; 108 e820.map[x].type = type; 109 e820.nr_map++; 110} 111 112void __init e820_print_map(char *who) 113{ 114 int i; 115 116 for (i = 0; i < e820.nr_map; i++) { 117 printk(KERN_INFO " %s: %016Lx - %016Lx ", who, 118 (unsigned long long) e820.map[i].addr, 119 (unsigned long long) 120 (e820.map[i].addr + e820.map[i].size)); 121 switch (e820.map[i].type) { 122 case E820_RAM: 123 printk(KERN_CONT "(usable)\n"); 124 break; 125 case E820_RESERVED: 126 printk(KERN_CONT "(reserved)\n"); 127 break; 128 case E820_ACPI: 129 printk(KERN_CONT "(ACPI data)\n"); 130 break; 131 case E820_NVS: 132 printk(KERN_CONT "(ACPI NVS)\n"); 133 break; 134 default: 135 printk(KERN_CONT "type %u\n", e820.map[i].type); 136 break; 137 } 138 } 139} 140 141/* 142 * Sanitize the BIOS e820 map. 143 * 144 * Some e820 responses include overlapping entries. The following 145 * replaces the original e820 map with a new one, removing overlaps, 146 * and resolving conflicting memory types in favor of highest 147 * numbered type. 148 * 149 * The input parameter biosmap points to an array of 'struct 150 * e820entry' which on entry has elements in the range [0, *pnr_map) 151 * valid, and which has space for up to max_nr_map entries. 152 * On return, the resulting sanitized e820 map entries will be in 153 * overwritten in the same location, starting at biosmap. 154 * 155 * The integer pointed to by pnr_map must be valid on entry (the 156 * current number of valid entries located at biosmap) and will 157 * be updated on return, with the new number of valid entries 158 * (something no more than max_nr_map.) 159 * 160 * The return value from sanitize_e820_map() is zero if it 161 * successfully 'sanitized' the map entries passed in, and is -1 162 * if it did nothing, which can happen if either of (1) it was 163 * only passed one map entry, or (2) any of the input map entries 164 * were invalid (start + size < start, meaning that the size was 165 * so big the described memory range wrapped around through zero.) 166 * 167 * Visually we're performing the following 168 * (1,2,3,4 = memory types)... 169 * 170 * Sample memory map (w/overlaps): 171 * ____22__________________ 172 * ______________________4_ 173 * ____1111________________ 174 * _44_____________________ 175 * 11111111________________ 176 * ____________________33__ 177 * ___________44___________ 178 * __________33333_________ 179 * ______________22________ 180 * ___________________2222_ 181 * _________111111111______ 182 * _____________________11_ 183 * _________________4______ 184 * 185 * Sanitized equivalent (no overlap): 186 * 1_______________________ 187 * _44_____________________ 188 * ___1____________________ 189 * ____22__________________ 190 * ______11________________ 191 * _________1______________ 192 * __________3_____________ 193 * ___________44___________ 194 * _____________33_________ 195 * _______________2________ 196 * ________________1_______ 197 * _________________4______ 198 * ___________________2____ 199 * ____________________33__ 200 * ______________________4_ 201 */ 202 203int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, 204 int *pnr_map) 205{ 206 struct change_member { 207 struct e820entry *pbios; /* pointer to original bios entry */ 208 unsigned long long addr; /* address for this change point */ 209 }; 210static struct change_member change_point_list[2*E820_X_MAX] __initdata; 211static struct change_member *change_point[2*E820_X_MAX] __initdata; 212static struct e820entry *overlap_list[E820_X_MAX] __initdata; 213static struct e820entry new_bios[E820_X_MAX] __initdata; 214 struct change_member *change_tmp; 215 unsigned long current_type, last_type; 216 unsigned long long last_addr; 217 int chgidx, still_changing; 218 int overlap_entries; 219 int new_bios_entry; 220 int old_nr, new_nr, chg_nr; 221 int i; 222 223 /* if there's only one memory region, don't bother */ 224 if (*pnr_map < 2) 225 return -1; 226 227 old_nr = *pnr_map; 228 BUG_ON(old_nr > max_nr_map); 229 230 /* bail out if we find any unreasonable addresses in bios map */ 231 for (i = 0; i < old_nr; i++) 232 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) 233 return -1; 234 235 /* create pointers for initial change-point information (for sorting) */ 236 for (i = 0; i < 2 * old_nr; i++) 237 change_point[i] = &change_point_list[i]; 238 239 /* record all known change-points (starting and ending addresses), 240 omitting those that are for empty memory regions */ 241 chgidx = 0; 242 for (i = 0; i < old_nr; i++) { 243 if (biosmap[i].size != 0) { 244 change_point[chgidx]->addr = biosmap[i].addr; 245 change_point[chgidx++]->pbios = &biosmap[i]; 246 change_point[chgidx]->addr = biosmap[i].addr + 247 biosmap[i].size; 248 change_point[chgidx++]->pbios = &biosmap[i]; 249 } 250 } 251 chg_nr = chgidx; 252 253 /* sort change-point list by memory addresses (low -> high) */ 254 still_changing = 1; 255 while (still_changing) { 256 still_changing = 0; 257 for (i = 1; i < chg_nr; i++) { 258 unsigned long long curaddr, lastaddr; 259 unsigned long long curpbaddr, lastpbaddr; 260 261 curaddr = change_point[i]->addr; 262 lastaddr = change_point[i - 1]->addr; 263 curpbaddr = change_point[i]->pbios->addr; 264 lastpbaddr = change_point[i - 1]->pbios->addr; 265 266 /* 267 * swap entries, when: 268 * 269 * curaddr > lastaddr or 270 * curaddr == lastaddr and curaddr == curpbaddr and 271 * lastaddr != lastpbaddr 272 */ 273 if (curaddr < lastaddr || 274 (curaddr == lastaddr && curaddr == curpbaddr && 275 lastaddr != lastpbaddr)) { 276 change_tmp = change_point[i]; 277 change_point[i] = change_point[i-1]; 278 change_point[i-1] = change_tmp; 279 still_changing = 1; 280 } 281 } 282 } 283 284 /* create a new bios memory map, removing overlaps */ 285 overlap_entries = 0; /* number of entries in the overlap table */ 286 new_bios_entry = 0; /* index for creating new bios map entries */ 287 last_type = 0; /* start with undefined memory type */ 288 last_addr = 0; /* start with 0 as last starting address */ 289 290 /* loop through change-points, determining affect on the new bios map */ 291 for (chgidx = 0; chgidx < chg_nr; chgidx++) { 292 /* keep track of all overlapping bios entries */ 293 if (change_point[chgidx]->addr == 294 change_point[chgidx]->pbios->addr) { 295 /* 296 * add map entry to overlap list (> 1 entry 297 * implies an overlap) 298 */ 299 overlap_list[overlap_entries++] = 300 change_point[chgidx]->pbios; 301 } else { 302 /* 303 * remove entry from list (order independent, 304 * so swap with last) 305 */ 306 for (i = 0; i < overlap_entries; i++) { 307 if (overlap_list[i] == 308 change_point[chgidx]->pbios) 309 overlap_list[i] = 310 overlap_list[overlap_entries-1]; 311 } 312 overlap_entries--; 313 } 314 /* 315 * if there are overlapping entries, decide which 316 * "type" to use (larger value takes precedence -- 317 * 1=usable, 2,3,4,4+=unusable) 318 */ 319 current_type = 0; 320 for (i = 0; i < overlap_entries; i++) 321 if (overlap_list[i]->type > current_type) 322 current_type = overlap_list[i]->type; 323 /* 324 * continue building up new bios map based on this 325 * information 326 */ 327 if (current_type != last_type) { 328 if (last_type != 0) { 329 new_bios[new_bios_entry].size = 330 change_point[chgidx]->addr - last_addr; 331 /* 332 * move forward only if the new size 333 * was non-zero 334 */ 335 if (new_bios[new_bios_entry].size != 0) 336 /* 337 * no more space left for new 338 * bios entries ? 339 */ 340 if (++new_bios_entry >= max_nr_map) 341 break; 342 } 343 if (current_type != 0) { 344 new_bios[new_bios_entry].addr = 345 change_point[chgidx]->addr; 346 new_bios[new_bios_entry].type = current_type; 347 last_addr = change_point[chgidx]->addr; 348 } 349 last_type = current_type; 350 } 351 } 352 /* retain count for new bios entries */ 353 new_nr = new_bios_entry; 354 355 /* copy new bios mapping into original location */ 356 memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); 357 *pnr_map = new_nr; 358 359 return 0; 360} 361 362static int __init __copy_e820_map(struct e820entry *biosmap, int nr_map) 363{ 364 while (nr_map) { 365 u64 start = biosmap->addr; 366 u64 size = biosmap->size; 367 u64 end = start + size; 368 u32 type = biosmap->type; 369 370 /* Overflow in 64 bits? Ignore the memory map. */ 371 if (start > end) 372 return -1; 373 374 e820_add_region(start, size, type); 375 376 biosmap++; 377 nr_map--; 378 } 379 return 0; 380} 381 382/* 383 * Copy the BIOS e820 map into a safe place. 384 * 385 * Sanity-check it while we're at it.. 386 * 387 * If we're lucky and live on a modern system, the setup code 388 * will have given us a memory map that we can use to properly 389 * set up memory. If we aren't, we'll fake a memory map. 390 */ 391int __init copy_e820_map(struct e820entry *biosmap, int nr_map) 392{ 393 /* Only one memory region (or negative)? Ignore it */ 394 if (nr_map < 2) 395 return -1; 396 397 return __copy_e820_map(biosmap, nr_map); 398} 399 400u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, 401 unsigned new_type) 402{ 403 int i; 404 u64 real_updated_size = 0; 405 406 BUG_ON(old_type == new_type); 407 408 for (i = 0; i < e820.nr_map; i++) { 409 struct e820entry *ei = &e820.map[i]; 410 u64 final_start, final_end; 411 if (ei->type != old_type) 412 continue; 413 /* totally covered? */ 414 if (ei->addr >= start && 415 (ei->addr + ei->size) <= (start + size)) { 416 ei->type = new_type; 417 real_updated_size += ei->size; 418 continue; 419 } 420 /* partially covered */ 421 final_start = max(start, ei->addr); 422 final_end = min(start + size, ei->addr + ei->size); 423 if (final_start >= final_end) 424 continue; 425 e820_add_region(final_start, final_end - final_start, 426 new_type); 427 real_updated_size += final_end - final_start; 428 } 429 return real_updated_size; 430} 431 432void __init update_e820(void) 433{ 434 int nr_map; 435 436 nr_map = e820.nr_map; 437 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) 438 return; 439 e820.nr_map = nr_map; 440 printk(KERN_INFO "modified physical RAM map:\n"); 441 e820_print_map("modified"); 442} 443 444/* 445 * Search for the biggest gap in the low 32 bits of the e820 446 * memory space. We pass this space to PCI to assign MMIO resources 447 * for hotplug or unconfigured devices in. 448 * Hopefully the BIOS let enough space left. 449 */ 450__init void e820_setup_gap(void) 451{ 452 unsigned long gapstart, gapsize, round; 453 unsigned long long last; 454 int i; 455 int found = 0; 456 457 last = 0x100000000ull; 458 gapstart = 0x10000000; 459 gapsize = 0x400000; 460 i = e820.nr_map; 461 while (--i >= 0) { 462 unsigned long long start = e820.map[i].addr; 463 unsigned long long end = start + e820.map[i].size; 464 465 /* 466 * Since "last" is at most 4GB, we know we'll 467 * fit in 32 bits if this condition is true 468 */ 469 if (last > end) { 470 unsigned long gap = last - end; 471 472 if (gap > gapsize) { 473 gapsize = gap; 474 gapstart = end; 475 found = 1; 476 } 477 } 478 if (start < last) 479 last = start; 480 } 481 482#ifdef CONFIG_X86_64 483 if (!found) { 484 gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; 485 printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " 486 "address range\n" 487 KERN_ERR "PCI: Unassigned devices with 32bit resource " 488 "registers may break!\n"); 489 } 490#endif 491 492 /* 493 * See how much we want to round up: start off with 494 * rounding to the next 1MB area. 495 */ 496 round = 0x100000; 497 while ((gapsize >> 4) > round) 498 round += round; 499 /* Fun with two's complement */ 500 pci_mem_start = (gapstart + round) & -round; 501 502 printk(KERN_INFO 503 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", 504 pci_mem_start, gapstart, gapsize); 505} 506 507/** 508 * Because of the size limitation of struct boot_params, only first 509 * 128 E820 memory entries are passed to kernel via 510 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of 511 * linked list of struct setup_data, which is parsed here. 512 */ 513void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) 514{ 515 u32 map_len; 516 int entries; 517 struct e820entry *extmap; 518 519 entries = sdata->len / sizeof(struct e820entry); 520 map_len = sdata->len + sizeof(struct setup_data); 521 if (map_len > PAGE_SIZE) 522 sdata = early_ioremap(pa_data, map_len); 523 extmap = (struct e820entry *)(sdata->data); 524 __copy_e820_map(extmap, entries); 525 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 526 if (map_len > PAGE_SIZE) 527 early_iounmap(sdata, map_len); 528 printk(KERN_INFO "extended physical RAM map:\n"); 529 e820_print_map("extended"); 530} 531 532#if defined(CONFIG_X86_64) || \ 533 (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) 534/** 535 * Find the ranges of physical addresses that do not correspond to 536 * e820 RAM areas and mark the corresponding pages as nosave for 537 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). 538 * 539 * This function requires the e820 map to be sorted and without any 540 * overlapping entries and assumes the first e820 area to be RAM. 541 */ 542void __init e820_mark_nosave_regions(unsigned long limit_pfn) 543{ 544 int i; 545 unsigned long pfn; 546 547 pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); 548 for (i = 1; i < e820.nr_map; i++) { 549 struct e820entry *ei = &e820.map[i]; 550 551 if (pfn < PFN_UP(ei->addr)) 552 register_nosave_region(pfn, PFN_UP(ei->addr)); 553 554 pfn = PFN_DOWN(ei->addr + ei->size); 555 if (ei->type != E820_RAM) 556 register_nosave_region(PFN_UP(ei->addr), pfn); 557 558 if (pfn >= limit_pfn) 559 break; 560 } 561} 562#endif 563 564/* 565 * Early reserved memory areas. 566 */ 567#define MAX_EARLY_RES 20 568 569struct early_res { 570 u64 start, end; 571 char name[16]; 572}; 573static struct early_res early_res[MAX_EARLY_RES] __initdata = { 574 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ 575#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) 576 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, 577#endif 578#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) 579 /* 580 * But first pinch a few for the stack/trampoline stuff 581 * FIXME: Don't need the extra page at 4K, but need to fix 582 * trampoline before removing it. (see the GDT stuff) 583 */ 584 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, 585 /* 586 * Has to be in very low memory so we can execute 587 * real-mode AP code. 588 */ 589 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, 590#endif 591 {} 592}; 593 594static int __init find_overlapped_early(u64 start, u64 end) 595{ 596 int i; 597 struct early_res *r; 598 599 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 600 r = &early_res[i]; 601 if (end > r->start && start < r->end) 602 break; 603 } 604 605 return i; 606} 607 608void __init reserve_early(u64 start, u64 end, char *name) 609{ 610 int i; 611 struct early_res *r; 612 613 i = find_overlapped_early(start, end); 614 if (i >= MAX_EARLY_RES) 615 panic("Too many early reservations"); 616 r = &early_res[i]; 617 if (r->end) 618 panic("Overlapping early reservations " 619 "%llx-%llx %s to %llx-%llx %s\n", 620 start, end - 1, name?name:"", r->start, 621 r->end - 1, r->name); 622 r->start = start; 623 r->end = end; 624 if (name) 625 strncpy(r->name, name, sizeof(r->name) - 1); 626} 627 628void __init free_early(u64 start, u64 end) 629{ 630 struct early_res *r; 631 int i, j; 632 633 i = find_overlapped_early(start, end); 634 r = &early_res[i]; 635 if (i >= MAX_EARLY_RES || r->end != end || r->start != start) 636 panic("free_early on not reserved area: %llx-%llx!", 637 start, end - 1); 638 639 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) 640 ; 641 642 memmove(&early_res[i], &early_res[i + 1], 643 (j - 1 - i) * sizeof(struct early_res)); 644 645 early_res[j - 1].end = 0; 646} 647 648void __init early_res_to_bootmem(u64 start, u64 end) 649{ 650 int i; 651 u64 final_start, final_end; 652 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 653 struct early_res *r = &early_res[i]; 654 final_start = max(start, r->start); 655 final_end = min(end, r->end); 656 if (final_start >= final_end) 657 continue; 658 printk(KERN_INFO " early res: %d [%llx-%llx] %s\n", i, 659 final_start, final_end - 1, r->name); 660 reserve_bootmem_generic(final_start, final_end - final_start, 661 BOOTMEM_DEFAULT); 662 } 663} 664 665/* Check for already reserved areas */ 666static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) 667{ 668 int i; 669 u64 addr = *addrp; 670 int changed = 0; 671 struct early_res *r; 672again: 673 i = find_overlapped_early(addr, addr + size); 674 r = &early_res[i]; 675 if (i < MAX_EARLY_RES && r->end) { 676 *addrp = addr = round_up(r->end, align); 677 changed = 1; 678 goto again; 679 } 680 return changed; 681} 682 683/* Check for already reserved areas */ 684static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) 685{ 686 int i; 687 u64 addr = *addrp, last; 688 u64 size = *sizep; 689 int changed = 0; 690again: 691 last = addr + size; 692 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 693 struct early_res *r = &early_res[i]; 694 if (last > r->start && addr < r->start) { 695 size = r->start - addr; 696 changed = 1; 697 goto again; 698 } 699 if (last > r->end && addr < r->end) { 700 addr = round_up(r->end, align); 701 size = last - addr; 702 changed = 1; 703 goto again; 704 } 705 if (last <= r->end && addr >= r->start) { 706 (*sizep)++; 707 return 0; 708 } 709 } 710 if (changed) { 711 *addrp = addr; 712 *sizep = size; 713 } 714 return changed; 715} 716 717/* 718 * Find a free area with specified alignment in a specific range. 719 */ 720u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) 721{ 722 int i; 723 724 for (i = 0; i < e820.nr_map; i++) { 725 struct e820entry *ei = &e820.map[i]; 726 u64 addr, last; 727 u64 ei_last; 728 729 if (ei->type != E820_RAM) 730 continue; 731 addr = round_up(ei->addr, align); 732 ei_last = ei->addr + ei->size; 733 if (addr < start) 734 addr = round_up(start, align); 735 if (addr >= ei_last) 736 continue; 737 while (bad_addr(&addr, size, align) && addr+size <= ei_last) 738 ; 739 last = addr + size; 740 if (last > ei_last) 741 continue; 742 if (last > end) 743 continue; 744 return addr; 745 } 746 return -1ULL; 747} 748 749/* 750 * Find next free range after *start 751 */ 752u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) 753{ 754 int i; 755 756 for (i = 0; i < e820.nr_map; i++) { 757 struct e820entry *ei = &e820.map[i]; 758 u64 addr, last; 759 u64 ei_last; 760 761 if (ei->type != E820_RAM) 762 continue; 763 addr = round_up(ei->addr, align); 764 ei_last = ei->addr + ei->size; 765 if (addr < start) 766 addr = round_up(start, align); 767 if (addr >= ei_last) 768 continue; 769 *sizep = ei_last - addr; 770 while (bad_addr_size(&addr, sizep, align) && 771 addr + *sizep <= ei_last) 772 ; 773 last = addr + *sizep; 774 if (last > ei_last) 775 continue; 776 return addr; 777 } 778 return -1UL; 779 780} 781 782/* 783 * pre allocated 4k and reserved it in e820 784 */ 785u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) 786{ 787 u64 size = 0; 788 u64 addr; 789 u64 start; 790 791 start = startt; 792 while (size < sizet) 793 start = find_e820_area_size(start, &size, align); 794 795 if (size < sizet) 796 return 0; 797 798 addr = round_down(start + size - sizet, align); 799 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); 800 printk(KERN_INFO "update e820 for early_reserve_e820\n"); 801 update_e820(); 802 803 return addr; 804} 805 806#ifdef CONFIG_X86_32 807# ifdef CONFIG_X86_PAE 808# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) 809# else 810# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) 811# endif 812#else /* CONFIG_X86_32 */ 813# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT 814#endif 815 816/* 817 * Last pfn which the user wants to use. 818 */ 819unsigned long __initdata end_user_pfn = MAX_ARCH_PFN; 820 821/* 822 * Find the highest page frame number we have available 823 */ 824unsigned long __init e820_end_of_ram(void) 825{ 826 unsigned long last_pfn; 827 unsigned long max_arch_pfn = MAX_ARCH_PFN; 828 829 last_pfn = find_max_pfn_with_active_regions(); 830 831 if (last_pfn > max_arch_pfn) 832 last_pfn = max_arch_pfn; 833 if (last_pfn > end_user_pfn) 834 last_pfn = end_user_pfn; 835 836 printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n", 837 last_pfn, max_arch_pfn); 838 return last_pfn; 839} 840 841/* 842 * Finds an active region in the address range from start_pfn to last_pfn and 843 * returns its range in ei_startpfn and ei_endpfn for the e820 entry. 844 */ 845int __init e820_find_active_region(const struct e820entry *ei, 846 unsigned long start_pfn, 847 unsigned long last_pfn, 848 unsigned long *ei_startpfn, 849 unsigned long *ei_endpfn) 850{ 851 u64 align = PAGE_SIZE; 852 853 *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; 854 *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; 855 856 /* Skip map entries smaller than a page */ 857 if (*ei_startpfn >= *ei_endpfn) 858 return 0; 859 860 /* Skip if map is outside the node */ 861 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || 862 *ei_startpfn >= last_pfn) 863 return 0; 864 865 /* Check for overlaps */ 866 if (*ei_startpfn < start_pfn) 867 *ei_startpfn = start_pfn; 868 if (*ei_endpfn > last_pfn) 869 *ei_endpfn = last_pfn; 870 871 /* Obey end_user_pfn to save on memmap */ 872 if (*ei_startpfn >= end_user_pfn) 873 return 0; 874 if (*ei_endpfn > end_user_pfn) 875 *ei_endpfn = end_user_pfn; 876 877 return 1; 878} 879 880/* Walk the e820 map and register active regions within a node */ 881void __init e820_register_active_regions(int nid, unsigned long start_pfn, 882 unsigned long last_pfn) 883{ 884 unsigned long ei_startpfn; 885 unsigned long ei_endpfn; 886 int i; 887 888 for (i = 0; i < e820.nr_map; i++) 889 if (e820_find_active_region(&e820.map[i], 890 start_pfn, last_pfn, 891 &ei_startpfn, &ei_endpfn)) 892 add_active_range(nid, ei_startpfn, ei_endpfn); 893} 894 895/* 896 * Find the hole size (in bytes) in the memory range. 897 * @start: starting address of the memory range to scan 898 * @end: ending address of the memory range to scan 899 */ 900u64 __init e820_hole_size(u64 start, u64 end) 901{ 902 unsigned long start_pfn = start >> PAGE_SHIFT; 903 unsigned long last_pfn = end >> PAGE_SHIFT; 904 unsigned long ei_startpfn, ei_endpfn, ram = 0; 905 int i; 906 907 for (i = 0; i < e820.nr_map; i++) { 908 if (e820_find_active_region(&e820.map[i], 909 start_pfn, last_pfn, 910 &ei_startpfn, &ei_endpfn)) 911 ram += ei_endpfn - ei_startpfn; 912 } 913 return end - start - ((u64)ram << PAGE_SHIFT); 914} 915 916static void early_panic(char *msg) 917{ 918 early_printk(msg); 919 panic(msg); 920} 921 922/* "mem=nopentium" disables the 4MB page tables. */ 923static int __init parse_memopt(char *p) 924{ 925 u64 mem_size; 926 927 if (!p) 928 return -EINVAL; 929 930#ifdef CONFIG_X86_32 931 if (!strcmp(p, "nopentium")) { 932 setup_clear_cpu_cap(X86_FEATURE_PSE); 933 return 0; 934 } 935#endif 936 937 mem_size = memparse(p, &p); 938 end_user_pfn = mem_size>>PAGE_SHIFT; 939 return 0; 940} 941early_param("mem", parse_memopt); 942 943static int userdef __initdata; 944 945static int __init parse_memmap_opt(char *p) 946{ 947 char *oldp; 948 u64 start_at, mem_size; 949 950 if (!strcmp(p, "exactmap")) { 951#ifdef CONFIG_CRASH_DUMP 952 /* 953 * If we are doing a crash dump, we still need to know 954 * the real mem size before original memory map is 955 * reset. 956 */ 957 e820_register_active_regions(0, 0, -1UL); 958 saved_max_pfn = e820_end_of_ram(); 959 remove_all_active_ranges(); 960#endif 961 e820.nr_map = 0; 962 userdef = 1; 963 return 0; 964 } 965 966 oldp = p; 967 mem_size = memparse(p, &p); 968 if (p == oldp) 969 return -EINVAL; 970 971 userdef = 1; 972 if (*p == '@') { 973 start_at = memparse(p+1, &p); 974 e820_add_region(start_at, mem_size, E820_RAM); 975 } else if (*p == '#') { 976 start_at = memparse(p+1, &p); 977 e820_add_region(start_at, mem_size, E820_ACPI); 978 } else if (*p == '$') { 979 start_at = memparse(p+1, &p); 980 e820_add_region(start_at, mem_size, E820_RESERVED); 981 } else { 982 end_user_pfn = (mem_size >> PAGE_SHIFT); 983 } 984 return *p == '\0' ? 0 : -EINVAL; 985} 986early_param("memmap", parse_memmap_opt); 987 988void __init finish_e820_parsing(void) 989{ 990 if (userdef) { 991 int nr = e820.nr_map; 992 993 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) 994 early_panic("Invalid user supplied memory map"); 995 e820.nr_map = nr; 996 997 printk(KERN_INFO "user-defined physical RAM map:\n"); 998 e820_print_map("user"); 999 } 1000} 1001 1002/* 1003 * Mark e820 reserved areas as busy for the resource manager. 1004 */ 1005void __init e820_reserve_resources(void) 1006{ 1007 int i; 1008 struct resource *res; 1009 1010 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); 1011 for (i = 0; i < e820.nr_map; i++) { 1012 switch (e820.map[i].type) { 1013 case E820_RAM: res->name = "System RAM"; break; 1014 case E820_ACPI: res->name = "ACPI Tables"; break; 1015 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; 1016 default: res->name = "reserved"; 1017 } 1018 res->start = e820.map[i].addr; 1019 res->end = res->start + e820.map[i].size - 1; 1020#ifndef CONFIG_RESOURCES_64BIT 1021 if (res->end > 0x100000000ULL) { 1022 res++; 1023 continue; 1024 } 1025#endif 1026 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 1027 insert_resource(&iomem_resource, res); 1028 res++; 1029 } 1030} 1031 1032 1033