e820.c revision dc8e8120ad291074a5fb93cfb0418466c62f6019
1/* 2 * Handle the memory map. 3 * The functions here do the job until bootmem takes over. 4 * 5 * Getting sanitize_e820_map() in sync with i386 version by applying change: 6 * - Provisions for empty E820 memory regions (reported by certain BIOSes). 7 * Alex Achenbach <xela@slit.de>, December 2002. 8 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 9 * 10 */ 11#include <linux/kernel.h> 12#include <linux/types.h> 13#include <linux/init.h> 14#include <linux/bootmem.h> 15#include <linux/ioport.h> 16#include <linux/string.h> 17#include <linux/kexec.h> 18#include <linux/module.h> 19#include <linux/mm.h> 20#include <linux/pfn.h> 21#include <linux/suspend.h> 22 23#include <asm/pgtable.h> 24#include <asm/page.h> 25#include <asm/e820.h> 26#include <asm/proto.h> 27#include <asm/setup.h> 28#include <asm/trampoline.h> 29 30struct e820map e820; 31 32/* For PCI or other memory-mapped resources */ 33unsigned long pci_mem_start = 0xaeedbabe; 34#ifdef CONFIG_PCI 35EXPORT_SYMBOL(pci_mem_start); 36#endif 37 38/* 39 * This function checks if any part of the range <start,end> is mapped 40 * with type. 41 */ 42int 43e820_any_mapped(u64 start, u64 end, unsigned type) 44{ 45 int i; 46 47 for (i = 0; i < e820.nr_map; i++) { 48 struct e820entry *ei = &e820.map[i]; 49 50 if (type && ei->type != type) 51 continue; 52 if (ei->addr >= end || ei->addr + ei->size <= start) 53 continue; 54 return 1; 55 } 56 return 0; 57} 58EXPORT_SYMBOL_GPL(e820_any_mapped); 59 60/* 61 * This function checks if the entire range <start,end> is mapped with type. 62 * 63 * Note: this function only works correct if the e820 table is sorted and 64 * not-overlapping, which is the case 65 */ 66int __init e820_all_mapped(u64 start, u64 end, unsigned type) 67{ 68 int i; 69 70 for (i = 0; i < e820.nr_map; i++) { 71 struct e820entry *ei = &e820.map[i]; 72 73 if (type && ei->type != type) 74 continue; 75 /* is the region (part) in overlap with the current region ?*/ 76 if (ei->addr >= end || ei->addr + ei->size <= start) 77 continue; 78 79 /* if the region is at the beginning of <start,end> we move 80 * start to the end of the region since it's ok until there 81 */ 82 if (ei->addr <= start) 83 start = ei->addr + ei->size; 84 /* 85 * if start is now at or beyond end, we're done, full 86 * coverage 87 */ 88 if (start >= end) 89 return 1; 90 } 91 return 0; 92} 93 94/* 95 * Add a memory region to the kernel e820 map. 96 */ 97void __init e820_add_region(u64 start, u64 size, int type) 98{ 99 int x = e820.nr_map; 100 101 if (x == ARRAY_SIZE(e820.map)) { 102 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); 103 return; 104 } 105 106 e820.map[x].addr = start; 107 e820.map[x].size = size; 108 e820.map[x].type = type; 109 e820.nr_map++; 110} 111 112void __init e820_print_map(char *who) 113{ 114 int i; 115 116 for (i = 0; i < e820.nr_map; i++) { 117 printk(KERN_INFO " %s: %016Lx - %016Lx ", who, 118 (unsigned long long) e820.map[i].addr, 119 (unsigned long long) 120 (e820.map[i].addr + e820.map[i].size)); 121 switch (e820.map[i].type) { 122 case E820_RAM: 123 case E820_RESERVED_KERN: 124 printk(KERN_CONT "(usable)\n"); 125 break; 126 case E820_RESERVED: 127 printk(KERN_CONT "(reserved)\n"); 128 break; 129 case E820_ACPI: 130 printk(KERN_CONT "(ACPI data)\n"); 131 break; 132 case E820_NVS: 133 printk(KERN_CONT "(ACPI NVS)\n"); 134 break; 135 default: 136 printk(KERN_CONT "type %u\n", e820.map[i].type); 137 break; 138 } 139 } 140} 141 142/* 143 * Sanitize the BIOS e820 map. 144 * 145 * Some e820 responses include overlapping entries. The following 146 * replaces the original e820 map with a new one, removing overlaps, 147 * and resolving conflicting memory types in favor of highest 148 * numbered type. 149 * 150 * The input parameter biosmap points to an array of 'struct 151 * e820entry' which on entry has elements in the range [0, *pnr_map) 152 * valid, and which has space for up to max_nr_map entries. 153 * On return, the resulting sanitized e820 map entries will be in 154 * overwritten in the same location, starting at biosmap. 155 * 156 * The integer pointed to by pnr_map must be valid on entry (the 157 * current number of valid entries located at biosmap) and will 158 * be updated on return, with the new number of valid entries 159 * (something no more than max_nr_map.) 160 * 161 * The return value from sanitize_e820_map() is zero if it 162 * successfully 'sanitized' the map entries passed in, and is -1 163 * if it did nothing, which can happen if either of (1) it was 164 * only passed one map entry, or (2) any of the input map entries 165 * were invalid (start + size < start, meaning that the size was 166 * so big the described memory range wrapped around through zero.) 167 * 168 * Visually we're performing the following 169 * (1,2,3,4 = memory types)... 170 * 171 * Sample memory map (w/overlaps): 172 * ____22__________________ 173 * ______________________4_ 174 * ____1111________________ 175 * _44_____________________ 176 * 11111111________________ 177 * ____________________33__ 178 * ___________44___________ 179 * __________33333_________ 180 * ______________22________ 181 * ___________________2222_ 182 * _________111111111______ 183 * _____________________11_ 184 * _________________4______ 185 * 186 * Sanitized equivalent (no overlap): 187 * 1_______________________ 188 * _44_____________________ 189 * ___1____________________ 190 * ____22__________________ 191 * ______11________________ 192 * _________1______________ 193 * __________3_____________ 194 * ___________44___________ 195 * _____________33_________ 196 * _______________2________ 197 * ________________1_______ 198 * _________________4______ 199 * ___________________2____ 200 * ____________________33__ 201 * ______________________4_ 202 */ 203 204int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, 205 int *pnr_map) 206{ 207 struct change_member { 208 struct e820entry *pbios; /* pointer to original bios entry */ 209 unsigned long long addr; /* address for this change point */ 210 }; 211 static struct change_member change_point_list[2*E820_X_MAX] __initdata; 212 static struct change_member *change_point[2*E820_X_MAX] __initdata; 213 static struct e820entry *overlap_list[E820_X_MAX] __initdata; 214 static struct e820entry new_bios[E820_X_MAX] __initdata; 215 struct change_member *change_tmp; 216 unsigned long current_type, last_type; 217 unsigned long long last_addr; 218 int chgidx, still_changing; 219 int overlap_entries; 220 int new_bios_entry; 221 int old_nr, new_nr, chg_nr; 222 int i; 223 224 /* if there's only one memory region, don't bother */ 225 if (*pnr_map < 2) 226 return -1; 227 228 old_nr = *pnr_map; 229 BUG_ON(old_nr > max_nr_map); 230 231 /* bail out if we find any unreasonable addresses in bios map */ 232 for (i = 0; i < old_nr; i++) 233 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) 234 return -1; 235 236 /* create pointers for initial change-point information (for sorting) */ 237 for (i = 0; i < 2 * old_nr; i++) 238 change_point[i] = &change_point_list[i]; 239 240 /* record all known change-points (starting and ending addresses), 241 omitting those that are for empty memory regions */ 242 chgidx = 0; 243 for (i = 0; i < old_nr; i++) { 244 if (biosmap[i].size != 0) { 245 change_point[chgidx]->addr = biosmap[i].addr; 246 change_point[chgidx++]->pbios = &biosmap[i]; 247 change_point[chgidx]->addr = biosmap[i].addr + 248 biosmap[i].size; 249 change_point[chgidx++]->pbios = &biosmap[i]; 250 } 251 } 252 chg_nr = chgidx; 253 254 /* sort change-point list by memory addresses (low -> high) */ 255 still_changing = 1; 256 while (still_changing) { 257 still_changing = 0; 258 for (i = 1; i < chg_nr; i++) { 259 unsigned long long curaddr, lastaddr; 260 unsigned long long curpbaddr, lastpbaddr; 261 262 curaddr = change_point[i]->addr; 263 lastaddr = change_point[i - 1]->addr; 264 curpbaddr = change_point[i]->pbios->addr; 265 lastpbaddr = change_point[i - 1]->pbios->addr; 266 267 /* 268 * swap entries, when: 269 * 270 * curaddr > lastaddr or 271 * curaddr == lastaddr and curaddr == curpbaddr and 272 * lastaddr != lastpbaddr 273 */ 274 if (curaddr < lastaddr || 275 (curaddr == lastaddr && curaddr == curpbaddr && 276 lastaddr != lastpbaddr)) { 277 change_tmp = change_point[i]; 278 change_point[i] = change_point[i-1]; 279 change_point[i-1] = change_tmp; 280 still_changing = 1; 281 } 282 } 283 } 284 285 /* create a new bios memory map, removing overlaps */ 286 overlap_entries = 0; /* number of entries in the overlap table */ 287 new_bios_entry = 0; /* index for creating new bios map entries */ 288 last_type = 0; /* start with undefined memory type */ 289 last_addr = 0; /* start with 0 as last starting address */ 290 291 /* loop through change-points, determining affect on the new bios map */ 292 for (chgidx = 0; chgidx < chg_nr; chgidx++) { 293 /* keep track of all overlapping bios entries */ 294 if (change_point[chgidx]->addr == 295 change_point[chgidx]->pbios->addr) { 296 /* 297 * add map entry to overlap list (> 1 entry 298 * implies an overlap) 299 */ 300 overlap_list[overlap_entries++] = 301 change_point[chgidx]->pbios; 302 } else { 303 /* 304 * remove entry from list (order independent, 305 * so swap with last) 306 */ 307 for (i = 0; i < overlap_entries; i++) { 308 if (overlap_list[i] == 309 change_point[chgidx]->pbios) 310 overlap_list[i] = 311 overlap_list[overlap_entries-1]; 312 } 313 overlap_entries--; 314 } 315 /* 316 * if there are overlapping entries, decide which 317 * "type" to use (larger value takes precedence -- 318 * 1=usable, 2,3,4,4+=unusable) 319 */ 320 current_type = 0; 321 for (i = 0; i < overlap_entries; i++) 322 if (overlap_list[i]->type > current_type) 323 current_type = overlap_list[i]->type; 324 /* 325 * continue building up new bios map based on this 326 * information 327 */ 328 if (current_type != last_type) { 329 if (last_type != 0) { 330 new_bios[new_bios_entry].size = 331 change_point[chgidx]->addr - last_addr; 332 /* 333 * move forward only if the new size 334 * was non-zero 335 */ 336 if (new_bios[new_bios_entry].size != 0) 337 /* 338 * no more space left for new 339 * bios entries ? 340 */ 341 if (++new_bios_entry >= max_nr_map) 342 break; 343 } 344 if (current_type != 0) { 345 new_bios[new_bios_entry].addr = 346 change_point[chgidx]->addr; 347 new_bios[new_bios_entry].type = current_type; 348 last_addr = change_point[chgidx]->addr; 349 } 350 last_type = current_type; 351 } 352 } 353 /* retain count for new bios entries */ 354 new_nr = new_bios_entry; 355 356 /* copy new bios mapping into original location */ 357 memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); 358 *pnr_map = new_nr; 359 360 return 0; 361} 362 363static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) 364{ 365 while (nr_map) { 366 u64 start = biosmap->addr; 367 u64 size = biosmap->size; 368 u64 end = start + size; 369 u32 type = biosmap->type; 370 371 /* Overflow in 64 bits? Ignore the memory map. */ 372 if (start > end) 373 return -1; 374 375 e820_add_region(start, size, type); 376 377 biosmap++; 378 nr_map--; 379 } 380 return 0; 381} 382 383/* 384 * Copy the BIOS e820 map into a safe place. 385 * 386 * Sanity-check it while we're at it.. 387 * 388 * If we're lucky and live on a modern system, the setup code 389 * will have given us a memory map that we can use to properly 390 * set up memory. If we aren't, we'll fake a memory map. 391 */ 392static int __init append_e820_map(struct e820entry *biosmap, int nr_map) 393{ 394 /* Only one memory region (or negative)? Ignore it */ 395 if (nr_map < 2) 396 return -1; 397 398 return __append_e820_map(biosmap, nr_map); 399} 400 401u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, 402 unsigned new_type) 403{ 404 int i; 405 u64 real_updated_size = 0; 406 407 BUG_ON(old_type == new_type); 408 409 if (size > (ULLONG_MAX - start)) 410 size = ULLONG_MAX - start; 411 412 for (i = 0; i < e820.nr_map; i++) { 413 struct e820entry *ei = &e820.map[i]; 414 u64 final_start, final_end; 415 if (ei->type != old_type) 416 continue; 417 /* totally covered? */ 418 if (ei->addr >= start && 419 (ei->addr + ei->size) <= (start + size)) { 420 ei->type = new_type; 421 real_updated_size += ei->size; 422 continue; 423 } 424 /* partially covered */ 425 final_start = max(start, ei->addr); 426 final_end = min(start + size, ei->addr + ei->size); 427 if (final_start >= final_end) 428 continue; 429 e820_add_region(final_start, final_end - final_start, 430 new_type); 431 real_updated_size += final_end - final_start; 432 433 ei->size -= final_end - final_start; 434 if (ei->addr < final_start) 435 continue; 436 ei->addr = final_end; 437 } 438 return real_updated_size; 439} 440 441/* make e820 not cover the range */ 442u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, 443 int checktype) 444{ 445 int i; 446 u64 real_removed_size = 0; 447 448 if (size > (ULLONG_MAX - start)) 449 size = ULLONG_MAX - start; 450 451 for (i = 0; i < e820.nr_map; i++) { 452 struct e820entry *ei = &e820.map[i]; 453 u64 final_start, final_end; 454 455 if (checktype && ei->type != old_type) 456 continue; 457 /* totally covered? */ 458 if (ei->addr >= start && 459 (ei->addr + ei->size) <= (start + size)) { 460 real_removed_size += ei->size; 461 memset(ei, 0, sizeof(struct e820entry)); 462 continue; 463 } 464 /* partially covered */ 465 final_start = max(start, ei->addr); 466 final_end = min(start + size, ei->addr + ei->size); 467 if (final_start >= final_end) 468 continue; 469 real_removed_size += final_end - final_start; 470 471 ei->size -= final_end - final_start; 472 if (ei->addr < final_start) 473 continue; 474 ei->addr = final_end; 475 } 476 return real_removed_size; 477} 478 479void __init update_e820(void) 480{ 481 int nr_map; 482 483 nr_map = e820.nr_map; 484 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) 485 return; 486 e820.nr_map = nr_map; 487 printk(KERN_INFO "modified physical RAM map:\n"); 488 e820_print_map("modified"); 489} 490#define MAX_GAP_END 0x100000000ull 491/* 492 * Search for a gap in the e820 memory space from start_addr to end_addr. 493 */ 494__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, 495 unsigned long start_addr, unsigned long long end_addr) 496{ 497 unsigned long long last; 498 int i = e820.nr_map; 499 int found = 0; 500 501 last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; 502 503 while (--i >= 0) { 504 unsigned long long start = e820.map[i].addr; 505 unsigned long long end = start + e820.map[i].size; 506 507 if (end < start_addr) 508 continue; 509 510 /* 511 * Since "last" is at most 4GB, we know we'll 512 * fit in 32 bits if this condition is true 513 */ 514 if (last > end) { 515 unsigned long gap = last - end; 516 517 if (gap >= *gapsize) { 518 *gapsize = gap; 519 *gapstart = end; 520 found = 1; 521 } 522 } 523 if (start < last) 524 last = start; 525 } 526 return found; 527} 528 529/* 530 * Search for the biggest gap in the low 32 bits of the e820 531 * memory space. We pass this space to PCI to assign MMIO resources 532 * for hotplug or unconfigured devices in. 533 * Hopefully the BIOS let enough space left. 534 */ 535__init void e820_setup_gap(void) 536{ 537 unsigned long gapstart, gapsize, round; 538 int found; 539 540 gapstart = 0x10000000; 541 gapsize = 0x400000; 542 found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); 543 544#ifdef CONFIG_X86_64 545 if (!found) { 546 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; 547 printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " 548 "address range\n" 549 KERN_ERR "PCI: Unassigned devices with 32bit resource " 550 "registers may break!\n"); 551 } 552#endif 553 554 /* 555 * See how much we want to round up: start off with 556 * rounding to the next 1MB area. 557 */ 558 round = 0x100000; 559 while ((gapsize >> 4) > round) 560 round += round; 561 /* Fun with two's complement */ 562 pci_mem_start = (gapstart + round) & -round; 563 564 printk(KERN_INFO 565 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", 566 pci_mem_start, gapstart, gapsize); 567} 568 569/** 570 * Because of the size limitation of struct boot_params, only first 571 * 128 E820 memory entries are passed to kernel via 572 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of 573 * linked list of struct setup_data, which is parsed here. 574 */ 575void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) 576{ 577 u32 map_len; 578 int entries; 579 struct e820entry *extmap; 580 581 entries = sdata->len / sizeof(struct e820entry); 582 map_len = sdata->len + sizeof(struct setup_data); 583 if (map_len > PAGE_SIZE) 584 sdata = early_ioremap(pa_data, map_len); 585 extmap = (struct e820entry *)(sdata->data); 586 __append_e820_map(extmap, entries); 587 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 588 if (map_len > PAGE_SIZE) 589 early_iounmap(sdata, map_len); 590 printk(KERN_INFO "extended physical RAM map:\n"); 591 e820_print_map("extended"); 592} 593 594#if defined(CONFIG_X86_64) || \ 595 (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) 596/** 597 * Find the ranges of physical addresses that do not correspond to 598 * e820 RAM areas and mark the corresponding pages as nosave for 599 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). 600 * 601 * This function requires the e820 map to be sorted and without any 602 * overlapping entries and assumes the first e820 area to be RAM. 603 */ 604void __init e820_mark_nosave_regions(unsigned long limit_pfn) 605{ 606 int i; 607 unsigned long pfn; 608 609 pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); 610 for (i = 1; i < e820.nr_map; i++) { 611 struct e820entry *ei = &e820.map[i]; 612 613 if (pfn < PFN_UP(ei->addr)) 614 register_nosave_region(pfn, PFN_UP(ei->addr)); 615 616 pfn = PFN_DOWN(ei->addr + ei->size); 617 if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) 618 register_nosave_region(PFN_UP(ei->addr), pfn); 619 620 if (pfn >= limit_pfn) 621 break; 622 } 623} 624#endif 625 626/* 627 * Early reserved memory areas. 628 */ 629#define MAX_EARLY_RES 20 630 631struct early_res { 632 u64 start, end; 633 char name[16]; 634 char overlap_ok; 635}; 636static struct early_res early_res[MAX_EARLY_RES] __initdata = { 637 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ 638#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) 639 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, 640#endif 641#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) 642 /* 643 * But first pinch a few for the stack/trampoline stuff 644 * FIXME: Don't need the extra page at 4K, but need to fix 645 * trampoline before removing it. (see the GDT stuff) 646 */ 647 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, 648 /* 649 * Has to be in very low memory so we can execute 650 * real-mode AP code. 651 */ 652 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, 653#endif 654 {} 655}; 656 657static int __init find_overlapped_early(u64 start, u64 end) 658{ 659 int i; 660 struct early_res *r; 661 662 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 663 r = &early_res[i]; 664 if (end > r->start && start < r->end) 665 break; 666 } 667 668 return i; 669} 670 671/* 672 * Drop the i-th range from the early reservation map, 673 * by copying any higher ranges down one over it, and 674 * clearing what had been the last slot. 675 */ 676static void __init drop_range(int i) 677{ 678 int j; 679 680 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) 681 ; 682 683 memmove(&early_res[i], &early_res[i + 1], 684 (j - 1 - i) * sizeof(struct early_res)); 685 686 early_res[j - 1].end = 0; 687} 688 689/* 690 * Split any existing ranges that: 691 * 1) are marked 'overlap_ok', and 692 * 2) overlap with the stated range [start, end) 693 * into whatever portion (if any) of the existing range is entirely 694 * below or entirely above the stated range. Drop the portion 695 * of the existing range that overlaps with the stated range, 696 * which will allow the caller of this routine to then add that 697 * stated range without conflicting with any existing range. 698 */ 699static void __init drop_overlaps_that_are_ok(u64 start, u64 end) 700{ 701 int i; 702 struct early_res *r; 703 u64 lower_start, lower_end; 704 u64 upper_start, upper_end; 705 char name[16]; 706 707 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 708 r = &early_res[i]; 709 710 /* Continue past non-overlapping ranges */ 711 if (end <= r->start || start >= r->end) 712 continue; 713 714 /* 715 * Leave non-ok overlaps as is; let caller 716 * panic "Overlapping early reservations" 717 * when it hits this overlap. 718 */ 719 if (!r->overlap_ok) 720 return; 721 722 /* 723 * We have an ok overlap. We will drop it from the early 724 * reservation map, and add back in any non-overlapping 725 * portions (lower or upper) as separate, overlap_ok, 726 * non-overlapping ranges. 727 */ 728 729 /* 1. Note any non-overlapping (lower or upper) ranges. */ 730 strncpy(name, r->name, sizeof(name) - 1); 731 732 lower_start = lower_end = 0; 733 upper_start = upper_end = 0; 734 if (r->start < start) { 735 lower_start = r->start; 736 lower_end = start; 737 } 738 if (r->end > end) { 739 upper_start = end; 740 upper_end = r->end; 741 } 742 743 /* 2. Drop the original ok overlapping range */ 744 drop_range(i); 745 746 i--; /* resume for-loop on copied down entry */ 747 748 /* 3. Add back in any non-overlapping ranges. */ 749 if (lower_end) 750 reserve_early_overlap_ok(lower_start, lower_end, name); 751 if (upper_end) 752 reserve_early_overlap_ok(upper_start, upper_end, name); 753 } 754} 755 756static void __init __reserve_early(u64 start, u64 end, char *name, 757 int overlap_ok) 758{ 759 int i; 760 struct early_res *r; 761 762 i = find_overlapped_early(start, end); 763 if (i >= MAX_EARLY_RES) 764 panic("Too many early reservations"); 765 r = &early_res[i]; 766 if (r->end) 767 panic("Overlapping early reservations " 768 "%llx-%llx %s to %llx-%llx %s\n", 769 start, end - 1, name?name:"", r->start, 770 r->end - 1, r->name); 771 r->start = start; 772 r->end = end; 773 r->overlap_ok = overlap_ok; 774 if (name) 775 strncpy(r->name, name, sizeof(r->name) - 1); 776} 777 778/* 779 * A few early reservtations come here. 780 * 781 * The 'overlap_ok' in the name of this routine does -not- mean it 782 * is ok for these reservations to overlap an earlier reservation. 783 * Rather it means that it is ok for subsequent reservations to 784 * overlap this one. 785 * 786 * Use this entry point to reserve early ranges when you are doing 787 * so out of "Paranoia", reserving perhaps more memory than you need, 788 * just in case, and don't mind a subsequent overlapping reservation 789 * that is known to be needed. 790 * 791 * The drop_overlaps_that_are_ok() call here isn't really needed. 792 * It would be needed if we had two colliding 'overlap_ok' 793 * reservations, so that the second such would not panic on the 794 * overlap with the first. We don't have any such as of this 795 * writing, but might as well tolerate such if it happens in 796 * the future. 797 */ 798void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) 799{ 800 drop_overlaps_that_are_ok(start, end); 801 __reserve_early(start, end, name, 1); 802} 803 804/* 805 * Most early reservations come here. 806 * 807 * We first have drop_overlaps_that_are_ok() drop any pre-existing 808 * 'overlap_ok' ranges, so that we can then reserve this memory 809 * range without risk of panic'ing on an overlapping overlap_ok 810 * early reservation. 811 */ 812void __init reserve_early(u64 start, u64 end, char *name) 813{ 814 drop_overlaps_that_are_ok(start, end); 815 __reserve_early(start, end, name, 0); 816} 817 818void __init free_early(u64 start, u64 end) 819{ 820 struct early_res *r; 821 int i; 822 823 i = find_overlapped_early(start, end); 824 r = &early_res[i]; 825 if (i >= MAX_EARLY_RES || r->end != end || r->start != start) 826 panic("free_early on not reserved area: %llx-%llx!", 827 start, end - 1); 828 829 drop_range(i); 830} 831 832void __init early_res_to_bootmem(u64 start, u64 end) 833{ 834 int i, count; 835 u64 final_start, final_end; 836 837 count = 0; 838 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) 839 count++; 840 841 printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count); 842 for (i = 0; i < count; i++) { 843 struct early_res *r = &early_res[i]; 844 printk(KERN_INFO " #%d [ %010llx - %010llx ] %16s", i, 845 r->start, r->end, r->name); 846 final_start = max(start, r->start); 847 final_end = min(end, r->end); 848 if (final_start >= final_end) { 849 printk(KERN_CONT "\n"); 850 continue; 851 } 852 printk(KERN_CONT " ===> [ %010llx - %010llx ]\n", 853 final_start, final_end); 854 reserve_bootmem_generic(final_start, final_end - final_start, 855 BOOTMEM_DEFAULT); 856 } 857} 858 859/* Check for already reserved areas */ 860static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) 861{ 862 int i; 863 u64 addr = *addrp; 864 int changed = 0; 865 struct early_res *r; 866again: 867 i = find_overlapped_early(addr, addr + size); 868 r = &early_res[i]; 869 if (i < MAX_EARLY_RES && r->end) { 870 *addrp = addr = round_up(r->end, align); 871 changed = 1; 872 goto again; 873 } 874 return changed; 875} 876 877/* Check for already reserved areas */ 878static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) 879{ 880 int i; 881 u64 addr = *addrp, last; 882 u64 size = *sizep; 883 int changed = 0; 884again: 885 last = addr + size; 886 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 887 struct early_res *r = &early_res[i]; 888 if (last > r->start && addr < r->start) { 889 size = r->start - addr; 890 changed = 1; 891 goto again; 892 } 893 if (last > r->end && addr < r->end) { 894 addr = round_up(r->end, align); 895 size = last - addr; 896 changed = 1; 897 goto again; 898 } 899 if (last <= r->end && addr >= r->start) { 900 (*sizep)++; 901 return 0; 902 } 903 } 904 if (changed) { 905 *addrp = addr; 906 *sizep = size; 907 } 908 return changed; 909} 910 911/* 912 * Find a free area with specified alignment in a specific range. 913 */ 914u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) 915{ 916 int i; 917 918 for (i = 0; i < e820.nr_map; i++) { 919 struct e820entry *ei = &e820.map[i]; 920 u64 addr, last; 921 u64 ei_last; 922 923 if (ei->type != E820_RAM) 924 continue; 925 addr = round_up(ei->addr, align); 926 ei_last = ei->addr + ei->size; 927 if (addr < start) 928 addr = round_up(start, align); 929 if (addr >= ei_last) 930 continue; 931 while (bad_addr(&addr, size, align) && addr+size <= ei_last) 932 ; 933 last = addr + size; 934 if (last > ei_last) 935 continue; 936 if (last > end) 937 continue; 938 return addr; 939 } 940 return -1ULL; 941} 942 943/* 944 * Find next free range after *start 945 */ 946u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) 947{ 948 int i; 949 950 for (i = 0; i < e820.nr_map; i++) { 951 struct e820entry *ei = &e820.map[i]; 952 u64 addr, last; 953 u64 ei_last; 954 955 if (ei->type != E820_RAM) 956 continue; 957 addr = round_up(ei->addr, align); 958 ei_last = ei->addr + ei->size; 959 if (addr < start) 960 addr = round_up(start, align); 961 if (addr >= ei_last) 962 continue; 963 *sizep = ei_last - addr; 964 while (bad_addr_size(&addr, sizep, align) && 965 addr + *sizep <= ei_last) 966 ; 967 last = addr + *sizep; 968 if (last > ei_last) 969 continue; 970 return addr; 971 } 972 return -1UL; 973 974} 975 976/* 977 * pre allocated 4k and reserved it in e820 978 */ 979u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) 980{ 981 u64 size = 0; 982 u64 addr; 983 u64 start; 984 985 start = startt; 986 while (size < sizet) 987 start = find_e820_area_size(start, &size, align); 988 989 if (size < sizet) 990 return 0; 991 992 addr = round_down(start + size - sizet, align); 993 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); 994 printk(KERN_INFO "update e820 for early_reserve_e820\n"); 995 update_e820(); 996 997 return addr; 998} 999 1000#ifdef CONFIG_X86_32 1001# ifdef CONFIG_X86_PAE 1002# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) 1003# else 1004# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) 1005# endif 1006#else /* CONFIG_X86_32 */ 1007# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT 1008#endif 1009 1010/* 1011 * Last pfn which the user wants to use. 1012 */ 1013unsigned long __initdata end_user_pfn = MAX_ARCH_PFN; 1014 1015/* 1016 * Find the highest page frame number we have available 1017 */ 1018unsigned long __init e820_end_of_ram(void) 1019{ 1020 unsigned long last_pfn; 1021 unsigned long max_arch_pfn = MAX_ARCH_PFN; 1022 1023 last_pfn = find_max_pfn_with_active_regions(); 1024 1025 if (last_pfn > max_arch_pfn) 1026 last_pfn = max_arch_pfn; 1027 if (last_pfn > end_user_pfn) 1028 last_pfn = end_user_pfn; 1029 1030 printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", 1031 last_pfn, max_arch_pfn); 1032 return last_pfn; 1033} 1034 1035/* 1036 * Finds an active region in the address range from start_pfn to last_pfn and 1037 * returns its range in ei_startpfn and ei_endpfn for the e820 entry. 1038 */ 1039int __init e820_find_active_region(const struct e820entry *ei, 1040 unsigned long start_pfn, 1041 unsigned long last_pfn, 1042 unsigned long *ei_startpfn, 1043 unsigned long *ei_endpfn) 1044{ 1045 u64 align = PAGE_SIZE; 1046 1047 *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; 1048 *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; 1049 1050 /* Skip map entries smaller than a page */ 1051 if (*ei_startpfn >= *ei_endpfn) 1052 return 0; 1053 1054 /* Skip if map is outside the node */ 1055 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || 1056 *ei_startpfn >= last_pfn) 1057 return 0; 1058 1059 /* Check for overlaps */ 1060 if (*ei_startpfn < start_pfn) 1061 *ei_startpfn = start_pfn; 1062 if (*ei_endpfn > last_pfn) 1063 *ei_endpfn = last_pfn; 1064 1065 /* Obey end_user_pfn to save on memmap */ 1066 if (*ei_startpfn >= end_user_pfn) 1067 return 0; 1068 if (*ei_endpfn > end_user_pfn) 1069 *ei_endpfn = end_user_pfn; 1070 1071 return 1; 1072} 1073 1074/* Walk the e820 map and register active regions within a node */ 1075void __init e820_register_active_regions(int nid, unsigned long start_pfn, 1076 unsigned long last_pfn) 1077{ 1078 unsigned long ei_startpfn; 1079 unsigned long ei_endpfn; 1080 int i; 1081 1082 for (i = 0; i < e820.nr_map; i++) 1083 if (e820_find_active_region(&e820.map[i], 1084 start_pfn, last_pfn, 1085 &ei_startpfn, &ei_endpfn)) 1086 add_active_range(nid, ei_startpfn, ei_endpfn); 1087} 1088 1089/* 1090 * Find the hole size (in bytes) in the memory range. 1091 * @start: starting address of the memory range to scan 1092 * @end: ending address of the memory range to scan 1093 */ 1094u64 __init e820_hole_size(u64 start, u64 end) 1095{ 1096 unsigned long start_pfn = start >> PAGE_SHIFT; 1097 unsigned long last_pfn = end >> PAGE_SHIFT; 1098 unsigned long ei_startpfn, ei_endpfn, ram = 0; 1099 int i; 1100 1101 for (i = 0; i < e820.nr_map; i++) { 1102 if (e820_find_active_region(&e820.map[i], 1103 start_pfn, last_pfn, 1104 &ei_startpfn, &ei_endpfn)) 1105 ram += ei_endpfn - ei_startpfn; 1106 } 1107 return end - start - ((u64)ram << PAGE_SHIFT); 1108} 1109 1110static void early_panic(char *msg) 1111{ 1112 early_printk(msg); 1113 panic(msg); 1114} 1115 1116/* "mem=nopentium" disables the 4MB page tables. */ 1117static int __init parse_memopt(char *p) 1118{ 1119 u64 mem_size; 1120 1121 if (!p) 1122 return -EINVAL; 1123 1124#ifdef CONFIG_X86_32 1125 if (!strcmp(p, "nopentium")) { 1126 setup_clear_cpu_cap(X86_FEATURE_PSE); 1127 return 0; 1128 } 1129#endif 1130 1131 mem_size = memparse(p, &p); 1132 end_user_pfn = mem_size>>PAGE_SHIFT; 1133 e820_update_range(mem_size, ULLONG_MAX - mem_size, 1134 E820_RAM, E820_RESERVED); 1135 1136 return 0; 1137} 1138early_param("mem", parse_memopt); 1139 1140static int userdef __initdata; 1141 1142static int __init parse_memmap_opt(char *p) 1143{ 1144 char *oldp; 1145 u64 start_at, mem_size; 1146 1147 if (!strcmp(p, "exactmap")) { 1148#ifdef CONFIG_CRASH_DUMP 1149 /* 1150 * If we are doing a crash dump, we still need to know 1151 * the real mem size before original memory map is 1152 * reset. 1153 */ 1154 e820_register_active_regions(0, 0, -1UL); 1155 saved_max_pfn = e820_end_of_ram(); 1156 remove_all_active_ranges(); 1157#endif 1158 e820.nr_map = 0; 1159 userdef = 1; 1160 return 0; 1161 } 1162 1163 oldp = p; 1164 mem_size = memparse(p, &p); 1165 if (p == oldp) 1166 return -EINVAL; 1167 1168 userdef = 1; 1169 if (*p == '@') { 1170 start_at = memparse(p+1, &p); 1171 e820_add_region(start_at, mem_size, E820_RAM); 1172 } else if (*p == '#') { 1173 start_at = memparse(p+1, &p); 1174 e820_add_region(start_at, mem_size, E820_ACPI); 1175 } else if (*p == '$') { 1176 start_at = memparse(p+1, &p); 1177 e820_add_region(start_at, mem_size, E820_RESERVED); 1178 } else { 1179 end_user_pfn = (mem_size >> PAGE_SHIFT); 1180 e820_update_range(mem_size, ULLONG_MAX - mem_size, 1181 E820_RAM, E820_RESERVED); 1182 } 1183 return *p == '\0' ? 0 : -EINVAL; 1184} 1185early_param("memmap", parse_memmap_opt); 1186 1187void __init finish_e820_parsing(void) 1188{ 1189 if (userdef) { 1190 int nr = e820.nr_map; 1191 1192 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) 1193 early_panic("Invalid user supplied memory map"); 1194 e820.nr_map = nr; 1195 1196 printk(KERN_INFO "user-defined physical RAM map:\n"); 1197 e820_print_map("user"); 1198 } 1199} 1200 1201/* 1202 * Mark e820 reserved areas as busy for the resource manager. 1203 */ 1204void __init e820_reserve_resources(void) 1205{ 1206 int i; 1207 struct resource *res; 1208 u64 end; 1209 1210 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); 1211 for (i = 0; i < e820.nr_map; i++) { 1212 switch (e820.map[i].type) { 1213 case E820_RESERVED_KERN: 1214 case E820_RAM: res->name = "System RAM"; break; 1215 case E820_ACPI: res->name = "ACPI Tables"; break; 1216 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; 1217 default: res->name = "reserved"; 1218 } 1219 end = e820.map[i].addr + e820.map[i].size - 1; 1220#ifndef CONFIG_RESOURCES_64BIT 1221 if (end > 0x100000000ULL) { 1222 res++; 1223 continue; 1224 } 1225#endif 1226 res->start = e820.map[i].addr; 1227 res->end = end; 1228 1229 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 1230 insert_resource(&iomem_resource, res); 1231 res++; 1232 } 1233} 1234 1235char *__init default_machine_specific_memory_setup(void) 1236{ 1237 char *who = "BIOS-e820"; 1238 int new_nr; 1239 /* 1240 * Try to copy the BIOS-supplied E820-map. 1241 * 1242 * Otherwise fake a memory map; one section from 0k->640k, 1243 * the next section from 1mb->appropriate_mem_k 1244 */ 1245 new_nr = boot_params.e820_entries; 1246 sanitize_e820_map(boot_params.e820_map, 1247 ARRAY_SIZE(boot_params.e820_map), 1248 &new_nr); 1249 boot_params.e820_entries = new_nr; 1250 if (append_e820_map(boot_params.e820_map, boot_params.e820_entries) 1251 < 0) { 1252 u64 mem_size; 1253 1254 /* compare results from other methods and take the greater */ 1255 if (boot_params.alt_mem_k 1256 < boot_params.screen_info.ext_mem_k) { 1257 mem_size = boot_params.screen_info.ext_mem_k; 1258 who = "BIOS-88"; 1259 } else { 1260 mem_size = boot_params.alt_mem_k; 1261 who = "BIOS-e801"; 1262 } 1263 1264 e820.nr_map = 0; 1265 e820_add_region(0, LOWMEMSIZE(), E820_RAM); 1266 e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); 1267 } 1268 1269 /* In case someone cares... */ 1270 return who; 1271} 1272 1273char *__init __attribute__((weak)) machine_specific_memory_setup(void) 1274{ 1275 return default_machine_specific_memory_setup(); 1276} 1277 1278/* Overridden in paravirt.c if CONFIG_PARAVIRT */ 1279char * __init __attribute__((weak)) memory_setup(void) 1280{ 1281 return machine_specific_memory_setup(); 1282} 1283 1284void __init setup_memory_map(void) 1285{ 1286 printk(KERN_INFO "BIOS-provided physical RAM map:\n"); 1287 e820_print_map(memory_setup()); 1288} 1289 1290#ifdef CONFIG_X86_64 1291int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) 1292{ 1293 int i; 1294 1295 if (slot < 0 || slot >= e820.nr_map) 1296 return -1; 1297 for (i = slot; i < e820.nr_map; i++) { 1298 if (e820.map[i].type != E820_RAM) 1299 continue; 1300 break; 1301 } 1302 if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) 1303 return -1; 1304 *addr = e820.map[i].addr; 1305 *size = min_t(u64, e820.map[i].size + e820.map[i].addr, 1306 max_pfn << PAGE_SHIFT) - *addr; 1307 return i + 1; 1308} 1309#endif 1310