e820.c revision a678c2be75773e112f6d656a22a7f1645c4dbd6c
1/*
2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over.
4 *
5 *  Getting sanitize_e820_map() in sync with i386 version by applying change:
6 *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
7 *     Alex Achenbach <xela@slit.de>, December 2002.
8 *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/pfn.h>
16#include <linux/suspend.h>
17#include <linux/firmware-map.h>
18
19#include <asm/e820.h>
20#include <asm/early_res.h>
21#include <asm/proto.h>
22#include <asm/setup.h>
23
24/*
25 * The e820 map is the map that gets modified e.g. with command line parameters
26 * and that is also registered with modifications in the kernel resource tree
27 * with the iomem_resource as parent.
28 *
29 * The e820_saved is directly saved after the BIOS-provided memory map is
30 * copied. It doesn't get modified afterwards. It's registered for the
31 * /sys/firmware/memmap interface.
32 *
33 * That memory map is not modified and is used as base for kexec. The kexec'd
34 * kernel should get the same memory map as the firmware provides. Then the
35 * user can e.g. boot the original kernel with mem=1G while still booting the
36 * next kernel with full memory.
37 */
38struct e820map e820;
39struct e820map e820_saved;
40
41/* For PCI or other memory-mapped resources */
42unsigned long pci_mem_start = 0xaeedbabe;
43#ifdef CONFIG_PCI
44EXPORT_SYMBOL(pci_mem_start);
45#endif
46
47/*
48 * This function checks if any part of the range <start,end> is mapped
49 * with type.
50 */
51int
52e820_any_mapped(u64 start, u64 end, unsigned type)
53{
54	int i;
55
56	for (i = 0; i < e820.nr_map; i++) {
57		struct e820entry *ei = &e820.map[i];
58
59		if (type && ei->type != type)
60			continue;
61		if (ei->addr >= end || ei->addr + ei->size <= start)
62			continue;
63		return 1;
64	}
65	return 0;
66}
67EXPORT_SYMBOL_GPL(e820_any_mapped);
68
69/*
70 * This function checks if the entire range <start,end> is mapped with type.
71 *
72 * Note: this function only works correct if the e820 table is sorted and
73 * not-overlapping, which is the case
74 */
75int __init e820_all_mapped(u64 start, u64 end, unsigned type)
76{
77	int i;
78
79	for (i = 0; i < e820.nr_map; i++) {
80		struct e820entry *ei = &e820.map[i];
81
82		if (type && ei->type != type)
83			continue;
84		/* is the region (part) in overlap with the current region ?*/
85		if (ei->addr >= end || ei->addr + ei->size <= start)
86			continue;
87
88		/* if the region is at the beginning of <start,end> we move
89		 * start to the end of the region since it's ok until there
90		 */
91		if (ei->addr <= start)
92			start = ei->addr + ei->size;
93		/*
94		 * if start is now at or beyond end, we're done, full
95		 * coverage
96		 */
97		if (start >= end)
98			return 1;
99	}
100	return 0;
101}
102
103/*
104 * Add a memory region to the kernel e820 map.
105 */
106static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
107					 int type)
108{
109	int x = e820x->nr_map;
110
111	if (x >= ARRAY_SIZE(e820x->map)) {
112		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
113		return;
114	}
115
116	e820x->map[x].addr = start;
117	e820x->map[x].size = size;
118	e820x->map[x].type = type;
119	e820x->nr_map++;
120}
121
122void __init e820_add_region(u64 start, u64 size, int type)
123{
124	__e820_add_region(&e820, start, size, type);
125}
126
127static void __init e820_print_type(u32 type)
128{
129	switch (type) {
130	case E820_RAM:
131	case E820_RESERVED_KERN:
132		printk(KERN_CONT "(usable)");
133		break;
134	case E820_RESERVED:
135		printk(KERN_CONT "(reserved)");
136		break;
137	case E820_ACPI:
138		printk(KERN_CONT "(ACPI data)");
139		break;
140	case E820_NVS:
141		printk(KERN_CONT "(ACPI NVS)");
142		break;
143	case E820_UNUSABLE:
144		printk(KERN_CONT "(unusable)");
145		break;
146	default:
147		printk(KERN_CONT "type %u", type);
148		break;
149	}
150}
151
152void __init e820_print_map(char *who)
153{
154	int i;
155
156	for (i = 0; i < e820.nr_map; i++) {
157		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
158		       (unsigned long long) e820.map[i].addr,
159		       (unsigned long long)
160		       (e820.map[i].addr + e820.map[i].size));
161		e820_print_type(e820.map[i].type);
162		printk(KERN_CONT "\n");
163	}
164}
165
166/*
167 * Sanitize the BIOS e820 map.
168 *
169 * Some e820 responses include overlapping entries. The following
170 * replaces the original e820 map with a new one, removing overlaps,
171 * and resolving conflicting memory types in favor of highest
172 * numbered type.
173 *
174 * The input parameter biosmap points to an array of 'struct
175 * e820entry' which on entry has elements in the range [0, *pnr_map)
176 * valid, and which has space for up to max_nr_map entries.
177 * On return, the resulting sanitized e820 map entries will be in
178 * overwritten in the same location, starting at biosmap.
179 *
180 * The integer pointed to by pnr_map must be valid on entry (the
181 * current number of valid entries located at biosmap) and will
182 * be updated on return, with the new number of valid entries
183 * (something no more than max_nr_map.)
184 *
185 * The return value from sanitize_e820_map() is zero if it
186 * successfully 'sanitized' the map entries passed in, and is -1
187 * if it did nothing, which can happen if either of (1) it was
188 * only passed one map entry, or (2) any of the input map entries
189 * were invalid (start + size < start, meaning that the size was
190 * so big the described memory range wrapped around through zero.)
191 *
192 *	Visually we're performing the following
193 *	(1,2,3,4 = memory types)...
194 *
195 *	Sample memory map (w/overlaps):
196 *	   ____22__________________
197 *	   ______________________4_
198 *	   ____1111________________
199 *	   _44_____________________
200 *	   11111111________________
201 *	   ____________________33__
202 *	   ___________44___________
203 *	   __________33333_________
204 *	   ______________22________
205 *	   ___________________2222_
206 *	   _________111111111______
207 *	   _____________________11_
208 *	   _________________4______
209 *
210 *	Sanitized equivalent (no overlap):
211 *	   1_______________________
212 *	   _44_____________________
213 *	   ___1____________________
214 *	   ____22__________________
215 *	   ______11________________
216 *	   _________1______________
217 *	   __________3_____________
218 *	   ___________44___________
219 *	   _____________33_________
220 *	   _______________2________
221 *	   ________________1_______
222 *	   _________________4______
223 *	   ___________________2____
224 *	   ____________________33__
225 *	   ______________________4_
226 */
227
228int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
229			     u32 *pnr_map)
230{
231	struct change_member {
232		struct e820entry *pbios; /* pointer to original bios entry */
233		unsigned long long addr; /* address for this change point */
234	};
235	static struct change_member change_point_list[2*E820_X_MAX] __initdata;
236	static struct change_member *change_point[2*E820_X_MAX] __initdata;
237	static struct e820entry *overlap_list[E820_X_MAX] __initdata;
238	static struct e820entry new_bios[E820_X_MAX] __initdata;
239	struct change_member *change_tmp;
240	unsigned long current_type, last_type;
241	unsigned long long last_addr;
242	int chgidx, still_changing;
243	int overlap_entries;
244	int new_bios_entry;
245	int old_nr, new_nr, chg_nr;
246	int i;
247
248	/* if there's only one memory region, don't bother */
249	if (*pnr_map < 2)
250		return -1;
251
252	old_nr = *pnr_map;
253	BUG_ON(old_nr > max_nr_map);
254
255	/* bail out if we find any unreasonable addresses in bios map */
256	for (i = 0; i < old_nr; i++)
257		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
258			return -1;
259
260	/* create pointers for initial change-point information (for sorting) */
261	for (i = 0; i < 2 * old_nr; i++)
262		change_point[i] = &change_point_list[i];
263
264	/* record all known change-points (starting and ending addresses),
265	   omitting those that are for empty memory regions */
266	chgidx = 0;
267	for (i = 0; i < old_nr; i++)	{
268		if (biosmap[i].size != 0) {
269			change_point[chgidx]->addr = biosmap[i].addr;
270			change_point[chgidx++]->pbios = &biosmap[i];
271			change_point[chgidx]->addr = biosmap[i].addr +
272				biosmap[i].size;
273			change_point[chgidx++]->pbios = &biosmap[i];
274		}
275	}
276	chg_nr = chgidx;
277
278	/* sort change-point list by memory addresses (low -> high) */
279	still_changing = 1;
280	while (still_changing)	{
281		still_changing = 0;
282		for (i = 1; i < chg_nr; i++)  {
283			unsigned long long curaddr, lastaddr;
284			unsigned long long curpbaddr, lastpbaddr;
285
286			curaddr = change_point[i]->addr;
287			lastaddr = change_point[i - 1]->addr;
288			curpbaddr = change_point[i]->pbios->addr;
289			lastpbaddr = change_point[i - 1]->pbios->addr;
290
291			/*
292			 * swap entries, when:
293			 *
294			 * curaddr > lastaddr or
295			 * curaddr == lastaddr and curaddr == curpbaddr and
296			 * lastaddr != lastpbaddr
297			 */
298			if (curaddr < lastaddr ||
299			    (curaddr == lastaddr && curaddr == curpbaddr &&
300			     lastaddr != lastpbaddr)) {
301				change_tmp = change_point[i];
302				change_point[i] = change_point[i-1];
303				change_point[i-1] = change_tmp;
304				still_changing = 1;
305			}
306		}
307	}
308
309	/* create a new bios memory map, removing overlaps */
310	overlap_entries = 0;	 /* number of entries in the overlap table */
311	new_bios_entry = 0;	 /* index for creating new bios map entries */
312	last_type = 0;		 /* start with undefined memory type */
313	last_addr = 0;		 /* start with 0 as last starting address */
314
315	/* loop through change-points, determining affect on the new bios map */
316	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
317		/* keep track of all overlapping bios entries */
318		if (change_point[chgidx]->addr ==
319		    change_point[chgidx]->pbios->addr) {
320			/*
321			 * add map entry to overlap list (> 1 entry
322			 * implies an overlap)
323			 */
324			overlap_list[overlap_entries++] =
325				change_point[chgidx]->pbios;
326		} else {
327			/*
328			 * remove entry from list (order independent,
329			 * so swap with last)
330			 */
331			for (i = 0; i < overlap_entries; i++) {
332				if (overlap_list[i] ==
333				    change_point[chgidx]->pbios)
334					overlap_list[i] =
335						overlap_list[overlap_entries-1];
336			}
337			overlap_entries--;
338		}
339		/*
340		 * if there are overlapping entries, decide which
341		 * "type" to use (larger value takes precedence --
342		 * 1=usable, 2,3,4,4+=unusable)
343		 */
344		current_type = 0;
345		for (i = 0; i < overlap_entries; i++)
346			if (overlap_list[i]->type > current_type)
347				current_type = overlap_list[i]->type;
348		/*
349		 * continue building up new bios map based on this
350		 * information
351		 */
352		if (current_type != last_type)	{
353			if (last_type != 0)	 {
354				new_bios[new_bios_entry].size =
355					change_point[chgidx]->addr - last_addr;
356				/*
357				 * move forward only if the new size
358				 * was non-zero
359				 */
360				if (new_bios[new_bios_entry].size != 0)
361					/*
362					 * no more space left for new
363					 * bios entries ?
364					 */
365					if (++new_bios_entry >= max_nr_map)
366						break;
367			}
368			if (current_type != 0)	{
369				new_bios[new_bios_entry].addr =
370					change_point[chgidx]->addr;
371				new_bios[new_bios_entry].type = current_type;
372				last_addr = change_point[chgidx]->addr;
373			}
374			last_type = current_type;
375		}
376	}
377	/* retain count for new bios entries */
378	new_nr = new_bios_entry;
379
380	/* copy new bios mapping into original location */
381	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
382	*pnr_map = new_nr;
383
384	return 0;
385}
386
387static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
388{
389	while (nr_map) {
390		u64 start = biosmap->addr;
391		u64 size = biosmap->size;
392		u64 end = start + size;
393		u32 type = biosmap->type;
394
395		/* Overflow in 64 bits? Ignore the memory map. */
396		if (start > end)
397			return -1;
398
399		e820_add_region(start, size, type);
400
401		biosmap++;
402		nr_map--;
403	}
404	return 0;
405}
406
407/*
408 * Copy the BIOS e820 map into a safe place.
409 *
410 * Sanity-check it while we're at it..
411 *
412 * If we're lucky and live on a modern system, the setup code
413 * will have given us a memory map that we can use to properly
414 * set up memory.  If we aren't, we'll fake a memory map.
415 */
416static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
417{
418	/* Only one memory region (or negative)? Ignore it */
419	if (nr_map < 2)
420		return -1;
421
422	return __append_e820_map(biosmap, nr_map);
423}
424
425static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
426					u64 size, unsigned old_type,
427					unsigned new_type)
428{
429	u64 end;
430	unsigned int i;
431	u64 real_updated_size = 0;
432
433	BUG_ON(old_type == new_type);
434
435	if (size > (ULLONG_MAX - start))
436		size = ULLONG_MAX - start;
437
438	end = start + size;
439	printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
440		       (unsigned long long) start,
441		       (unsigned long long) end);
442	e820_print_type(old_type);
443	printk(KERN_CONT " ==> ");
444	e820_print_type(new_type);
445	printk(KERN_CONT "\n");
446
447	for (i = 0; i < e820x->nr_map; i++) {
448		struct e820entry *ei = &e820x->map[i];
449		u64 final_start, final_end;
450		u64 ei_end;
451
452		if (ei->type != old_type)
453			continue;
454
455		ei_end = ei->addr + ei->size;
456		/* totally covered by new range? */
457		if (ei->addr >= start && ei_end <= end) {
458			ei->type = new_type;
459			real_updated_size += ei->size;
460			continue;
461		}
462
463		/* new range is totally covered? */
464		if (ei->addr < start && ei_end > end) {
465			__e820_add_region(e820x, start, size, new_type);
466			__e820_add_region(e820x, end, ei_end - end, ei->type);
467			ei->size = start - ei->addr;
468			real_updated_size += size;
469			continue;
470		}
471
472		/* partially covered */
473		final_start = max(start, ei->addr);
474		final_end = min(end, ei_end);
475		if (final_start >= final_end)
476			continue;
477
478		__e820_add_region(e820x, final_start, final_end - final_start,
479				  new_type);
480
481		real_updated_size += final_end - final_start;
482
483		/*
484		 * left range could be head or tail, so need to update
485		 * size at first.
486		 */
487		ei->size -= final_end - final_start;
488		if (ei->addr < final_start)
489			continue;
490		ei->addr = final_end;
491	}
492	return real_updated_size;
493}
494
495u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
496			     unsigned new_type)
497{
498	return __e820_update_range(&e820, start, size, old_type, new_type);
499}
500
501static u64 __init e820_update_range_saved(u64 start, u64 size,
502					  unsigned old_type, unsigned new_type)
503{
504	return __e820_update_range(&e820_saved, start, size, old_type,
505				     new_type);
506}
507
508/* make e820 not cover the range */
509u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
510			     int checktype)
511{
512	int i;
513	u64 end;
514	u64 real_removed_size = 0;
515
516	if (size > (ULLONG_MAX - start))
517		size = ULLONG_MAX - start;
518
519	end = start + size;
520	printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
521		       (unsigned long long) start,
522		       (unsigned long long) end);
523	e820_print_type(old_type);
524	printk(KERN_CONT "\n");
525
526	for (i = 0; i < e820.nr_map; i++) {
527		struct e820entry *ei = &e820.map[i];
528		u64 final_start, final_end;
529
530		if (checktype && ei->type != old_type)
531			continue;
532		/* totally covered? */
533		if (ei->addr >= start &&
534		    (ei->addr + ei->size) <= (start + size)) {
535			real_removed_size += ei->size;
536			memset(ei, 0, sizeof(struct e820entry));
537			continue;
538		}
539		/* partially covered */
540		final_start = max(start, ei->addr);
541		final_end = min(start + size, ei->addr + ei->size);
542		if (final_start >= final_end)
543			continue;
544		real_removed_size += final_end - final_start;
545
546		ei->size -= final_end - final_start;
547		if (ei->addr < final_start)
548			continue;
549		ei->addr = final_end;
550	}
551	return real_removed_size;
552}
553
554void __init update_e820(void)
555{
556	u32 nr_map;
557
558	nr_map = e820.nr_map;
559	if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
560		return;
561	e820.nr_map = nr_map;
562	printk(KERN_INFO "modified physical RAM map:\n");
563	e820_print_map("modified");
564}
565static void __init update_e820_saved(void)
566{
567	u32 nr_map;
568
569	nr_map = e820_saved.nr_map;
570	if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
571		return;
572	e820_saved.nr_map = nr_map;
573}
574#define MAX_GAP_END 0x100000000ull
575/*
576 * Search for a gap in the e820 memory space from start_addr to end_addr.
577 */
578__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
579		unsigned long start_addr, unsigned long long end_addr)
580{
581	unsigned long long last;
582	int i = e820.nr_map;
583	int found = 0;
584
585	last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
586
587	while (--i >= 0) {
588		unsigned long long start = e820.map[i].addr;
589		unsigned long long end = start + e820.map[i].size;
590
591		if (end < start_addr)
592			continue;
593
594		/*
595		 * Since "last" is at most 4GB, we know we'll
596		 * fit in 32 bits if this condition is true
597		 */
598		if (last > end) {
599			unsigned long gap = last - end;
600
601			if (gap >= *gapsize) {
602				*gapsize = gap;
603				*gapstart = end;
604				found = 1;
605			}
606		}
607		if (start < last)
608			last = start;
609	}
610	return found;
611}
612
613/*
614 * Search for the biggest gap in the low 32 bits of the e820
615 * memory space.  We pass this space to PCI to assign MMIO resources
616 * for hotplug or unconfigured devices in.
617 * Hopefully the BIOS let enough space left.
618 */
619__init void e820_setup_gap(void)
620{
621	unsigned long gapstart, gapsize;
622	int found;
623
624	gapstart = 0x10000000;
625	gapsize = 0x400000;
626	found  = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
627
628#ifdef CONFIG_X86_64
629	if (!found) {
630		gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
631		printk(KERN_ERR
632	"PCI: Warning: Cannot find a gap in the 32bit address range\n"
633	"PCI: Unassigned devices with 32bit resource registers may break!\n");
634	}
635#endif
636
637	/*
638	 * e820_reserve_resources_late protect stolen RAM already
639	 */
640	pci_mem_start = gapstart;
641
642	printk(KERN_INFO
643	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
644	       pci_mem_start, gapstart, gapsize);
645}
646
647/**
648 * Because of the size limitation of struct boot_params, only first
649 * 128 E820 memory entries are passed to kernel via
650 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
651 * linked list of struct setup_data, which is parsed here.
652 */
653void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
654{
655	u32 map_len;
656	int entries;
657	struct e820entry *extmap;
658
659	entries = sdata->len / sizeof(struct e820entry);
660	map_len = sdata->len + sizeof(struct setup_data);
661	if (map_len > PAGE_SIZE)
662		sdata = early_ioremap(pa_data, map_len);
663	extmap = (struct e820entry *)(sdata->data);
664	__append_e820_map(extmap, entries);
665	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
666	if (map_len > PAGE_SIZE)
667		early_iounmap(sdata, map_len);
668	printk(KERN_INFO "extended physical RAM map:\n");
669	e820_print_map("extended");
670}
671
672#if defined(CONFIG_X86_64) || \
673	(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
674/**
675 * Find the ranges of physical addresses that do not correspond to
676 * e820 RAM areas and mark the corresponding pages as nosave for
677 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
678 *
679 * This function requires the e820 map to be sorted and without any
680 * overlapping entries and assumes the first e820 area to be RAM.
681 */
682void __init e820_mark_nosave_regions(unsigned long limit_pfn)
683{
684	int i;
685	unsigned long pfn;
686
687	pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
688	for (i = 1; i < e820.nr_map; i++) {
689		struct e820entry *ei = &e820.map[i];
690
691		if (pfn < PFN_UP(ei->addr))
692			register_nosave_region(pfn, PFN_UP(ei->addr));
693
694		pfn = PFN_DOWN(ei->addr + ei->size);
695		if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
696			register_nosave_region(PFN_UP(ei->addr), pfn);
697
698		if (pfn >= limit_pfn)
699			break;
700	}
701}
702#endif
703
704#ifdef CONFIG_HIBERNATION
705/**
706 * Mark ACPI NVS memory region, so that we can save/restore it during
707 * hibernation and the subsequent resume.
708 */
709static int __init e820_mark_nvs_memory(void)
710{
711	int i;
712
713	for (i = 0; i < e820.nr_map; i++) {
714		struct e820entry *ei = &e820.map[i];
715
716		if (ei->type == E820_NVS)
717			hibernate_nvs_register(ei->addr, ei->size);
718	}
719
720	return 0;
721}
722core_initcall(e820_mark_nvs_memory);
723#endif
724
725/*
726 * pre allocated 4k and reserved it in e820
727 */
728u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
729{
730	u64 size = 0;
731	u64 addr;
732	u64 start;
733
734	for (start = startt; ; start += size) {
735		start = find_e820_area_size(start, &size, align);
736		if (!(start + 1))
737			return 0;
738		if (size >= sizet)
739			break;
740	}
741
742#ifdef CONFIG_X86_32
743	if (start >= MAXMEM)
744		return 0;
745	if (start + size > MAXMEM)
746		size = MAXMEM - start;
747#endif
748
749	addr = round_down(start + size - sizet, align);
750	if (addr < start)
751		return 0;
752	e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
753	e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
754	printk(KERN_INFO "update e820 for early_reserve_e820\n");
755	update_e820();
756	update_e820_saved();
757
758	return addr;
759}
760
761#ifdef CONFIG_X86_32
762# ifdef CONFIG_X86_PAE
763#  define MAX_ARCH_PFN		(1ULL<<(36-PAGE_SHIFT))
764# else
765#  define MAX_ARCH_PFN		(1ULL<<(32-PAGE_SHIFT))
766# endif
767#else /* CONFIG_X86_32 */
768# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
769#endif
770
771/*
772 * Find the highest page frame number we have available
773 */
774static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
775{
776	int i;
777	unsigned long last_pfn = 0;
778	unsigned long max_arch_pfn = MAX_ARCH_PFN;
779
780	for (i = 0; i < e820.nr_map; i++) {
781		struct e820entry *ei = &e820.map[i];
782		unsigned long start_pfn;
783		unsigned long end_pfn;
784
785		if (ei->type != type)
786			continue;
787
788		start_pfn = ei->addr >> PAGE_SHIFT;
789		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
790
791		if (start_pfn >= limit_pfn)
792			continue;
793		if (end_pfn > limit_pfn) {
794			last_pfn = limit_pfn;
795			break;
796		}
797		if (end_pfn > last_pfn)
798			last_pfn = end_pfn;
799	}
800
801	if (last_pfn > max_arch_pfn)
802		last_pfn = max_arch_pfn;
803
804	printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
805			 last_pfn, max_arch_pfn);
806	return last_pfn;
807}
808unsigned long __init e820_end_of_ram_pfn(void)
809{
810	return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
811}
812
813unsigned long __init e820_end_of_low_ram_pfn(void)
814{
815	return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
816}
817/*
818 * Finds an active region in the address range from start_pfn to last_pfn and
819 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
820 */
821int __init e820_find_active_region(const struct e820entry *ei,
822				  unsigned long start_pfn,
823				  unsigned long last_pfn,
824				  unsigned long *ei_startpfn,
825				  unsigned long *ei_endpfn)
826{
827	u64 align = PAGE_SIZE;
828
829	*ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
830	*ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
831
832	/* Skip map entries smaller than a page */
833	if (*ei_startpfn >= *ei_endpfn)
834		return 0;
835
836	/* Skip if map is outside the node */
837	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
838				    *ei_startpfn >= last_pfn)
839		return 0;
840
841	/* Check for overlaps */
842	if (*ei_startpfn < start_pfn)
843		*ei_startpfn = start_pfn;
844	if (*ei_endpfn > last_pfn)
845		*ei_endpfn = last_pfn;
846
847	return 1;
848}
849
850/* Walk the e820 map and register active regions within a node */
851void __init e820_register_active_regions(int nid, unsigned long start_pfn,
852					 unsigned long last_pfn)
853{
854	unsigned long ei_startpfn;
855	unsigned long ei_endpfn;
856	int i;
857
858	for (i = 0; i < e820.nr_map; i++)
859		if (e820_find_active_region(&e820.map[i],
860					    start_pfn, last_pfn,
861					    &ei_startpfn, &ei_endpfn))
862			add_active_range(nid, ei_startpfn, ei_endpfn);
863}
864
865/*
866 * Find the hole size (in bytes) in the memory range.
867 * @start: starting address of the memory range to scan
868 * @end: ending address of the memory range to scan
869 */
870u64 __init e820_hole_size(u64 start, u64 end)
871{
872	unsigned long start_pfn = start >> PAGE_SHIFT;
873	unsigned long last_pfn = end >> PAGE_SHIFT;
874	unsigned long ei_startpfn, ei_endpfn, ram = 0;
875	int i;
876
877	for (i = 0; i < e820.nr_map; i++) {
878		if (e820_find_active_region(&e820.map[i],
879					    start_pfn, last_pfn,
880					    &ei_startpfn, &ei_endpfn))
881			ram += ei_endpfn - ei_startpfn;
882	}
883	return end - start - ((u64)ram << PAGE_SHIFT);
884}
885
886static void early_panic(char *msg)
887{
888	early_printk(msg);
889	panic(msg);
890}
891
892static int userdef __initdata;
893
894/* "mem=nopentium" disables the 4MB page tables. */
895static int __init parse_memopt(char *p)
896{
897	u64 mem_size;
898
899	if (!p)
900		return -EINVAL;
901
902#ifdef CONFIG_X86_32
903	if (!strcmp(p, "nopentium")) {
904		setup_clear_cpu_cap(X86_FEATURE_PSE);
905		return 0;
906	}
907#endif
908
909	userdef = 1;
910	mem_size = memparse(p, &p);
911	e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
912
913	return 0;
914}
915early_param("mem", parse_memopt);
916
917static int __init parse_memmap_opt(char *p)
918{
919	char *oldp;
920	u64 start_at, mem_size;
921
922	if (!p)
923		return -EINVAL;
924
925	if (!strncmp(p, "exactmap", 8)) {
926#ifdef CONFIG_CRASH_DUMP
927		/*
928		 * If we are doing a crash dump, we still need to know
929		 * the real mem size before original memory map is
930		 * reset.
931		 */
932		saved_max_pfn = e820_end_of_ram_pfn();
933#endif
934		e820.nr_map = 0;
935		userdef = 1;
936		return 0;
937	}
938
939	oldp = p;
940	mem_size = memparse(p, &p);
941	if (p == oldp)
942		return -EINVAL;
943
944	userdef = 1;
945	if (*p == '@') {
946		start_at = memparse(p+1, &p);
947		e820_add_region(start_at, mem_size, E820_RAM);
948	} else if (*p == '#') {
949		start_at = memparse(p+1, &p);
950		e820_add_region(start_at, mem_size, E820_ACPI);
951	} else if (*p == '$') {
952		start_at = memparse(p+1, &p);
953		e820_add_region(start_at, mem_size, E820_RESERVED);
954	} else
955		e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
956
957	return *p == '\0' ? 0 : -EINVAL;
958}
959early_param("memmap", parse_memmap_opt);
960
961void __init finish_e820_parsing(void)
962{
963	if (userdef) {
964		u32 nr = e820.nr_map;
965
966		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
967			early_panic("Invalid user supplied memory map");
968		e820.nr_map = nr;
969
970		printk(KERN_INFO "user-defined physical RAM map:\n");
971		e820_print_map("user");
972	}
973}
974
975static inline const char *e820_type_to_string(int e820_type)
976{
977	switch (e820_type) {
978	case E820_RESERVED_KERN:
979	case E820_RAM:	return "System RAM";
980	case E820_ACPI:	return "ACPI Tables";
981	case E820_NVS:	return "ACPI Non-volatile Storage";
982	case E820_UNUSABLE:	return "Unusable memory";
983	default:	return "reserved";
984	}
985}
986
987/*
988 * Mark e820 reserved areas as busy for the resource manager.
989 */
990static struct resource __initdata *e820_res;
991void __init e820_reserve_resources(void)
992{
993	int i;
994	struct resource *res;
995	u64 end;
996
997	res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
998	e820_res = res;
999	for (i = 0; i < e820.nr_map; i++) {
1000		end = e820.map[i].addr + e820.map[i].size - 1;
1001		if (end != (resource_size_t)end) {
1002			res++;
1003			continue;
1004		}
1005		res->name = e820_type_to_string(e820.map[i].type);
1006		res->start = e820.map[i].addr;
1007		res->end = end;
1008
1009		res->flags = IORESOURCE_MEM;
1010
1011		/*
1012		 * don't register the region that could be conflicted with
1013		 * pci device BAR resource and insert them later in
1014		 * pcibios_resource_survey()
1015		 */
1016		if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
1017			res->flags |= IORESOURCE_BUSY;
1018			insert_resource(&iomem_resource, res);
1019		}
1020		res++;
1021	}
1022
1023	for (i = 0; i < e820_saved.nr_map; i++) {
1024		struct e820entry *entry = &e820_saved.map[i];
1025		firmware_map_add_early(entry->addr,
1026			entry->addr + entry->size - 1,
1027			e820_type_to_string(entry->type));
1028	}
1029}
1030
1031/* How much should we pad RAM ending depending on where it is? */
1032static unsigned long ram_alignment(resource_size_t pos)
1033{
1034	unsigned long mb = pos >> 20;
1035
1036	/* To 64kB in the first megabyte */
1037	if (!mb)
1038		return 64*1024;
1039
1040	/* To 1MB in the first 16MB */
1041	if (mb < 16)
1042		return 1024*1024;
1043
1044	/* To 64MB for anything above that */
1045	return 64*1024*1024;
1046}
1047
1048#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
1049
1050void __init e820_reserve_resources_late(void)
1051{
1052	int i;
1053	struct resource *res;
1054
1055	res = e820_res;
1056	for (i = 0; i < e820.nr_map; i++) {
1057		if (!res->parent && res->end)
1058			insert_resource_expand_to_fit(&iomem_resource, res);
1059		res++;
1060	}
1061
1062	/*
1063	 * Try to bump up RAM regions to reasonable boundaries to
1064	 * avoid stolen RAM:
1065	 */
1066	for (i = 0; i < e820.nr_map; i++) {
1067		struct e820entry *entry = &e820.map[i];
1068		u64 start, end;
1069
1070		if (entry->type != E820_RAM)
1071			continue;
1072		start = entry->addr + entry->size;
1073		end = round_up(start, ram_alignment(start)) - 1;
1074		if (end > MAX_RESOURCE_SIZE)
1075			end = MAX_RESOURCE_SIZE;
1076		if (start >= end)
1077			continue;
1078		printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
1079			       start, end);
1080		reserve_region_with_split(&iomem_resource, start, end,
1081					  "RAM buffer");
1082	}
1083}
1084
1085char *__init default_machine_specific_memory_setup(void)
1086{
1087	char *who = "BIOS-e820";
1088	u32 new_nr;
1089	/*
1090	 * Try to copy the BIOS-supplied E820-map.
1091	 *
1092	 * Otherwise fake a memory map; one section from 0k->640k,
1093	 * the next section from 1mb->appropriate_mem_k
1094	 */
1095	new_nr = boot_params.e820_entries;
1096	sanitize_e820_map(boot_params.e820_map,
1097			ARRAY_SIZE(boot_params.e820_map),
1098			&new_nr);
1099	boot_params.e820_entries = new_nr;
1100	if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
1101	  < 0) {
1102		u64 mem_size;
1103
1104		/* compare results from other methods and take the greater */
1105		if (boot_params.alt_mem_k
1106		    < boot_params.screen_info.ext_mem_k) {
1107			mem_size = boot_params.screen_info.ext_mem_k;
1108			who = "BIOS-88";
1109		} else {
1110			mem_size = boot_params.alt_mem_k;
1111			who = "BIOS-e801";
1112		}
1113
1114		e820.nr_map = 0;
1115		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
1116		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
1117	}
1118
1119	/* In case someone cares... */
1120	return who;
1121}
1122
1123void __init setup_memory_map(void)
1124{
1125	char *who;
1126
1127	who = x86_init.resources.memory_setup();
1128	memcpy(&e820_saved, &e820, sizeof(struct e820map));
1129	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1130	e820_print_map(who);
1131}
1132