e820.c revision 77eed821accf5dd962b1f13bed0680e217e49112
1/*
2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over.
4 *
5 *  Getting sanitize_e820_map() in sync with i386 version by applying change:
6 *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
7 *     Alex Achenbach <xela@slit.de>, December 2002.
8 *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/pfn.h>
16#include <linux/suspend.h>
17#include <linux/acpi.h>
18#include <linux/firmware-map.h>
19#include <linux/memblock.h>
20
21#include <asm/e820.h>
22#include <asm/proto.h>
23#include <asm/setup.h>
24
25/*
26 * The e820 map is the map that gets modified e.g. with command line parameters
27 * and that is also registered with modifications in the kernel resource tree
28 * with the iomem_resource as parent.
29 *
30 * The e820_saved is directly saved after the BIOS-provided memory map is
31 * copied. It doesn't get modified afterwards. It's registered for the
32 * /sys/firmware/memmap interface.
33 *
34 * That memory map is not modified and is used as base for kexec. The kexec'd
35 * kernel should get the same memory map as the firmware provides. Then the
36 * user can e.g. boot the original kernel with mem=1G while still booting the
37 * next kernel with full memory.
38 */
39struct e820map e820;
40struct e820map e820_saved;
41
42/* For PCI or other memory-mapped resources */
43unsigned long pci_mem_start = 0xaeedbabe;
44#ifdef CONFIG_PCI
45EXPORT_SYMBOL(pci_mem_start);
46#endif
47
48/*
49 * This function checks if any part of the range <start,end> is mapped
50 * with type.
51 */
52int
53e820_any_mapped(u64 start, u64 end, unsigned type)
54{
55	int i;
56
57	for (i = 0; i < e820.nr_map; i++) {
58		struct e820entry *ei = &e820.map[i];
59
60		if (type && ei->type != type)
61			continue;
62		if (ei->addr >= end || ei->addr + ei->size <= start)
63			continue;
64		return 1;
65	}
66	return 0;
67}
68EXPORT_SYMBOL_GPL(e820_any_mapped);
69
70/*
71 * This function checks if the entire range <start,end> is mapped with type.
72 *
73 * Note: this function only works correct if the e820 table is sorted and
74 * not-overlapping, which is the case
75 */
76int __init e820_all_mapped(u64 start, u64 end, unsigned type)
77{
78	int i;
79
80	for (i = 0; i < e820.nr_map; i++) {
81		struct e820entry *ei = &e820.map[i];
82
83		if (type && ei->type != type)
84			continue;
85		/* is the region (part) in overlap with the current region ?*/
86		if (ei->addr >= end || ei->addr + ei->size <= start)
87			continue;
88
89		/* if the region is at the beginning of <start,end> we move
90		 * start to the end of the region since it's ok until there
91		 */
92		if (ei->addr <= start)
93			start = ei->addr + ei->size;
94		/*
95		 * if start is now at or beyond end, we're done, full
96		 * coverage
97		 */
98		if (start >= end)
99			return 1;
100	}
101	return 0;
102}
103
104/*
105 * Add a memory region to the kernel e820 map.
106 */
107static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
108					 int type)
109{
110	int x = e820x->nr_map;
111
112	if (x >= ARRAY_SIZE(e820x->map)) {
113		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
114		return;
115	}
116
117	e820x->map[x].addr = start;
118	e820x->map[x].size = size;
119	e820x->map[x].type = type;
120	e820x->nr_map++;
121}
122
123void __init e820_add_region(u64 start, u64 size, int type)
124{
125	__e820_add_region(&e820, start, size, type);
126}
127
128static void __init e820_print_type(u32 type)
129{
130	switch (type) {
131	case E820_RAM:
132	case E820_RESERVED_KERN:
133		printk(KERN_CONT "(usable)");
134		break;
135	case E820_RESERVED:
136		printk(KERN_CONT "(reserved)");
137		break;
138	case E820_ACPI:
139		printk(KERN_CONT "(ACPI data)");
140		break;
141	case E820_NVS:
142		printk(KERN_CONT "(ACPI NVS)");
143		break;
144	case E820_UNUSABLE:
145		printk(KERN_CONT "(unusable)");
146		break;
147	default:
148		printk(KERN_CONT "type %u", type);
149		break;
150	}
151}
152
153void __init e820_print_map(char *who)
154{
155	int i;
156
157	for (i = 0; i < e820.nr_map; i++) {
158		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
159		       (unsigned long long) e820.map[i].addr,
160		       (unsigned long long)
161		       (e820.map[i].addr + e820.map[i].size));
162		e820_print_type(e820.map[i].type);
163		printk(KERN_CONT "\n");
164	}
165}
166
167/*
168 * Sanitize the BIOS e820 map.
169 *
170 * Some e820 responses include overlapping entries. The following
171 * replaces the original e820 map with a new one, removing overlaps,
172 * and resolving conflicting memory types in favor of highest
173 * numbered type.
174 *
175 * The input parameter biosmap points to an array of 'struct
176 * e820entry' which on entry has elements in the range [0, *pnr_map)
177 * valid, and which has space for up to max_nr_map entries.
178 * On return, the resulting sanitized e820 map entries will be in
179 * overwritten in the same location, starting at biosmap.
180 *
181 * The integer pointed to by pnr_map must be valid on entry (the
182 * current number of valid entries located at biosmap) and will
183 * be updated on return, with the new number of valid entries
184 * (something no more than max_nr_map.)
185 *
186 * The return value from sanitize_e820_map() is zero if it
187 * successfully 'sanitized' the map entries passed in, and is -1
188 * if it did nothing, which can happen if either of (1) it was
189 * only passed one map entry, or (2) any of the input map entries
190 * were invalid (start + size < start, meaning that the size was
191 * so big the described memory range wrapped around through zero.)
192 *
193 *	Visually we're performing the following
194 *	(1,2,3,4 = memory types)...
195 *
196 *	Sample memory map (w/overlaps):
197 *	   ____22__________________
198 *	   ______________________4_
199 *	   ____1111________________
200 *	   _44_____________________
201 *	   11111111________________
202 *	   ____________________33__
203 *	   ___________44___________
204 *	   __________33333_________
205 *	   ______________22________
206 *	   ___________________2222_
207 *	   _________111111111______
208 *	   _____________________11_
209 *	   _________________4______
210 *
211 *	Sanitized equivalent (no overlap):
212 *	   1_______________________
213 *	   _44_____________________
214 *	   ___1____________________
215 *	   ____22__________________
216 *	   ______11________________
217 *	   _________1______________
218 *	   __________3_____________
219 *	   ___________44___________
220 *	   _____________33_________
221 *	   _______________2________
222 *	   ________________1_______
223 *	   _________________4______
224 *	   ___________________2____
225 *	   ____________________33__
226 *	   ______________________4_
227 */
228
229int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
230			     u32 *pnr_map)
231{
232	struct change_member {
233		struct e820entry *pbios; /* pointer to original bios entry */
234		unsigned long long addr; /* address for this change point */
235	};
236	static struct change_member change_point_list[2*E820_X_MAX] __initdata;
237	static struct change_member *change_point[2*E820_X_MAX] __initdata;
238	static struct e820entry *overlap_list[E820_X_MAX] __initdata;
239	static struct e820entry new_bios[E820_X_MAX] __initdata;
240	struct change_member *change_tmp;
241	unsigned long current_type, last_type;
242	unsigned long long last_addr;
243	int chgidx, still_changing;
244	int overlap_entries;
245	int new_bios_entry;
246	int old_nr, new_nr, chg_nr;
247	int i;
248
249	/* if there's only one memory region, don't bother */
250	if (*pnr_map < 2)
251		return -1;
252
253	old_nr = *pnr_map;
254	BUG_ON(old_nr > max_nr_map);
255
256	/* bail out if we find any unreasonable addresses in bios map */
257	for (i = 0; i < old_nr; i++)
258		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
259			return -1;
260
261	/* create pointers for initial change-point information (for sorting) */
262	for (i = 0; i < 2 * old_nr; i++)
263		change_point[i] = &change_point_list[i];
264
265	/* record all known change-points (starting and ending addresses),
266	   omitting those that are for empty memory regions */
267	chgidx = 0;
268	for (i = 0; i < old_nr; i++)	{
269		if (biosmap[i].size != 0) {
270			change_point[chgidx]->addr = biosmap[i].addr;
271			change_point[chgidx++]->pbios = &biosmap[i];
272			change_point[chgidx]->addr = biosmap[i].addr +
273				biosmap[i].size;
274			change_point[chgidx++]->pbios = &biosmap[i];
275		}
276	}
277	chg_nr = chgidx;
278
279	/* sort change-point list by memory addresses (low -> high) */
280	still_changing = 1;
281	while (still_changing)	{
282		still_changing = 0;
283		for (i = 1; i < chg_nr; i++)  {
284			unsigned long long curaddr, lastaddr;
285			unsigned long long curpbaddr, lastpbaddr;
286
287			curaddr = change_point[i]->addr;
288			lastaddr = change_point[i - 1]->addr;
289			curpbaddr = change_point[i]->pbios->addr;
290			lastpbaddr = change_point[i - 1]->pbios->addr;
291
292			/*
293			 * swap entries, when:
294			 *
295			 * curaddr > lastaddr or
296			 * curaddr == lastaddr and curaddr == curpbaddr and
297			 * lastaddr != lastpbaddr
298			 */
299			if (curaddr < lastaddr ||
300			    (curaddr == lastaddr && curaddr == curpbaddr &&
301			     lastaddr != lastpbaddr)) {
302				change_tmp = change_point[i];
303				change_point[i] = change_point[i-1];
304				change_point[i-1] = change_tmp;
305				still_changing = 1;
306			}
307		}
308	}
309
310	/* create a new bios memory map, removing overlaps */
311	overlap_entries = 0;	 /* number of entries in the overlap table */
312	new_bios_entry = 0;	 /* index for creating new bios map entries */
313	last_type = 0;		 /* start with undefined memory type */
314	last_addr = 0;		 /* start with 0 as last starting address */
315
316	/* loop through change-points, determining affect on the new bios map */
317	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
318		/* keep track of all overlapping bios entries */
319		if (change_point[chgidx]->addr ==
320		    change_point[chgidx]->pbios->addr) {
321			/*
322			 * add map entry to overlap list (> 1 entry
323			 * implies an overlap)
324			 */
325			overlap_list[overlap_entries++] =
326				change_point[chgidx]->pbios;
327		} else {
328			/*
329			 * remove entry from list (order independent,
330			 * so swap with last)
331			 */
332			for (i = 0; i < overlap_entries; i++) {
333				if (overlap_list[i] ==
334				    change_point[chgidx]->pbios)
335					overlap_list[i] =
336						overlap_list[overlap_entries-1];
337			}
338			overlap_entries--;
339		}
340		/*
341		 * if there are overlapping entries, decide which
342		 * "type" to use (larger value takes precedence --
343		 * 1=usable, 2,3,4,4+=unusable)
344		 */
345		current_type = 0;
346		for (i = 0; i < overlap_entries; i++)
347			if (overlap_list[i]->type > current_type)
348				current_type = overlap_list[i]->type;
349		/*
350		 * continue building up new bios map based on this
351		 * information
352		 */
353		if (current_type != last_type)	{
354			if (last_type != 0)	 {
355				new_bios[new_bios_entry].size =
356					change_point[chgidx]->addr - last_addr;
357				/*
358				 * move forward only if the new size
359				 * was non-zero
360				 */
361				if (new_bios[new_bios_entry].size != 0)
362					/*
363					 * no more space left for new
364					 * bios entries ?
365					 */
366					if (++new_bios_entry >= max_nr_map)
367						break;
368			}
369			if (current_type != 0)	{
370				new_bios[new_bios_entry].addr =
371					change_point[chgidx]->addr;
372				new_bios[new_bios_entry].type = current_type;
373				last_addr = change_point[chgidx]->addr;
374			}
375			last_type = current_type;
376		}
377	}
378	/* retain count for new bios entries */
379	new_nr = new_bios_entry;
380
381	/* copy new bios mapping into original location */
382	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
383	*pnr_map = new_nr;
384
385	return 0;
386}
387
388static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
389{
390	while (nr_map) {
391		u64 start = biosmap->addr;
392		u64 size = biosmap->size;
393		u64 end = start + size;
394		u32 type = biosmap->type;
395
396		/* Overflow in 64 bits? Ignore the memory map. */
397		if (start > end)
398			return -1;
399
400		e820_add_region(start, size, type);
401
402		biosmap++;
403		nr_map--;
404	}
405	return 0;
406}
407
408/*
409 * Copy the BIOS e820 map into a safe place.
410 *
411 * Sanity-check it while we're at it..
412 *
413 * If we're lucky and live on a modern system, the setup code
414 * will have given us a memory map that we can use to properly
415 * set up memory.  If we aren't, we'll fake a memory map.
416 */
417static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
418{
419	/* Only one memory region (or negative)? Ignore it */
420	if (nr_map < 2)
421		return -1;
422
423	return __append_e820_map(biosmap, nr_map);
424}
425
426static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
427					u64 size, unsigned old_type,
428					unsigned new_type)
429{
430	u64 end;
431	unsigned int i;
432	u64 real_updated_size = 0;
433
434	BUG_ON(old_type == new_type);
435
436	if (size > (ULLONG_MAX - start))
437		size = ULLONG_MAX - start;
438
439	end = start + size;
440	printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
441		       (unsigned long long) start,
442		       (unsigned long long) end);
443	e820_print_type(old_type);
444	printk(KERN_CONT " ==> ");
445	e820_print_type(new_type);
446	printk(KERN_CONT "\n");
447
448	for (i = 0; i < e820x->nr_map; i++) {
449		struct e820entry *ei = &e820x->map[i];
450		u64 final_start, final_end;
451		u64 ei_end;
452
453		if (ei->type != old_type)
454			continue;
455
456		ei_end = ei->addr + ei->size;
457		/* totally covered by new range? */
458		if (ei->addr >= start && ei_end <= end) {
459			ei->type = new_type;
460			real_updated_size += ei->size;
461			continue;
462		}
463
464		/* new range is totally covered? */
465		if (ei->addr < start && ei_end > end) {
466			__e820_add_region(e820x, start, size, new_type);
467			__e820_add_region(e820x, end, ei_end - end, ei->type);
468			ei->size = start - ei->addr;
469			real_updated_size += size;
470			continue;
471		}
472
473		/* partially covered */
474		final_start = max(start, ei->addr);
475		final_end = min(end, ei_end);
476		if (final_start >= final_end)
477			continue;
478
479		__e820_add_region(e820x, final_start, final_end - final_start,
480				  new_type);
481
482		real_updated_size += final_end - final_start;
483
484		/*
485		 * left range could be head or tail, so need to update
486		 * size at first.
487		 */
488		ei->size -= final_end - final_start;
489		if (ei->addr < final_start)
490			continue;
491		ei->addr = final_end;
492	}
493	return real_updated_size;
494}
495
496u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
497			     unsigned new_type)
498{
499	return __e820_update_range(&e820, start, size, old_type, new_type);
500}
501
502static u64 __init e820_update_range_saved(u64 start, u64 size,
503					  unsigned old_type, unsigned new_type)
504{
505	return __e820_update_range(&e820_saved, start, size, old_type,
506				     new_type);
507}
508
509/* make e820 not cover the range */
510u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
511			     int checktype)
512{
513	int i;
514	u64 end;
515	u64 real_removed_size = 0;
516
517	if (size > (ULLONG_MAX - start))
518		size = ULLONG_MAX - start;
519
520	end = start + size;
521	printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
522		       (unsigned long long) start,
523		       (unsigned long long) end);
524	if (checktype)
525		e820_print_type(old_type);
526	printk(KERN_CONT "\n");
527
528	for (i = 0; i < e820.nr_map; i++) {
529		struct e820entry *ei = &e820.map[i];
530		u64 final_start, final_end;
531		u64 ei_end;
532
533		if (checktype && ei->type != old_type)
534			continue;
535
536		ei_end = ei->addr + ei->size;
537		/* totally covered? */
538		if (ei->addr >= start && ei_end <= end) {
539			real_removed_size += ei->size;
540			memset(ei, 0, sizeof(struct e820entry));
541			continue;
542		}
543
544		/* new range is totally covered? */
545		if (ei->addr < start && ei_end > end) {
546			e820_add_region(end, ei_end - end, ei->type);
547			ei->size = start - ei->addr;
548			real_removed_size += size;
549			continue;
550		}
551
552		/* partially covered */
553		final_start = max(start, ei->addr);
554		final_end = min(end, ei_end);
555		if (final_start >= final_end)
556			continue;
557		real_removed_size += final_end - final_start;
558
559		/*
560		 * left range could be head or tail, so need to update
561		 * size at first.
562		 */
563		ei->size -= final_end - final_start;
564		if (ei->addr < final_start)
565			continue;
566		ei->addr = final_end;
567	}
568	return real_removed_size;
569}
570
571void __init update_e820(void)
572{
573	u32 nr_map;
574
575	nr_map = e820.nr_map;
576	if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
577		return;
578	e820.nr_map = nr_map;
579	printk(KERN_INFO "modified physical RAM map:\n");
580	e820_print_map("modified");
581}
582static void __init update_e820_saved(void)
583{
584	u32 nr_map;
585
586	nr_map = e820_saved.nr_map;
587	if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
588		return;
589	e820_saved.nr_map = nr_map;
590}
591#define MAX_GAP_END 0x100000000ull
592/*
593 * Search for a gap in the e820 memory space from start_addr to end_addr.
594 */
595__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
596		unsigned long start_addr, unsigned long long end_addr)
597{
598	unsigned long long last;
599	int i = e820.nr_map;
600	int found = 0;
601
602	last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
603
604	while (--i >= 0) {
605		unsigned long long start = e820.map[i].addr;
606		unsigned long long end = start + e820.map[i].size;
607
608		if (end < start_addr)
609			continue;
610
611		/*
612		 * Since "last" is at most 4GB, we know we'll
613		 * fit in 32 bits if this condition is true
614		 */
615		if (last > end) {
616			unsigned long gap = last - end;
617
618			if (gap >= *gapsize) {
619				*gapsize = gap;
620				*gapstart = end;
621				found = 1;
622			}
623		}
624		if (start < last)
625			last = start;
626	}
627	return found;
628}
629
630/*
631 * Search for the biggest gap in the low 32 bits of the e820
632 * memory space.  We pass this space to PCI to assign MMIO resources
633 * for hotplug or unconfigured devices in.
634 * Hopefully the BIOS let enough space left.
635 */
636__init void e820_setup_gap(void)
637{
638	unsigned long gapstart, gapsize;
639	int found;
640
641	gapstart = 0x10000000;
642	gapsize = 0x400000;
643	found  = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
644
645#ifdef CONFIG_X86_64
646	if (!found) {
647		gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
648		printk(KERN_ERR
649	"PCI: Warning: Cannot find a gap in the 32bit address range\n"
650	"PCI: Unassigned devices with 32bit resource registers may break!\n");
651	}
652#endif
653
654	/*
655	 * e820_reserve_resources_late protect stolen RAM already
656	 */
657	pci_mem_start = gapstart;
658
659	printk(KERN_INFO
660	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
661	       pci_mem_start, gapstart, gapsize);
662}
663
664/**
665 * Because of the size limitation of struct boot_params, only first
666 * 128 E820 memory entries are passed to kernel via
667 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
668 * linked list of struct setup_data, which is parsed here.
669 */
670void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
671{
672	u32 map_len;
673	int entries;
674	struct e820entry *extmap;
675
676	entries = sdata->len / sizeof(struct e820entry);
677	map_len = sdata->len + sizeof(struct setup_data);
678	if (map_len > PAGE_SIZE)
679		sdata = early_ioremap(pa_data, map_len);
680	extmap = (struct e820entry *)(sdata->data);
681	__append_e820_map(extmap, entries);
682	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
683	if (map_len > PAGE_SIZE)
684		early_iounmap(sdata, map_len);
685	printk(KERN_INFO "extended physical RAM map:\n");
686	e820_print_map("extended");
687}
688
689#if defined(CONFIG_X86_64) || \
690	(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
691/**
692 * Find the ranges of physical addresses that do not correspond to
693 * e820 RAM areas and mark the corresponding pages as nosave for
694 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
695 *
696 * This function requires the e820 map to be sorted and without any
697 * overlapping entries and assumes the first e820 area to be RAM.
698 */
699void __init e820_mark_nosave_regions(unsigned long limit_pfn)
700{
701	int i;
702	unsigned long pfn;
703
704	pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
705	for (i = 1; i < e820.nr_map; i++) {
706		struct e820entry *ei = &e820.map[i];
707
708		if (pfn < PFN_UP(ei->addr))
709			register_nosave_region(pfn, PFN_UP(ei->addr));
710
711		pfn = PFN_DOWN(ei->addr + ei->size);
712		if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
713			register_nosave_region(PFN_UP(ei->addr), pfn);
714
715		if (pfn >= limit_pfn)
716			break;
717	}
718}
719#endif
720
721#ifdef CONFIG_HIBERNATION
722/**
723 * Mark ACPI NVS memory region, so that we can save/restore it during
724 * hibernation and the subsequent resume.
725 */
726static int __init e820_mark_nvs_memory(void)
727{
728	int i;
729
730	for (i = 0; i < e820.nr_map; i++) {
731		struct e820entry *ei = &e820.map[i];
732
733		if (ei->type == E820_NVS)
734			suspend_nvs_register(ei->addr, ei->size);
735	}
736
737	return 0;
738}
739core_initcall(e820_mark_nvs_memory);
740#endif
741
742/*
743 * pre allocated 4k and reserved it in memblock and e820_saved
744 */
745u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
746{
747	u64 size = 0;
748	u64 addr;
749	u64 start;
750
751	for (start = startt; ; start += size) {
752		start = memblock_x86_find_in_range_size(start, &size, align);
753		if (start == MEMBLOCK_ERROR)
754			return 0;
755		if (size >= sizet)
756			break;
757	}
758
759#ifdef CONFIG_X86_32
760	if (start >= MAXMEM)
761		return 0;
762	if (start + size > MAXMEM)
763		size = MAXMEM - start;
764#endif
765
766	addr = round_down(start + size - sizet, align);
767	if (addr < start)
768		return 0;
769	memblock_x86_reserve_range(addr, addr + sizet, "new next");
770	e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
771	printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
772	update_e820_saved();
773
774	return addr;
775}
776
777#ifdef CONFIG_X86_32
778# ifdef CONFIG_X86_PAE
779#  define MAX_ARCH_PFN		(1ULL<<(36-PAGE_SHIFT))
780# else
781#  define MAX_ARCH_PFN		(1ULL<<(32-PAGE_SHIFT))
782# endif
783#else /* CONFIG_X86_32 */
784# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
785#endif
786
787/*
788 * Find the highest page frame number we have available
789 */
790static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
791{
792	int i;
793	unsigned long last_pfn = 0;
794	unsigned long max_arch_pfn = MAX_ARCH_PFN;
795
796	for (i = 0; i < e820.nr_map; i++) {
797		struct e820entry *ei = &e820.map[i];
798		unsigned long start_pfn;
799		unsigned long end_pfn;
800
801		if (ei->type != type)
802			continue;
803
804		start_pfn = ei->addr >> PAGE_SHIFT;
805		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
806
807		if (start_pfn >= limit_pfn)
808			continue;
809		if (end_pfn > limit_pfn) {
810			last_pfn = limit_pfn;
811			break;
812		}
813		if (end_pfn > last_pfn)
814			last_pfn = end_pfn;
815	}
816
817	if (last_pfn > max_arch_pfn)
818		last_pfn = max_arch_pfn;
819
820	printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
821			 last_pfn, max_arch_pfn);
822	return last_pfn;
823}
824unsigned long __init e820_end_of_ram_pfn(void)
825{
826	return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
827}
828
829unsigned long __init e820_end_of_low_ram_pfn(void)
830{
831	return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
832}
833
834static void early_panic(char *msg)
835{
836	early_printk(msg);
837	panic(msg);
838}
839
840static int userdef __initdata;
841
842/* "mem=nopentium" disables the 4MB page tables. */
843static int __init parse_memopt(char *p)
844{
845	u64 mem_size;
846
847	if (!p)
848		return -EINVAL;
849
850#ifdef CONFIG_X86_32
851	if (!strcmp(p, "nopentium")) {
852		setup_clear_cpu_cap(X86_FEATURE_PSE);
853		return 0;
854	}
855#endif
856
857	userdef = 1;
858	mem_size = memparse(p, &p);
859	/* don't remove all of memory when handling "mem={invalid}" param */
860	if (mem_size == 0)
861		return -EINVAL;
862	e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
863
864	return 0;
865}
866early_param("mem", parse_memopt);
867
868static int __init parse_memmap_opt(char *p)
869{
870	char *oldp;
871	u64 start_at, mem_size;
872
873	if (!p)
874		return -EINVAL;
875
876	if (!strncmp(p, "exactmap", 8)) {
877#ifdef CONFIG_CRASH_DUMP
878		/*
879		 * If we are doing a crash dump, we still need to know
880		 * the real mem size before original memory map is
881		 * reset.
882		 */
883		saved_max_pfn = e820_end_of_ram_pfn();
884#endif
885		e820.nr_map = 0;
886		userdef = 1;
887		return 0;
888	}
889
890	oldp = p;
891	mem_size = memparse(p, &p);
892	if (p == oldp)
893		return -EINVAL;
894
895	userdef = 1;
896	if (*p == '@') {
897		start_at = memparse(p+1, &p);
898		e820_add_region(start_at, mem_size, E820_RAM);
899	} else if (*p == '#') {
900		start_at = memparse(p+1, &p);
901		e820_add_region(start_at, mem_size, E820_ACPI);
902	} else if (*p == '$') {
903		start_at = memparse(p+1, &p);
904		e820_add_region(start_at, mem_size, E820_RESERVED);
905	} else
906		e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
907
908	return *p == '\0' ? 0 : -EINVAL;
909}
910early_param("memmap", parse_memmap_opt);
911
912void __init finish_e820_parsing(void)
913{
914	if (userdef) {
915		u32 nr = e820.nr_map;
916
917		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
918			early_panic("Invalid user supplied memory map");
919		e820.nr_map = nr;
920
921		printk(KERN_INFO "user-defined physical RAM map:\n");
922		e820_print_map("user");
923	}
924}
925
926static inline const char *e820_type_to_string(int e820_type)
927{
928	switch (e820_type) {
929	case E820_RESERVED_KERN:
930	case E820_RAM:	return "System RAM";
931	case E820_ACPI:	return "ACPI Tables";
932	case E820_NVS:	return "ACPI Non-volatile Storage";
933	case E820_UNUSABLE:	return "Unusable memory";
934	default:	return "reserved";
935	}
936}
937
938/*
939 * Mark e820 reserved areas as busy for the resource manager.
940 */
941static struct resource __initdata *e820_res;
942void __init e820_reserve_resources(void)
943{
944	int i;
945	struct resource *res;
946	u64 end;
947
948	res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
949	e820_res = res;
950	for (i = 0; i < e820.nr_map; i++) {
951		end = e820.map[i].addr + e820.map[i].size - 1;
952		if (end != (resource_size_t)end) {
953			res++;
954			continue;
955		}
956		res->name = e820_type_to_string(e820.map[i].type);
957		res->start = e820.map[i].addr;
958		res->end = end;
959
960		res->flags = IORESOURCE_MEM;
961
962		/*
963		 * don't register the region that could be conflicted with
964		 * pci device BAR resource and insert them later in
965		 * pcibios_resource_survey()
966		 */
967		if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
968			res->flags |= IORESOURCE_BUSY;
969			insert_resource(&iomem_resource, res);
970		}
971		res++;
972	}
973
974	for (i = 0; i < e820_saved.nr_map; i++) {
975		struct e820entry *entry = &e820_saved.map[i];
976		firmware_map_add_early(entry->addr,
977			entry->addr + entry->size - 1,
978			e820_type_to_string(entry->type));
979	}
980}
981
982/* How much should we pad RAM ending depending on where it is? */
983static unsigned long ram_alignment(resource_size_t pos)
984{
985	unsigned long mb = pos >> 20;
986
987	/* To 64kB in the first megabyte */
988	if (!mb)
989		return 64*1024;
990
991	/* To 1MB in the first 16MB */
992	if (mb < 16)
993		return 1024*1024;
994
995	/* To 64MB for anything above that */
996	return 64*1024*1024;
997}
998
999#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
1000
1001void __init e820_reserve_resources_late(void)
1002{
1003	int i;
1004	struct resource *res;
1005
1006	res = e820_res;
1007	for (i = 0; i < e820.nr_map; i++) {
1008		if (!res->parent && res->end)
1009			insert_resource_expand_to_fit(&iomem_resource, res);
1010		res++;
1011	}
1012
1013	/*
1014	 * Try to bump up RAM regions to reasonable boundaries to
1015	 * avoid stolen RAM:
1016	 */
1017	for (i = 0; i < e820.nr_map; i++) {
1018		struct e820entry *entry = &e820.map[i];
1019		u64 start, end;
1020
1021		if (entry->type != E820_RAM)
1022			continue;
1023		start = entry->addr + entry->size;
1024		end = round_up(start, ram_alignment(start)) - 1;
1025		if (end > MAX_RESOURCE_SIZE)
1026			end = MAX_RESOURCE_SIZE;
1027		if (start >= end)
1028			continue;
1029		printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
1030			       start, end);
1031		reserve_region_with_split(&iomem_resource, start, end,
1032					  "RAM buffer");
1033	}
1034}
1035
1036char *__init default_machine_specific_memory_setup(void)
1037{
1038	char *who = "BIOS-e820";
1039	u32 new_nr;
1040	/*
1041	 * Try to copy the BIOS-supplied E820-map.
1042	 *
1043	 * Otherwise fake a memory map; one section from 0k->640k,
1044	 * the next section from 1mb->appropriate_mem_k
1045	 */
1046	new_nr = boot_params.e820_entries;
1047	sanitize_e820_map(boot_params.e820_map,
1048			ARRAY_SIZE(boot_params.e820_map),
1049			&new_nr);
1050	boot_params.e820_entries = new_nr;
1051	if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
1052	  < 0) {
1053		u64 mem_size;
1054
1055		/* compare results from other methods and take the greater */
1056		if (boot_params.alt_mem_k
1057		    < boot_params.screen_info.ext_mem_k) {
1058			mem_size = boot_params.screen_info.ext_mem_k;
1059			who = "BIOS-88";
1060		} else {
1061			mem_size = boot_params.alt_mem_k;
1062			who = "BIOS-e801";
1063		}
1064
1065		e820.nr_map = 0;
1066		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
1067		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
1068	}
1069
1070	/* In case someone cares... */
1071	return who;
1072}
1073
1074void __init setup_memory_map(void)
1075{
1076	char *who;
1077
1078	who = x86_init.resources.memory_setup();
1079	memcpy(&e820_saved, &e820, sizeof(struct e820map));
1080	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1081	e820_print_map(who);
1082}
1083
1084void __init memblock_x86_fill(void)
1085{
1086	int i;
1087	u64 end;
1088
1089	/*
1090	 * EFI may have more than 128 entries
1091	 * We are safe to enable resizing, beause memblock_x86_fill()
1092	 * is rather later for x86
1093	 */
1094	memblock_can_resize = 1;
1095
1096	for (i = 0; i < e820.nr_map; i++) {
1097		struct e820entry *ei = &e820.map[i];
1098
1099		end = ei->addr + ei->size;
1100		if (end != (resource_size_t)end)
1101			continue;
1102
1103		if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
1104			continue;
1105
1106		memblock_add(ei->addr, ei->size);
1107	}
1108
1109	memblock_analyze();
1110	memblock_dump_all();
1111}
1112
1113void __init memblock_find_dma_reserve(void)
1114{
1115#ifdef CONFIG_X86_64
1116	u64 free_size_pfn;
1117	u64 mem_size_pfn;
1118	/*
1119	 * need to find out used area below MAX_DMA_PFN
1120	 * need to use memblock to get free size in [0, MAX_DMA_PFN]
1121	 * at first, and assume boot_mem will not take below MAX_DMA_PFN
1122	 */
1123	mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1124	free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1125	set_dma_reserve(mem_size_pfn - free_size_pfn);
1126#endif
1127}
1128