e820.c revision ab5d140b9eafae402aa3e673a63c5ef6164a9dd2
1/*
2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over.
4 *
5 *  Getting sanitize_e820_map() in sync with i386 version by applying change:
6 *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
7 *     Alex Achenbach <xela@slit.de>, December 2002.
8 *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/crash_dump.h>
15#include <linux/bootmem.h>
16#include <linux/pfn.h>
17#include <linux/suspend.h>
18#include <linux/acpi.h>
19#include <linux/firmware-map.h>
20#include <linux/memblock.h>
21
22#include <asm/e820.h>
23#include <asm/proto.h>
24#include <asm/setup.h>
25
26/*
27 * The e820 map is the map that gets modified e.g. with command line parameters
28 * and that is also registered with modifications in the kernel resource tree
29 * with the iomem_resource as parent.
30 *
31 * The e820_saved is directly saved after the BIOS-provided memory map is
32 * copied. It doesn't get modified afterwards. It's registered for the
33 * /sys/firmware/memmap interface.
34 *
35 * That memory map is not modified and is used as base for kexec. The kexec'd
36 * kernel should get the same memory map as the firmware provides. Then the
37 * user can e.g. boot the original kernel with mem=1G while still booting the
38 * next kernel with full memory.
39 */
40struct e820map e820;
41struct e820map e820_saved;
42
43/* For PCI or other memory-mapped resources */
44unsigned long pci_mem_start = 0xaeedbabe;
45#ifdef CONFIG_PCI
46EXPORT_SYMBOL(pci_mem_start);
47#endif
48
49/*
50 * This function checks if any part of the range <start,end> is mapped
51 * with type.
52 */
53int
54e820_any_mapped(u64 start, u64 end, unsigned type)
55{
56	int i;
57
58	for (i = 0; i < e820.nr_map; i++) {
59		struct e820entry *ei = &e820.map[i];
60
61		if (type && ei->type != type)
62			continue;
63		if (ei->addr >= end || ei->addr + ei->size <= start)
64			continue;
65		return 1;
66	}
67	return 0;
68}
69EXPORT_SYMBOL_GPL(e820_any_mapped);
70
71/*
72 * This function checks if the entire range <start,end> is mapped with type.
73 *
74 * Note: this function only works correct if the e820 table is sorted and
75 * not-overlapping, which is the case
76 */
77int __init e820_all_mapped(u64 start, u64 end, unsigned type)
78{
79	int i;
80
81	for (i = 0; i < e820.nr_map; i++) {
82		struct e820entry *ei = &e820.map[i];
83
84		if (type && ei->type != type)
85			continue;
86		/* is the region (part) in overlap with the current region ?*/
87		if (ei->addr >= end || ei->addr + ei->size <= start)
88			continue;
89
90		/* if the region is at the beginning of <start,end> we move
91		 * start to the end of the region since it's ok until there
92		 */
93		if (ei->addr <= start)
94			start = ei->addr + ei->size;
95		/*
96		 * if start is now at or beyond end, we're done, full
97		 * coverage
98		 */
99		if (start >= end)
100			return 1;
101	}
102	return 0;
103}
104
105/*
106 * Add a memory region to the kernel e820 map.
107 */
108static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
109					 int type)
110{
111	int x = e820x->nr_map;
112
113	if (x >= ARRAY_SIZE(e820x->map)) {
114		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
115		return;
116	}
117
118	e820x->map[x].addr = start;
119	e820x->map[x].size = size;
120	e820x->map[x].type = type;
121	e820x->nr_map++;
122}
123
124void __init e820_add_region(u64 start, u64 size, int type)
125{
126	__e820_add_region(&e820, start, size, type);
127}
128
129static void __init e820_print_type(u32 type)
130{
131	switch (type) {
132	case E820_RAM:
133	case E820_RESERVED_KERN:
134		printk(KERN_CONT "(usable)");
135		break;
136	case E820_RESERVED:
137		printk(KERN_CONT "(reserved)");
138		break;
139	case E820_ACPI:
140		printk(KERN_CONT "(ACPI data)");
141		break;
142	case E820_NVS:
143		printk(KERN_CONT "(ACPI NVS)");
144		break;
145	case E820_UNUSABLE:
146		printk(KERN_CONT "(unusable)");
147		break;
148	default:
149		printk(KERN_CONT "type %u", type);
150		break;
151	}
152}
153
154void __init e820_print_map(char *who)
155{
156	int i;
157
158	for (i = 0; i < e820.nr_map; i++) {
159		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
160		       (unsigned long long) e820.map[i].addr,
161		       (unsigned long long)
162		       (e820.map[i].addr + e820.map[i].size));
163		e820_print_type(e820.map[i].type);
164		printk(KERN_CONT "\n");
165	}
166}
167
168/*
169 * Sanitize the BIOS e820 map.
170 *
171 * Some e820 responses include overlapping entries. The following
172 * replaces the original e820 map with a new one, removing overlaps,
173 * and resolving conflicting memory types in favor of highest
174 * numbered type.
175 *
176 * The input parameter biosmap points to an array of 'struct
177 * e820entry' which on entry has elements in the range [0, *pnr_map)
178 * valid, and which has space for up to max_nr_map entries.
179 * On return, the resulting sanitized e820 map entries will be in
180 * overwritten in the same location, starting at biosmap.
181 *
182 * The integer pointed to by pnr_map must be valid on entry (the
183 * current number of valid entries located at biosmap) and will
184 * be updated on return, with the new number of valid entries
185 * (something no more than max_nr_map.)
186 *
187 * The return value from sanitize_e820_map() is zero if it
188 * successfully 'sanitized' the map entries passed in, and is -1
189 * if it did nothing, which can happen if either of (1) it was
190 * only passed one map entry, or (2) any of the input map entries
191 * were invalid (start + size < start, meaning that the size was
192 * so big the described memory range wrapped around through zero.)
193 *
194 *	Visually we're performing the following
195 *	(1,2,3,4 = memory types)...
196 *
197 *	Sample memory map (w/overlaps):
198 *	   ____22__________________
199 *	   ______________________4_
200 *	   ____1111________________
201 *	   _44_____________________
202 *	   11111111________________
203 *	   ____________________33__
204 *	   ___________44___________
205 *	   __________33333_________
206 *	   ______________22________
207 *	   ___________________2222_
208 *	   _________111111111______
209 *	   _____________________11_
210 *	   _________________4______
211 *
212 *	Sanitized equivalent (no overlap):
213 *	   1_______________________
214 *	   _44_____________________
215 *	   ___1____________________
216 *	   ____22__________________
217 *	   ______11________________
218 *	   _________1______________
219 *	   __________3_____________
220 *	   ___________44___________
221 *	   _____________33_________
222 *	   _______________2________
223 *	   ________________1_______
224 *	   _________________4______
225 *	   ___________________2____
226 *	   ____________________33__
227 *	   ______________________4_
228 */
229
230int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
231			     u32 *pnr_map)
232{
233	struct change_member {
234		struct e820entry *pbios; /* pointer to original bios entry */
235		unsigned long long addr; /* address for this change point */
236	};
237	static struct change_member change_point_list[2*E820_X_MAX] __initdata;
238	static struct change_member *change_point[2*E820_X_MAX] __initdata;
239	static struct e820entry *overlap_list[E820_X_MAX] __initdata;
240	static struct e820entry new_bios[E820_X_MAX] __initdata;
241	struct change_member *change_tmp;
242	unsigned long current_type, last_type;
243	unsigned long long last_addr;
244	int chgidx, still_changing;
245	int overlap_entries;
246	int new_bios_entry;
247	int old_nr, new_nr, chg_nr;
248	int i;
249
250	/* if there's only one memory region, don't bother */
251	if (*pnr_map < 2)
252		return -1;
253
254	old_nr = *pnr_map;
255	BUG_ON(old_nr > max_nr_map);
256
257	/* bail out if we find any unreasonable addresses in bios map */
258	for (i = 0; i < old_nr; i++)
259		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
260			return -1;
261
262	/* create pointers for initial change-point information (for sorting) */
263	for (i = 0; i < 2 * old_nr; i++)
264		change_point[i] = &change_point_list[i];
265
266	/* record all known change-points (starting and ending addresses),
267	   omitting those that are for empty memory regions */
268	chgidx = 0;
269	for (i = 0; i < old_nr; i++)	{
270		if (biosmap[i].size != 0) {
271			change_point[chgidx]->addr = biosmap[i].addr;
272			change_point[chgidx++]->pbios = &biosmap[i];
273			change_point[chgidx]->addr = biosmap[i].addr +
274				biosmap[i].size;
275			change_point[chgidx++]->pbios = &biosmap[i];
276		}
277	}
278	chg_nr = chgidx;
279
280	/* sort change-point list by memory addresses (low -> high) */
281	still_changing = 1;
282	while (still_changing)	{
283		still_changing = 0;
284		for (i = 1; i < chg_nr; i++)  {
285			unsigned long long curaddr, lastaddr;
286			unsigned long long curpbaddr, lastpbaddr;
287
288			curaddr = change_point[i]->addr;
289			lastaddr = change_point[i - 1]->addr;
290			curpbaddr = change_point[i]->pbios->addr;
291			lastpbaddr = change_point[i - 1]->pbios->addr;
292
293			/*
294			 * swap entries, when:
295			 *
296			 * curaddr > lastaddr or
297			 * curaddr == lastaddr and curaddr == curpbaddr and
298			 * lastaddr != lastpbaddr
299			 */
300			if (curaddr < lastaddr ||
301			    (curaddr == lastaddr && curaddr == curpbaddr &&
302			     lastaddr != lastpbaddr)) {
303				change_tmp = change_point[i];
304				change_point[i] = change_point[i-1];
305				change_point[i-1] = change_tmp;
306				still_changing = 1;
307			}
308		}
309	}
310
311	/* create a new bios memory map, removing overlaps */
312	overlap_entries = 0;	 /* number of entries in the overlap table */
313	new_bios_entry = 0;	 /* index for creating new bios map entries */
314	last_type = 0;		 /* start with undefined memory type */
315	last_addr = 0;		 /* start with 0 as last starting address */
316
317	/* loop through change-points, determining affect on the new bios map */
318	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
319		/* keep track of all overlapping bios entries */
320		if (change_point[chgidx]->addr ==
321		    change_point[chgidx]->pbios->addr) {
322			/*
323			 * add map entry to overlap list (> 1 entry
324			 * implies an overlap)
325			 */
326			overlap_list[overlap_entries++] =
327				change_point[chgidx]->pbios;
328		} else {
329			/*
330			 * remove entry from list (order independent,
331			 * so swap with last)
332			 */
333			for (i = 0; i < overlap_entries; i++) {
334				if (overlap_list[i] ==
335				    change_point[chgidx]->pbios)
336					overlap_list[i] =
337						overlap_list[overlap_entries-1];
338			}
339			overlap_entries--;
340		}
341		/*
342		 * if there are overlapping entries, decide which
343		 * "type" to use (larger value takes precedence --
344		 * 1=usable, 2,3,4,4+=unusable)
345		 */
346		current_type = 0;
347		for (i = 0; i < overlap_entries; i++)
348			if (overlap_list[i]->type > current_type)
349				current_type = overlap_list[i]->type;
350		/*
351		 * continue building up new bios map based on this
352		 * information
353		 */
354		if (current_type != last_type)	{
355			if (last_type != 0)	 {
356				new_bios[new_bios_entry].size =
357					change_point[chgidx]->addr - last_addr;
358				/*
359				 * move forward only if the new size
360				 * was non-zero
361				 */
362				if (new_bios[new_bios_entry].size != 0)
363					/*
364					 * no more space left for new
365					 * bios entries ?
366					 */
367					if (++new_bios_entry >= max_nr_map)
368						break;
369			}
370			if (current_type != 0)	{
371				new_bios[new_bios_entry].addr =
372					change_point[chgidx]->addr;
373				new_bios[new_bios_entry].type = current_type;
374				last_addr = change_point[chgidx]->addr;
375			}
376			last_type = current_type;
377		}
378	}
379	/* retain count for new bios entries */
380	new_nr = new_bios_entry;
381
382	/* copy new bios mapping into original location */
383	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
384	*pnr_map = new_nr;
385
386	return 0;
387}
388
389static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
390{
391	while (nr_map) {
392		u64 start = biosmap->addr;
393		u64 size = biosmap->size;
394		u64 end = start + size;
395		u32 type = biosmap->type;
396
397		/* Overflow in 64 bits? Ignore the memory map. */
398		if (start > end)
399			return -1;
400
401		e820_add_region(start, size, type);
402
403		biosmap++;
404		nr_map--;
405	}
406	return 0;
407}
408
409/*
410 * Copy the BIOS e820 map into a safe place.
411 *
412 * Sanity-check it while we're at it..
413 *
414 * If we're lucky and live on a modern system, the setup code
415 * will have given us a memory map that we can use to properly
416 * set up memory.  If we aren't, we'll fake a memory map.
417 */
418static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
419{
420	/* Only one memory region (or negative)? Ignore it */
421	if (nr_map < 2)
422		return -1;
423
424	return __append_e820_map(biosmap, nr_map);
425}
426
427static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
428					u64 size, unsigned old_type,
429					unsigned new_type)
430{
431	u64 end;
432	unsigned int i;
433	u64 real_updated_size = 0;
434
435	BUG_ON(old_type == new_type);
436
437	if (size > (ULLONG_MAX - start))
438		size = ULLONG_MAX - start;
439
440	end = start + size;
441	printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
442		       (unsigned long long) start,
443		       (unsigned long long) end);
444	e820_print_type(old_type);
445	printk(KERN_CONT " ==> ");
446	e820_print_type(new_type);
447	printk(KERN_CONT "\n");
448
449	for (i = 0; i < e820x->nr_map; i++) {
450		struct e820entry *ei = &e820x->map[i];
451		u64 final_start, final_end;
452		u64 ei_end;
453
454		if (ei->type != old_type)
455			continue;
456
457		ei_end = ei->addr + ei->size;
458		/* totally covered by new range? */
459		if (ei->addr >= start && ei_end <= end) {
460			ei->type = new_type;
461			real_updated_size += ei->size;
462			continue;
463		}
464
465		/* new range is totally covered? */
466		if (ei->addr < start && ei_end > end) {
467			__e820_add_region(e820x, start, size, new_type);
468			__e820_add_region(e820x, end, ei_end - end, ei->type);
469			ei->size = start - ei->addr;
470			real_updated_size += size;
471			continue;
472		}
473
474		/* partially covered */
475		final_start = max(start, ei->addr);
476		final_end = min(end, ei_end);
477		if (final_start >= final_end)
478			continue;
479
480		__e820_add_region(e820x, final_start, final_end - final_start,
481				  new_type);
482
483		real_updated_size += final_end - final_start;
484
485		/*
486		 * left range could be head or tail, so need to update
487		 * size at first.
488		 */
489		ei->size -= final_end - final_start;
490		if (ei->addr < final_start)
491			continue;
492		ei->addr = final_end;
493	}
494	return real_updated_size;
495}
496
497u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
498			     unsigned new_type)
499{
500	return __e820_update_range(&e820, start, size, old_type, new_type);
501}
502
503static u64 __init e820_update_range_saved(u64 start, u64 size,
504					  unsigned old_type, unsigned new_type)
505{
506	return __e820_update_range(&e820_saved, start, size, old_type,
507				     new_type);
508}
509
510/* make e820 not cover the range */
511u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
512			     int checktype)
513{
514	int i;
515	u64 end;
516	u64 real_removed_size = 0;
517
518	if (size > (ULLONG_MAX - start))
519		size = ULLONG_MAX - start;
520
521	end = start + size;
522	printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
523		       (unsigned long long) start,
524		       (unsigned long long) end);
525	if (checktype)
526		e820_print_type(old_type);
527	printk(KERN_CONT "\n");
528
529	for (i = 0; i < e820.nr_map; i++) {
530		struct e820entry *ei = &e820.map[i];
531		u64 final_start, final_end;
532		u64 ei_end;
533
534		if (checktype && ei->type != old_type)
535			continue;
536
537		ei_end = ei->addr + ei->size;
538		/* totally covered? */
539		if (ei->addr >= start && ei_end <= end) {
540			real_removed_size += ei->size;
541			memset(ei, 0, sizeof(struct e820entry));
542			continue;
543		}
544
545		/* new range is totally covered? */
546		if (ei->addr < start && ei_end > end) {
547			e820_add_region(end, ei_end - end, ei->type);
548			ei->size = start - ei->addr;
549			real_removed_size += size;
550			continue;
551		}
552
553		/* partially covered */
554		final_start = max(start, ei->addr);
555		final_end = min(end, ei_end);
556		if (final_start >= final_end)
557			continue;
558		real_removed_size += final_end - final_start;
559
560		/*
561		 * left range could be head or tail, so need to update
562		 * size at first.
563		 */
564		ei->size -= final_end - final_start;
565		if (ei->addr < final_start)
566			continue;
567		ei->addr = final_end;
568	}
569	return real_removed_size;
570}
571
572void __init update_e820(void)
573{
574	u32 nr_map;
575
576	nr_map = e820.nr_map;
577	if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
578		return;
579	e820.nr_map = nr_map;
580	printk(KERN_INFO "modified physical RAM map:\n");
581	e820_print_map("modified");
582}
583static void __init update_e820_saved(void)
584{
585	u32 nr_map;
586
587	nr_map = e820_saved.nr_map;
588	if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
589		return;
590	e820_saved.nr_map = nr_map;
591}
592#define MAX_GAP_END 0x100000000ull
593/*
594 * Search for a gap in the e820 memory space from start_addr to end_addr.
595 */
596__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
597		unsigned long start_addr, unsigned long long end_addr)
598{
599	unsigned long long last;
600	int i = e820.nr_map;
601	int found = 0;
602
603	last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
604
605	while (--i >= 0) {
606		unsigned long long start = e820.map[i].addr;
607		unsigned long long end = start + e820.map[i].size;
608
609		if (end < start_addr)
610			continue;
611
612		/*
613		 * Since "last" is at most 4GB, we know we'll
614		 * fit in 32 bits if this condition is true
615		 */
616		if (last > end) {
617			unsigned long gap = last - end;
618
619			if (gap >= *gapsize) {
620				*gapsize = gap;
621				*gapstart = end;
622				found = 1;
623			}
624		}
625		if (start < last)
626			last = start;
627	}
628	return found;
629}
630
631/*
632 * Search for the biggest gap in the low 32 bits of the e820
633 * memory space.  We pass this space to PCI to assign MMIO resources
634 * for hotplug or unconfigured devices in.
635 * Hopefully the BIOS let enough space left.
636 */
637__init void e820_setup_gap(void)
638{
639	unsigned long gapstart, gapsize;
640	int found;
641
642	gapstart = 0x10000000;
643	gapsize = 0x400000;
644	found  = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
645
646#ifdef CONFIG_X86_64
647	if (!found) {
648		gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
649		printk(KERN_ERR
650	"PCI: Warning: Cannot find a gap in the 32bit address range\n"
651	"PCI: Unassigned devices with 32bit resource registers may break!\n");
652	}
653#endif
654
655	/*
656	 * e820_reserve_resources_late protect stolen RAM already
657	 */
658	pci_mem_start = gapstart;
659
660	printk(KERN_INFO
661	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
662	       pci_mem_start, gapstart, gapsize);
663}
664
665/**
666 * Because of the size limitation of struct boot_params, only first
667 * 128 E820 memory entries are passed to kernel via
668 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
669 * linked list of struct setup_data, which is parsed here.
670 */
671void __init parse_e820_ext(struct setup_data *sdata)
672{
673	int entries;
674	struct e820entry *extmap;
675
676	entries = sdata->len / sizeof(struct e820entry);
677	extmap = (struct e820entry *)(sdata->data);
678	__append_e820_map(extmap, entries);
679	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
680	printk(KERN_INFO "extended physical RAM map:\n");
681	e820_print_map("extended");
682}
683
684#if defined(CONFIG_X86_64) || \
685	(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
686/**
687 * Find the ranges of physical addresses that do not correspond to
688 * e820 RAM areas and mark the corresponding pages as nosave for
689 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
690 *
691 * This function requires the e820 map to be sorted and without any
692 * overlapping entries and assumes the first e820 area to be RAM.
693 */
694void __init e820_mark_nosave_regions(unsigned long limit_pfn)
695{
696	int i;
697	unsigned long pfn;
698
699	pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
700	for (i = 1; i < e820.nr_map; i++) {
701		struct e820entry *ei = &e820.map[i];
702
703		if (pfn < PFN_UP(ei->addr))
704			register_nosave_region(pfn, PFN_UP(ei->addr));
705
706		pfn = PFN_DOWN(ei->addr + ei->size);
707		if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
708			register_nosave_region(PFN_UP(ei->addr), pfn);
709
710		if (pfn >= limit_pfn)
711			break;
712	}
713}
714#endif
715
716#ifdef CONFIG_HIBERNATION
717/**
718 * Mark ACPI NVS memory region, so that we can save/restore it during
719 * hibernation and the subsequent resume.
720 */
721static int __init e820_mark_nvs_memory(void)
722{
723	int i;
724
725	for (i = 0; i < e820.nr_map; i++) {
726		struct e820entry *ei = &e820.map[i];
727
728		if (ei->type == E820_NVS)
729			suspend_nvs_register(ei->addr, ei->size);
730	}
731
732	return 0;
733}
734core_initcall(e820_mark_nvs_memory);
735#endif
736
737/*
738 * pre allocated 4k and reserved it in memblock and e820_saved
739 */
740u64 __init early_reserve_e820(u64 size, u64 align)
741{
742	u64 addr;
743
744	addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
745	if (addr) {
746		e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
747		printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
748		update_e820_saved();
749	}
750
751	return addr;
752}
753
754#ifdef CONFIG_X86_32
755# ifdef CONFIG_X86_PAE
756#  define MAX_ARCH_PFN		(1ULL<<(36-PAGE_SHIFT))
757# else
758#  define MAX_ARCH_PFN		(1ULL<<(32-PAGE_SHIFT))
759# endif
760#else /* CONFIG_X86_32 */
761# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
762#endif
763
764/*
765 * Find the highest page frame number we have available
766 */
767static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
768{
769	int i;
770	unsigned long last_pfn = 0;
771	unsigned long max_arch_pfn = MAX_ARCH_PFN;
772
773	for (i = 0; i < e820.nr_map; i++) {
774		struct e820entry *ei = &e820.map[i];
775		unsigned long start_pfn;
776		unsigned long end_pfn;
777
778		if (ei->type != type)
779			continue;
780
781		start_pfn = ei->addr >> PAGE_SHIFT;
782		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
783
784		if (start_pfn >= limit_pfn)
785			continue;
786		if (end_pfn > limit_pfn) {
787			last_pfn = limit_pfn;
788			break;
789		}
790		if (end_pfn > last_pfn)
791			last_pfn = end_pfn;
792	}
793
794	if (last_pfn > max_arch_pfn)
795		last_pfn = max_arch_pfn;
796
797	printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
798			 last_pfn, max_arch_pfn);
799	return last_pfn;
800}
801unsigned long __init e820_end_of_ram_pfn(void)
802{
803	return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
804}
805
806unsigned long __init e820_end_of_low_ram_pfn(void)
807{
808	return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
809}
810
811static void early_panic(char *msg)
812{
813	early_printk(msg);
814	panic(msg);
815}
816
817static int userdef __initdata;
818
819/* "mem=nopentium" disables the 4MB page tables. */
820static int __init parse_memopt(char *p)
821{
822	u64 mem_size;
823
824	if (!p)
825		return -EINVAL;
826
827	if (!strcmp(p, "nopentium")) {
828#ifdef CONFIG_X86_32
829		setup_clear_cpu_cap(X86_FEATURE_PSE);
830		return 0;
831#else
832		printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
833		return -EINVAL;
834#endif
835	}
836
837	userdef = 1;
838	mem_size = memparse(p, &p);
839	/* don't remove all of memory when handling "mem={invalid}" param */
840	if (mem_size == 0)
841		return -EINVAL;
842	e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
843
844	return 0;
845}
846early_param("mem", parse_memopt);
847
848static int __init parse_memmap_opt(char *p)
849{
850	char *oldp;
851	u64 start_at, mem_size;
852
853	if (!p)
854		return -EINVAL;
855
856	if (!strncmp(p, "exactmap", 8)) {
857#ifdef CONFIG_CRASH_DUMP
858		/*
859		 * If we are doing a crash dump, we still need to know
860		 * the real mem size before original memory map is
861		 * reset.
862		 */
863		saved_max_pfn = e820_end_of_ram_pfn();
864#endif
865		e820.nr_map = 0;
866		userdef = 1;
867		return 0;
868	}
869
870	oldp = p;
871	mem_size = memparse(p, &p);
872	if (p == oldp)
873		return -EINVAL;
874
875	userdef = 1;
876	if (*p == '@') {
877		start_at = memparse(p+1, &p);
878		e820_add_region(start_at, mem_size, E820_RAM);
879	} else if (*p == '#') {
880		start_at = memparse(p+1, &p);
881		e820_add_region(start_at, mem_size, E820_ACPI);
882	} else if (*p == '$') {
883		start_at = memparse(p+1, &p);
884		e820_add_region(start_at, mem_size, E820_RESERVED);
885	} else
886		e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
887
888	return *p == '\0' ? 0 : -EINVAL;
889}
890early_param("memmap", parse_memmap_opt);
891
892void __init finish_e820_parsing(void)
893{
894	if (userdef) {
895		u32 nr = e820.nr_map;
896
897		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
898			early_panic("Invalid user supplied memory map");
899		e820.nr_map = nr;
900
901		printk(KERN_INFO "user-defined physical RAM map:\n");
902		e820_print_map("user");
903	}
904}
905
906static inline const char *e820_type_to_string(int e820_type)
907{
908	switch (e820_type) {
909	case E820_RESERVED_KERN:
910	case E820_RAM:	return "System RAM";
911	case E820_ACPI:	return "ACPI Tables";
912	case E820_NVS:	return "ACPI Non-volatile Storage";
913	case E820_UNUSABLE:	return "Unusable memory";
914	default:	return "reserved";
915	}
916}
917
918/*
919 * Mark e820 reserved areas as busy for the resource manager.
920 */
921static struct resource __initdata *e820_res;
922void __init e820_reserve_resources(void)
923{
924	int i;
925	struct resource *res;
926	u64 end;
927
928	res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
929	e820_res = res;
930	for (i = 0; i < e820.nr_map; i++) {
931		end = e820.map[i].addr + e820.map[i].size - 1;
932		if (end != (resource_size_t)end) {
933			res++;
934			continue;
935		}
936		res->name = e820_type_to_string(e820.map[i].type);
937		res->start = e820.map[i].addr;
938		res->end = end;
939
940		res->flags = IORESOURCE_MEM;
941
942		/*
943		 * don't register the region that could be conflicted with
944		 * pci device BAR resource and insert them later in
945		 * pcibios_resource_survey()
946		 */
947		if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
948			res->flags |= IORESOURCE_BUSY;
949			insert_resource(&iomem_resource, res);
950		}
951		res++;
952	}
953
954	for (i = 0; i < e820_saved.nr_map; i++) {
955		struct e820entry *entry = &e820_saved.map[i];
956		firmware_map_add_early(entry->addr,
957			entry->addr + entry->size - 1,
958			e820_type_to_string(entry->type));
959	}
960}
961
962/* How much should we pad RAM ending depending on where it is? */
963static unsigned long ram_alignment(resource_size_t pos)
964{
965	unsigned long mb = pos >> 20;
966
967	/* To 64kB in the first megabyte */
968	if (!mb)
969		return 64*1024;
970
971	/* To 1MB in the first 16MB */
972	if (mb < 16)
973		return 1024*1024;
974
975	/* To 64MB for anything above that */
976	return 64*1024*1024;
977}
978
979#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
980
981void __init e820_reserve_resources_late(void)
982{
983	int i;
984	struct resource *res;
985
986	res = e820_res;
987	for (i = 0; i < e820.nr_map; i++) {
988		if (!res->parent && res->end)
989			insert_resource_expand_to_fit(&iomem_resource, res);
990		res++;
991	}
992
993	/*
994	 * Try to bump up RAM regions to reasonable boundaries to
995	 * avoid stolen RAM:
996	 */
997	for (i = 0; i < e820.nr_map; i++) {
998		struct e820entry *entry = &e820.map[i];
999		u64 start, end;
1000
1001		if (entry->type != E820_RAM)
1002			continue;
1003		start = entry->addr + entry->size;
1004		end = round_up(start, ram_alignment(start)) - 1;
1005		if (end > MAX_RESOURCE_SIZE)
1006			end = MAX_RESOURCE_SIZE;
1007		if (start >= end)
1008			continue;
1009		printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
1010			       start, end);
1011		reserve_region_with_split(&iomem_resource, start, end,
1012					  "RAM buffer");
1013	}
1014}
1015
1016char *__init default_machine_specific_memory_setup(void)
1017{
1018	char *who = "BIOS-e820";
1019	u32 new_nr;
1020	/*
1021	 * Try to copy the BIOS-supplied E820-map.
1022	 *
1023	 * Otherwise fake a memory map; one section from 0k->640k,
1024	 * the next section from 1mb->appropriate_mem_k
1025	 */
1026	new_nr = boot_params.e820_entries;
1027	sanitize_e820_map(boot_params.e820_map,
1028			ARRAY_SIZE(boot_params.e820_map),
1029			&new_nr);
1030	boot_params.e820_entries = new_nr;
1031	if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
1032	  < 0) {
1033		u64 mem_size;
1034
1035		/* compare results from other methods and take the greater */
1036		if (boot_params.alt_mem_k
1037		    < boot_params.screen_info.ext_mem_k) {
1038			mem_size = boot_params.screen_info.ext_mem_k;
1039			who = "BIOS-88";
1040		} else {
1041			mem_size = boot_params.alt_mem_k;
1042			who = "BIOS-e801";
1043		}
1044
1045		e820.nr_map = 0;
1046		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
1047		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
1048	}
1049
1050	/* In case someone cares... */
1051	return who;
1052}
1053
1054void __init setup_memory_map(void)
1055{
1056	char *who;
1057
1058	who = x86_init.resources.memory_setup();
1059	memcpy(&e820_saved, &e820, sizeof(struct e820map));
1060	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1061	e820_print_map(who);
1062}
1063
1064void __init memblock_x86_fill(void)
1065{
1066	int i;
1067	u64 end;
1068
1069	/*
1070	 * EFI may have more than 128 entries
1071	 * We are safe to enable resizing, beause memblock_x86_fill()
1072	 * is rather later for x86
1073	 */
1074	memblock_can_resize = 1;
1075
1076	for (i = 0; i < e820.nr_map; i++) {
1077		struct e820entry *ei = &e820.map[i];
1078
1079		end = ei->addr + ei->size;
1080		if (end != (resource_size_t)end)
1081			continue;
1082
1083		if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
1084			continue;
1085
1086		memblock_add(ei->addr, ei->size);
1087	}
1088
1089	memblock_analyze();
1090	memblock_dump_all();
1091}
1092
1093void __init memblock_find_dma_reserve(void)
1094{
1095#ifdef CONFIG_X86_64
1096	u64 free_size_pfn;
1097	u64 mem_size_pfn;
1098	/*
1099	 * need to find out used area below MAX_DMA_PFN
1100	 * need to use memblock to get free size in [0, MAX_DMA_PFN]
1101	 * at first, and assume boot_mem will not take below MAX_DMA_PFN
1102	 */
1103	mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1104	free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1105	set_dma_reserve(mem_size_pfn - free_size_pfn);
1106#endif
1107}
1108