e820.c revision 41c094fd3ca54f1a71233049cf136ff94c91f4ae
1/*
2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over.
4 *
5 *  Getting sanitize_e820_map() in sync with i386 version by applying change:
6 *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
7 *     Alex Achenbach <xela@slit.de>, December 2002.
8 *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/ioport.h>
16#include <linux/string.h>
17#include <linux/kexec.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/pfn.h>
21#include <linux/suspend.h>
22
23#include <asm/pgtable.h>
24#include <asm/page.h>
25#include <asm/e820.h>
26#include <asm/proto.h>
27#include <asm/setup.h>
28#include <asm/trampoline.h>
29
30struct e820map e820;
31
32/* For PCI or other memory-mapped resources */
33unsigned long pci_mem_start = 0xaeedbabe;
34#ifdef CONFIG_PCI
35EXPORT_SYMBOL(pci_mem_start);
36#endif
37
38/*
39 * This function checks if any part of the range <start,end> is mapped
40 * with type.
41 */
42int
43e820_any_mapped(u64 start, u64 end, unsigned type)
44{
45	int i;
46
47	for (i = 0; i < e820.nr_map; i++) {
48		struct e820entry *ei = &e820.map[i];
49
50		if (type && ei->type != type)
51			continue;
52		if (ei->addr >= end || ei->addr + ei->size <= start)
53			continue;
54		return 1;
55	}
56	return 0;
57}
58EXPORT_SYMBOL_GPL(e820_any_mapped);
59
60/*
61 * This function checks if the entire range <start,end> is mapped with type.
62 *
63 * Note: this function only works correct if the e820 table is sorted and
64 * not-overlapping, which is the case
65 */
66int __init e820_all_mapped(u64 start, u64 end, unsigned type)
67{
68	int i;
69
70	for (i = 0; i < e820.nr_map; i++) {
71		struct e820entry *ei = &e820.map[i];
72
73		if (type && ei->type != type)
74			continue;
75		/* is the region (part) in overlap with the current region ?*/
76		if (ei->addr >= end || ei->addr + ei->size <= start)
77			continue;
78
79		/* if the region is at the beginning of <start,end> we move
80		 * start to the end of the region since it's ok until there
81		 */
82		if (ei->addr <= start)
83			start = ei->addr + ei->size;
84		/*
85		 * if start is now at or beyond end, we're done, full
86		 * coverage
87		 */
88		if (start >= end)
89			return 1;
90	}
91	return 0;
92}
93
94/*
95 * Add a memory region to the kernel e820 map.
96 */
97void __init e820_add_region(u64 start, u64 size, int type)
98{
99	int x = e820.nr_map;
100
101	if (x == ARRAY_SIZE(e820.map)) {
102		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
103		return;
104	}
105
106	e820.map[x].addr = start;
107	e820.map[x].size = size;
108	e820.map[x].type = type;
109	e820.nr_map++;
110}
111
112void __init e820_print_map(char *who)
113{
114	int i;
115
116	for (i = 0; i < e820.nr_map; i++) {
117		printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
118		       (unsigned long long) e820.map[i].addr,
119		       (unsigned long long)
120		       (e820.map[i].addr + e820.map[i].size));
121		switch (e820.map[i].type) {
122		case E820_RAM:
123			printk(KERN_CONT "(usable)\n");
124			break;
125		case E820_RESERVED:
126			printk(KERN_CONT "(reserved)\n");
127			break;
128		case E820_ACPI:
129			printk(KERN_CONT "(ACPI data)\n");
130			break;
131		case E820_NVS:
132			printk(KERN_CONT "(ACPI NVS)\n");
133			break;
134		default:
135			printk(KERN_CONT "type %u\n", e820.map[i].type);
136			break;
137		}
138	}
139}
140
141/*
142 * Sanitize the BIOS e820 map.
143 *
144 * Some e820 responses include overlapping entries. The following
145 * replaces the original e820 map with a new one, removing overlaps,
146 * and resolving conflicting memory types in favor of highest
147 * numbered type.
148 *
149 * The input parameter biosmap points to an array of 'struct
150 * e820entry' which on entry has elements in the range [0, *pnr_map)
151 * valid, and which has space for up to max_nr_map entries.
152 * On return, the resulting sanitized e820 map entries will be in
153 * overwritten in the same location, starting at biosmap.
154 *
155 * The integer pointed to by pnr_map must be valid on entry (the
156 * current number of valid entries located at biosmap) and will
157 * be updated on return, with the new number of valid entries
158 * (something no more than max_nr_map.)
159 *
160 * The return value from sanitize_e820_map() is zero if it
161 * successfully 'sanitized' the map entries passed in, and is -1
162 * if it did nothing, which can happen if either of (1) it was
163 * only passed one map entry, or (2) any of the input map entries
164 * were invalid (start + size < start, meaning that the size was
165 * so big the described memory range wrapped around through zero.)
166 *
167 *	Visually we're performing the following
168 *	(1,2,3,4 = memory types)...
169 *
170 *	Sample memory map (w/overlaps):
171 *	   ____22__________________
172 *	   ______________________4_
173 *	   ____1111________________
174 *	   _44_____________________
175 *	   11111111________________
176 *	   ____________________33__
177 *	   ___________44___________
178 *	   __________33333_________
179 *	   ______________22________
180 *	   ___________________2222_
181 *	   _________111111111______
182 *	   _____________________11_
183 *	   _________________4______
184 *
185 *	Sanitized equivalent (no overlap):
186 *	   1_______________________
187 *	   _44_____________________
188 *	   ___1____________________
189 *	   ____22__________________
190 *	   ______11________________
191 *	   _________1______________
192 *	   __________3_____________
193 *	   ___________44___________
194 *	   _____________33_________
195 *	   _______________2________
196 *	   ________________1_______
197 *	   _________________4______
198 *	   ___________________2____
199 *	   ____________________33__
200 *	   ______________________4_
201 */
202
203int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
204				int *pnr_map)
205{
206	struct change_member {
207		struct e820entry *pbios; /* pointer to original bios entry */
208		unsigned long long addr; /* address for this change point */
209	};
210static struct change_member change_point_list[2*E820_X_MAX] __initdata;
211static struct change_member *change_point[2*E820_X_MAX] __initdata;
212static struct e820entry *overlap_list[E820_X_MAX] __initdata;
213static struct e820entry new_bios[E820_X_MAX] __initdata;
214	struct change_member *change_tmp;
215	unsigned long current_type, last_type;
216	unsigned long long last_addr;
217	int chgidx, still_changing;
218	int overlap_entries;
219	int new_bios_entry;
220	int old_nr, new_nr, chg_nr;
221	int i;
222
223	/* if there's only one memory region, don't bother */
224	if (*pnr_map < 2)
225		return -1;
226
227	old_nr = *pnr_map;
228	BUG_ON(old_nr > max_nr_map);
229
230	/* bail out if we find any unreasonable addresses in bios map */
231	for (i = 0; i < old_nr; i++)
232		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
233			return -1;
234
235	/* create pointers for initial change-point information (for sorting) */
236	for (i = 0; i < 2 * old_nr; i++)
237		change_point[i] = &change_point_list[i];
238
239	/* record all known change-points (starting and ending addresses),
240	   omitting those that are for empty memory regions */
241	chgidx = 0;
242	for (i = 0; i < old_nr; i++)	{
243		if (biosmap[i].size != 0) {
244			change_point[chgidx]->addr = biosmap[i].addr;
245			change_point[chgidx++]->pbios = &biosmap[i];
246			change_point[chgidx]->addr = biosmap[i].addr +
247				biosmap[i].size;
248			change_point[chgidx++]->pbios = &biosmap[i];
249		}
250	}
251	chg_nr = chgidx;
252
253	/* sort change-point list by memory addresses (low -> high) */
254	still_changing = 1;
255	while (still_changing)	{
256		still_changing = 0;
257		for (i = 1; i < chg_nr; i++)  {
258			unsigned long long curaddr, lastaddr;
259			unsigned long long curpbaddr, lastpbaddr;
260
261			curaddr = change_point[i]->addr;
262			lastaddr = change_point[i - 1]->addr;
263			curpbaddr = change_point[i]->pbios->addr;
264			lastpbaddr = change_point[i - 1]->pbios->addr;
265
266			/*
267			 * swap entries, when:
268			 *
269			 * curaddr > lastaddr or
270			 * curaddr == lastaddr and curaddr == curpbaddr and
271			 * lastaddr != lastpbaddr
272			 */
273			if (curaddr < lastaddr ||
274			    (curaddr == lastaddr && curaddr == curpbaddr &&
275			     lastaddr != lastpbaddr)) {
276				change_tmp = change_point[i];
277				change_point[i] = change_point[i-1];
278				change_point[i-1] = change_tmp;
279				still_changing = 1;
280			}
281		}
282	}
283
284	/* create a new bios memory map, removing overlaps */
285	overlap_entries = 0;	 /* number of entries in the overlap table */
286	new_bios_entry = 0;	 /* index for creating new bios map entries */
287	last_type = 0;		 /* start with undefined memory type */
288	last_addr = 0;		 /* start with 0 as last starting address */
289
290	/* loop through change-points, determining affect on the new bios map */
291	for (chgidx = 0; chgidx < chg_nr; chgidx++) {
292		/* keep track of all overlapping bios entries */
293		if (change_point[chgidx]->addr ==
294		    change_point[chgidx]->pbios->addr) {
295			/*
296			 * add map entry to overlap list (> 1 entry
297			 * implies an overlap)
298			 */
299			overlap_list[overlap_entries++] =
300				change_point[chgidx]->pbios;
301		} else {
302			/*
303			 * remove entry from list (order independent,
304			 * so swap with last)
305			 */
306			for (i = 0; i < overlap_entries; i++) {
307				if (overlap_list[i] ==
308				    change_point[chgidx]->pbios)
309					overlap_list[i] =
310						overlap_list[overlap_entries-1];
311			}
312			overlap_entries--;
313		}
314		/*
315		 * if there are overlapping entries, decide which
316		 * "type" to use (larger value takes precedence --
317		 * 1=usable, 2,3,4,4+=unusable)
318		 */
319		current_type = 0;
320		for (i = 0; i < overlap_entries; i++)
321			if (overlap_list[i]->type > current_type)
322				current_type = overlap_list[i]->type;
323		/*
324		 * continue building up new bios map based on this
325		 * information
326		 */
327		if (current_type != last_type)	{
328			if (last_type != 0)	 {
329				new_bios[new_bios_entry].size =
330					change_point[chgidx]->addr - last_addr;
331				/*
332				 * move forward only if the new size
333				 * was non-zero
334				 */
335				if (new_bios[new_bios_entry].size != 0)
336					/*
337					 * no more space left for new
338					 * bios entries ?
339					 */
340					if (++new_bios_entry >= max_nr_map)
341						break;
342			}
343			if (current_type != 0)	{
344				new_bios[new_bios_entry].addr =
345					change_point[chgidx]->addr;
346				new_bios[new_bios_entry].type = current_type;
347				last_addr = change_point[chgidx]->addr;
348			}
349			last_type = current_type;
350		}
351	}
352	/* retain count for new bios entries */
353	new_nr = new_bios_entry;
354
355	/* copy new bios mapping into original location */
356	memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
357	*pnr_map = new_nr;
358
359	return 0;
360}
361
362static int __init __copy_e820_map(struct e820entry *biosmap, int nr_map)
363{
364	while (nr_map) {
365		u64 start = biosmap->addr;
366		u64 size = biosmap->size;
367		u64 end = start + size;
368		u32 type = biosmap->type;
369
370		/* Overflow in 64 bits? Ignore the memory map. */
371		if (start > end)
372			return -1;
373
374		e820_add_region(start, size, type);
375
376		biosmap++;
377		nr_map--;
378	}
379	return 0;
380}
381
382/*
383 * Copy the BIOS e820 map into a safe place.
384 *
385 * Sanity-check it while we're at it..
386 *
387 * If we're lucky and live on a modern system, the setup code
388 * will have given us a memory map that we can use to properly
389 * set up memory.  If we aren't, we'll fake a memory map.
390 */
391int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
392{
393	/* Only one memory region (or negative)? Ignore it */
394	if (nr_map < 2)
395		return -1;
396
397	return __copy_e820_map(biosmap, nr_map);
398}
399
400u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
401				unsigned new_type)
402{
403	int i;
404	u64 real_updated_size = 0;
405
406	BUG_ON(old_type == new_type);
407
408	for (i = 0; i < e820.nr_map; i++) {
409		struct e820entry *ei = &e820.map[i];
410		u64 final_start, final_end;
411		if (ei->type != old_type)
412			continue;
413		/* totally covered? */
414		if (ei->addr >= start &&
415		    (ei->addr + ei->size) <= (start + size)) {
416			ei->type = new_type;
417			real_updated_size += ei->size;
418			continue;
419		}
420		/* partially covered */
421		final_start = max(start, ei->addr);
422		final_end = min(start + size, ei->addr + ei->size);
423		if (final_start >= final_end)
424			continue;
425		e820_add_region(final_start, final_end - final_start,
426					 new_type);
427		real_updated_size += final_end - final_start;
428	}
429	return real_updated_size;
430}
431
432void __init update_e820(void)
433{
434	int nr_map;
435
436	nr_map = e820.nr_map;
437	if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
438		return;
439	e820.nr_map = nr_map;
440	printk(KERN_INFO "modified physical RAM map:\n");
441	e820_print_map("modified");
442}
443
444/*
445 * Search for the biggest gap in the low 32 bits of the e820
446 * memory space.  We pass this space to PCI to assign MMIO resources
447 * for hotplug or unconfigured devices in.
448 * Hopefully the BIOS let enough space left.
449 */
450__init void e820_setup_gap(void)
451{
452	unsigned long gapstart, gapsize, round;
453	unsigned long long last;
454	int i;
455	int found = 0;
456
457	last = 0x100000000ull;
458	gapstart = 0x10000000;
459	gapsize = 0x400000;
460	i = e820.nr_map;
461	while (--i >= 0) {
462		unsigned long long start = e820.map[i].addr;
463		unsigned long long end = start + e820.map[i].size;
464
465		/*
466		 * Since "last" is at most 4GB, we know we'll
467		 * fit in 32 bits if this condition is true
468		 */
469		if (last > end) {
470			unsigned long gap = last - end;
471
472			if (gap > gapsize) {
473				gapsize = gap;
474				gapstart = end;
475				found = 1;
476			}
477		}
478		if (start < last)
479			last = start;
480	}
481
482#ifdef CONFIG_X86_64
483	if (!found) {
484		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
485		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
486		       "address range\n"
487		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
488		       "registers may break!\n");
489	}
490#endif
491
492	/*
493	 * See how much we want to round up: start off with
494	 * rounding to the next 1MB area.
495	 */
496	round = 0x100000;
497	while ((gapsize >> 4) > round)
498		round += round;
499	/* Fun with two's complement */
500	pci_mem_start = (gapstart + round) & -round;
501
502	printk(KERN_INFO
503	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
504	       pci_mem_start, gapstart, gapsize);
505}
506
507/**
508 * Because of the size limitation of struct boot_params, only first
509 * 128 E820 memory entries are passed to kernel via
510 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
511 * linked list of struct setup_data, which is parsed here.
512 */
513void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
514{
515	u32 map_len;
516	int entries;
517	struct e820entry *extmap;
518
519	entries = sdata->len / sizeof(struct e820entry);
520	map_len = sdata->len + sizeof(struct setup_data);
521	if (map_len > PAGE_SIZE)
522		sdata = early_ioremap(pa_data, map_len);
523	extmap = (struct e820entry *)(sdata->data);
524	__copy_e820_map(extmap, entries);
525	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
526	if (map_len > PAGE_SIZE)
527		early_iounmap(sdata, map_len);
528	printk(KERN_INFO "extended physical RAM map:\n");
529	e820_print_map("extended");
530}
531
532#if defined(CONFIG_X86_64) || \
533	(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
534/**
535 * Find the ranges of physical addresses that do not correspond to
536 * e820 RAM areas and mark the corresponding pages as nosave for
537 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
538 *
539 * This function requires the e820 map to be sorted and without any
540 * overlapping entries and assumes the first e820 area to be RAM.
541 */
542void __init e820_mark_nosave_regions(unsigned long limit_pfn)
543{
544	int i;
545	unsigned long pfn;
546
547	pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
548	for (i = 1; i < e820.nr_map; i++) {
549		struct e820entry *ei = &e820.map[i];
550
551		if (pfn < PFN_UP(ei->addr))
552			register_nosave_region(pfn, PFN_UP(ei->addr));
553
554		pfn = PFN_DOWN(ei->addr + ei->size);
555		if (ei->type != E820_RAM)
556			register_nosave_region(PFN_UP(ei->addr), pfn);
557
558		if (pfn >= limit_pfn)
559			break;
560	}
561}
562#endif
563
564/*
565 * Early reserved memory areas.
566 */
567#define MAX_EARLY_RES 20
568
569struct early_res {
570	u64 start, end;
571	char name[16];
572};
573static struct early_res early_res[MAX_EARLY_RES] __initdata = {
574	{ 0, PAGE_SIZE, "BIOS data page" },	/* BIOS data page */
575#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
576	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
577#endif
578#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
579	/*
580	 * But first pinch a few for the stack/trampoline stuff
581	 * FIXME: Don't need the extra page at 4K, but need to fix
582	 * trampoline before removing it. (see the GDT stuff)
583	 */
584	{ PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
585	/*
586	 * Has to be in very low memory so we can execute
587	 * real-mode AP code.
588	 */
589	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
590#endif
591	{}
592};
593
594static int __init find_overlapped_early(u64 start, u64 end)
595{
596	int i;
597	struct early_res *r;
598
599	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
600		r = &early_res[i];
601		if (end > r->start && start < r->end)
602			break;
603	}
604
605	return i;
606}
607
608void __init reserve_early(u64 start, u64 end, char *name)
609{
610	int i;
611	struct early_res *r;
612
613	i = find_overlapped_early(start, end);
614	if (i >= MAX_EARLY_RES)
615		panic("Too many early reservations");
616	r = &early_res[i];
617	if (r->end)
618		panic("Overlapping early reservations "
619		      "%llx-%llx %s to %llx-%llx %s\n",
620		      start, end - 1, name?name:"", r->start,
621		      r->end - 1, r->name);
622	r->start = start;
623	r->end = end;
624	if (name)
625		strncpy(r->name, name, sizeof(r->name) - 1);
626}
627
628void __init free_early(u64 start, u64 end)
629{
630	struct early_res *r;
631	int i, j;
632
633	i = find_overlapped_early(start, end);
634	r = &early_res[i];
635	if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
636		panic("free_early on not reserved area: %llx-%llx!",
637			 start, end - 1);
638
639	for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
640		;
641
642	memmove(&early_res[i], &early_res[i + 1],
643	       (j - 1 - i) * sizeof(struct early_res));
644
645	early_res[j - 1].end = 0;
646}
647
648void __init early_res_to_bootmem(u64 start, u64 end)
649{
650	int i;
651	u64 final_start, final_end;
652	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
653		struct early_res *r = &early_res[i];
654		final_start = max(start, r->start);
655		final_end = min(end, r->end);
656		if (final_start >= final_end)
657			continue;
658		printk(KERN_INFO "  early res: %d [%llx-%llx] %s\n", i,
659			final_start, final_end - 1, r->name);
660		reserve_bootmem_generic(final_start, final_end - final_start,
661				BOOTMEM_DEFAULT);
662	}
663}
664
665/* Check for already reserved areas */
666static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
667{
668	int i;
669	u64 addr = *addrp;
670	int changed = 0;
671	struct early_res *r;
672again:
673	i = find_overlapped_early(addr, addr + size);
674	r = &early_res[i];
675	if (i < MAX_EARLY_RES && r->end) {
676		*addrp = addr = round_up(r->end, align);
677		changed = 1;
678		goto again;
679	}
680	return changed;
681}
682
683/* Check for already reserved areas */
684static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
685{
686	int i;
687	u64 addr = *addrp, last;
688	u64 size = *sizep;
689	int changed = 0;
690again:
691	last = addr + size;
692	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
693		struct early_res *r = &early_res[i];
694		if (last > r->start && addr < r->start) {
695			size = r->start - addr;
696			changed = 1;
697			goto again;
698		}
699		if (last > r->end && addr < r->end) {
700			addr = round_up(r->end, align);
701			size = last - addr;
702			changed = 1;
703			goto again;
704		}
705		if (last <= r->end && addr >= r->start) {
706			(*sizep)++;
707			return 0;
708		}
709	}
710	if (changed) {
711		*addrp = addr;
712		*sizep = size;
713	}
714	return changed;
715}
716
717/*
718 * Find a free area with specified alignment in a specific range.
719 */
720u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
721{
722	int i;
723
724	for (i = 0; i < e820.nr_map; i++) {
725		struct e820entry *ei = &e820.map[i];
726		u64 addr, last;
727		u64 ei_last;
728
729		if (ei->type != E820_RAM)
730			continue;
731		addr = round_up(ei->addr, align);
732		ei_last = ei->addr + ei->size;
733		if (addr < start)
734			addr = round_up(start, align);
735		if (addr >= ei_last)
736			continue;
737		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
738			;
739		last = addr + size;
740		if (last > ei_last)
741			continue;
742		if (last > end)
743			continue;
744		return addr;
745	}
746	return -1ULL;
747}
748
749/*
750 * Find next free range after *start
751 */
752u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
753{
754	int i;
755
756	for (i = 0; i < e820.nr_map; i++) {
757		struct e820entry *ei = &e820.map[i];
758		u64 addr, last;
759		u64 ei_last;
760
761		if (ei->type != E820_RAM)
762			continue;
763		addr = round_up(ei->addr, align);
764		ei_last = ei->addr + ei->size;
765		if (addr < start)
766			addr = round_up(start, align);
767		if (addr >= ei_last)
768			continue;
769		*sizep = ei_last - addr;
770		while (bad_addr_size(&addr, sizep, align) &&
771			addr + *sizep <= ei_last)
772			;
773		last = addr + *sizep;
774		if (last > ei_last)
775			continue;
776		return addr;
777	}
778	return -1UL;
779
780}
781
782/*
783 * pre allocated 4k and reserved it in e820
784 */
785u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
786{
787	u64 size = 0;
788	u64 addr;
789	u64 start;
790
791	start = startt;
792	while (size < sizet)
793		start = find_e820_area_size(start, &size, align);
794
795	if (size < sizet)
796		return 0;
797
798	addr = round_down(start + size - sizet, align);
799	e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
800	printk(KERN_INFO "update e820 for early_reserve_e820\n");
801	update_e820();
802
803	return addr;
804}
805
806#ifdef CONFIG_X86_32
807# ifdef CONFIG_X86_PAE
808#  define MAX_ARCH_PFN		(1ULL<<(36-PAGE_SHIFT))
809# else
810#  define MAX_ARCH_PFN		(1ULL<<(32-PAGE_SHIFT))
811# endif
812#else /* CONFIG_X86_32 */
813# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
814#endif
815
816/*
817 * Last pfn which the user wants to use.
818 */
819unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
820
821/*
822 * Find the highest page frame number we have available
823 */
824unsigned long __init e820_end_of_ram(void)
825{
826	unsigned long last_pfn;
827	unsigned long max_arch_pfn = MAX_ARCH_PFN;
828
829	last_pfn = find_max_pfn_with_active_regions();
830
831	if (last_pfn > max_arch_pfn)
832		last_pfn = max_arch_pfn;
833	if (last_pfn > end_user_pfn)
834		last_pfn = end_user_pfn;
835
836	printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n",
837			 last_pfn, max_arch_pfn);
838	return last_pfn;
839}
840
841/*
842 * Finds an active region in the address range from start_pfn to last_pfn and
843 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
844 */
845int __init e820_find_active_region(const struct e820entry *ei,
846				  unsigned long start_pfn,
847				  unsigned long last_pfn,
848				  unsigned long *ei_startpfn,
849				  unsigned long *ei_endpfn)
850{
851	u64 align = PAGE_SIZE;
852
853	*ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
854	*ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
855
856	/* Skip map entries smaller than a page */
857	if (*ei_startpfn >= *ei_endpfn)
858		return 0;
859
860	/* Skip if map is outside the node */
861	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
862				    *ei_startpfn >= last_pfn)
863		return 0;
864
865	/* Check for overlaps */
866	if (*ei_startpfn < start_pfn)
867		*ei_startpfn = start_pfn;
868	if (*ei_endpfn > last_pfn)
869		*ei_endpfn = last_pfn;
870
871	/* Obey end_user_pfn to save on memmap */
872	if (*ei_startpfn >= end_user_pfn)
873		return 0;
874	if (*ei_endpfn > end_user_pfn)
875		*ei_endpfn = end_user_pfn;
876
877	return 1;
878}
879
880/* Walk the e820 map and register active regions within a node */
881void __init e820_register_active_regions(int nid, unsigned long start_pfn,
882					 unsigned long last_pfn)
883{
884	unsigned long ei_startpfn;
885	unsigned long ei_endpfn;
886	int i;
887
888	for (i = 0; i < e820.nr_map; i++)
889		if (e820_find_active_region(&e820.map[i],
890					    start_pfn, last_pfn,
891					    &ei_startpfn, &ei_endpfn))
892			add_active_range(nid, ei_startpfn, ei_endpfn);
893}
894
895/*
896 * Find the hole size (in bytes) in the memory range.
897 * @start: starting address of the memory range to scan
898 * @end: ending address of the memory range to scan
899 */
900u64 __init e820_hole_size(u64 start, u64 end)
901{
902	unsigned long start_pfn = start >> PAGE_SHIFT;
903	unsigned long last_pfn = end >> PAGE_SHIFT;
904	unsigned long ei_startpfn, ei_endpfn, ram = 0;
905	int i;
906
907	for (i = 0; i < e820.nr_map; i++) {
908		if (e820_find_active_region(&e820.map[i],
909					    start_pfn, last_pfn,
910					    &ei_startpfn, &ei_endpfn))
911			ram += ei_endpfn - ei_startpfn;
912	}
913	return end - start - ((u64)ram << PAGE_SHIFT);
914}
915
916static void early_panic(char *msg)
917{
918	early_printk(msg);
919	panic(msg);
920}
921
922/* "mem=nopentium" disables the 4MB page tables. */
923static int __init parse_memopt(char *p)
924{
925	u64 mem_size;
926
927	if (!p)
928		return -EINVAL;
929
930#ifdef CONFIG_X86_32
931	if (!strcmp(p, "nopentium")) {
932		setup_clear_cpu_cap(X86_FEATURE_PSE);
933		return 0;
934	}
935#endif
936
937	mem_size = memparse(p, &p);
938	end_user_pfn = mem_size>>PAGE_SHIFT;
939	return 0;
940}
941early_param("mem", parse_memopt);
942
943static int userdef __initdata;
944
945static int __init parse_memmap_opt(char *p)
946{
947	char *oldp;
948	u64 start_at, mem_size;
949
950	if (!strcmp(p, "exactmap")) {
951#ifdef CONFIG_CRASH_DUMP
952		/*
953		 * If we are doing a crash dump, we still need to know
954		 * the real mem size before original memory map is
955		 * reset.
956		 */
957		e820_register_active_regions(0, 0, -1UL);
958		saved_max_pfn = e820_end_of_ram();
959		remove_all_active_ranges();
960#endif
961		e820.nr_map = 0;
962		userdef = 1;
963		return 0;
964	}
965
966	oldp = p;
967	mem_size = memparse(p, &p);
968	if (p == oldp)
969		return -EINVAL;
970
971	userdef = 1;
972	if (*p == '@') {
973		start_at = memparse(p+1, &p);
974		e820_add_region(start_at, mem_size, E820_RAM);
975	} else if (*p == '#') {
976		start_at = memparse(p+1, &p);
977		e820_add_region(start_at, mem_size, E820_ACPI);
978	} else if (*p == '$') {
979		start_at = memparse(p+1, &p);
980		e820_add_region(start_at, mem_size, E820_RESERVED);
981	} else {
982		end_user_pfn = (mem_size >> PAGE_SHIFT);
983	}
984	return *p == '\0' ? 0 : -EINVAL;
985}
986early_param("memmap", parse_memmap_opt);
987
988void __init finish_e820_parsing(void)
989{
990	if (userdef) {
991		int nr = e820.nr_map;
992
993		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
994			early_panic("Invalid user supplied memory map");
995		e820.nr_map = nr;
996
997		printk(KERN_INFO "user-defined physical RAM map:\n");
998		e820_print_map("user");
999	}
1000}
1001
1002/*
1003 * Mark e820 reserved areas as busy for the resource manager.
1004 */
1005void __init e820_reserve_resources(void)
1006{
1007	int i;
1008	struct resource *res;
1009
1010	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
1011	for (i = 0; i < e820.nr_map; i++) {
1012		switch (e820.map[i].type) {
1013		case E820_RAM:	res->name = "System RAM"; break;
1014		case E820_ACPI:	res->name = "ACPI Tables"; break;
1015		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
1016		default:	res->name = "reserved";
1017		}
1018		res->start = e820.map[i].addr;
1019		res->end = res->start + e820.map[i].size - 1;
1020#ifndef CONFIG_RESOURCES_64BIT
1021		if (res->end > 0x100000000ULL) {
1022			res++;
1023			continue;
1024		}
1025#endif
1026		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1027		insert_resource(&iomem_resource, res);
1028		res++;
1029	}
1030}
1031
1032
1033