sparse.c revision 4edd7ceff0662afde195da6f6c43e7cbe1ed2dc4
1d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft/*
2d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * sparse memory mappings.
3d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft */
4d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft#include <linux/mm.h>
55a0e3ad6af8660be21ca98a971cd00f331318c05Tejun Heo#include <linux/slab.h>
6d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft#include <linux/mmzone.h>
7d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft#include <linux/bootmem.h>
80b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen#include <linux/highmem.h>
9b95f1b31b75588306e32b2afd32166cad48f670bPaul Gortmaker#include <linux/export.h>
1028ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen#include <linux/spinlock.h>
110b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen#include <linux/vmalloc.h>
120c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto#include "internal.h"
13d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft#include <asm/dma.h>
148f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter#include <asm/pgalloc.h>
158f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter#include <asm/pgtable.h>
16d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
17d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft/*
18d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * Permanent SPARSEMEM data:
19d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft *
20d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * 1) mem_section	- memory sections, mem_map's for valid memory
21d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft */
223e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco#ifdef CONFIG_SPARSEMEM_EXTREME
23802f192e4a600f7ef84ca25c8b818c8830acef5aBob Piccostruct mem_section *mem_section[NR_SECTION_ROOTS]
2422fc6eccbf4ce4eb6265e6ada7b50a7b9cc57d05Ravikiran G Thirumalai	____cacheline_internodealigned_in_smp;
253e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco#else
263e347261a80b57df792ab9464b5f0ed59add53a8Bob Piccostruct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
2722fc6eccbf4ce4eb6265e6ada7b50a7b9cc57d05Ravikiran G Thirumalai	____cacheline_internodealigned_in_smp;
283e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco#endif
293e347261a80b57df792ab9464b5f0ed59add53a8Bob PiccoEXPORT_SYMBOL(mem_section);
303e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco
3189689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#ifdef NODE_NOT_IN_PAGE_FLAGS
3289689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter/*
3389689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter * If we did not store the node number in the page then we have to
3489689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter * do a lookup in the section_to_node_table in order to find which
3589689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter * node the page belongs to.
3689689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter */
3789689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#if MAX_NUMNODES <= 256
3889689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameterstatic u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
3989689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#else
4089689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameterstatic u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
4189689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#endif
4289689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter
4333dd4e0ec91138c3d80e790c08a3db47426c81f2Ian Campbellint page_to_nid(const struct page *page)
4489689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter{
4589689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter	return section_to_node_table[page_to_section(page)];
4689689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter}
4789689ae7f95995723fbcd5c116c47933a3bb8b13Christoph LameterEXPORT_SYMBOL(page_to_nid);
4885770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft
4985770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroftstatic void set_section_nid(unsigned long section_nr, int nid)
5085770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft{
5185770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft	section_to_node_table[section_nr] = nid;
5285770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft}
5385770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft#else /* !NODE_NOT_IN_PAGE_FLAGS */
5485770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroftstatic inline void set_section_nid(unsigned long section_nr, int nid)
5585770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft{
5685770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft}
5789689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#endif
5889689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter
593e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco#ifdef CONFIG_SPARSEMEM_EXTREME
60577a32f620271416d05f852477151fb51c790bc6Sam Ravnborgstatic struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
6128ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen{
6228ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	struct mem_section *section = NULL;
6328ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	unsigned long array_size = SECTIONS_PER_ROOT *
6428ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen				   sizeof(struct mem_section);
6528ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen
66f52407ce2deac76c87abc8211a63ea152ba72d54Shaohua Li	if (slab_is_available()) {
67f52407ce2deac76c87abc8211a63ea152ba72d54Shaohua Li		if (node_state(nid, N_HIGH_MEMORY))
685b760e64a64c8940cdccd0ba6fce19a9bd010d20Gavin Shan			section = kzalloc_node(array_size, GFP_KERNEL, nid);
69f52407ce2deac76c87abc8211a63ea152ba72d54Shaohua Li		else
705b760e64a64c8940cdccd0ba6fce19a9bd010d20Gavin Shan			section = kzalloc(array_size, GFP_KERNEL);
715b760e64a64c8940cdccd0ba6fce19a9bd010d20Gavin Shan	} else {
7246a66eecdf7bc12562ecb492297447ed0e1ecf59Mike Kravetz		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
735b760e64a64c8940cdccd0ba6fce19a9bd010d20Gavin Shan	}
7428ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen
7528ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	return section;
763e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco}
77802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco
78a3142c8e1dd57ff48040bdb3478cff9312543dc3Yasunori Gotostatic int __meminit sparse_index_init(unsigned long section_nr, int nid)
79802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco{
8028ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
8128ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	struct mem_section *section;
8228ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	int ret = 0;
83802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco
84802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco	if (mem_section[root])
8528ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen		return -EEXIST;
863e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco
8728ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	section = sparse_index_alloc(nid);
88af0cd5a7c3cded50c25e98acd94912d17a0eb914WANG Cong	if (!section)
89af0cd5a7c3cded50c25e98acd94912d17a0eb914WANG Cong		return -ENOMEM;
9028ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen
9128ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	mem_section[root] = section;
92c1c9518331969f97ea403bac66f0fd4a85d204d5Gavin Shan
9328ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	return ret;
9428ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen}
9528ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen#else /* !SPARSEMEM_EXTREME */
9628ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansenstatic inline int sparse_index_init(unsigned long section_nr, int nid)
9728ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen{
9828ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	return 0;
99802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco}
10028ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen#endif
10128ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen
1024ca644d970bf2542623228a4624af356d20ca267Dave Hansen/*
1034ca644d970bf2542623228a4624af356d20ca267Dave Hansen * Although written for the SPARSEMEM_EXTREME case, this happens
104cd881a6b22902b356cacf8fd2e4e895871068eecAndy Whitcroft * to also work for the flat array case because
1054ca644d970bf2542623228a4624af356d20ca267Dave Hansen * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
1064ca644d970bf2542623228a4624af356d20ca267Dave Hansen */
1074ca644d970bf2542623228a4624af356d20ca267Dave Hansenint __section_nr(struct mem_section* ms)
1084ca644d970bf2542623228a4624af356d20ca267Dave Hansen{
1094ca644d970bf2542623228a4624af356d20ca267Dave Hansen	unsigned long root_nr;
1104ca644d970bf2542623228a4624af356d20ca267Dave Hansen	struct mem_section* root;
1114ca644d970bf2542623228a4624af356d20ca267Dave Hansen
11212783b002db1f02c29353c8f698a85514420b9f4Mike Kravetz	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
11312783b002db1f02c29353c8f698a85514420b9f4Mike Kravetz		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
1144ca644d970bf2542623228a4624af356d20ca267Dave Hansen		if (!root)
1154ca644d970bf2542623228a4624af356d20ca267Dave Hansen			continue;
1164ca644d970bf2542623228a4624af356d20ca267Dave Hansen
1174ca644d970bf2542623228a4624af356d20ca267Dave Hansen		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
1184ca644d970bf2542623228a4624af356d20ca267Dave Hansen		     break;
1194ca644d970bf2542623228a4624af356d20ca267Dave Hansen	}
1204ca644d970bf2542623228a4624af356d20ca267Dave Hansen
121db36a46113e101a8aa2d6ede41e78f2eaabed3f1Gavin Shan	VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
122db36a46113e101a8aa2d6ede41e78f2eaabed3f1Gavin Shan
1234ca644d970bf2542623228a4624af356d20ca267Dave Hansen	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
1244ca644d970bf2542623228a4624af356d20ca267Dave Hansen}
1254ca644d970bf2542623228a4624af356d20ca267Dave Hansen
12630c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft/*
12730c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft * During early boot, before section_mem_map is used for an actual
12830c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft * mem_map, we use section_mem_map to store the section's NUMA
12930c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft * node.  This keeps us from having to use another data structure.  The
13030c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft * node information is cleared just before we store the real mem_map.
13130c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft */
13230c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroftstatic inline unsigned long sparse_encode_early_nid(int nid)
13330c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft{
13430c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft	return (nid << SECTION_NID_SHIFT);
13530c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft}
13630c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft
13730c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroftstatic inline int sparse_early_nid(struct mem_section *section)
13830c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft{
13930c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft	return (section->section_mem_map >> SECTION_NID_SHIFT);
14030c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft}
14130c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft
1422dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman/* Validate the physical addressing limitations of the model */
1432dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gormanvoid __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
1442dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman						unsigned long *end_pfn)
145d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft{
1462dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
147d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
148bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar	/*
149bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar	 * Sanity checks - do not allow an architecture to pass
150bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar	 * in larger pfns than the maximum scope of sparsemem:
151bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar	 */
1522dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	if (*start_pfn > max_sparsemem_pfn) {
1532dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
1542dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
1552dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman			*start_pfn, *end_pfn, max_sparsemem_pfn);
1562dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		WARN_ON_ONCE(1);
1572dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		*start_pfn = max_sparsemem_pfn;
1582dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		*end_pfn = max_sparsemem_pfn;
159ef161a9863b045909142daea9490b067997f3dc5Cyrill Gorcunov	} else if (*end_pfn > max_sparsemem_pfn) {
1602dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
1612dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman			"End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
1622dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman			*start_pfn, *end_pfn, max_sparsemem_pfn);
1632dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		WARN_ON_ONCE(1);
1642dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		*end_pfn = max_sparsemem_pfn;
1652dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	}
1662dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman}
1672dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman
1682dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman/* Record a memory area against a node. */
1692dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gormanvoid __init memory_present(int nid, unsigned long start, unsigned long end)
1702dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman{
1712dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	unsigned long pfn;
172bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar
173d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	start &= PAGE_SECTION_MASK;
1742dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	mminit_validate_memmodel_limits(&start, &end);
175d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
176d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft		unsigned long section = pfn_to_section_nr(pfn);
177802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco		struct mem_section *ms;
178802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco
179802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco		sparse_index_init(section, nid);
18085770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft		set_section_nid(section, nid);
181802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco
182802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco		ms = __nr_to_section(section);
183802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco		if (!ms->section_mem_map)
18430c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft			ms->section_mem_map = sparse_encode_early_nid(nid) |
18530c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft							SECTION_MARKED_PRESENT;
186d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	}
187d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft}
188d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
189d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft/*
190d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * Only used by the i386 NUMA architecures, but relatively
191d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * generic code.
192d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft */
193d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroftunsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
194d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft						     unsigned long end_pfn)
195d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft{
196d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	unsigned long pfn;
197d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	unsigned long nr_pages = 0;
198d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
1992dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
200d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
201d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft		if (nid != early_pfn_to_nid(pfn))
202d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft			continue;
203d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
204540557b9439ec19668553830c90222f9fb0c2e95Andy Whitcroft		if (pfn_present(pfn))
205d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft			nr_pages += PAGES_PER_SECTION;
206d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	}
207d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
208d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	return nr_pages * sizeof(struct page);
209d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft}
210d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
211d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft/*
21229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * Subtle, we encode the real pfn into the mem_map such that
21329751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * the identity pfn - section_mem_map will return the actual
21429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * physical page frame number.
21529751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft */
21629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroftstatic unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
21729751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
21829751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
21929751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
22029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
22129751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft/*
222ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty * Decode mem_map from the coded memmap
22329751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft */
22429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroftstruct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
22529751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
226ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	/* mask off the extra low bits of information */
227ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	coded_mem_map &= SECTION_MAP_MASK;
22829751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
22929751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
23029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
231a3142c8e1dd57ff48040bdb3478cff9312543dc3Yasunori Gotostatic int __meminit sparse_init_one_section(struct mem_section *ms,
2325c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman		unsigned long pnum, struct page *mem_map,
2335c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman		unsigned long *pageblock_bitmap)
23429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
235540557b9439ec19668553830c90222f9fb0c2e95Andy Whitcroft	if (!present_section(ms))
23629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft		return -EINVAL;
23729751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
23830c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft	ms->section_mem_map &= ~SECTION_MAP_MASK;
239540557b9439ec19668553830c90222f9fb0c2e95Andy Whitcroft	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
240540557b9439ec19668553830c90222f9fb0c2e95Andy Whitcroft							SECTION_HAS_MEM_MAP;
2415c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman 	ms->pageblock_flags = pageblock_bitmap;
24229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
24329751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	return 1;
24429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
24529751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
24604753278769f3b6c3b79a080edb52f21d83bf6e2Yasunori Gotounsigned long usemap_size(void)
2475c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman{
2485c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	unsigned long size_bytes;
2495c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
2505c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	size_bytes = roundup(size_bytes, sizeof(unsigned long));
2515c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	return size_bytes;
2525c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman}
2535c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
2545c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman#ifdef CONFIG_MEMORY_HOTPLUG
2555c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gormanstatic unsigned long *__kmalloc_section_usemap(void)
2565c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman{
2575c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	return kmalloc(usemap_size(), GFP_KERNEL);
2585c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman}
2595c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman#endif /* CONFIG_MEMORY_HOTPLUG */
2605c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
26148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto#ifdef CONFIG_MEMORY_HOTREMOVE
26248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Gotostatic unsigned long * __init
263a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lusparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
264238305bb4d418c95977162ba13c11880685fc731Johannes Weiner					 unsigned long size)
26548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto{
26699ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	unsigned long goal, limit;
26799ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	unsigned long *p;
26899ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	int nid;
26948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	/*
27048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * A page may contain usemaps for other sections preventing the
27148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * page being freed and making a section unremovable while
27248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * other sections referencing the usemap retmain active. Similarly,
27348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * a pgdat can prevent a section being removed. If section A
27448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * contains a pgdat and section B contains the usemap, both
27548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * sections become inter-dependent. This allocates usemaps
27648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * from the same section as the pgdat where possible to avoid
27748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * this problem.
27848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 */
27907b4e2bc9c35ea88cbd36d806fcd5e3bcbf022beYinghai Lu	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
28099ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	limit = goal + (1UL << PA_SECTION_SHIFT);
28199ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
28299ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Luagain:
28399ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
28499ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu					  SMP_CACHE_BYTES, goal, limit);
28599ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	if (!p && limit) {
28699ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu		limit = 0;
28799ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu		goto again;
28899ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	}
28999ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	return p;
29048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto}
29148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
29248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Gotostatic void __init check_usemap_section_nr(int nid, unsigned long *usemap)
29348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto{
29448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	unsigned long usemap_snr, pgdat_snr;
29548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
29648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
29748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	struct pglist_data *pgdat = NODE_DATA(nid);
29848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	int usemap_nid;
29948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
30048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
30148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
30248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	if (usemap_snr == pgdat_snr)
30348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		return;
30448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
30548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
30648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		/* skip redundant message */
30748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		return;
30848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
30948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	old_usemap_snr = usemap_snr;
31048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	old_pgdat_snr = pgdat_snr;
31148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
31248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
31348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	if (usemap_nid != nid) {
31448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		printk(KERN_INFO
31548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		       "node %d must be removed before remove section %ld\n",
31648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		       nid, usemap_snr);
31748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		return;
31848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	}
31948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	/*
32048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * There is a circular dependency.
32148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * Some platforms allow un-removable section because they will just
32248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * gather other removable sections for dynamic partitioning.
32348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * Just notify un-removable section's number here.
32448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 */
32548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr,
32648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	       pgdat_snr, nid);
32748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	printk(KERN_CONT
32848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	       " have a circular dependency on usemap and pgdat allocations\n");
32948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto}
33048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto#else
33148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Gotostatic unsigned long * __init
332a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lusparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
333238305bb4d418c95977162ba13c11880685fc731Johannes Weiner					 unsigned long size)
33448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto{
335238305bb4d418c95977162ba13c11880685fc731Johannes Weiner	return alloc_bootmem_node_nopanic(pgdat, size);
33648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto}
33748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
33848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Gotostatic void __init check_usemap_section_nr(int nid, unsigned long *usemap)
33948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto{
34048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto}
34148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto#endif /* CONFIG_MEMORY_HOTREMOVE */
34248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
343a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lustatic void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
344a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu				 unsigned long pnum_begin,
345a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu				 unsigned long pnum_end,
346a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu				 unsigned long usemap_count, int nodeid)
3475c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman{
348a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	void *usemap;
349a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	unsigned long pnum;
350a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	int size = usemap_size();
3515c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
352a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
353238305bb4d418c95977162ba13c11880685fc731Johannes Weiner							  size * usemap_count);
354f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan	if (!usemap) {
355238305bb4d418c95977162ba13c11880685fc731Johannes Weiner		printk(KERN_WARNING "%s: allocation failed\n", __func__);
356238305bb4d418c95977162ba13c11880685fc731Johannes Weiner		return;
35748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	}
35848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
359f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
360f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan		if (!present_section_nr(pnum))
361f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan			continue;
362f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan		usemap_map[pnum] = usemap;
363f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan		usemap += size;
364f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan		check_usemap_section_nr(nodeid, usemap_map[pnum]);
365a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	}
3665c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman}
3675c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
3688f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter#ifndef CONFIG_SPARSEMEM_VMEMMAP
36998f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Gotostruct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
37029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
37129751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	struct page *map;
372e48e67e08c340def3d0349c2910d23c7985fb6faYinghai Lu	unsigned long size;
37329751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
37429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
37529751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	if (map)
37629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft		return map;
37729751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
378e48e67e08c340def3d0349c2910d23c7985fb6faYinghai Lu	size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
379e48e67e08c340def3d0349c2910d23c7985fb6faYinghai Lu	map = __alloc_bootmem_node_high(NODE_DATA(nid), size,
380e48e67e08c340def3d0349c2910d23c7985fb6faYinghai Lu					 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
3818f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	return map;
3828f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter}
3839bdac914240759457175ac0d6529a37d2820bc4dYinghai Luvoid __init sparse_mem_maps_populate_node(struct page **map_map,
3849bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu					  unsigned long pnum_begin,
3859bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu					  unsigned long pnum_end,
3869bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu					  unsigned long map_count, int nodeid)
3879bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu{
3889bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	void *map;
3899bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	unsigned long pnum;
3909bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
3919bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
3929bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	map = alloc_remap(nodeid, size * map_count);
3939bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	if (map) {
3949bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
3959bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			if (!present_section_nr(pnum))
3969bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				continue;
3979bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			map_map[pnum] = map;
3989bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			map += size;
3999bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		}
4009bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		return;
4019bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	}
4029bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
4039bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	size = PAGE_ALIGN(size);
404e48e67e08c340def3d0349c2910d23c7985fb6faYinghai Lu	map = __alloc_bootmem_node_high(NODE_DATA(nodeid), size * map_count,
405e48e67e08c340def3d0349c2910d23c7985fb6faYinghai Lu					 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
4069bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	if (map) {
4079bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
4089bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			if (!present_section_nr(pnum))
4099bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				continue;
4109bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			map_map[pnum] = map;
4119bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			map += size;
4129bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		}
4139bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		return;
4149bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	}
4159bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
4169bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	/* fallback */
4179bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
4189bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		struct mem_section *ms;
4199bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
4209bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		if (!present_section_nr(pnum))
4219bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			continue;
4229bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
4239bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		if (map_map[pnum])
4249bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			continue;
4259bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		ms = __nr_to_section(pnum);
4269bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		printk(KERN_ERR "%s: sparsemem memory map backing failed "
4279bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			"some memory will not be available.\n", __func__);
4289bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		ms->section_mem_map = 0;
4299bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	}
4309bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu}
4318f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
4328f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter
43381d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
4349bdac914240759457175ac0d6529a37d2820bc4dYinghai Lustatic void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
4359bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				 unsigned long pnum_begin,
4369bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				 unsigned long pnum_end,
4379bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				 unsigned long map_count, int nodeid)
4389bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu{
4399bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
4409bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu					 map_count, nodeid);
4419bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu}
44281d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu#else
4439e5c6da71e89fa25ced6e88182225a99941bec90Adrian Bunkstatic struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
4448f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter{
4458f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	struct page *map;
4468f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	struct mem_section *ms = __nr_to_section(pnum);
4478f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	int nid = sparse_early_nid(ms);
4488f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter
44998f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto	map = sparse_mem_map_populate(pnum, nid);
45029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	if (map)
45129751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft		return map;
45229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
4538f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	printk(KERN_ERR "%s: sparsemem memory map backing failed "
454d40cee245ff6ad05d3448401d7320be82c1c5af1Harvey Harrison			"some memory will not be available.\n", __func__);
455802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco	ms->section_mem_map = 0;
45629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	return NULL;
45729751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
4589bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#endif
45929751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
460c2b91e2eec9678dbda274e906cc32ea8f711da3bYinghai Luvoid __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
461c2b91e2eec9678dbda274e906cc32ea8f711da3bYinghai Lu{
462c2b91e2eec9678dbda274e906cc32ea8f711da3bYinghai Lu}
463a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu
464193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell/*
465193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell * Allocate the accumulated non-linear sections, allocate a mem_map
466193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell * for each and record the physical to section mapping.
467193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell */
468193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwellvoid __init sparse_init(void)
469193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell{
470193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell	unsigned long pnum;
471193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell	struct page *map;
4725c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	unsigned long *usemap;
473e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	unsigned long **usemap_map;
47481d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu	int size;
475a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	int nodeid_begin = 0;
476a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	unsigned long pnum_begin = 0;
477a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	unsigned long usemap_count;
47881d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
4799bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	unsigned long map_count;
48081d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu	int size2;
48181d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu	struct page **map_map;
48281d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu#endif
483e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu
484ca57df79d4f64e1a4886606af4289d40636189c5Xishi Qiu	/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
485ca57df79d4f64e1a4886606af4289d40636189c5Xishi Qiu	set_pageblock_order();
486ca57df79d4f64e1a4886606af4289d40636189c5Xishi Qiu
487e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	/*
488e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * map is using big page (aka 2M in x86 64 bit)
489e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * usemap is less one page (aka 24 bytes)
490e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * so alloc 2M (with 2M align) and 24 bytes in turn will
491e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * make next 2M slip to one more 2M later.
492e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * then in big system, the memory will have a lot of holes...
49325985edcedea6396277003854657b5f3cb31a628Lucas De Marchi	 * here try to allocate 2M pages continuously.
494e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 *
495e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * powerpc need to call sparse_init_one_section right after each
496e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
497e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 */
498e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
499e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	usemap_map = alloc_bootmem(size);
500e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	if (!usemap_map)
501e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		panic("can not allocate usemap_map\n");
502193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell
503193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
504a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		struct mem_section *ms;
505a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu
506540557b9439ec19668553830c90222f9fb0c2e95Andy Whitcroft		if (!present_section_nr(pnum))
507193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell			continue;
508a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		ms = __nr_to_section(pnum);
509a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		nodeid_begin = sparse_early_nid(ms);
510a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		pnum_begin = pnum;
511a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		break;
512a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	}
513a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	usemap_count = 1;
514a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
515a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		struct mem_section *ms;
516a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		int nodeid;
517a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu
518a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		if (!present_section_nr(pnum))
519a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu			continue;
520a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		ms = __nr_to_section(pnum);
521a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		nodeid = sparse_early_nid(ms);
522a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		if (nodeid == nodeid_begin) {
523a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu			usemap_count++;
524a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu			continue;
525a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		}
526a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
527a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
528a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu						 usemap_count, nodeid_begin);
529a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		/* new start, update count etc*/
530a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		nodeid_begin = nodeid;
531a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		pnum_begin = pnum;
532a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu		usemap_count = 1;
533e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	}
534a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	/* ok, last chunk */
535a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
536a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu					 usemap_count, nodeid_begin);
537193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell
5389bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
5399bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
5409bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	map_map = alloc_bootmem(size2);
5419bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	if (!map_map)
5429bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		panic("can not allocate map_map\n");
5439bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
5449bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
5459bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		struct mem_section *ms;
5469bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
5479bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		if (!present_section_nr(pnum))
5489bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			continue;
5499bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		ms = __nr_to_section(pnum);
5509bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		nodeid_begin = sparse_early_nid(ms);
5519bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		pnum_begin = pnum;
5529bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		break;
5539bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	}
5549bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	map_count = 1;
5559bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
5569bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		struct mem_section *ms;
5579bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		int nodeid;
5589bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
5599bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		if (!present_section_nr(pnum))
5609bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			continue;
5619bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		ms = __nr_to_section(pnum);
5629bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		nodeid = sparse_early_nid(ms);
5639bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		if (nodeid == nodeid_begin) {
5649bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			map_count++;
5659bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			continue;
5669bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		}
5679bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
5689bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
5699bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu						 map_count, nodeid_begin);
5709bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		/* new start, update count etc*/
5719bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		nodeid_begin = nodeid;
5729bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		pnum_begin = pnum;
5739bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		map_count = 1;
5749bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	}
5759bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	/* ok, last chunk */
5769bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
5779bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu					 map_count, nodeid_begin);
5789bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#endif
5799bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
580e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
581e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		if (!present_section_nr(pnum))
582193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell			continue;
5835c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
584e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		usemap = usemap_map[pnum];
5855c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman		if (!usemap)
5865c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman			continue;
5875c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
5889bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
5899bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		map = map_map[pnum];
5909bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#else
591e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		map = sparse_early_mem_map_alloc(pnum);
5929bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#endif
593e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		if (!map)
594e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu			continue;
595e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu
5965c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
5975c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman								usemap);
598193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell	}
599e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu
600c2b91e2eec9678dbda274e906cc32ea8f711da3bYinghai Lu	vmemmap_populate_print_last();
601c2b91e2eec9678dbda274e906cc32ea8f711da3bYinghai Lu
6029bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
6039bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	free_bootmem(__pa(map_map), size2);
6049bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#endif
605e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	free_bootmem(__pa(usemap_map), size);
606193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell}
607193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell
608193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell#ifdef CONFIG_MEMORY_HOTPLUG
60998f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto#ifdef CONFIG_SPARSEMEM_VMEMMAP
61098f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Gotostatic inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
61198f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto						 unsigned long nr_pages)
61298f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto{
61398f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto	/* This will make the necessary allocations eventually. */
61498f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto	return sparse_mem_map_populate(pnum, nid);
61598f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto}
61698f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Gotostatic void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
61798f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto{
6180aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	unsigned long start = (unsigned long)memmap;
6190aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	unsigned long end = (unsigned long)(memmap + nr_pages);
6200aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner
6210aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	vmemmap_free(start, end);
62298f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto}
6234edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#ifdef CONFIG_MEMORY_HOTREMOVE
624ae64ffcac35de0db628ba9631edf8ff34c5cd7acJianguo Wustatic void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
6250c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto{
6260aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	unsigned long start = (unsigned long)memmap;
6270aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	unsigned long end = (unsigned long)(memmap + nr_pages);
6280aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner
6290aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	vmemmap_free(start, end);
6300c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto}
6314edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#endif /* CONFIG_MEMORY_HOTREMOVE */
63298f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto#else
6330b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansenstatic struct page *__kmalloc_section_memmap(unsigned long nr_pages)
6340b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen{
6350b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	struct page *page, *ret;
6360b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	unsigned long memmap_size = sizeof(struct page) * nr_pages;
6370b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
638f2d0aa5bf8d4f7ae4cb1a7feebf5b1afddd0b9b0Yasunori Goto	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
6390b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	if (page)
6400b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		goto got_map_page;
6410b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
6420b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	ret = vmalloc(memmap_size);
6430b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	if (ret)
6440b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		goto got_map_ptr;
6450b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
6460b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	return NULL;
6470b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansengot_map_page:
6480b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
6490b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansengot_map_ptr:
6500b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
6510b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	return ret;
6520b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen}
6530b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
65498f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Gotostatic inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
65598f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto						  unsigned long nr_pages)
65698f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto{
65798f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto	return __kmalloc_section_memmap(nr_pages);
65898f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto}
65998f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto
6600b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansenstatic void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
6610b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen{
6629e2779fa281cfda13ac060753d674bbcaa23367eChristoph Lameter	if (is_vmalloc_addr(memmap))
6630b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		vfree(memmap);
6640b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	else
6650b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		free_pages((unsigned long)memmap,
6660b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen			   get_order(sizeof(struct page) * nr_pages));
6670b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen}
6680c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
6694edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#ifdef CONFIG_MEMORY_HOTREMOVE
670ae64ffcac35de0db628ba9631edf8ff34c5cd7acJianguo Wustatic void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
6710c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto{
6720c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto	unsigned long maps_section_nr, removing_section_nr, i;
6735f24ce5fd34c3ca1b3d10d30da754732da64d5c0Andrea Arcangeli	unsigned long magic;
674ae64ffcac35de0db628ba9631edf8ff34c5cd7acJianguo Wu	struct page *page = virt_to_page(memmap);
6750c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
6760c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto	for (i = 0; i < nr_pages; i++, page++) {
6775f24ce5fd34c3ca1b3d10d30da754732da64d5c0Andrea Arcangeli		magic = (unsigned long) page->lru.next;
6780c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
6790c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		BUG_ON(magic == NODE_INFO);
6800c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
6810c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
6820c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		removing_section_nr = page->private;
6830c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
6840c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		/*
6850c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * When this function is called, the removing section is
6860c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * logical offlined state. This means all pages are isolated
6870c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * from page allocator. If removing section's memmap is placed
6880c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * on the same section, it must not be freed.
6890c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * If it is freed, page allocator may allocate it which will
6900c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * be removed physically soon.
6910c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 */
6920c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		if (maps_section_nr != removing_section_nr)
6930c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto			put_page_bootmem(page);
6940c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto	}
6950c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto}
6964edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#endif /* CONFIG_MEMORY_HOTREMOVE */
69798f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto#endif /* CONFIG_SPARSEMEM_VMEMMAP */
6980b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
69929751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft/*
70029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * returns the number of sections whose mem_maps were properly
70129751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * set.  If this is <=0, then that means that the passed-in
70229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * map was not consumed and must be freed.
70329751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft */
70431168481c32c8a485e1003af9433124dede57f8dAl Viroint __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
7050b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen			   int nr_pages)
70629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
7070b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	unsigned long section_nr = pfn_to_section_nr(start_pfn);
7080b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	struct pglist_data *pgdat = zone->zone_pgdat;
7090b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	struct mem_section *ms;
7100b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	struct page *memmap;
7115c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	unsigned long *usemap;
7120b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	unsigned long flags;
7130b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	int ret;
71429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
7150b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	/*
7160b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	 * no locking for this, because it does its own
7170b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	 * plus, it does a kmalloc
7180b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	 */
719bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	ret = sparse_index_init(section_nr, pgdat->node_id);
720bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	if (ret < 0 && ret != -EEXIST)
721bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong		return ret;
72298f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
723bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	if (!memmap)
724bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong		return -ENOMEM;
7255c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	usemap = __kmalloc_section_usemap();
726bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	if (!usemap) {
727bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong		__kfree_section_memmap(memmap, nr_pages);
728bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong		return -ENOMEM;
729bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	}
7300b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
7310b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	pgdat_resize_lock(pgdat, &flags);
73229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
7330b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	ms = __pfn_to_section(start_pfn);
7340b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
7350b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		ret = -EEXIST;
7360b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		goto out;
7370b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	}
7385c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
7393ac19f8efe26451cacac31d0be34fa9c51114c2aWen Congyang	memset(memmap, 0, sizeof(struct page) * nr_pages);
7403ac19f8efe26451cacac31d0be34fa9c51114c2aWen Congyang
74129751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	ms->section_mem_map |= SECTION_MARKED_PRESENT;
74229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
7435c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
7440b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
7450b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansenout:
7460b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	pgdat_resize_unlock(pgdat, &flags);
747bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	if (ret <= 0) {
748bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong		kfree(usemap);
74946a66eecdf7bc12562ecb492297447ed0e1ecf59Mike Kravetz		__kfree_section_memmap(memmap, nr_pages);
750bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	}
7510b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	return ret;
75229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
753ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty
75495a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang#ifdef CONFIG_MEMORY_FAILURE
75595a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyangstatic void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
75695a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang{
75795a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	int i;
75895a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang
75995a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	if (!memmap)
76095a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang		return;
76195a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang
76295a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	for (i = 0; i < PAGES_PER_SECTION; i++) {
76395a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang		if (PageHWPoison(&memmap[i])) {
764293c07e31ab5a0b8df8c19b2a9e5c6fa30308849Xishi Qiu			atomic_long_sub(1, &num_poisoned_pages);
76595a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang			ClearPageHWPoison(&memmap[i]);
76695a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang		}
76795a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	}
76895a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang}
76995a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang#else
77095a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyangstatic inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
77195a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang{
77295a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang}
77395a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang#endif
77495a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang
7754edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#ifdef CONFIG_MEMORY_HOTREMOVE
7764edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjesstatic void free_section_usemap(struct page *memmap, unsigned long *usemap)
7774edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes{
7784edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	struct page *usemap_page;
7794edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	unsigned long nr_pages;
7804edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
7814edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	if (!usemap)
7824edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		return;
7834edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
7844edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	usemap_page = virt_to_page(usemap);
7854edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	/*
7864edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 * Check to see if allocation came from hot-plug-add
7874edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 */
7884edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
7894edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		kfree(usemap);
7904edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		if (memmap)
7914edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
7924edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		return;
7934edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	}
7944edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
7954edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	/*
7964edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 * The usemap came from bootmem. This is packed with other usemaps
7974edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 * on the section which has pgdat at boot time. Just keep it as is now.
7984edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 */
7994edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
8004edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	if (memmap) {
8014edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
8024edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes			>> PAGE_SHIFT;
8034edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
8044edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		free_map_bootmem(memmap, nr_pages);
8054edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	}
8064edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes}
8074edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
808ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavartyvoid sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
809ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty{
810ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	struct page *memmap = NULL;
811cd099682e4c786c3a866e462b37fcac6e3a44a68Tang Chen	unsigned long *usemap = NULL, flags;
812cd099682e4c786c3a866e462b37fcac6e3a44a68Tang Chen	struct pglist_data *pgdat = zone->zone_pgdat;
813ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty
814cd099682e4c786c3a866e462b37fcac6e3a44a68Tang Chen	pgdat_resize_lock(pgdat, &flags);
815ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	if (ms->section_mem_map) {
816ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty		usemap = ms->pageblock_flags;
817ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty		memmap = sparse_decode_mem_map(ms->section_mem_map,
818ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty						__section_nr(ms));
819ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty		ms->section_mem_map = 0;
820ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty		ms->pageblock_flags = NULL;
821ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	}
822cd099682e4c786c3a866e462b37fcac6e3a44a68Tang Chen	pgdat_resize_unlock(pgdat, &flags);
823ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty
82495a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	clear_hwpoisoned_pages(memmap, PAGES_PER_SECTION);
825ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	free_section_usemap(memmap, usemap);
826ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty}
8274edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#endif /* CONFIG_MEMORY_HOTREMOVE */
8284edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#endif /* CONFIG_MEMORY_HOTPLUG */
829