1d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft/*
2d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * sparse memory mappings.
3d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft */
4d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft#include <linux/mm.h>
55a0e3ad6af8660be21ca98a971cd00f331318c05Tejun Heo#include <linux/slab.h>
6d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft#include <linux/mmzone.h>
7d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft#include <linux/bootmem.h>
83b32123d734cb414e366b35a3b2142a995f9d1a0Gideon Israel Dsouza#include <linux/compiler.h>
90b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen#include <linux/highmem.h>
10b95f1b31b75588306e32b2afd32166cad48f670bPaul Gortmaker#include <linux/export.h>
1128ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen#include <linux/spinlock.h>
120b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen#include <linux/vmalloc.h>
133b32123d734cb414e366b35a3b2142a995f9d1a0Gideon Israel Dsouza
140c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto#include "internal.h"
15d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft#include <asm/dma.h>
168f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter#include <asm/pgalloc.h>
178f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter#include <asm/pgtable.h>
18d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
19d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft/*
20d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * Permanent SPARSEMEM data:
21d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft *
22d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * 1) mem_section	- memory sections, mem_map's for valid memory
23d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft */
243e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco#ifdef CONFIG_SPARSEMEM_EXTREME
25802f192e4a600f7ef84ca25c8b818c8830acef5aBob Piccostruct mem_section *mem_section[NR_SECTION_ROOTS]
2622fc6eccbf4ce4eb6265e6ada7b50a7b9cc57d05Ravikiran G Thirumalai	____cacheline_internodealigned_in_smp;
273e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco#else
283e347261a80b57df792ab9464b5f0ed59add53a8Bob Piccostruct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
2922fc6eccbf4ce4eb6265e6ada7b50a7b9cc57d05Ravikiran G Thirumalai	____cacheline_internodealigned_in_smp;
303e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco#endif
313e347261a80b57df792ab9464b5f0ed59add53a8Bob PiccoEXPORT_SYMBOL(mem_section);
323e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco
3389689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#ifdef NODE_NOT_IN_PAGE_FLAGS
3489689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter/*
3589689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter * If we did not store the node number in the page then we have to
3689689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter * do a lookup in the section_to_node_table in order to find which
3789689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter * node the page belongs to.
3889689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter */
3989689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#if MAX_NUMNODES <= 256
4089689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameterstatic u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
4189689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#else
4289689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameterstatic u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
4389689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#endif
4489689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter
4533dd4e0ec91138c3d80e790c08a3db47426c81f2Ian Campbellint page_to_nid(const struct page *page)
4689689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter{
4789689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter	return section_to_node_table[page_to_section(page)];
4889689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter}
4989689ae7f95995723fbcd5c116c47933a3bb8b13Christoph LameterEXPORT_SYMBOL(page_to_nid);
5085770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft
5185770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroftstatic void set_section_nid(unsigned long section_nr, int nid)
5285770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft{
5385770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft	section_to_node_table[section_nr] = nid;
5485770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft}
5585770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft#else /* !NODE_NOT_IN_PAGE_FLAGS */
5685770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroftstatic inline void set_section_nid(unsigned long section_nr, int nid)
5785770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft{
5885770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft}
5989689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter#endif
6089689ae7f95995723fbcd5c116c47933a3bb8b13Christoph Lameter
613e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco#ifdef CONFIG_SPARSEMEM_EXTREME
62577a32f620271416d05f852477151fb51c790bc6Sam Ravnborgstatic struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
6328ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen{
6428ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	struct mem_section *section = NULL;
6528ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	unsigned long array_size = SECTIONS_PER_ROOT *
6628ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen				   sizeof(struct mem_section);
6728ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen
68f52407ce2deac76c87abc8211a63ea152ba72d54Shaohua Li	if (slab_is_available()) {
69f52407ce2deac76c87abc8211a63ea152ba72d54Shaohua Li		if (node_state(nid, N_HIGH_MEMORY))
705b760e64a64c8940cdccd0ba6fce19a9bd010d20Gavin Shan			section = kzalloc_node(array_size, GFP_KERNEL, nid);
71f52407ce2deac76c87abc8211a63ea152ba72d54Shaohua Li		else
725b760e64a64c8940cdccd0ba6fce19a9bd010d20Gavin Shan			section = kzalloc(array_size, GFP_KERNEL);
735b760e64a64c8940cdccd0ba6fce19a9bd010d20Gavin Shan	} else {
74bb016b84164554725899aef544331085e08cb402Santosh Shilimkar		section = memblock_virt_alloc_node(array_size, nid);
755b760e64a64c8940cdccd0ba6fce19a9bd010d20Gavin Shan	}
7628ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen
7728ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	return section;
783e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco}
79802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco
80a3142c8e1dd57ff48040bdb3478cff9312543dc3Yasunori Gotostatic int __meminit sparse_index_init(unsigned long section_nr, int nid)
81802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco{
8228ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
8328ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	struct mem_section *section;
84802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco
85802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco	if (mem_section[root])
8628ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen		return -EEXIST;
873e347261a80b57df792ab9464b5f0ed59add53a8Bob Picco
8828ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	section = sparse_index_alloc(nid);
89af0cd5a7c3cded50c25e98acd94912d17a0eb914WANG Cong	if (!section)
90af0cd5a7c3cded50c25e98acd94912d17a0eb914WANG Cong		return -ENOMEM;
9128ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen
9228ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	mem_section[root] = section;
93c1c9518331969f97ea403bac66f0fd4a85d204d5Gavin Shan
949d1936cf86be8dc0cc27365bd8f1efdf23941961Zhang Yanfei	return 0;
9528ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen}
9628ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen#else /* !SPARSEMEM_EXTREME */
9728ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansenstatic inline int sparse_index_init(unsigned long section_nr, int nid)
9828ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen{
9928ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen	return 0;
100802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco}
10128ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen#endif
10228ae55c98e4d16eac9a05a8a259d7763ef3aeb18Dave Hansen
1034ca644d970bf2542623228a4624af356d20ca267Dave Hansen/*
1044ca644d970bf2542623228a4624af356d20ca267Dave Hansen * Although written for the SPARSEMEM_EXTREME case, this happens
105cd881a6b22902b356cacf8fd2e4e895871068eecAndy Whitcroft * to also work for the flat array case because
1064ca644d970bf2542623228a4624af356d20ca267Dave Hansen * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
1074ca644d970bf2542623228a4624af356d20ca267Dave Hansen */
1084ca644d970bf2542623228a4624af356d20ca267Dave Hansenint __section_nr(struct mem_section* ms)
1094ca644d970bf2542623228a4624af356d20ca267Dave Hansen{
1104ca644d970bf2542623228a4624af356d20ca267Dave Hansen	unsigned long root_nr;
1114ca644d970bf2542623228a4624af356d20ca267Dave Hansen	struct mem_section* root;
1124ca644d970bf2542623228a4624af356d20ca267Dave Hansen
11312783b002db1f02c29353c8f698a85514420b9f4Mike Kravetz	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
11412783b002db1f02c29353c8f698a85514420b9f4Mike Kravetz		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
1154ca644d970bf2542623228a4624af356d20ca267Dave Hansen		if (!root)
1164ca644d970bf2542623228a4624af356d20ca267Dave Hansen			continue;
1174ca644d970bf2542623228a4624af356d20ca267Dave Hansen
1184ca644d970bf2542623228a4624af356d20ca267Dave Hansen		if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
1194ca644d970bf2542623228a4624af356d20ca267Dave Hansen		     break;
1204ca644d970bf2542623228a4624af356d20ca267Dave Hansen	}
1214ca644d970bf2542623228a4624af356d20ca267Dave Hansen
122db36a46113e101a8aa2d6ede41e78f2eaabed3f1Gavin Shan	VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
123db36a46113e101a8aa2d6ede41e78f2eaabed3f1Gavin Shan
1244ca644d970bf2542623228a4624af356d20ca267Dave Hansen	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
1254ca644d970bf2542623228a4624af356d20ca267Dave Hansen}
1264ca644d970bf2542623228a4624af356d20ca267Dave Hansen
12730c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft/*
12830c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft * During early boot, before section_mem_map is used for an actual
12930c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft * mem_map, we use section_mem_map to store the section's NUMA
13030c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft * node.  This keeps us from having to use another data structure.  The
13130c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft * node information is cleared just before we store the real mem_map.
13230c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft */
13330c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroftstatic inline unsigned long sparse_encode_early_nid(int nid)
13430c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft{
13530c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft	return (nid << SECTION_NID_SHIFT);
13630c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft}
13730c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft
13830c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroftstatic inline int sparse_early_nid(struct mem_section *section)
13930c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft{
14030c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft	return (section->section_mem_map >> SECTION_NID_SHIFT);
14130c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft}
14230c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft
1432dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman/* Validate the physical addressing limitations of the model */
1442dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gormanvoid __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
1452dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman						unsigned long *end_pfn)
146d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft{
1472dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
148d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
149bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar	/*
150bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar	 * Sanity checks - do not allow an architecture to pass
151bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar	 * in larger pfns than the maximum scope of sparsemem:
152bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar	 */
1532dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	if (*start_pfn > max_sparsemem_pfn) {
1542dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
1552dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
1562dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman			*start_pfn, *end_pfn, max_sparsemem_pfn);
1572dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		WARN_ON_ONCE(1);
1582dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		*start_pfn = max_sparsemem_pfn;
1592dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		*end_pfn = max_sparsemem_pfn;
160ef161a9863b045909142daea9490b067997f3dc5Cyrill Gorcunov	} else if (*end_pfn > max_sparsemem_pfn) {
1612dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
1622dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman			"End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
1632dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman			*start_pfn, *end_pfn, max_sparsemem_pfn);
1642dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		WARN_ON_ONCE(1);
1652dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman		*end_pfn = max_sparsemem_pfn;
1662dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	}
1672dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman}
1682dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman
1692dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman/* Record a memory area against a node. */
1702dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gormanvoid __init memory_present(int nid, unsigned long start, unsigned long end)
1712dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman{
1722dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	unsigned long pfn;
173bead9a3abd15710b0bdfd418daef606722d86282Ingo Molnar
174d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	start &= PAGE_SECTION_MASK;
1752dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	mminit_validate_memmodel_limits(&start, &end);
176d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
177d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft		unsigned long section = pfn_to_section_nr(pfn);
178802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco		struct mem_section *ms;
179802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco
180802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco		sparse_index_init(section, nid);
18185770ffe4f0cdd4396b17f14762adc25a571a348Andy Whitcroft		set_section_nid(section, nid);
182802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco
183802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco		ms = __nr_to_section(section);
184802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco		if (!ms->section_mem_map)
18530c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft			ms->section_mem_map = sparse_encode_early_nid(nid) |
18630c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft							SECTION_MARKED_PRESENT;
187d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	}
188d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft}
189d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
190d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft/*
191d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * Only used by the i386 NUMA architecures, but relatively
192d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft * generic code.
193d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft */
194d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroftunsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
195d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft						     unsigned long end_pfn)
196d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft{
197d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	unsigned long pfn;
198d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	unsigned long nr_pages = 0;
199d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
2002dbb51c49f4fecb8330e43247a0edfbc4b2b8974Mel Gorman	mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
201d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
202d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft		if (nid != early_pfn_to_nid(pfn))
203d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft			continue;
204d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
205540557b9439ec19668553830c90222f9fb0c2e95Andy Whitcroft		if (pfn_present(pfn))
206d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft			nr_pages += PAGES_PER_SECTION;
207d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	}
208d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
209d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft	return nr_pages * sizeof(struct page);
210d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft}
211d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft
212d41dee369bff3b9dcb6328d4d822926c28cc2594Andy Whitcroft/*
21329751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * Subtle, we encode the real pfn into the mem_map such that
21429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * the identity pfn - section_mem_map will return the actual
21529751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * physical page frame number.
21629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft */
21729751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroftstatic unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
21829751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
21929751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
22029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
22129751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
22229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft/*
223ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty * Decode mem_map from the coded memmap
22429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft */
22529751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroftstruct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
22629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
227ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	/* mask off the extra low bits of information */
228ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	coded_mem_map &= SECTION_MAP_MASK;
22929751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
23029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
23129751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
232a3142c8e1dd57ff48040bdb3478cff9312543dc3Yasunori Gotostatic int __meminit sparse_init_one_section(struct mem_section *ms,
2335c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman		unsigned long pnum, struct page *mem_map,
2345c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman		unsigned long *pageblock_bitmap)
23529751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
236540557b9439ec19668553830c90222f9fb0c2e95Andy Whitcroft	if (!present_section(ms))
23729751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft		return -EINVAL;
23829751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
23930c253e6da655d73eb8bfe2adca9b8f4d82fb81eAndy Whitcroft	ms->section_mem_map &= ~SECTION_MAP_MASK;
240540557b9439ec19668553830c90222f9fb0c2e95Andy Whitcroft	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
241540557b9439ec19668553830c90222f9fb0c2e95Andy Whitcroft							SECTION_HAS_MEM_MAP;
2425c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman 	ms->pageblock_flags = pageblock_bitmap;
24329751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
24429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	return 1;
24529751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
24629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
24704753278769f3b6c3b79a080edb52f21d83bf6e2Yasunori Gotounsigned long usemap_size(void)
2485c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman{
2495c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	unsigned long size_bytes;
2505c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
2515c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	size_bytes = roundup(size_bytes, sizeof(unsigned long));
2525c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	return size_bytes;
2535c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman}
2545c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
2555c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman#ifdef CONFIG_MEMORY_HOTPLUG
2565c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gormanstatic unsigned long *__kmalloc_section_usemap(void)
2575c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman{
2585c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	return kmalloc(usemap_size(), GFP_KERNEL);
2595c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman}
2605c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman#endif /* CONFIG_MEMORY_HOTPLUG */
2615c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
26248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto#ifdef CONFIG_MEMORY_HOTREMOVE
26348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Gotostatic unsigned long * __init
264a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lusparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
265238305bb4d418c95977162ba13c11880685fc731Johannes Weiner					 unsigned long size)
26648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto{
26799ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	unsigned long goal, limit;
26899ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	unsigned long *p;
26999ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	int nid;
27048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	/*
27148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * A page may contain usemaps for other sections preventing the
27248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * page being freed and making a section unremovable while
273c800bcd5f53fd9455fc6c68f1a34306e5aa4f79aLi Zhong	 * other sections referencing the usemap remain active. Similarly,
27448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * a pgdat can prevent a section being removed. If section A
27548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * contains a pgdat and section B contains the usemap, both
27648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * sections become inter-dependent. This allocates usemaps
27748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * from the same section as the pgdat where possible to avoid
27848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * this problem.
27948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 */
28007b4e2bc9c35ea88cbd36d806fcd5e3bcbf022beYinghai Lu	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
28199ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	limit = goal + (1UL << PA_SECTION_SHIFT);
28299ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
28399ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Luagain:
284bb016b84164554725899aef544331085e08cb402Santosh Shilimkar	p = memblock_virt_alloc_try_nid_nopanic(size,
285bb016b84164554725899aef544331085e08cb402Santosh Shilimkar						SMP_CACHE_BYTES, goal, limit,
286bb016b84164554725899aef544331085e08cb402Santosh Shilimkar						nid);
28799ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	if (!p && limit) {
28899ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu		limit = 0;
28999ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu		goto again;
29099ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	}
29199ab7b19440a72ebdf225f99b20f8ef40decee86Yinghai Lu	return p;
29248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto}
29348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
29448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Gotostatic void __init check_usemap_section_nr(int nid, unsigned long *usemap)
29548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto{
29648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	unsigned long usemap_snr, pgdat_snr;
29748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
29848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
29948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	struct pglist_data *pgdat = NODE_DATA(nid);
30048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	int usemap_nid;
30148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
30248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
30348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
30448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	if (usemap_snr == pgdat_snr)
30548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		return;
30648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
30748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
30848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		/* skip redundant message */
30948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		return;
31048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
31148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	old_usemap_snr = usemap_snr;
31248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	old_pgdat_snr = pgdat_snr;
31348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
31448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
31548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	if (usemap_nid != nid) {
31648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		printk(KERN_INFO
31748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		       "node %d must be removed before remove section %ld\n",
31848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		       nid, usemap_snr);
31948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto		return;
32048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	}
32148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	/*
32248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * There is a circular dependency.
32348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * Some platforms allow un-removable section because they will just
32448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * gather other removable sections for dynamic partitioning.
32548c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 * Just notify un-removable section's number here.
32648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	 */
32748c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	printk(KERN_INFO "Section %ld and %ld (node %d)", usemap_snr,
32848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	       pgdat_snr, nid);
32948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	printk(KERN_CONT
33048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	       " have a circular dependency on usemap and pgdat allocations\n");
33148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto}
33248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto#else
33348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Gotostatic unsigned long * __init
334a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lusparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
335238305bb4d418c95977162ba13c11880685fc731Johannes Weiner					 unsigned long size)
33648c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto{
337bb016b84164554725899aef544331085e08cb402Santosh Shilimkar	return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
33848c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto}
33948c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
34048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Gotostatic void __init check_usemap_section_nr(int nid, unsigned long *usemap)
34148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto{
34248c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto}
34348c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto#endif /* CONFIG_MEMORY_HOTREMOVE */
34448c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
345187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Listatic void __init sparse_early_usemaps_alloc_node(void *data,
346a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu				 unsigned long pnum_begin,
347a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu				 unsigned long pnum_end,
348a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu				 unsigned long usemap_count, int nodeid)
3495c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman{
350a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	void *usemap;
351a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	unsigned long pnum;
352187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	unsigned long **usemap_map = (unsigned long **)data;
353a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	int size = usemap_size();
3545c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
355a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
356238305bb4d418c95977162ba13c11880685fc731Johannes Weiner							  size * usemap_count);
357f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan	if (!usemap) {
358238305bb4d418c95977162ba13c11880685fc731Johannes Weiner		printk(KERN_WARNING "%s: allocation failed\n", __func__);
359238305bb4d418c95977162ba13c11880685fc731Johannes Weiner		return;
36048c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto	}
36148c906823f3927b981db9f0b03c2e2499977ee93Yasunori Goto
362f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
363f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan		if (!present_section_nr(pnum))
364f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan			continue;
365f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan		usemap_map[pnum] = usemap;
366f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan		usemap += size;
367f5bf18fa22f8c41a13eb8762c7373eb3a93a7333Nishanth Aravamudan		check_usemap_section_nr(nodeid, usemap_map[pnum]);
368a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu	}
3695c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman}
3705c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
3718f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter#ifndef CONFIG_SPARSEMEM_VMEMMAP
37298f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Gotostruct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
37329751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
37429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	struct page *map;
375e48e67e08c340def3d0349c2910d23c7985fb6faYinghai Lu	unsigned long size;
37629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
37729751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
37829751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	if (map)
37929751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft		return map;
38029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
381e48e67e08c340def3d0349c2910d23c7985fb6faYinghai Lu	size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
382bb016b84164554725899aef544331085e08cb402Santosh Shilimkar	map = memblock_virt_alloc_try_nid(size,
383bb016b84164554725899aef544331085e08cb402Santosh Shilimkar					  PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
384bb016b84164554725899aef544331085e08cb402Santosh Shilimkar					  BOOTMEM_ALLOC_ACCESSIBLE, nid);
3858f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	return map;
3868f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter}
3879bdac914240759457175ac0d6529a37d2820bc4dYinghai Luvoid __init sparse_mem_maps_populate_node(struct page **map_map,
3889bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu					  unsigned long pnum_begin,
3899bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu					  unsigned long pnum_end,
3909bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu					  unsigned long map_count, int nodeid)
3919bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu{
3929bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	void *map;
3939bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	unsigned long pnum;
3949bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
3959bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
3969bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	map = alloc_remap(nodeid, size * map_count);
3979bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	if (map) {
3989bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
3999bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			if (!present_section_nr(pnum))
4009bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				continue;
4019bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			map_map[pnum] = map;
4029bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			map += size;
4039bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		}
4049bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		return;
4059bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	}
4069bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
4079bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	size = PAGE_ALIGN(size);
408bb016b84164554725899aef544331085e08cb402Santosh Shilimkar	map = memblock_virt_alloc_try_nid(size * map_count,
409bb016b84164554725899aef544331085e08cb402Santosh Shilimkar					  PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
410bb016b84164554725899aef544331085e08cb402Santosh Shilimkar					  BOOTMEM_ALLOC_ACCESSIBLE, nodeid);
4119bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	if (map) {
4129bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
4139bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			if (!present_section_nr(pnum))
4149bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				continue;
4159bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			map_map[pnum] = map;
4169bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			map += size;
4179bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		}
4189bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		return;
4199bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	}
4209bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
4219bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	/* fallback */
4229bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
4239bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		struct mem_section *ms;
4249bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
4259bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		if (!present_section_nr(pnum))
4269bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			continue;
4279bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
4289bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		if (map_map[pnum])
4299bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			continue;
4309bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		ms = __nr_to_section(pnum);
4319bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		printk(KERN_ERR "%s: sparsemem memory map backing failed "
4329bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu			"some memory will not be available.\n", __func__);
4339bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		ms->section_mem_map = 0;
4349bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	}
4359bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu}
4368f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
4378f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter
43881d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
439187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Listatic void __init sparse_early_mem_maps_alloc_node(void *data,
4409bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				 unsigned long pnum_begin,
4419bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				 unsigned long pnum_end,
4429bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu				 unsigned long map_count, int nodeid)
4439bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu{
444187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	struct page **map_map = (struct page **)data;
4459bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
4469bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu					 map_count, nodeid);
4479bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu}
44881d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu#else
4499e5c6da71e89fa25ced6e88182225a99941bec90Adrian Bunkstatic struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
4508f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter{
4518f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	struct page *map;
4528f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	struct mem_section *ms = __nr_to_section(pnum);
4538f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	int nid = sparse_early_nid(ms);
4548f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter
45598f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto	map = sparse_mem_map_populate(pnum, nid);
45629751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	if (map)
45729751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft		return map;
45829751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
4598f6aac419bd590f535fb110875a51f7db2b62b5bChristoph Lameter	printk(KERN_ERR "%s: sparsemem memory map backing failed "
460d40cee245ff6ad05d3448401d7320be82c1c5af1Harvey Harrison			"some memory will not be available.\n", __func__);
461802f192e4a600f7ef84ca25c8b818c8830acef5aBob Picco	ms->section_mem_map = 0;
46229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	return NULL;
46329751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
4649bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#endif
46529751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
4663b32123d734cb414e366b35a3b2142a995f9d1a0Gideon Israel Dsouzavoid __weak __meminit vmemmap_populate_print_last(void)
467c2b91e2eec9678dbda274e906cc32ea8f711da3bYinghai Lu{
468c2b91e2eec9678dbda274e906cc32ea8f711da3bYinghai Lu}
469a4322e1bad91fbca27056fc38d2cbca3f1eae0cfYinghai Lu
470187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li/**
471187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li *  alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap
472187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li *  @map: usemap_map for pageblock flags or mmap_map for vmemmap
473187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li */
474187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Listatic void __init alloc_usemap_and_memmap(void (*alloc_func)
475187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li					(void *, unsigned long, unsigned long,
476187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li					unsigned long, int), void *data)
477187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li{
478187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	unsigned long pnum;
479187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	unsigned long map_count;
480187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	int nodeid_begin = 0;
481187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	unsigned long pnum_begin = 0;
482187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li
483187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
484187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		struct mem_section *ms;
485187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li
486187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		if (!present_section_nr(pnum))
487187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li			continue;
488187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		ms = __nr_to_section(pnum);
489187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		nodeid_begin = sparse_early_nid(ms);
490187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		pnum_begin = pnum;
491187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		break;
492187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	}
493187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	map_count = 1;
494187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
495187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		struct mem_section *ms;
496187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		int nodeid;
497187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li
498187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		if (!present_section_nr(pnum))
499187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li			continue;
500187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		ms = __nr_to_section(pnum);
501187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		nodeid = sparse_early_nid(ms);
502187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		if (nodeid == nodeid_begin) {
503187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li			map_count++;
504187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li			continue;
505187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		}
506187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
507187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		alloc_func(data, pnum_begin, pnum,
508187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li						map_count, nodeid_begin);
509187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		/* new start, update count etc*/
510187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		nodeid_begin = nodeid;
511187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		pnum_begin = pnum;
512187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li		map_count = 1;
513187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	}
514187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	/* ok, last chunk */
515187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	alloc_func(data, pnum_begin, NR_MEM_SECTIONS,
516187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li						map_count, nodeid_begin);
517187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li}
518187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li
519193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell/*
520193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell * Allocate the accumulated non-linear sections, allocate a mem_map
521193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell * for each and record the physical to section mapping.
522193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell */
523193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwellvoid __init sparse_init(void)
524193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell{
525193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell	unsigned long pnum;
526193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell	struct page *map;
5275c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	unsigned long *usemap;
528e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	unsigned long **usemap_map;
52981d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu	int size;
53081d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
53181d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu	int size2;
53281d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu	struct page **map_map;
53381d0d950e5037a26b71e568ff235ff9e998f4ab3Yinghai Lu#endif
534e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu
53555878e88c59221c3187e1c24ec3b15eb79c374c0Cody P Schafer	/* see include/linux/mmzone.h 'struct mem_section' definition */
53655878e88c59221c3187e1c24ec3b15eb79c374c0Cody P Schafer	BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section)));
53755878e88c59221c3187e1c24ec3b15eb79c374c0Cody P Schafer
538ca57df79d4f64e1a4886606af4289d40636189c5Xishi Qiu	/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
539ca57df79d4f64e1a4886606af4289d40636189c5Xishi Qiu	set_pageblock_order();
540ca57df79d4f64e1a4886606af4289d40636189c5Xishi Qiu
541e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	/*
542e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * map is using big page (aka 2M in x86 64 bit)
543e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * usemap is less one page (aka 24 bytes)
544e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * so alloc 2M (with 2M align) and 24 bytes in turn will
545e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * make next 2M slip to one more 2M later.
546e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * then in big system, the memory will have a lot of holes...
54725985edcedea6396277003854657b5f3cb31a628Lucas De Marchi	 * here try to allocate 2M pages continuously.
548e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 *
549e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * powerpc need to call sparse_init_one_section right after each
550e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
551e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	 */
552e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	size = sizeof(unsigned long *) * NR_MEM_SECTIONS;
553bb016b84164554725899aef544331085e08cb402Santosh Shilimkar	usemap_map = memblock_virt_alloc(size, 0);
554e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	if (!usemap_map)
555e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		panic("can not allocate usemap_map\n");
556187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
557187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li							(void *)usemap_map);
558193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell
5599bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
5609bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
561bb016b84164554725899aef544331085e08cb402Santosh Shilimkar	map_map = memblock_virt_alloc(size2, 0);
5629bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu	if (!map_map)
5639bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		panic("can not allocate map_map\n");
564187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li	alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node,
565187320932dcece9c4b93f38f56d1f888bd5c325fWanpeng Li							(void *)map_map);
5669bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#endif
5679bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu
568e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
569e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		if (!present_section_nr(pnum))
570193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell			continue;
5715c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
572e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		usemap = usemap_map[pnum];
5735c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman		if (!usemap)
5745c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman			continue;
5755c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
5769bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
5779bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu		map = map_map[pnum];
5789bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#else
579e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		map = sparse_early_mem_map_alloc(pnum);
5809bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#endif
581e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu		if (!map)
582e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu			continue;
583e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu
5845c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
5855c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman								usemap);
586193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell	}
587e123dd3f0ec1664576456ea1ea045591a0a95f0cYinghai Lu
588c2b91e2eec9678dbda274e906cc32ea8f711da3bYinghai Lu	vmemmap_populate_print_last();
589c2b91e2eec9678dbda274e906cc32ea8f711da3bYinghai Lu
5909bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
591bb016b84164554725899aef544331085e08cb402Santosh Shilimkar	memblock_free_early(__pa(map_map), size2);
5929bdac914240759457175ac0d6529a37d2820bc4dYinghai Lu#endif
593bb016b84164554725899aef544331085e08cb402Santosh Shilimkar	memblock_free_early(__pa(usemap_map), size);
594193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell}
595193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell
596193faea9280a809cc30e81d7e503e01b1d7b7042Stephen Rothwell#ifdef CONFIG_MEMORY_HOTPLUG
59798f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto#ifdef CONFIG_SPARSEMEM_VMEMMAP
59885b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfeistatic inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
59998f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto{
60098f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto	/* This will make the necessary allocations eventually. */
60198f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto	return sparse_mem_map_populate(pnum, nid);
60298f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto}
60385b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfeistatic void __kfree_section_memmap(struct page *memmap)
60498f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto{
6050aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	unsigned long start = (unsigned long)memmap;
60685b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfei	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
6070aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner
6080aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	vmemmap_free(start, end);
60998f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto}
6104edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#ifdef CONFIG_MEMORY_HOTREMOVE
61181556b02525181e19ef073a798ba9d48db96f708Zhang Yanfeistatic void free_map_bootmem(struct page *memmap)
6120c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto{
6130aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	unsigned long start = (unsigned long)memmap;
61481556b02525181e19ef073a798ba9d48db96f708Zhang Yanfei	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
6150aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner
6160aad818b2de455f1bfd7ef87c28cdbbaaed9a699Johannes Weiner	vmemmap_free(start, end);
6170c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto}
6184edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#endif /* CONFIG_MEMORY_HOTREMOVE */
61998f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto#else
62085b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfeistatic struct page *__kmalloc_section_memmap(void)
6210b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen{
6220b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	struct page *page, *ret;
62385b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfei	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
6240b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
625f2d0aa5bf8d4f7ae4cb1a7feebf5b1afddd0b9b0Yasunori Goto	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
6260b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	if (page)
6270b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		goto got_map_page;
6280b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
6290b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	ret = vmalloc(memmap_size);
6300b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	if (ret)
6310b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		goto got_map_ptr;
6320b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
6330b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	return NULL;
6340b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansengot_map_page:
6350b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
6360b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansengot_map_ptr:
6370b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
6380b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	return ret;
6390b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen}
6400b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
64185b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfeistatic inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
64298f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto{
64385b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfei	return __kmalloc_section_memmap();
64498f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto}
64598f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto
64685b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfeistatic void __kfree_section_memmap(struct page *memmap)
6470b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen{
6489e2779fa281cfda13ac060753d674bbcaa23367eChristoph Lameter	if (is_vmalloc_addr(memmap))
6490b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		vfree(memmap);
6500b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	else
6510b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		free_pages((unsigned long)memmap,
65285b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfei			   get_order(sizeof(struct page) * PAGES_PER_SECTION));
6530b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen}
6540c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
6554edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#ifdef CONFIG_MEMORY_HOTREMOVE
65681556b02525181e19ef073a798ba9d48db96f708Zhang Yanfeistatic void free_map_bootmem(struct page *memmap)
6570c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto{
6580c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto	unsigned long maps_section_nr, removing_section_nr, i;
65981556b02525181e19ef073a798ba9d48db96f708Zhang Yanfei	unsigned long magic, nr_pages;
660ae64ffcac35de0db628ba9631edf8ff34c5cd7acJianguo Wu	struct page *page = virt_to_page(memmap);
6610c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
66281556b02525181e19ef073a798ba9d48db96f708Zhang Yanfei	nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
66381556b02525181e19ef073a798ba9d48db96f708Zhang Yanfei		>> PAGE_SHIFT;
66481556b02525181e19ef073a798ba9d48db96f708Zhang Yanfei
6650c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto	for (i = 0; i < nr_pages; i++, page++) {
6665f24ce5fd34c3ca1b3d10d30da754732da64d5c0Andrea Arcangeli		magic = (unsigned long) page->lru.next;
6670c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
6680c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		BUG_ON(magic == NODE_INFO);
6690c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
6700c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
6710c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		removing_section_nr = page->private;
6720c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto
6730c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		/*
6740c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * When this function is called, the removing section is
6750c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * logical offlined state. This means all pages are isolated
6760c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * from page allocator. If removing section's memmap is placed
6770c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * on the same section, it must not be freed.
6780c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * If it is freed, page allocator may allocate it which will
6790c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 * be removed physically soon.
6800c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		 */
6810c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto		if (maps_section_nr != removing_section_nr)
6820c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto			put_page_bootmem(page);
6830c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto	}
6840c0a4a517a31e05efb38304668198a873bfec6caYasunori Goto}
6854edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#endif /* CONFIG_MEMORY_HOTREMOVE */
68698f3cfc1dc7a53b629d43b7844a9b3f786213048Yasunori Goto#endif /* CONFIG_SPARSEMEM_VMEMMAP */
6870b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
68829751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft/*
68929751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * returns the number of sections whose mem_maps were properly
69029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * set.  If this is <=0, then that means that the passed-in
69129751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft * map was not consumed and must be freed.
69229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft */
69385b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfeiint __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn)
69429751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft{
6950b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	unsigned long section_nr = pfn_to_section_nr(start_pfn);
6960b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	struct pglist_data *pgdat = zone->zone_pgdat;
6970b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	struct mem_section *ms;
6980b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	struct page *memmap;
6995c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	unsigned long *usemap;
7000b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	unsigned long flags;
7010b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	int ret;
70229751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
7030b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	/*
7040b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	 * no locking for this, because it does its own
7050b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	 * plus, it does a kmalloc
7060b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	 */
707bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	ret = sparse_index_init(section_nr, pgdat->node_id);
708bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	if (ret < 0 && ret != -EEXIST)
709bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong		return ret;
71085b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfei	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
711bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	if (!memmap)
712bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong		return -ENOMEM;
7135c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	usemap = __kmalloc_section_usemap();
714bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	if (!usemap) {
71585b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfei		__kfree_section_memmap(memmap);
716bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong		return -ENOMEM;
717bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	}
7180b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
7190b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	pgdat_resize_lock(pgdat, &flags);
72029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
7210b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	ms = __pfn_to_section(start_pfn);
7220b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
7230b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		ret = -EEXIST;
7240b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen		goto out;
7250b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	}
7265c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman
72785b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfei	memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
7283ac19f8efe26451cacac31d0be34fa9c51114c2aWen Congyang
72929751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft	ms->section_mem_map |= SECTION_MARKED_PRESENT;
73029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft
7315c0e3066474b57c56ff0d88ca31d95bd14232feeMel Gorman	ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
7320b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen
7330b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansenout:
7340b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	pgdat_resize_unlock(pgdat, &flags);
735bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	if (ret <= 0) {
736bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong		kfree(usemap);
73785b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfei		__kfree_section_memmap(memmap);
738bbd0682596f7a434467ee551fee18d5f0b818539WANG Cong	}
7390b0acbec1bed75ec1e1daa7f7006323a2a2b2844Dave Hansen	return ret;
74029751f6991e845f7d002a6ae520bf996b38c8dcdAndy Whitcroft}
741ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty
742f3deb6872b946a851a3799b315f3c85ce4c027fcZhang Yanfei#ifdef CONFIG_MEMORY_HOTREMOVE
74395a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang#ifdef CONFIG_MEMORY_FAILURE
74495a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyangstatic void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
74595a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang{
74695a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	int i;
74795a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang
74895a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	if (!memmap)
74995a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang		return;
75095a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang
75195a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	for (i = 0; i < PAGES_PER_SECTION; i++) {
75295a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang		if (PageHWPoison(&memmap[i])) {
753293c07e31ab5a0b8df8c19b2a9e5c6fa30308849Xishi Qiu			atomic_long_sub(1, &num_poisoned_pages);
75495a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang			ClearPageHWPoison(&memmap[i]);
75595a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang		}
75695a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	}
75795a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang}
75895a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang#else
75995a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyangstatic inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
76095a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang{
76195a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang}
76295a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang#endif
76395a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang
7644edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjesstatic void free_section_usemap(struct page *memmap, unsigned long *usemap)
7654edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes{
7664edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	struct page *usemap_page;
7674edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
7684edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	if (!usemap)
7694edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		return;
7704edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
7714edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	usemap_page = virt_to_page(usemap);
7724edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	/*
7734edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 * Check to see if allocation came from hot-plug-add
7744edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 */
7754edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
7764edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		kfree(usemap);
7774edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		if (memmap)
77885b35feaecd4d2284505b22708795bc1f03fc897Zhang Yanfei			__kfree_section_memmap(memmap);
7794edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes		return;
7804edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	}
7814edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
7824edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	/*
7834edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 * The usemap came from bootmem. This is packed with other usemaps
7844edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 * on the section which has pgdat at boot time. Just keep it as is now.
7854edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes	 */
7864edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
78781556b02525181e19ef073a798ba9d48db96f708Zhang Yanfei	if (memmap)
78881556b02525181e19ef073a798ba9d48db96f708Zhang Yanfei		free_map_bootmem(memmap);
7894edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes}
7904edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes
791ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavartyvoid sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
792ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty{
793ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	struct page *memmap = NULL;
794cd099682e4c786c3a866e462b37fcac6e3a44a68Tang Chen	unsigned long *usemap = NULL, flags;
795cd099682e4c786c3a866e462b37fcac6e3a44a68Tang Chen	struct pglist_data *pgdat = zone->zone_pgdat;
796ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty
797cd099682e4c786c3a866e462b37fcac6e3a44a68Tang Chen	pgdat_resize_lock(pgdat, &flags);
798ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	if (ms->section_mem_map) {
799ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty		usemap = ms->pageblock_flags;
800ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty		memmap = sparse_decode_mem_map(ms->section_mem_map,
801ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty						__section_nr(ms));
802ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty		ms->section_mem_map = 0;
803ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty		ms->pageblock_flags = NULL;
804ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	}
805cd099682e4c786c3a866e462b37fcac6e3a44a68Tang Chen	pgdat_resize_unlock(pgdat, &flags);
806ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty
80795a4774d055c72d96ab192a1c6675cbf4d513f71Wen Congyang	clear_hwpoisoned_pages(memmap, PAGES_PER_SECTION);
808ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty	free_section_usemap(memmap, usemap);
809ea01ea937dcae2caa146dea1918cccf2f16ed3c4Badari Pulavarty}
8104edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#endif /* CONFIG_MEMORY_HOTREMOVE */
8114edd7ceff0662afde195da6f6c43e7cbe1ed2dc4David Rientjes#endif /* CONFIG_MEMORY_HOTPLUG */
812