sparse.c revision 46a66eecdf7bc12562ecb492297447ed0e1ecf59
1/* 2 * sparse memory mappings. 3 */ 4#include <linux/config.h> 5#include <linux/mm.h> 6#include <linux/mmzone.h> 7#include <linux/bootmem.h> 8#include <linux/highmem.h> 9#include <linux/module.h> 10#include <linux/spinlock.h> 11#include <linux/vmalloc.h> 12#include <asm/dma.h> 13 14/* 15 * Permanent SPARSEMEM data: 16 * 17 * 1) mem_section - memory sections, mem_map's for valid memory 18 */ 19#ifdef CONFIG_SPARSEMEM_EXTREME 20struct mem_section *mem_section[NR_SECTION_ROOTS] 21 ____cacheline_internodealigned_in_smp; 22#else 23struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] 24 ____cacheline_internodealigned_in_smp; 25#endif 26EXPORT_SYMBOL(mem_section); 27 28#ifdef CONFIG_SPARSEMEM_EXTREME 29static struct mem_section *sparse_index_alloc(int nid) 30{ 31 struct mem_section *section = NULL; 32 unsigned long array_size = SECTIONS_PER_ROOT * 33 sizeof(struct mem_section); 34 35 if (system_state == SYSTEM_RUNNING) 36 section = kmalloc_node(array_size, GFP_KERNEL, nid); 37 else 38 section = alloc_bootmem_node(NODE_DATA(nid), array_size); 39 40 if (section) 41 memset(section, 0, array_size); 42 43 return section; 44} 45 46static int sparse_index_init(unsigned long section_nr, int nid) 47{ 48 static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED; 49 unsigned long root = SECTION_NR_TO_ROOT(section_nr); 50 struct mem_section *section; 51 int ret = 0; 52 53 if (mem_section[root]) 54 return -EEXIST; 55 56 section = sparse_index_alloc(nid); 57 /* 58 * This lock keeps two different sections from 59 * reallocating for the same index 60 */ 61 spin_lock(&index_init_lock); 62 63 if (mem_section[root]) { 64 ret = -EEXIST; 65 goto out; 66 } 67 68 mem_section[root] = section; 69out: 70 spin_unlock(&index_init_lock); 71 return ret; 72} 73#else /* !SPARSEMEM_EXTREME */ 74static inline int sparse_index_init(unsigned long section_nr, int nid) 75{ 76 return 0; 77} 78#endif 79 80/* 81 * Although written for the SPARSEMEM_EXTREME case, this happens 82 * to also work for the flat array case becase 83 * NR_SECTION_ROOTS==NR_MEM_SECTIONS. 84 */ 85int __section_nr(struct mem_section* ms) 86{ 87 unsigned long root_nr; 88 struct mem_section* root; 89 90 for (root_nr = 0; 91 root_nr < NR_MEM_SECTIONS; 92 root_nr += SECTIONS_PER_ROOT) { 93 root = __nr_to_section(root_nr); 94 95 if (!root) 96 continue; 97 98 if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT))) 99 break; 100 } 101 102 return (root_nr * SECTIONS_PER_ROOT) + (ms - root); 103} 104 105/* Record a memory area against a node. */ 106void memory_present(int nid, unsigned long start, unsigned long end) 107{ 108 unsigned long pfn; 109 110 start &= PAGE_SECTION_MASK; 111 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { 112 unsigned long section = pfn_to_section_nr(pfn); 113 struct mem_section *ms; 114 115 sparse_index_init(section, nid); 116 117 ms = __nr_to_section(section); 118 if (!ms->section_mem_map) 119 ms->section_mem_map = SECTION_MARKED_PRESENT; 120 } 121} 122 123/* 124 * Only used by the i386 NUMA architecures, but relatively 125 * generic code. 126 */ 127unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, 128 unsigned long end_pfn) 129{ 130 unsigned long pfn; 131 unsigned long nr_pages = 0; 132 133 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 134 if (nid != early_pfn_to_nid(pfn)) 135 continue; 136 137 if (pfn_valid(pfn)) 138 nr_pages += PAGES_PER_SECTION; 139 } 140 141 return nr_pages * sizeof(struct page); 142} 143 144/* 145 * Subtle, we encode the real pfn into the mem_map such that 146 * the identity pfn - section_mem_map will return the actual 147 * physical page frame number. 148 */ 149static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) 150{ 151 return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); 152} 153 154/* 155 * We need this if we ever free the mem_maps. While not implemented yet, 156 * this function is included for parity with its sibling. 157 */ 158static __attribute((unused)) 159struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) 160{ 161 return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); 162} 163 164static int sparse_init_one_section(struct mem_section *ms, 165 unsigned long pnum, struct page *mem_map) 166{ 167 if (!valid_section(ms)) 168 return -EINVAL; 169 170 ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum); 171 172 return 1; 173} 174 175static struct page *sparse_early_mem_map_alloc(unsigned long pnum) 176{ 177 struct page *map; 178 int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); 179 struct mem_section *ms = __nr_to_section(pnum); 180 181 map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); 182 if (map) 183 return map; 184 185 map = alloc_bootmem_node(NODE_DATA(nid), 186 sizeof(struct page) * PAGES_PER_SECTION); 187 if (map) 188 return map; 189 190 printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); 191 ms->section_mem_map = 0; 192 return NULL; 193} 194 195static struct page *__kmalloc_section_memmap(unsigned long nr_pages) 196{ 197 struct page *page, *ret; 198 unsigned long memmap_size = sizeof(struct page) * nr_pages; 199 200 page = alloc_pages(GFP_KERNEL, get_order(memmap_size)); 201 if (page) 202 goto got_map_page; 203 204 ret = vmalloc(memmap_size); 205 if (ret) 206 goto got_map_ptr; 207 208 return NULL; 209got_map_page: 210 ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); 211got_map_ptr: 212 memset(ret, 0, memmap_size); 213 214 return ret; 215} 216 217static int vaddr_in_vmalloc_area(void *addr) 218{ 219 if (addr >= (void *)VMALLOC_START && 220 addr < (void *)VMALLOC_END) 221 return 1; 222 return 0; 223} 224 225static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) 226{ 227 if (vaddr_in_vmalloc_area(memmap)) 228 vfree(memmap); 229 else 230 free_pages((unsigned long)memmap, 231 get_order(sizeof(struct page) * nr_pages)); 232} 233 234/* 235 * Allocate the accumulated non-linear sections, allocate a mem_map 236 * for each and record the physical to section mapping. 237 */ 238void sparse_init(void) 239{ 240 unsigned long pnum; 241 struct page *map; 242 243 for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { 244 if (!valid_section_nr(pnum)) 245 continue; 246 247 map = sparse_early_mem_map_alloc(pnum); 248 if (!map) 249 continue; 250 sparse_init_one_section(__nr_to_section(pnum), pnum, map); 251 } 252} 253 254/* 255 * returns the number of sections whose mem_maps were properly 256 * set. If this is <=0, then that means that the passed-in 257 * map was not consumed and must be freed. 258 */ 259int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, 260 int nr_pages) 261{ 262 unsigned long section_nr = pfn_to_section_nr(start_pfn); 263 struct pglist_data *pgdat = zone->zone_pgdat; 264 struct mem_section *ms; 265 struct page *memmap; 266 unsigned long flags; 267 int ret; 268 269 /* 270 * no locking for this, because it does its own 271 * plus, it does a kmalloc 272 */ 273 sparse_index_init(section_nr, pgdat->node_id); 274 memmap = __kmalloc_section_memmap(nr_pages); 275 276 pgdat_resize_lock(pgdat, &flags); 277 278 ms = __pfn_to_section(start_pfn); 279 if (ms->section_mem_map & SECTION_MARKED_PRESENT) { 280 ret = -EEXIST; 281 goto out; 282 } 283 ms->section_mem_map |= SECTION_MARKED_PRESENT; 284 285 ret = sparse_init_one_section(ms, section_nr, memmap); 286 287out: 288 pgdat_resize_unlock(pgdat, &flags); 289 if (ret <= 0) 290 __kfree_section_memmap(memmap, nr_pages); 291 return ret; 292} 293