bootmem.c revision 92aa63a5a1bf2e7b0c79e6716d24b76dbbdcf951
1/* 2 * linux/mm/bootmem.c 3 * 4 * Copyright (C) 1999 Ingo Molnar 5 * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 6 * 7 * simple boot-time physical memory area allocator and 8 * free memory collector. It's used to deal with reserved 9 * system memory and memory holes as well. 10 */ 11 12#include <linux/mm.h> 13#include <linux/kernel_stat.h> 14#include <linux/swap.h> 15#include <linux/interrupt.h> 16#include <linux/init.h> 17#include <linux/bootmem.h> 18#include <linux/mmzone.h> 19#include <linux/module.h> 20#include <asm/dma.h> 21#include <asm/io.h> 22#include "internal.h" 23 24/* 25 * Access to this subsystem has to be serialized externally. (this is 26 * true for the boot process anyway) 27 */ 28unsigned long max_low_pfn; 29unsigned long min_low_pfn; 30unsigned long max_pfn; 31 32EXPORT_SYMBOL(max_pfn); /* This is exported so 33 * dma_get_required_mask(), which uses 34 * it, can be an inline function */ 35 36#ifdef CONFIG_CRASH_DUMP 37/* 38 * If we have booted due to a crash, max_pfn will be a very low value. We need 39 * to know the amount of memory that the previous kernel used. 40 */ 41unsigned long saved_max_pfn; 42#endif 43 44/* return the number of _pages_ that will be allocated for the boot bitmap */ 45unsigned long __init bootmem_bootmap_pages (unsigned long pages) 46{ 47 unsigned long mapsize; 48 49 mapsize = (pages+7)/8; 50 mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK; 51 mapsize >>= PAGE_SHIFT; 52 53 return mapsize; 54} 55 56/* 57 * Called once to set up the allocator itself. 58 */ 59static unsigned long __init init_bootmem_core (pg_data_t *pgdat, 60 unsigned long mapstart, unsigned long start, unsigned long end) 61{ 62 bootmem_data_t *bdata = pgdat->bdata; 63 unsigned long mapsize = ((end - start)+7)/8; 64 65 pgdat->pgdat_next = pgdat_list; 66 pgdat_list = pgdat; 67 68 mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL); 69 bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); 70 bdata->node_boot_start = (start << PAGE_SHIFT); 71 bdata->node_low_pfn = end; 72 73 /* 74 * Initially all pages are reserved - setup_arch() has to 75 * register free RAM areas explicitly. 76 */ 77 memset(bdata->node_bootmem_map, 0xff, mapsize); 78 79 return mapsize; 80} 81 82/* 83 * Marks a particular physical memory range as unallocatable. Usable RAM 84 * might be used for boot-time allocations - or it might get added 85 * to the free page pool later on. 86 */ 87static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) 88{ 89 unsigned long i; 90 /* 91 * round up, partially reserved pages are considered 92 * fully reserved. 93 */ 94 unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE; 95 unsigned long eidx = (addr + size - bdata->node_boot_start + 96 PAGE_SIZE-1)/PAGE_SIZE; 97 unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE; 98 99 BUG_ON(!size); 100 BUG_ON(sidx >= eidx); 101 BUG_ON((addr >> PAGE_SHIFT) >= bdata->node_low_pfn); 102 BUG_ON(end > bdata->node_low_pfn); 103 104 for (i = sidx; i < eidx; i++) 105 if (test_and_set_bit(i, bdata->node_bootmem_map)) { 106#ifdef CONFIG_DEBUG_BOOTMEM 107 printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); 108#endif 109 } 110} 111 112static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) 113{ 114 unsigned long i; 115 unsigned long start; 116 /* 117 * round down end of usable mem, partially free pages are 118 * considered reserved. 119 */ 120 unsigned long sidx; 121 unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE; 122 unsigned long end = (addr + size)/PAGE_SIZE; 123 124 BUG_ON(!size); 125 BUG_ON(end > bdata->node_low_pfn); 126 127 if (addr < bdata->last_success) 128 bdata->last_success = addr; 129 130 /* 131 * Round up the beginning of the address. 132 */ 133 start = (addr + PAGE_SIZE-1) / PAGE_SIZE; 134 sidx = start - (bdata->node_boot_start/PAGE_SIZE); 135 136 for (i = sidx; i < eidx; i++) { 137 if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) 138 BUG(); 139 } 140} 141 142/* 143 * We 'merge' subsequent allocations to save space. We might 'lose' 144 * some fraction of a page if allocations cannot be satisfied due to 145 * size constraints on boxes where there is physical RAM space 146 * fragmentation - in these cases (mostly large memory boxes) this 147 * is not a problem. 148 * 149 * On low memory boxes we get it right in 100% of the cases. 150 * 151 * alignment has to be a power of 2 value. 152 * 153 * NOTE: This function is _not_ reentrant. 154 */ 155static void * __init 156__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, 157 unsigned long align, unsigned long goal) 158{ 159 unsigned long offset, remaining_size, areasize, preferred; 160 unsigned long i, start = 0, incr, eidx; 161 void *ret; 162 163 if(!size) { 164 printk("__alloc_bootmem_core(): zero-sized request\n"); 165 BUG(); 166 } 167 BUG_ON(align & (align-1)); 168 169 eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); 170 offset = 0; 171 if (align && 172 (bdata->node_boot_start & (align - 1UL)) != 0) 173 offset = (align - (bdata->node_boot_start & (align - 1UL))); 174 offset >>= PAGE_SHIFT; 175 176 /* 177 * We try to allocate bootmem pages above 'goal' 178 * first, then we try to allocate lower pages. 179 */ 180 if (goal && (goal >= bdata->node_boot_start) && 181 ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { 182 preferred = goal - bdata->node_boot_start; 183 184 if (bdata->last_success >= preferred) 185 preferred = bdata->last_success; 186 } else 187 preferred = 0; 188 189 preferred = ((preferred + align - 1) & ~(align - 1)) >> PAGE_SHIFT; 190 preferred += offset; 191 areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; 192 incr = align >> PAGE_SHIFT ? : 1; 193 194restart_scan: 195 for (i = preferred; i < eidx; i += incr) { 196 unsigned long j; 197 i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); 198 i = ALIGN(i, incr); 199 if (test_bit(i, bdata->node_bootmem_map)) 200 continue; 201 for (j = i + 1; j < i + areasize; ++j) { 202 if (j >= eidx) 203 goto fail_block; 204 if (test_bit (j, bdata->node_bootmem_map)) 205 goto fail_block; 206 } 207 start = i; 208 goto found; 209 fail_block: 210 i = ALIGN(j, incr); 211 } 212 213 if (preferred > offset) { 214 preferred = offset; 215 goto restart_scan; 216 } 217 return NULL; 218 219found: 220 bdata->last_success = start << PAGE_SHIFT; 221 BUG_ON(start >= eidx); 222 223 /* 224 * Is the next page of the previous allocation-end the start 225 * of this allocation's buffer? If yes then we can 'merge' 226 * the previous partial page with this allocation. 227 */ 228 if (align < PAGE_SIZE && 229 bdata->last_offset && bdata->last_pos+1 == start) { 230 offset = (bdata->last_offset+align-1) & ~(align-1); 231 BUG_ON(offset > PAGE_SIZE); 232 remaining_size = PAGE_SIZE-offset; 233 if (size < remaining_size) { 234 areasize = 0; 235 /* last_pos unchanged */ 236 bdata->last_offset = offset+size; 237 ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + 238 bdata->node_boot_start); 239 } else { 240 remaining_size = size - remaining_size; 241 areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; 242 ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + 243 bdata->node_boot_start); 244 bdata->last_pos = start+areasize-1; 245 bdata->last_offset = remaining_size; 246 } 247 bdata->last_offset &= ~PAGE_MASK; 248 } else { 249 bdata->last_pos = start + areasize - 1; 250 bdata->last_offset = size & ~PAGE_MASK; 251 ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); 252 } 253 254 /* 255 * Reserve the area now: 256 */ 257 for (i = start; i < start+areasize; i++) 258 if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) 259 BUG(); 260 memset(ret, 0, size); 261 return ret; 262} 263 264static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) 265{ 266 struct page *page; 267 unsigned long pfn; 268 bootmem_data_t *bdata = pgdat->bdata; 269 unsigned long i, count, total = 0; 270 unsigned long idx; 271 unsigned long *map; 272 int gofast = 0; 273 274 BUG_ON(!bdata->node_bootmem_map); 275 276 count = 0; 277 /* first extant page of the node */ 278 pfn = bdata->node_boot_start >> PAGE_SHIFT; 279 idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); 280 map = bdata->node_bootmem_map; 281 /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ 282 if (bdata->node_boot_start == 0 || 283 ffs(bdata->node_boot_start) - PAGE_SHIFT > ffs(BITS_PER_LONG)) 284 gofast = 1; 285 for (i = 0; i < idx; ) { 286 unsigned long v = ~map[i / BITS_PER_LONG]; 287 288 if (gofast && v == ~0UL) { 289 int j, order; 290 291 page = pfn_to_page(pfn); 292 count += BITS_PER_LONG; 293 __ClearPageReserved(page); 294 order = ffs(BITS_PER_LONG) - 1; 295 set_page_refs(page, order); 296 for (j = 1; j < BITS_PER_LONG; j++) { 297 if (j + 16 < BITS_PER_LONG) 298 prefetchw(page + j + 16); 299 __ClearPageReserved(page + j); 300 } 301 __free_pages(page, order); 302 i += BITS_PER_LONG; 303 page += BITS_PER_LONG; 304 } else if (v) { 305 unsigned long m; 306 307 page = pfn_to_page(pfn); 308 for (m = 1; m && i < idx; m<<=1, page++, i++) { 309 if (v & m) { 310 count++; 311 __ClearPageReserved(page); 312 set_page_refs(page, 0); 313 __free_page(page); 314 } 315 } 316 } else { 317 i+=BITS_PER_LONG; 318 } 319 pfn += BITS_PER_LONG; 320 } 321 total += count; 322 323 /* 324 * Now free the allocator bitmap itself, it's not 325 * needed anymore: 326 */ 327 page = virt_to_page(bdata->node_bootmem_map); 328 count = 0; 329 for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { 330 count++; 331 __ClearPageReserved(page); 332 set_page_count(page, 1); 333 __free_page(page); 334 } 335 total += count; 336 bdata->node_bootmem_map = NULL; 337 338 return total; 339} 340 341unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) 342{ 343 return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn)); 344} 345 346void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) 347{ 348 reserve_bootmem_core(pgdat->bdata, physaddr, size); 349} 350 351void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) 352{ 353 free_bootmem_core(pgdat->bdata, physaddr, size); 354} 355 356unsigned long __init free_all_bootmem_node (pg_data_t *pgdat) 357{ 358 return(free_all_bootmem_core(pgdat)); 359} 360 361unsigned long __init init_bootmem (unsigned long start, unsigned long pages) 362{ 363 max_low_pfn = pages; 364 min_low_pfn = start; 365 return(init_bootmem_core(NODE_DATA(0), start, 0, pages)); 366} 367 368#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE 369void __init reserve_bootmem (unsigned long addr, unsigned long size) 370{ 371 reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size); 372} 373#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 374 375void __init free_bootmem (unsigned long addr, unsigned long size) 376{ 377 free_bootmem_core(NODE_DATA(0)->bdata, addr, size); 378} 379 380unsigned long __init free_all_bootmem (void) 381{ 382 return(free_all_bootmem_core(NODE_DATA(0))); 383} 384 385void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) 386{ 387 pg_data_t *pgdat = pgdat_list; 388 void *ptr; 389 390 for_each_pgdat(pgdat) 391 if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, 392 align, goal))) 393 return(ptr); 394 395 /* 396 * Whoops, we cannot satisfy the allocation request. 397 */ 398 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); 399 panic("Out of memory"); 400 return NULL; 401} 402 403void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) 404{ 405 void *ptr; 406 407 ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal); 408 if (ptr) 409 return (ptr); 410 411 return __alloc_bootmem(size, align, goal); 412} 413 414