slob.c revision ca2b84cb3c4a0d4d2143b46ec072cdff5d1b3b87
1/* 2 * SLOB Allocator: Simple List Of Blocks 3 * 4 * Matt Mackall <mpm@selenic.com> 12/30/03 5 * 6 * NUMA support by Paul Mundt, 2007. 7 * 8 * How SLOB works: 9 * 10 * The core of SLOB is a traditional K&R style heap allocator, with 11 * support for returning aligned objects. The granularity of this 12 * allocator is as little as 2 bytes, however typically most architectures 13 * will require 4 bytes on 32-bit and 8 bytes on 64-bit. 14 * 15 * The slob heap is a set of linked list of pages from alloc_pages(), 16 * and within each page, there is a singly-linked list of free blocks 17 * (slob_t). The heap is grown on demand. To reduce fragmentation, 18 * heap pages are segregated into three lists, with objects less than 19 * 256 bytes, objects less than 1024 bytes, and all other objects. 20 * 21 * Allocation from heap involves first searching for a page with 22 * sufficient free blocks (using a next-fit-like approach) followed by 23 * a first-fit scan of the page. Deallocation inserts objects back 24 * into the free list in address order, so this is effectively an 25 * address-ordered first fit. 26 * 27 * Above this is an implementation of kmalloc/kfree. Blocks returned 28 * from kmalloc are prepended with a 4-byte header with the kmalloc size. 29 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls 30 * alloc_pages() directly, allocating compound pages so the page order 31 * does not have to be separately tracked, and also stores the exact 32 * allocation size in page->private so that it can be used to accurately 33 * provide ksize(). These objects are detected in kfree() because slob_page() 34 * is false for them. 35 * 36 * SLAB is emulated on top of SLOB by simply calling constructors and 37 * destructors for every SLAB allocation. Objects are returned with the 38 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which 39 * case the low-level allocator will fragment blocks to create the proper 40 * alignment. Again, objects of page-size or greater are allocated by 41 * calling alloc_pages(). As SLAB objects know their size, no separate 42 * size bookkeeping is necessary and there is essentially no allocation 43 * space overhead, and compound pages aren't needed for multi-page 44 * allocations. 45 * 46 * NUMA support in SLOB is fairly simplistic, pushing most of the real 47 * logic down to the page allocator, and simply doing the node accounting 48 * on the upper levels. In the event that a node id is explicitly 49 * provided, alloc_pages_node() with the specified node id is used 50 * instead. The common case (or when the node id isn't explicitly provided) 51 * will default to the current node, as per numa_node_id(). 52 * 53 * Node aware pages are still inserted in to the global freelist, and 54 * these are scanned for by matching against the node id encoded in the 55 * page flags. As a result, block allocations that can be satisfied from 56 * the freelist will only be done so on pages residing on the same node, 57 * in order to prevent random node placement. 58 */ 59 60#include <linux/kernel.h> 61#include <linux/slab.h> 62#include <linux/mm.h> 63#include <linux/cache.h> 64#include <linux/init.h> 65#include <linux/module.h> 66#include <linux/rcupdate.h> 67#include <linux/list.h> 68#include <trace/kmemtrace.h> 69#include <asm/atomic.h> 70 71/* 72 * slob_block has a field 'units', which indicates size of block if +ve, 73 * or offset of next block if -ve (in SLOB_UNITs). 74 * 75 * Free blocks of size 1 unit simply contain the offset of the next block. 76 * Those with larger size contain their size in the first SLOB_UNIT of 77 * memory, and the offset of the next free block in the second SLOB_UNIT. 78 */ 79#if PAGE_SIZE <= (32767 * 2) 80typedef s16 slobidx_t; 81#else 82typedef s32 slobidx_t; 83#endif 84 85struct slob_block { 86 slobidx_t units; 87}; 88typedef struct slob_block slob_t; 89 90/* 91 * We use struct page fields to manage some slob allocation aspects, 92 * however to avoid the horrible mess in include/linux/mm_types.h, we'll 93 * just define our own struct page type variant here. 94 */ 95struct slob_page { 96 union { 97 struct { 98 unsigned long flags; /* mandatory */ 99 atomic_t _count; /* mandatory */ 100 slobidx_t units; /* free units left in page */ 101 unsigned long pad[2]; 102 slob_t *free; /* first free slob_t in page */ 103 struct list_head list; /* linked list of free pages */ 104 }; 105 struct page page; 106 }; 107}; 108static inline void struct_slob_page_wrong_size(void) 109{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); } 110 111/* 112 * free_slob_page: call before a slob_page is returned to the page allocator. 113 */ 114static inline void free_slob_page(struct slob_page *sp) 115{ 116 reset_page_mapcount(&sp->page); 117 sp->page.mapping = NULL; 118} 119 120/* 121 * All partially free slob pages go on these lists. 122 */ 123#define SLOB_BREAK1 256 124#define SLOB_BREAK2 1024 125static LIST_HEAD(free_slob_small); 126static LIST_HEAD(free_slob_medium); 127static LIST_HEAD(free_slob_large); 128 129/* 130 * is_slob_page: True for all slob pages (false for bigblock pages) 131 */ 132static inline int is_slob_page(struct slob_page *sp) 133{ 134 return PageSlobPage((struct page *)sp); 135} 136 137static inline void set_slob_page(struct slob_page *sp) 138{ 139 __SetPageSlobPage((struct page *)sp); 140} 141 142static inline void clear_slob_page(struct slob_page *sp) 143{ 144 __ClearPageSlobPage((struct page *)sp); 145} 146 147static inline struct slob_page *slob_page(const void *addr) 148{ 149 return (struct slob_page *)virt_to_page(addr); 150} 151 152/* 153 * slob_page_free: true for pages on free_slob_pages list. 154 */ 155static inline int slob_page_free(struct slob_page *sp) 156{ 157 return PageSlobFree((struct page *)sp); 158} 159 160static void set_slob_page_free(struct slob_page *sp, struct list_head *list) 161{ 162 list_add(&sp->list, list); 163 __SetPageSlobFree((struct page *)sp); 164} 165 166static inline void clear_slob_page_free(struct slob_page *sp) 167{ 168 list_del(&sp->list); 169 __ClearPageSlobFree((struct page *)sp); 170} 171 172#define SLOB_UNIT sizeof(slob_t) 173#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT) 174#define SLOB_ALIGN L1_CACHE_BYTES 175 176/* 177 * struct slob_rcu is inserted at the tail of allocated slob blocks, which 178 * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free 179 * the block using call_rcu. 180 */ 181struct slob_rcu { 182 struct rcu_head head; 183 int size; 184}; 185 186/* 187 * slob_lock protects all slob allocator structures. 188 */ 189static DEFINE_SPINLOCK(slob_lock); 190 191/* 192 * Encode the given size and next info into a free slob block s. 193 */ 194static void set_slob(slob_t *s, slobidx_t size, slob_t *next) 195{ 196 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK); 197 slobidx_t offset = next - base; 198 199 if (size > 1) { 200 s[0].units = size; 201 s[1].units = offset; 202 } else 203 s[0].units = -offset; 204} 205 206/* 207 * Return the size of a slob block. 208 */ 209static slobidx_t slob_units(slob_t *s) 210{ 211 if (s->units > 0) 212 return s->units; 213 return 1; 214} 215 216/* 217 * Return the next free slob block pointer after this one. 218 */ 219static slob_t *slob_next(slob_t *s) 220{ 221 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK); 222 slobidx_t next; 223 224 if (s[0].units < 0) 225 next = -s[0].units; 226 else 227 next = s[1].units; 228 return base+next; 229} 230 231/* 232 * Returns true if s is the last free block in its page. 233 */ 234static int slob_last(slob_t *s) 235{ 236 return !((unsigned long)slob_next(s) & ~PAGE_MASK); 237} 238 239static void *slob_new_pages(gfp_t gfp, int order, int node) 240{ 241 void *page; 242 243#ifdef CONFIG_NUMA 244 if (node != -1) 245 page = alloc_pages_node(node, gfp, order); 246 else 247#endif 248 page = alloc_pages(gfp, order); 249 250 if (!page) 251 return NULL; 252 253 return page_address(page); 254} 255 256static void slob_free_pages(void *b, int order) 257{ 258 free_pages((unsigned long)b, order); 259} 260 261/* 262 * Allocate a slob block within a given slob_page sp. 263 */ 264static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) 265{ 266 slob_t *prev, *cur, *aligned = NULL; 267 int delta = 0, units = SLOB_UNITS(size); 268 269 for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) { 270 slobidx_t avail = slob_units(cur); 271 272 if (align) { 273 aligned = (slob_t *)ALIGN((unsigned long)cur, align); 274 delta = aligned - cur; 275 } 276 if (avail >= units + delta) { /* room enough? */ 277 slob_t *next; 278 279 if (delta) { /* need to fragment head to align? */ 280 next = slob_next(cur); 281 set_slob(aligned, avail - delta, next); 282 set_slob(cur, delta, aligned); 283 prev = cur; 284 cur = aligned; 285 avail = slob_units(cur); 286 } 287 288 next = slob_next(cur); 289 if (avail == units) { /* exact fit? unlink. */ 290 if (prev) 291 set_slob(prev, slob_units(prev), next); 292 else 293 sp->free = next; 294 } else { /* fragment */ 295 if (prev) 296 set_slob(prev, slob_units(prev), cur + units); 297 else 298 sp->free = cur + units; 299 set_slob(cur + units, avail - units, next); 300 } 301 302 sp->units -= units; 303 if (!sp->units) 304 clear_slob_page_free(sp); 305 return cur; 306 } 307 if (slob_last(cur)) 308 return NULL; 309 } 310} 311 312/* 313 * slob_alloc: entry point into the slob allocator. 314 */ 315static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) 316{ 317 struct slob_page *sp; 318 struct list_head *prev; 319 struct list_head *slob_list; 320 slob_t *b = NULL; 321 unsigned long flags; 322 323 if (size < SLOB_BREAK1) 324 slob_list = &free_slob_small; 325 else if (size < SLOB_BREAK2) 326 slob_list = &free_slob_medium; 327 else 328 slob_list = &free_slob_large; 329 330 spin_lock_irqsave(&slob_lock, flags); 331 /* Iterate through each partially free page, try to find room */ 332 list_for_each_entry(sp, slob_list, list) { 333#ifdef CONFIG_NUMA 334 /* 335 * If there's a node specification, search for a partial 336 * page with a matching node id in the freelist. 337 */ 338 if (node != -1 && page_to_nid(&sp->page) != node) 339 continue; 340#endif 341 /* Enough room on this page? */ 342 if (sp->units < SLOB_UNITS(size)) 343 continue; 344 345 /* Attempt to alloc */ 346 prev = sp->list.prev; 347 b = slob_page_alloc(sp, size, align); 348 if (!b) 349 continue; 350 351 /* Improve fragment distribution and reduce our average 352 * search time by starting our next search here. (see 353 * Knuth vol 1, sec 2.5, pg 449) */ 354 if (prev != slob_list->prev && 355 slob_list->next != prev->next) 356 list_move_tail(slob_list, prev->next); 357 break; 358 } 359 spin_unlock_irqrestore(&slob_lock, flags); 360 361 /* Not enough space: must allocate a new page */ 362 if (!b) { 363 b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node); 364 if (!b) 365 return NULL; 366 sp = slob_page(b); 367 set_slob_page(sp); 368 369 spin_lock_irqsave(&slob_lock, flags); 370 sp->units = SLOB_UNITS(PAGE_SIZE); 371 sp->free = b; 372 INIT_LIST_HEAD(&sp->list); 373 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); 374 set_slob_page_free(sp, slob_list); 375 b = slob_page_alloc(sp, size, align); 376 BUG_ON(!b); 377 spin_unlock_irqrestore(&slob_lock, flags); 378 } 379 if (unlikely((gfp & __GFP_ZERO) && b)) 380 memset(b, 0, size); 381 return b; 382} 383 384/* 385 * slob_free: entry point into the slob allocator. 386 */ 387static void slob_free(void *block, int size) 388{ 389 struct slob_page *sp; 390 slob_t *prev, *next, *b = (slob_t *)block; 391 slobidx_t units; 392 unsigned long flags; 393 394 if (unlikely(ZERO_OR_NULL_PTR(block))) 395 return; 396 BUG_ON(!size); 397 398 sp = slob_page(block); 399 units = SLOB_UNITS(size); 400 401 spin_lock_irqsave(&slob_lock, flags); 402 403 if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) { 404 /* Go directly to page allocator. Do not pass slob allocator */ 405 if (slob_page_free(sp)) 406 clear_slob_page_free(sp); 407 spin_unlock_irqrestore(&slob_lock, flags); 408 clear_slob_page(sp); 409 free_slob_page(sp); 410 free_page((unsigned long)b); 411 return; 412 } 413 414 if (!slob_page_free(sp)) { 415 /* This slob page is about to become partially free. Easy! */ 416 sp->units = units; 417 sp->free = b; 418 set_slob(b, units, 419 (void *)((unsigned long)(b + 420 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK)); 421 set_slob_page_free(sp, &free_slob_small); 422 goto out; 423 } 424 425 /* 426 * Otherwise the page is already partially free, so find reinsertion 427 * point. 428 */ 429 sp->units += units; 430 431 if (b < sp->free) { 432 if (b + units == sp->free) { 433 units += slob_units(sp->free); 434 sp->free = slob_next(sp->free); 435 } 436 set_slob(b, units, sp->free); 437 sp->free = b; 438 } else { 439 prev = sp->free; 440 next = slob_next(prev); 441 while (b > next) { 442 prev = next; 443 next = slob_next(prev); 444 } 445 446 if (!slob_last(prev) && b + units == next) { 447 units += slob_units(next); 448 set_slob(b, units, slob_next(next)); 449 } else 450 set_slob(b, units, next); 451 452 if (prev + slob_units(prev) == b) { 453 units = slob_units(b) + slob_units(prev); 454 set_slob(prev, units, slob_next(b)); 455 } else 456 set_slob(prev, slob_units(prev), b); 457 } 458out: 459 spin_unlock_irqrestore(&slob_lock, flags); 460} 461 462/* 463 * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend. 464 */ 465 466#ifndef ARCH_KMALLOC_MINALIGN 467#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long) 468#endif 469 470#ifndef ARCH_SLAB_MINALIGN 471#define ARCH_SLAB_MINALIGN __alignof__(unsigned long) 472#endif 473 474void *__kmalloc_node(size_t size, gfp_t gfp, int node) 475{ 476 unsigned int *m; 477 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 478 void *ret; 479 480 lockdep_trace_alloc(gfp); 481 482 if (size < PAGE_SIZE - align) { 483 if (!size) 484 return ZERO_SIZE_PTR; 485 486 m = slob_alloc(size + align, gfp, align, node); 487 488 if (!m) 489 return NULL; 490 *m = size; 491 ret = (void *)m + align; 492 493 trace_kmalloc_node(_RET_IP_, ret, 494 size, size + align, gfp, node); 495 } else { 496 unsigned int order = get_order(size); 497 498 ret = slob_new_pages(gfp | __GFP_COMP, get_order(size), node); 499 if (ret) { 500 struct page *page; 501 page = virt_to_page(ret); 502 page->private = size; 503 } 504 505 trace_kmalloc_node(_RET_IP_, ret, 506 size, PAGE_SIZE << order, gfp, node); 507 } 508 509 return ret; 510} 511EXPORT_SYMBOL(__kmalloc_node); 512 513void kfree(const void *block) 514{ 515 struct slob_page *sp; 516 517 if (unlikely(ZERO_OR_NULL_PTR(block))) 518 return; 519 520 sp = slob_page(block); 521 if (is_slob_page(sp)) { 522 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 523 unsigned int *m = (unsigned int *)(block - align); 524 slob_free(m, *m + align); 525 } else 526 put_page(&sp->page); 527 528 trace_kfree(_RET_IP_, block); 529} 530EXPORT_SYMBOL(kfree); 531 532/* can't use ksize for kmem_cache_alloc memory, only kmalloc */ 533size_t ksize(const void *block) 534{ 535 struct slob_page *sp; 536 537 BUG_ON(!block); 538 if (unlikely(block == ZERO_SIZE_PTR)) 539 return 0; 540 541 sp = slob_page(block); 542 if (is_slob_page(sp)) { 543 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 544 unsigned int *m = (unsigned int *)(block - align); 545 return SLOB_UNITS(*m) * SLOB_UNIT; 546 } else 547 return sp->page.private; 548} 549EXPORT_SYMBOL(ksize); 550 551struct kmem_cache { 552 unsigned int size, align; 553 unsigned long flags; 554 const char *name; 555 void (*ctor)(void *); 556}; 557 558struct kmem_cache *kmem_cache_create(const char *name, size_t size, 559 size_t align, unsigned long flags, void (*ctor)(void *)) 560{ 561 struct kmem_cache *c; 562 563 c = slob_alloc(sizeof(struct kmem_cache), 564 GFP_KERNEL, ARCH_KMALLOC_MINALIGN, -1); 565 566 if (c) { 567 c->name = name; 568 c->size = size; 569 if (flags & SLAB_DESTROY_BY_RCU) { 570 /* leave room for rcu footer at the end of object */ 571 c->size += sizeof(struct slob_rcu); 572 } 573 c->flags = flags; 574 c->ctor = ctor; 575 /* ignore alignment unless it's forced */ 576 c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0; 577 if (c->align < ARCH_SLAB_MINALIGN) 578 c->align = ARCH_SLAB_MINALIGN; 579 if (c->align < align) 580 c->align = align; 581 } else if (flags & SLAB_PANIC) 582 panic("Cannot create slab cache %s\n", name); 583 584 return c; 585} 586EXPORT_SYMBOL(kmem_cache_create); 587 588void kmem_cache_destroy(struct kmem_cache *c) 589{ 590 slob_free(c, sizeof(struct kmem_cache)); 591} 592EXPORT_SYMBOL(kmem_cache_destroy); 593 594void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) 595{ 596 void *b; 597 598 if (c->size < PAGE_SIZE) { 599 b = slob_alloc(c->size, flags, c->align, node); 600 trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, 601 SLOB_UNITS(c->size) * SLOB_UNIT, 602 flags, node); 603 } else { 604 b = slob_new_pages(flags, get_order(c->size), node); 605 trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, 606 PAGE_SIZE << get_order(c->size), 607 flags, node); 608 } 609 610 if (c->ctor) 611 c->ctor(b); 612 613 return b; 614} 615EXPORT_SYMBOL(kmem_cache_alloc_node); 616 617static void __kmem_cache_free(void *b, int size) 618{ 619 if (size < PAGE_SIZE) 620 slob_free(b, size); 621 else 622 slob_free_pages(b, get_order(size)); 623} 624 625static void kmem_rcu_free(struct rcu_head *head) 626{ 627 struct slob_rcu *slob_rcu = (struct slob_rcu *)head; 628 void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu)); 629 630 __kmem_cache_free(b, slob_rcu->size); 631} 632 633void kmem_cache_free(struct kmem_cache *c, void *b) 634{ 635 if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { 636 struct slob_rcu *slob_rcu; 637 slob_rcu = b + (c->size - sizeof(struct slob_rcu)); 638 INIT_RCU_HEAD(&slob_rcu->head); 639 slob_rcu->size = c->size; 640 call_rcu(&slob_rcu->head, kmem_rcu_free); 641 } else { 642 __kmem_cache_free(b, c->size); 643 } 644 645 trace_kmem_cache_free(_RET_IP_, b); 646} 647EXPORT_SYMBOL(kmem_cache_free); 648 649unsigned int kmem_cache_size(struct kmem_cache *c) 650{ 651 return c->size; 652} 653EXPORT_SYMBOL(kmem_cache_size); 654 655const char *kmem_cache_name(struct kmem_cache *c) 656{ 657 return c->name; 658} 659EXPORT_SYMBOL(kmem_cache_name); 660 661int kmem_cache_shrink(struct kmem_cache *d) 662{ 663 return 0; 664} 665EXPORT_SYMBOL(kmem_cache_shrink); 666 667int kmem_ptr_validate(struct kmem_cache *a, const void *b) 668{ 669 return 0; 670} 671 672static unsigned int slob_ready __read_mostly; 673 674int slab_is_available(void) 675{ 676 return slob_ready; 677} 678 679void __init kmem_cache_init(void) 680{ 681 slob_ready = 1; 682} 683