slob.c revision ef2ad80c7d255ed0449eda947c2d700635b7e0f5
1/* 2 * SLOB Allocator: Simple List Of Blocks 3 * 4 * Matt Mackall <mpm@selenic.com> 12/30/03 5 * 6 * NUMA support by Paul Mundt, 2007. 7 * 8 * How SLOB works: 9 * 10 * The core of SLOB is a traditional K&R style heap allocator, with 11 * support for returning aligned objects. The granularity of this 12 * allocator is as little as 2 bytes, however typically most architectures 13 * will require 4 bytes on 32-bit and 8 bytes on 64-bit. 14 * 15 * The slob heap is a linked list of pages from alloc_pages(), and 16 * within each page, there is a singly-linked list of free blocks (slob_t). 17 * The heap is grown on demand and allocation from the heap is currently 18 * first-fit. 19 * 20 * Above this is an implementation of kmalloc/kfree. Blocks returned 21 * from kmalloc are prepended with a 4-byte header with the kmalloc size. 22 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls 23 * alloc_pages() directly, allocating compound pages so the page order 24 * does not have to be separately tracked, and also stores the exact 25 * allocation size in page->private so that it can be used to accurately 26 * provide ksize(). These objects are detected in kfree() because slob_page() 27 * is false for them. 28 * 29 * SLAB is emulated on top of SLOB by simply calling constructors and 30 * destructors for every SLAB allocation. Objects are returned with the 31 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which 32 * case the low-level allocator will fragment blocks to create the proper 33 * alignment. Again, objects of page-size or greater are allocated by 34 * calling alloc_pages(). As SLAB objects know their size, no separate 35 * size bookkeeping is necessary and there is essentially no allocation 36 * space overhead, and compound pages aren't needed for multi-page 37 * allocations. 38 * 39 * NUMA support in SLOB is fairly simplistic, pushing most of the real 40 * logic down to the page allocator, and simply doing the node accounting 41 * on the upper levels. In the event that a node id is explicitly 42 * provided, alloc_pages_node() with the specified node id is used 43 * instead. The common case (or when the node id isn't explicitly provided) 44 * will default to the current node, as per numa_node_id(). 45 * 46 * Node aware pages are still inserted in to the global freelist, and 47 * these are scanned for by matching against the node id encoded in the 48 * page flags. As a result, block allocations that can be satisfied from 49 * the freelist will only be done so on pages residing on the same node, 50 * in order to prevent random node placement. 51 */ 52 53#include <linux/kernel.h> 54#include <linux/slab.h> 55#include <linux/mm.h> 56#include <linux/cache.h> 57#include <linux/init.h> 58#include <linux/module.h> 59#include <linux/rcupdate.h> 60#include <linux/list.h> 61#include <asm/atomic.h> 62 63/* 64 * slob_block has a field 'units', which indicates size of block if +ve, 65 * or offset of next block if -ve (in SLOB_UNITs). 66 * 67 * Free blocks of size 1 unit simply contain the offset of the next block. 68 * Those with larger size contain their size in the first SLOB_UNIT of 69 * memory, and the offset of the next free block in the second SLOB_UNIT. 70 */ 71#if PAGE_SIZE <= (32767 * 2) 72typedef s16 slobidx_t; 73#else 74typedef s32 slobidx_t; 75#endif 76 77struct slob_block { 78 slobidx_t units; 79}; 80typedef struct slob_block slob_t; 81 82/* 83 * We use struct page fields to manage some slob allocation aspects, 84 * however to avoid the horrible mess in include/linux/mm_types.h, we'll 85 * just define our own struct page type variant here. 86 */ 87struct slob_page { 88 union { 89 struct { 90 unsigned long flags; /* mandatory */ 91 atomic_t _count; /* mandatory */ 92 slobidx_t units; /* free units left in page */ 93 unsigned long pad[2]; 94 slob_t *free; /* first free slob_t in page */ 95 struct list_head list; /* linked list of free pages */ 96 }; 97 struct page page; 98 }; 99}; 100static inline void struct_slob_page_wrong_size(void) 101{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); } 102 103/* 104 * free_slob_page: call before a slob_page is returned to the page allocator. 105 */ 106static inline void free_slob_page(struct slob_page *sp) 107{ 108 reset_page_mapcount(&sp->page); 109 sp->page.mapping = NULL; 110} 111 112/* 113 * All (partially) free slob pages go on this list. 114 */ 115static LIST_HEAD(free_slob_pages); 116 117/* 118 * slob_page: True for all slob pages (false for bigblock pages) 119 */ 120static inline int slob_page(struct slob_page *sp) 121{ 122 return test_bit(PG_active, &sp->flags); 123} 124 125static inline void set_slob_page(struct slob_page *sp) 126{ 127 __set_bit(PG_active, &sp->flags); 128} 129 130static inline void clear_slob_page(struct slob_page *sp) 131{ 132 __clear_bit(PG_active, &sp->flags); 133} 134 135/* 136 * slob_page_free: true for pages on free_slob_pages list. 137 */ 138static inline int slob_page_free(struct slob_page *sp) 139{ 140 return test_bit(PG_private, &sp->flags); 141} 142 143static inline void set_slob_page_free(struct slob_page *sp) 144{ 145 list_add(&sp->list, &free_slob_pages); 146 __set_bit(PG_private, &sp->flags); 147} 148 149static inline void clear_slob_page_free(struct slob_page *sp) 150{ 151 list_del(&sp->list); 152 __clear_bit(PG_private, &sp->flags); 153} 154 155#define SLOB_UNIT sizeof(slob_t) 156#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT) 157#define SLOB_ALIGN L1_CACHE_BYTES 158 159/* 160 * struct slob_rcu is inserted at the tail of allocated slob blocks, which 161 * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free 162 * the block using call_rcu. 163 */ 164struct slob_rcu { 165 struct rcu_head head; 166 int size; 167}; 168 169/* 170 * slob_lock protects all slob allocator structures. 171 */ 172static DEFINE_SPINLOCK(slob_lock); 173 174/* 175 * Encode the given size and next info into a free slob block s. 176 */ 177static void set_slob(slob_t *s, slobidx_t size, slob_t *next) 178{ 179 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK); 180 slobidx_t offset = next - base; 181 182 if (size > 1) { 183 s[0].units = size; 184 s[1].units = offset; 185 } else 186 s[0].units = -offset; 187} 188 189/* 190 * Return the size of a slob block. 191 */ 192static slobidx_t slob_units(slob_t *s) 193{ 194 if (s->units > 0) 195 return s->units; 196 return 1; 197} 198 199/* 200 * Return the next free slob block pointer after this one. 201 */ 202static slob_t *slob_next(slob_t *s) 203{ 204 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK); 205 slobidx_t next; 206 207 if (s[0].units < 0) 208 next = -s[0].units; 209 else 210 next = s[1].units; 211 return base+next; 212} 213 214/* 215 * Returns true if s is the last free block in its page. 216 */ 217static int slob_last(slob_t *s) 218{ 219 return !((unsigned long)slob_next(s) & ~PAGE_MASK); 220} 221 222static void *slob_new_page(gfp_t gfp, int order, int node) 223{ 224 void *page; 225 226#ifdef CONFIG_NUMA 227 if (node != -1) 228 page = alloc_pages_node(node, gfp, order); 229 else 230#endif 231 page = alloc_pages(gfp, order); 232 233 if (!page) 234 return NULL; 235 236 return page_address(page); 237} 238 239/* 240 * Allocate a slob block within a given slob_page sp. 241 */ 242static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) 243{ 244 slob_t *prev, *cur, *aligned = 0; 245 int delta = 0, units = SLOB_UNITS(size); 246 247 for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) { 248 slobidx_t avail = slob_units(cur); 249 250 if (align) { 251 aligned = (slob_t *)ALIGN((unsigned long)cur, align); 252 delta = aligned - cur; 253 } 254 if (avail >= units + delta) { /* room enough? */ 255 slob_t *next; 256 257 if (delta) { /* need to fragment head to align? */ 258 next = slob_next(cur); 259 set_slob(aligned, avail - delta, next); 260 set_slob(cur, delta, aligned); 261 prev = cur; 262 cur = aligned; 263 avail = slob_units(cur); 264 } 265 266 next = slob_next(cur); 267 if (avail == units) { /* exact fit? unlink. */ 268 if (prev) 269 set_slob(prev, slob_units(prev), next); 270 else 271 sp->free = next; 272 } else { /* fragment */ 273 if (prev) 274 set_slob(prev, slob_units(prev), cur + units); 275 else 276 sp->free = cur + units; 277 set_slob(cur + units, avail - units, next); 278 } 279 280 sp->units -= units; 281 if (!sp->units) 282 clear_slob_page_free(sp); 283 return cur; 284 } 285 if (slob_last(cur)) 286 return NULL; 287 } 288} 289 290/* 291 * slob_alloc: entry point into the slob allocator. 292 */ 293static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) 294{ 295 struct slob_page *sp; 296 slob_t *b = NULL; 297 unsigned long flags; 298 299 spin_lock_irqsave(&slob_lock, flags); 300 /* Iterate through each partially free page, try to find room */ 301 list_for_each_entry(sp, &free_slob_pages, list) { 302#ifdef CONFIG_NUMA 303 /* 304 * If there's a node specification, search for a partial 305 * page with a matching node id in the freelist. 306 */ 307 if (node != -1 && page_to_nid(&sp->page) != node) 308 continue; 309#endif 310 311 if (sp->units >= SLOB_UNITS(size)) { 312 b = slob_page_alloc(sp, size, align); 313 if (b) 314 break; 315 } 316 } 317 spin_unlock_irqrestore(&slob_lock, flags); 318 319 /* Not enough space: must allocate a new page */ 320 if (!b) { 321 b = slob_new_page(gfp, 0, node); 322 if (!b) 323 return 0; 324 sp = (struct slob_page *)virt_to_page(b); 325 set_slob_page(sp); 326 327 spin_lock_irqsave(&slob_lock, flags); 328 sp->units = SLOB_UNITS(PAGE_SIZE); 329 sp->free = b; 330 INIT_LIST_HEAD(&sp->list); 331 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); 332 set_slob_page_free(sp); 333 b = slob_page_alloc(sp, size, align); 334 BUG_ON(!b); 335 spin_unlock_irqrestore(&slob_lock, flags); 336 } 337 return b; 338} 339 340/* 341 * slob_free: entry point into the slob allocator. 342 */ 343static void slob_free(void *block, int size) 344{ 345 struct slob_page *sp; 346 slob_t *prev, *next, *b = (slob_t *)block; 347 slobidx_t units; 348 unsigned long flags; 349 350 if (!block) 351 return; 352 BUG_ON(!size); 353 354 sp = (struct slob_page *)virt_to_page(block); 355 units = SLOB_UNITS(size); 356 357 spin_lock_irqsave(&slob_lock, flags); 358 359 if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) { 360 /* Go directly to page allocator. Do not pass slob allocator */ 361 if (slob_page_free(sp)) 362 clear_slob_page_free(sp); 363 clear_slob_page(sp); 364 free_slob_page(sp); 365 free_page((unsigned long)b); 366 goto out; 367 } 368 369 if (!slob_page_free(sp)) { 370 /* This slob page is about to become partially free. Easy! */ 371 sp->units = units; 372 sp->free = b; 373 set_slob(b, units, 374 (void *)((unsigned long)(b + 375 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK)); 376 set_slob_page_free(sp); 377 goto out; 378 } 379 380 /* 381 * Otherwise the page is already partially free, so find reinsertion 382 * point. 383 */ 384 sp->units += units; 385 386 if (b < sp->free) { 387 set_slob(b, units, sp->free); 388 sp->free = b; 389 } else { 390 prev = sp->free; 391 next = slob_next(prev); 392 while (b > next) { 393 prev = next; 394 next = slob_next(prev); 395 } 396 397 if (!slob_last(prev) && b + units == next) { 398 units += slob_units(next); 399 set_slob(b, units, slob_next(next)); 400 } else 401 set_slob(b, units, next); 402 403 if (prev + slob_units(prev) == b) { 404 units = slob_units(b) + slob_units(prev); 405 set_slob(prev, units, slob_next(b)); 406 } else 407 set_slob(prev, slob_units(prev), b); 408 } 409out: 410 spin_unlock_irqrestore(&slob_lock, flags); 411} 412 413/* 414 * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend. 415 */ 416 417#ifndef ARCH_KMALLOC_MINALIGN 418#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long) 419#endif 420 421#ifndef ARCH_SLAB_MINALIGN 422#define ARCH_SLAB_MINALIGN __alignof__(unsigned long) 423#endif 424 425void *__kmalloc_node(size_t size, gfp_t gfp, int node) 426{ 427 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 428 429 if (size < PAGE_SIZE - align) { 430 unsigned int *m; 431 m = slob_alloc(size + align, gfp, align, node); 432 if (m) 433 *m = size; 434 return (void *)m + align; 435 } else { 436 void *ret; 437 438 ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); 439 if (ret) { 440 struct page *page; 441 page = virt_to_page(ret); 442 page->private = size; 443 } 444 return ret; 445 } 446} 447EXPORT_SYMBOL(__kmalloc_node); 448 449void kfree(const void *block) 450{ 451 struct slob_page *sp; 452 453 if (!block) 454 return; 455 456 sp = (struct slob_page *)virt_to_page(block); 457 if (slob_page(sp)) { 458 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 459 unsigned int *m = (unsigned int *)(block - align); 460 slob_free(m, *m + align); 461 } else 462 put_page(&sp->page); 463} 464EXPORT_SYMBOL(kfree); 465 466/* can't use ksize for kmem_cache_alloc memory, only kmalloc */ 467size_t ksize(const void *block) 468{ 469 struct slob_page *sp; 470 471 if (!block) 472 return 0; 473 474 sp = (struct slob_page *)virt_to_page(block); 475 if (slob_page(sp)) 476 return ((slob_t *)block - 1)->units + SLOB_UNIT; 477 else 478 return sp->page.private; 479} 480 481struct kmem_cache { 482 unsigned int size, align; 483 unsigned long flags; 484 const char *name; 485 void (*ctor)(void *, struct kmem_cache *, unsigned long); 486}; 487 488struct kmem_cache *kmem_cache_create(const char *name, size_t size, 489 size_t align, unsigned long flags, 490 void (*ctor)(void*, struct kmem_cache *, unsigned long), 491 void (*dtor)(void*, struct kmem_cache *, unsigned long)) 492{ 493 struct kmem_cache *c; 494 495 c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1); 496 497 if (c) { 498 c->name = name; 499 c->size = size; 500 if (flags & SLAB_DESTROY_BY_RCU) { 501 /* leave room for rcu footer at the end of object */ 502 c->size += sizeof(struct slob_rcu); 503 } 504 c->flags = flags; 505 c->ctor = ctor; 506 /* ignore alignment unless it's forced */ 507 c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0; 508 if (c->align < ARCH_SLAB_MINALIGN) 509 c->align = ARCH_SLAB_MINALIGN; 510 if (c->align < align) 511 c->align = align; 512 } else if (flags & SLAB_PANIC) 513 panic("Cannot create slab cache %s\n", name); 514 515 return c; 516} 517EXPORT_SYMBOL(kmem_cache_create); 518 519void kmem_cache_destroy(struct kmem_cache *c) 520{ 521 slob_free(c, sizeof(struct kmem_cache)); 522} 523EXPORT_SYMBOL(kmem_cache_destroy); 524 525void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) 526{ 527 void *b; 528 529 if (c->size < PAGE_SIZE) 530 b = slob_alloc(c->size, flags, c->align, node); 531 else 532 b = slob_new_page(flags, get_order(c->size), node); 533 534 if (c->ctor) 535 c->ctor(b, c, 0); 536 537 return b; 538} 539EXPORT_SYMBOL(kmem_cache_alloc_node); 540 541void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags) 542{ 543 void *ret = kmem_cache_alloc(c, flags); 544 if (ret) 545 memset(ret, 0, c->size); 546 547 return ret; 548} 549EXPORT_SYMBOL(kmem_cache_zalloc); 550 551static void __kmem_cache_free(void *b, int size) 552{ 553 if (size < PAGE_SIZE) 554 slob_free(b, size); 555 else 556 free_pages((unsigned long)b, get_order(size)); 557} 558 559static void kmem_rcu_free(struct rcu_head *head) 560{ 561 struct slob_rcu *slob_rcu = (struct slob_rcu *)head; 562 void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu)); 563 564 __kmem_cache_free(b, slob_rcu->size); 565} 566 567void kmem_cache_free(struct kmem_cache *c, void *b) 568{ 569 if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { 570 struct slob_rcu *slob_rcu; 571 slob_rcu = b + (c->size - sizeof(struct slob_rcu)); 572 INIT_RCU_HEAD(&slob_rcu->head); 573 slob_rcu->size = c->size; 574 call_rcu(&slob_rcu->head, kmem_rcu_free); 575 } else { 576 __kmem_cache_free(b, c->size); 577 } 578} 579EXPORT_SYMBOL(kmem_cache_free); 580 581unsigned int kmem_cache_size(struct kmem_cache *c) 582{ 583 return c->size; 584} 585EXPORT_SYMBOL(kmem_cache_size); 586 587const char *kmem_cache_name(struct kmem_cache *c) 588{ 589 return c->name; 590} 591EXPORT_SYMBOL(kmem_cache_name); 592 593int kmem_cache_shrink(struct kmem_cache *d) 594{ 595 return 0; 596} 597EXPORT_SYMBOL(kmem_cache_shrink); 598 599int kmem_ptr_validate(struct kmem_cache *a, const void *b) 600{ 601 return 0; 602} 603 604static unsigned int slob_ready __read_mostly; 605 606int slab_is_available(void) 607{ 608 return slob_ready; 609} 610 611void __init kmem_cache_init(void) 612{ 613 slob_ready = 1; 614} 615