slub.c revision 20c2df83d25c6a95affe6157a4c9cac4cf5ffaac
1/* 2 * SLUB: A slab allocator that limits cache line use instead of queuing 3 * objects in per cpu and per node lists. 4 * 5 * The allocator synchronizes using per slab locks and only 6 * uses a centralized lock to manage a pool of partial slabs. 7 * 8 * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com> 9 */ 10 11#include <linux/mm.h> 12#include <linux/module.h> 13#include <linux/bit_spinlock.h> 14#include <linux/interrupt.h> 15#include <linux/bitops.h> 16#include <linux/slab.h> 17#include <linux/seq_file.h> 18#include <linux/cpu.h> 19#include <linux/cpuset.h> 20#include <linux/mempolicy.h> 21#include <linux/ctype.h> 22#include <linux/kallsyms.h> 23 24/* 25 * Lock order: 26 * 1. slab_lock(page) 27 * 2. slab->list_lock 28 * 29 * The slab_lock protects operations on the object of a particular 30 * slab and its metadata in the page struct. If the slab lock 31 * has been taken then no allocations nor frees can be performed 32 * on the objects in the slab nor can the slab be added or removed 33 * from the partial or full lists since this would mean modifying 34 * the page_struct of the slab. 35 * 36 * The list_lock protects the partial and full list on each node and 37 * the partial slab counter. If taken then no new slabs may be added or 38 * removed from the lists nor make the number of partial slabs be modified. 39 * (Note that the total number of slabs is an atomic value that may be 40 * modified without taking the list lock). 41 * 42 * The list_lock is a centralized lock and thus we avoid taking it as 43 * much as possible. As long as SLUB does not have to handle partial 44 * slabs, operations can continue without any centralized lock. F.e. 45 * allocating a long series of objects that fill up slabs does not require 46 * the list lock. 47 * 48 * The lock order is sometimes inverted when we are trying to get a slab 49 * off a list. We take the list_lock and then look for a page on the list 50 * to use. While we do that objects in the slabs may be freed. We can 51 * only operate on the slab if we have also taken the slab_lock. So we use 52 * a slab_trylock() on the slab. If trylock was successful then no frees 53 * can occur anymore and we can use the slab for allocations etc. If the 54 * slab_trylock() does not succeed then frees are in progress in the slab and 55 * we must stay away from it for a while since we may cause a bouncing 56 * cacheline if we try to acquire the lock. So go onto the next slab. 57 * If all pages are busy then we may allocate a new slab instead of reusing 58 * a partial slab. A new slab has noone operating on it and thus there is 59 * no danger of cacheline contention. 60 * 61 * Interrupts are disabled during allocation and deallocation in order to 62 * make the slab allocator safe to use in the context of an irq. In addition 63 * interrupts are disabled to ensure that the processor does not change 64 * while handling per_cpu slabs, due to kernel preemption. 65 * 66 * SLUB assigns one slab for allocation to each processor. 67 * Allocations only occur from these slabs called cpu slabs. 68 * 69 * Slabs with free elements are kept on a partial list and during regular 70 * operations no list for full slabs is used. If an object in a full slab is 71 * freed then the slab will show up again on the partial lists. 72 * We track full slabs for debugging purposes though because otherwise we 73 * cannot scan all objects. 74 * 75 * Slabs are freed when they become empty. Teardown and setup is 76 * minimal so we rely on the page allocators per cpu caches for 77 * fast frees and allocs. 78 * 79 * Overloading of page flags that are otherwise used for LRU management. 80 * 81 * PageActive The slab is frozen and exempt from list processing. 82 * This means that the slab is dedicated to a purpose 83 * such as satisfying allocations for a specific 84 * processor. Objects may be freed in the slab while 85 * it is frozen but slab_free will then skip the usual 86 * list operations. It is up to the processor holding 87 * the slab to integrate the slab into the slab lists 88 * when the slab is no longer needed. 89 * 90 * One use of this flag is to mark slabs that are 91 * used for allocations. Then such a slab becomes a cpu 92 * slab. The cpu slab may be equipped with an additional 93 * lockless_freelist that allows lockless access to 94 * free objects in addition to the regular freelist 95 * that requires the slab lock. 96 * 97 * PageError Slab requires special handling due to debug 98 * options set. This moves slab handling out of 99 * the fast path and disables lockless freelists. 100 */ 101 102#define FROZEN (1 << PG_active) 103 104#ifdef CONFIG_SLUB_DEBUG 105#define SLABDEBUG (1 << PG_error) 106#else 107#define SLABDEBUG 0 108#endif 109 110static inline int SlabFrozen(struct page *page) 111{ 112 return page->flags & FROZEN; 113} 114 115static inline void SetSlabFrozen(struct page *page) 116{ 117 page->flags |= FROZEN; 118} 119 120static inline void ClearSlabFrozen(struct page *page) 121{ 122 page->flags &= ~FROZEN; 123} 124 125static inline int SlabDebug(struct page *page) 126{ 127 return page->flags & SLABDEBUG; 128} 129 130static inline void SetSlabDebug(struct page *page) 131{ 132 page->flags |= SLABDEBUG; 133} 134 135static inline void ClearSlabDebug(struct page *page) 136{ 137 page->flags &= ~SLABDEBUG; 138} 139 140/* 141 * Issues still to be resolved: 142 * 143 * - The per cpu array is updated for each new slab and and is a remote 144 * cacheline for most nodes. This could become a bouncing cacheline given 145 * enough frequent updates. There are 16 pointers in a cacheline, so at 146 * max 16 cpus could compete for the cacheline which may be okay. 147 * 148 * - Support PAGE_ALLOC_DEBUG. Should be easy to do. 149 * 150 * - Variable sizing of the per node arrays 151 */ 152 153/* Enable to test recovery from slab corruption on boot */ 154#undef SLUB_RESILIENCY_TEST 155 156#if PAGE_SHIFT <= 12 157 158/* 159 * Small page size. Make sure that we do not fragment memory 160 */ 161#define DEFAULT_MAX_ORDER 1 162#define DEFAULT_MIN_OBJECTS 4 163 164#else 165 166/* 167 * Large page machines are customarily able to handle larger 168 * page orders. 169 */ 170#define DEFAULT_MAX_ORDER 2 171#define DEFAULT_MIN_OBJECTS 8 172 173#endif 174 175/* 176 * Mininum number of partial slabs. These will be left on the partial 177 * lists even if they are empty. kmem_cache_shrink may reclaim them. 178 */ 179#define MIN_PARTIAL 2 180 181/* 182 * Maximum number of desirable partial slabs. 183 * The existence of more partial slabs makes kmem_cache_shrink 184 * sort the partial list by the number of objects in the. 185 */ 186#define MAX_PARTIAL 10 187 188#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ 189 SLAB_POISON | SLAB_STORE_USER) 190 191/* 192 * Set of flags that will prevent slab merging 193 */ 194#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 195 SLAB_TRACE | SLAB_DESTROY_BY_RCU) 196 197#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 198 SLAB_CACHE_DMA) 199 200#ifndef ARCH_KMALLOC_MINALIGN 201#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) 202#endif 203 204#ifndef ARCH_SLAB_MINALIGN 205#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) 206#endif 207 208/* 209 * The page->inuse field is 16 bit thus we have this limitation 210 */ 211#define MAX_OBJECTS_PER_SLAB 65535 212 213/* Internal SLUB flags */ 214#define __OBJECT_POISON 0x80000000 /* Poison object */ 215 216/* Not all arches define cache_line_size */ 217#ifndef cache_line_size 218#define cache_line_size() L1_CACHE_BYTES 219#endif 220 221static int kmem_size = sizeof(struct kmem_cache); 222 223#ifdef CONFIG_SMP 224static struct notifier_block slab_notifier; 225#endif 226 227static enum { 228 DOWN, /* No slab functionality available */ 229 PARTIAL, /* kmem_cache_open() works but kmalloc does not */ 230 UP, /* Everything works but does not show up in sysfs */ 231 SYSFS /* Sysfs up */ 232} slab_state = DOWN; 233 234/* A list of all slab caches on the system */ 235static DECLARE_RWSEM(slub_lock); 236static LIST_HEAD(slab_caches); 237 238/* 239 * Tracking user of a slab. 240 */ 241struct track { 242 void *addr; /* Called from address */ 243 int cpu; /* Was running on cpu */ 244 int pid; /* Pid context */ 245 unsigned long when; /* When did the operation occur */ 246}; 247 248enum track_item { TRACK_ALLOC, TRACK_FREE }; 249 250#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) 251static int sysfs_slab_add(struct kmem_cache *); 252static int sysfs_slab_alias(struct kmem_cache *, const char *); 253static void sysfs_slab_remove(struct kmem_cache *); 254#else 255static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 256static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 257 { return 0; } 258static inline void sysfs_slab_remove(struct kmem_cache *s) {} 259#endif 260 261/******************************************************************** 262 * Core slab cache functions 263 *******************************************************************/ 264 265int slab_is_available(void) 266{ 267 return slab_state >= UP; 268} 269 270static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 271{ 272#ifdef CONFIG_NUMA 273 return s->node[node]; 274#else 275 return &s->local_node; 276#endif 277} 278 279static inline int check_valid_pointer(struct kmem_cache *s, 280 struct page *page, const void *object) 281{ 282 void *base; 283 284 if (!object) 285 return 1; 286 287 base = page_address(page); 288 if (object < base || object >= base + s->objects * s->size || 289 (object - base) % s->size) { 290 return 0; 291 } 292 293 return 1; 294} 295 296/* 297 * Slow version of get and set free pointer. 298 * 299 * This version requires touching the cache lines of kmem_cache which 300 * we avoid to do in the fast alloc free paths. There we obtain the offset 301 * from the page struct. 302 */ 303static inline void *get_freepointer(struct kmem_cache *s, void *object) 304{ 305 return *(void **)(object + s->offset); 306} 307 308static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) 309{ 310 *(void **)(object + s->offset) = fp; 311} 312 313/* Loop over all objects in a slab */ 314#define for_each_object(__p, __s, __addr) \ 315 for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\ 316 __p += (__s)->size) 317 318/* Scan freelist */ 319#define for_each_free_object(__p, __s, __free) \ 320 for (__p = (__free); __p; __p = get_freepointer((__s), __p)) 321 322/* Determine object index from a given position */ 323static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 324{ 325 return (p - addr) / s->size; 326} 327 328#ifdef CONFIG_SLUB_DEBUG 329/* 330 * Debug settings: 331 */ 332#ifdef CONFIG_SLUB_DEBUG_ON 333static int slub_debug = DEBUG_DEFAULT_FLAGS; 334#else 335static int slub_debug; 336#endif 337 338static char *slub_debug_slabs; 339 340/* 341 * Object debugging 342 */ 343static void print_section(char *text, u8 *addr, unsigned int length) 344{ 345 int i, offset; 346 int newline = 1; 347 char ascii[17]; 348 349 ascii[16] = 0; 350 351 for (i = 0; i < length; i++) { 352 if (newline) { 353 printk(KERN_ERR "%8s 0x%p: ", text, addr + i); 354 newline = 0; 355 } 356 printk(" %02x", addr[i]); 357 offset = i % 16; 358 ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; 359 if (offset == 15) { 360 printk(" %s\n",ascii); 361 newline = 1; 362 } 363 } 364 if (!newline) { 365 i %= 16; 366 while (i < 16) { 367 printk(" "); 368 ascii[i] = ' '; 369 i++; 370 } 371 printk(" %s\n", ascii); 372 } 373} 374 375static struct track *get_track(struct kmem_cache *s, void *object, 376 enum track_item alloc) 377{ 378 struct track *p; 379 380 if (s->offset) 381 p = object + s->offset + sizeof(void *); 382 else 383 p = object + s->inuse; 384 385 return p + alloc; 386} 387 388static void set_track(struct kmem_cache *s, void *object, 389 enum track_item alloc, void *addr) 390{ 391 struct track *p; 392 393 if (s->offset) 394 p = object + s->offset + sizeof(void *); 395 else 396 p = object + s->inuse; 397 398 p += alloc; 399 if (addr) { 400 p->addr = addr; 401 p->cpu = smp_processor_id(); 402 p->pid = current ? current->pid : -1; 403 p->when = jiffies; 404 } else 405 memset(p, 0, sizeof(struct track)); 406} 407 408static void init_tracking(struct kmem_cache *s, void *object) 409{ 410 if (!(s->flags & SLAB_STORE_USER)) 411 return; 412 413 set_track(s, object, TRACK_FREE, NULL); 414 set_track(s, object, TRACK_ALLOC, NULL); 415} 416 417static void print_track(const char *s, struct track *t) 418{ 419 if (!t->addr) 420 return; 421 422 printk(KERN_ERR "INFO: %s in ", s); 423 __print_symbol("%s", (unsigned long)t->addr); 424 printk(" age=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid); 425} 426 427static void print_tracking(struct kmem_cache *s, void *object) 428{ 429 if (!(s->flags & SLAB_STORE_USER)) 430 return; 431 432 print_track("Allocated", get_track(s, object, TRACK_ALLOC)); 433 print_track("Freed", get_track(s, object, TRACK_FREE)); 434} 435 436static void print_page_info(struct page *page) 437{ 438 printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n", 439 page, page->inuse, page->freelist, page->flags); 440 441} 442 443static void slab_bug(struct kmem_cache *s, char *fmt, ...) 444{ 445 va_list args; 446 char buf[100]; 447 448 va_start(args, fmt); 449 vsnprintf(buf, sizeof(buf), fmt, args); 450 va_end(args); 451 printk(KERN_ERR "========================================" 452 "=====================================\n"); 453 printk(KERN_ERR "BUG %s: %s\n", s->name, buf); 454 printk(KERN_ERR "----------------------------------------" 455 "-------------------------------------\n\n"); 456} 457 458static void slab_fix(struct kmem_cache *s, char *fmt, ...) 459{ 460 va_list args; 461 char buf[100]; 462 463 va_start(args, fmt); 464 vsnprintf(buf, sizeof(buf), fmt, args); 465 va_end(args); 466 printk(KERN_ERR "FIX %s: %s\n", s->name, buf); 467} 468 469static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 470{ 471 unsigned int off; /* Offset of last byte */ 472 u8 *addr = page_address(page); 473 474 print_tracking(s, p); 475 476 print_page_info(page); 477 478 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", 479 p, p - addr, get_freepointer(s, p)); 480 481 if (p > addr + 16) 482 print_section("Bytes b4", p - 16, 16); 483 484 print_section("Object", p, min(s->objsize, 128)); 485 486 if (s->flags & SLAB_RED_ZONE) 487 print_section("Redzone", p + s->objsize, 488 s->inuse - s->objsize); 489 490 if (s->offset) 491 off = s->offset + sizeof(void *); 492 else 493 off = s->inuse; 494 495 if (s->flags & SLAB_STORE_USER) 496 off += 2 * sizeof(struct track); 497 498 if (off != s->size) 499 /* Beginning of the filler is the free pointer */ 500 print_section("Padding", p + off, s->size - off); 501 502 dump_stack(); 503} 504 505static void object_err(struct kmem_cache *s, struct page *page, 506 u8 *object, char *reason) 507{ 508 slab_bug(s, reason); 509 print_trailer(s, page, object); 510} 511 512static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) 513{ 514 va_list args; 515 char buf[100]; 516 517 va_start(args, fmt); 518 vsnprintf(buf, sizeof(buf), fmt, args); 519 va_end(args); 520 slab_bug(s, fmt); 521 print_page_info(page); 522 dump_stack(); 523} 524 525static void init_object(struct kmem_cache *s, void *object, int active) 526{ 527 u8 *p = object; 528 529 if (s->flags & __OBJECT_POISON) { 530 memset(p, POISON_FREE, s->objsize - 1); 531 p[s->objsize -1] = POISON_END; 532 } 533 534 if (s->flags & SLAB_RED_ZONE) 535 memset(p + s->objsize, 536 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE, 537 s->inuse - s->objsize); 538} 539 540static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) 541{ 542 while (bytes) { 543 if (*start != (u8)value) 544 return start; 545 start++; 546 bytes--; 547 } 548 return NULL; 549} 550 551static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 552 void *from, void *to) 553{ 554 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data); 555 memset(from, data, to - from); 556} 557 558static int check_bytes_and_report(struct kmem_cache *s, struct page *page, 559 u8 *object, char *what, 560 u8* start, unsigned int value, unsigned int bytes) 561{ 562 u8 *fault; 563 u8 *end; 564 565 fault = check_bytes(start, value, bytes); 566 if (!fault) 567 return 1; 568 569 end = start + bytes; 570 while (end > fault && end[-1] == value) 571 end--; 572 573 slab_bug(s, "%s overwritten", what); 574 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n", 575 fault, end - 1, fault[0], value); 576 print_trailer(s, page, object); 577 578 restore_bytes(s, what, value, fault, end); 579 return 0; 580} 581 582/* 583 * Object layout: 584 * 585 * object address 586 * Bytes of the object to be managed. 587 * If the freepointer may overlay the object then the free 588 * pointer is the first word of the object. 589 * 590 * Poisoning uses 0x6b (POISON_FREE) and the last byte is 591 * 0xa5 (POISON_END) 592 * 593 * object + s->objsize 594 * Padding to reach word boundary. This is also used for Redzoning. 595 * Padding is extended by another word if Redzoning is enabled and 596 * objsize == inuse. 597 * 598 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 599 * 0xcc (RED_ACTIVE) for objects in use. 600 * 601 * object + s->inuse 602 * Meta data starts here. 603 * 604 * A. Free pointer (if we cannot overwrite object on free) 605 * B. Tracking data for SLAB_STORE_USER 606 * C. Padding to reach required alignment boundary or at mininum 607 * one word if debuggin is on to be able to detect writes 608 * before the word boundary. 609 * 610 * Padding is done using 0x5a (POISON_INUSE) 611 * 612 * object + s->size 613 * Nothing is used beyond s->size. 614 * 615 * If slabcaches are merged then the objsize and inuse boundaries are mostly 616 * ignored. And therefore no slab options that rely on these boundaries 617 * may be used with merged slabcaches. 618 */ 619 620static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) 621{ 622 unsigned long off = s->inuse; /* The end of info */ 623 624 if (s->offset) 625 /* Freepointer is placed after the object. */ 626 off += sizeof(void *); 627 628 if (s->flags & SLAB_STORE_USER) 629 /* We also have user information there */ 630 off += 2 * sizeof(struct track); 631 632 if (s->size == off) 633 return 1; 634 635 return check_bytes_and_report(s, page, p, "Object padding", 636 p + off, POISON_INUSE, s->size - off); 637} 638 639static int slab_pad_check(struct kmem_cache *s, struct page *page) 640{ 641 u8 *start; 642 u8 *fault; 643 u8 *end; 644 int length; 645 int remainder; 646 647 if (!(s->flags & SLAB_POISON)) 648 return 1; 649 650 start = page_address(page); 651 end = start + (PAGE_SIZE << s->order); 652 length = s->objects * s->size; 653 remainder = end - (start + length); 654 if (!remainder) 655 return 1; 656 657 fault = check_bytes(start + length, POISON_INUSE, remainder); 658 if (!fault) 659 return 1; 660 while (end > fault && end[-1] == POISON_INUSE) 661 end--; 662 663 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); 664 print_section("Padding", start, length); 665 666 restore_bytes(s, "slab padding", POISON_INUSE, start, end); 667 return 0; 668} 669 670static int check_object(struct kmem_cache *s, struct page *page, 671 void *object, int active) 672{ 673 u8 *p = object; 674 u8 *endobject = object + s->objsize; 675 676 if (s->flags & SLAB_RED_ZONE) { 677 unsigned int red = 678 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE; 679 680 if (!check_bytes_and_report(s, page, object, "Redzone", 681 endobject, red, s->inuse - s->objsize)) 682 return 0; 683 } else { 684 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) 685 check_bytes_and_report(s, page, p, "Alignment padding", endobject, 686 POISON_INUSE, s->inuse - s->objsize); 687 } 688 689 if (s->flags & SLAB_POISON) { 690 if (!active && (s->flags & __OBJECT_POISON) && 691 (!check_bytes_and_report(s, page, p, "Poison", p, 692 POISON_FREE, s->objsize - 1) || 693 !check_bytes_and_report(s, page, p, "Poison", 694 p + s->objsize -1, POISON_END, 1))) 695 return 0; 696 /* 697 * check_pad_bytes cleans up on its own. 698 */ 699 check_pad_bytes(s, page, p); 700 } 701 702 if (!s->offset && active) 703 /* 704 * Object and freepointer overlap. Cannot check 705 * freepointer while object is allocated. 706 */ 707 return 1; 708 709 /* Check free pointer validity */ 710 if (!check_valid_pointer(s, page, get_freepointer(s, p))) { 711 object_err(s, page, p, "Freepointer corrupt"); 712 /* 713 * No choice but to zap it and thus loose the remainder 714 * of the free objects in this slab. May cause 715 * another error because the object count is now wrong. 716 */ 717 set_freepointer(s, p, NULL); 718 return 0; 719 } 720 return 1; 721} 722 723static int check_slab(struct kmem_cache *s, struct page *page) 724{ 725 VM_BUG_ON(!irqs_disabled()); 726 727 if (!PageSlab(page)) { 728 slab_err(s, page, "Not a valid slab page"); 729 return 0; 730 } 731 if (page->offset * sizeof(void *) != s->offset) { 732 slab_err(s, page, "Corrupted offset %lu", 733 (unsigned long)(page->offset * sizeof(void *))); 734 return 0; 735 } 736 if (page->inuse > s->objects) { 737 slab_err(s, page, "inuse %u > max %u", 738 s->name, page->inuse, s->objects); 739 return 0; 740 } 741 /* Slab_pad_check fixes things up after itself */ 742 slab_pad_check(s, page); 743 return 1; 744} 745 746/* 747 * Determine if a certain object on a page is on the freelist. Must hold the 748 * slab lock to guarantee that the chains are in a consistent state. 749 */ 750static int on_freelist(struct kmem_cache *s, struct page *page, void *search) 751{ 752 int nr = 0; 753 void *fp = page->freelist; 754 void *object = NULL; 755 756 while (fp && nr <= s->objects) { 757 if (fp == search) 758 return 1; 759 if (!check_valid_pointer(s, page, fp)) { 760 if (object) { 761 object_err(s, page, object, 762 "Freechain corrupt"); 763 set_freepointer(s, object, NULL); 764 break; 765 } else { 766 slab_err(s, page, "Freepointer corrupt"); 767 page->freelist = NULL; 768 page->inuse = s->objects; 769 slab_fix(s, "Freelist cleared"); 770 return 0; 771 } 772 break; 773 } 774 object = fp; 775 fp = get_freepointer(s, object); 776 nr++; 777 } 778 779 if (page->inuse != s->objects - nr) { 780 slab_err(s, page, "Wrong object count. Counter is %d but " 781 "counted were %d", page->inuse, s->objects - nr); 782 page->inuse = s->objects - nr; 783 slab_fix(s, "Object count adjusted."); 784 } 785 return search == NULL; 786} 787 788static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) 789{ 790 if (s->flags & SLAB_TRACE) { 791 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", 792 s->name, 793 alloc ? "alloc" : "free", 794 object, page->inuse, 795 page->freelist); 796 797 if (!alloc) 798 print_section("Object", (void *)object, s->objsize); 799 800 dump_stack(); 801 } 802} 803 804/* 805 * Tracking of fully allocated slabs for debugging purposes. 806 */ 807static void add_full(struct kmem_cache_node *n, struct page *page) 808{ 809 spin_lock(&n->list_lock); 810 list_add(&page->lru, &n->full); 811 spin_unlock(&n->list_lock); 812} 813 814static void remove_full(struct kmem_cache *s, struct page *page) 815{ 816 struct kmem_cache_node *n; 817 818 if (!(s->flags & SLAB_STORE_USER)) 819 return; 820 821 n = get_node(s, page_to_nid(page)); 822 823 spin_lock(&n->list_lock); 824 list_del(&page->lru); 825 spin_unlock(&n->list_lock); 826} 827 828static void setup_object_debug(struct kmem_cache *s, struct page *page, 829 void *object) 830{ 831 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) 832 return; 833 834 init_object(s, object, 0); 835 init_tracking(s, object); 836} 837 838static int alloc_debug_processing(struct kmem_cache *s, struct page *page, 839 void *object, void *addr) 840{ 841 if (!check_slab(s, page)) 842 goto bad; 843 844 if (object && !on_freelist(s, page, object)) { 845 object_err(s, page, object, "Object already allocated"); 846 goto bad; 847 } 848 849 if (!check_valid_pointer(s, page, object)) { 850 object_err(s, page, object, "Freelist Pointer check fails"); 851 goto bad; 852 } 853 854 if (object && !check_object(s, page, object, 0)) 855 goto bad; 856 857 /* Success perform special debug activities for allocs */ 858 if (s->flags & SLAB_STORE_USER) 859 set_track(s, object, TRACK_ALLOC, addr); 860 trace(s, page, object, 1); 861 init_object(s, object, 1); 862 return 1; 863 864bad: 865 if (PageSlab(page)) { 866 /* 867 * If this is a slab page then lets do the best we can 868 * to avoid issues in the future. Marking all objects 869 * as used avoids touching the remaining objects. 870 */ 871 slab_fix(s, "Marking all objects used"); 872 page->inuse = s->objects; 873 page->freelist = NULL; 874 /* Fix up fields that may be corrupted */ 875 page->offset = s->offset / sizeof(void *); 876 } 877 return 0; 878} 879 880static int free_debug_processing(struct kmem_cache *s, struct page *page, 881 void *object, void *addr) 882{ 883 if (!check_slab(s, page)) 884 goto fail; 885 886 if (!check_valid_pointer(s, page, object)) { 887 slab_err(s, page, "Invalid object pointer 0x%p", object); 888 goto fail; 889 } 890 891 if (on_freelist(s, page, object)) { 892 object_err(s, page, object, "Object already free"); 893 goto fail; 894 } 895 896 if (!check_object(s, page, object, 1)) 897 return 0; 898 899 if (unlikely(s != page->slab)) { 900 if (!PageSlab(page)) 901 slab_err(s, page, "Attempt to free object(0x%p) " 902 "outside of slab", object); 903 else 904 if (!page->slab) { 905 printk(KERN_ERR 906 "SLUB <none>: no slab for object 0x%p.\n", 907 object); 908 dump_stack(); 909 } 910 else 911 object_err(s, page, object, 912 "page slab pointer corrupt."); 913 goto fail; 914 } 915 916 /* Special debug activities for freeing objects */ 917 if (!SlabFrozen(page) && !page->freelist) 918 remove_full(s, page); 919 if (s->flags & SLAB_STORE_USER) 920 set_track(s, object, TRACK_FREE, addr); 921 trace(s, page, object, 0); 922 init_object(s, object, 0); 923 return 1; 924 925fail: 926 slab_fix(s, "Object at 0x%p not freed", object); 927 return 0; 928} 929 930static int __init setup_slub_debug(char *str) 931{ 932 slub_debug = DEBUG_DEFAULT_FLAGS; 933 if (*str++ != '=' || !*str) 934 /* 935 * No options specified. Switch on full debugging. 936 */ 937 goto out; 938 939 if (*str == ',') 940 /* 941 * No options but restriction on slabs. This means full 942 * debugging for slabs matching a pattern. 943 */ 944 goto check_slabs; 945 946 slub_debug = 0; 947 if (*str == '-') 948 /* 949 * Switch off all debugging measures. 950 */ 951 goto out; 952 953 /* 954 * Determine which debug features should be switched on 955 */ 956 for ( ;*str && *str != ','; str++) { 957 switch (tolower(*str)) { 958 case 'f': 959 slub_debug |= SLAB_DEBUG_FREE; 960 break; 961 case 'z': 962 slub_debug |= SLAB_RED_ZONE; 963 break; 964 case 'p': 965 slub_debug |= SLAB_POISON; 966 break; 967 case 'u': 968 slub_debug |= SLAB_STORE_USER; 969 break; 970 case 't': 971 slub_debug |= SLAB_TRACE; 972 break; 973 default: 974 printk(KERN_ERR "slub_debug option '%c' " 975 "unknown. skipped\n",*str); 976 } 977 } 978 979check_slabs: 980 if (*str == ',') 981 slub_debug_slabs = str + 1; 982out: 983 return 1; 984} 985 986__setup("slub_debug", setup_slub_debug); 987 988static void kmem_cache_open_debug_check(struct kmem_cache *s) 989{ 990 /* 991 * The page->offset field is only 16 bit wide. This is an offset 992 * in units of words from the beginning of an object. If the slab 993 * size is bigger then we cannot move the free pointer behind the 994 * object anymore. 995 * 996 * On 32 bit platforms the limit is 256k. On 64bit platforms 997 * the limit is 512k. 998 * 999 * Debugging or ctor may create a need to move the free 1000 * pointer. Fail if this happens. 1001 */ 1002 if (s->objsize >= 65535 * sizeof(void *)) { 1003 BUG_ON(s->flags & (SLAB_RED_ZONE | SLAB_POISON | 1004 SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); 1005 BUG_ON(s->ctor); 1006 } 1007 else 1008 /* 1009 * Enable debugging if selected on the kernel commandline. 1010 */ 1011 if (slub_debug && (!slub_debug_slabs || 1012 strncmp(slub_debug_slabs, s->name, 1013 strlen(slub_debug_slabs)) == 0)) 1014 s->flags |= slub_debug; 1015} 1016#else 1017static inline void setup_object_debug(struct kmem_cache *s, 1018 struct page *page, void *object) {} 1019 1020static inline int alloc_debug_processing(struct kmem_cache *s, 1021 struct page *page, void *object, void *addr) { return 0; } 1022 1023static inline int free_debug_processing(struct kmem_cache *s, 1024 struct page *page, void *object, void *addr) { return 0; } 1025 1026static inline int slab_pad_check(struct kmem_cache *s, struct page *page) 1027 { return 1; } 1028static inline int check_object(struct kmem_cache *s, struct page *page, 1029 void *object, int active) { return 1; } 1030static inline void add_full(struct kmem_cache_node *n, struct page *page) {} 1031static inline void kmem_cache_open_debug_check(struct kmem_cache *s) {} 1032#define slub_debug 0 1033#endif 1034/* 1035 * Slab allocation and freeing 1036 */ 1037static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) 1038{ 1039 struct page * page; 1040 int pages = 1 << s->order; 1041 1042 if (s->order) 1043 flags |= __GFP_COMP; 1044 1045 if (s->flags & SLAB_CACHE_DMA) 1046 flags |= SLUB_DMA; 1047 1048 if (node == -1) 1049 page = alloc_pages(flags, s->order); 1050 else 1051 page = alloc_pages_node(node, flags, s->order); 1052 1053 if (!page) 1054 return NULL; 1055 1056 mod_zone_page_state(page_zone(page), 1057 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1058 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1059 pages); 1060 1061 return page; 1062} 1063 1064static void setup_object(struct kmem_cache *s, struct page *page, 1065 void *object) 1066{ 1067 setup_object_debug(s, page, object); 1068 if (unlikely(s->ctor)) 1069 s->ctor(object, s, 0); 1070} 1071 1072static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) 1073{ 1074 struct page *page; 1075 struct kmem_cache_node *n; 1076 void *start; 1077 void *end; 1078 void *last; 1079 void *p; 1080 1081 BUG_ON(flags & ~(GFP_DMA | __GFP_ZERO | GFP_LEVEL_MASK)); 1082 1083 if (flags & __GFP_WAIT) 1084 local_irq_enable(); 1085 1086 page = allocate_slab(s, flags & GFP_LEVEL_MASK, node); 1087 if (!page) 1088 goto out; 1089 1090 n = get_node(s, page_to_nid(page)); 1091 if (n) 1092 atomic_long_inc(&n->nr_slabs); 1093 page->offset = s->offset / sizeof(void *); 1094 page->slab = s; 1095 page->flags |= 1 << PG_slab; 1096 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | 1097 SLAB_STORE_USER | SLAB_TRACE)) 1098 SetSlabDebug(page); 1099 1100 start = page_address(page); 1101 end = start + s->objects * s->size; 1102 1103 if (unlikely(s->flags & SLAB_POISON)) 1104 memset(start, POISON_INUSE, PAGE_SIZE << s->order); 1105 1106 last = start; 1107 for_each_object(p, s, start) { 1108 setup_object(s, page, last); 1109 set_freepointer(s, last, p); 1110 last = p; 1111 } 1112 setup_object(s, page, last); 1113 set_freepointer(s, last, NULL); 1114 1115 page->freelist = start; 1116 page->lockless_freelist = NULL; 1117 page->inuse = 0; 1118out: 1119 if (flags & __GFP_WAIT) 1120 local_irq_disable(); 1121 return page; 1122} 1123 1124static void __free_slab(struct kmem_cache *s, struct page *page) 1125{ 1126 int pages = 1 << s->order; 1127 1128 if (unlikely(SlabDebug(page))) { 1129 void *p; 1130 1131 slab_pad_check(s, page); 1132 for_each_object(p, s, page_address(page)) 1133 check_object(s, page, p, 0); 1134 } 1135 1136 mod_zone_page_state(page_zone(page), 1137 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1138 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1139 - pages); 1140 1141 page->mapping = NULL; 1142 __free_pages(page, s->order); 1143} 1144 1145static void rcu_free_slab(struct rcu_head *h) 1146{ 1147 struct page *page; 1148 1149 page = container_of((struct list_head *)h, struct page, lru); 1150 __free_slab(page->slab, page); 1151} 1152 1153static void free_slab(struct kmem_cache *s, struct page *page) 1154{ 1155 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { 1156 /* 1157 * RCU free overloads the RCU head over the LRU 1158 */ 1159 struct rcu_head *head = (void *)&page->lru; 1160 1161 call_rcu(head, rcu_free_slab); 1162 } else 1163 __free_slab(s, page); 1164} 1165 1166static void discard_slab(struct kmem_cache *s, struct page *page) 1167{ 1168 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1169 1170 atomic_long_dec(&n->nr_slabs); 1171 reset_page_mapcount(page); 1172 ClearSlabDebug(page); 1173 __ClearPageSlab(page); 1174 free_slab(s, page); 1175} 1176 1177/* 1178 * Per slab locking using the pagelock 1179 */ 1180static __always_inline void slab_lock(struct page *page) 1181{ 1182 bit_spin_lock(PG_locked, &page->flags); 1183} 1184 1185static __always_inline void slab_unlock(struct page *page) 1186{ 1187 bit_spin_unlock(PG_locked, &page->flags); 1188} 1189 1190static __always_inline int slab_trylock(struct page *page) 1191{ 1192 int rc = 1; 1193 1194 rc = bit_spin_trylock(PG_locked, &page->flags); 1195 return rc; 1196} 1197 1198/* 1199 * Management of partially allocated slabs 1200 */ 1201static void add_partial_tail(struct kmem_cache_node *n, struct page *page) 1202{ 1203 spin_lock(&n->list_lock); 1204 n->nr_partial++; 1205 list_add_tail(&page->lru, &n->partial); 1206 spin_unlock(&n->list_lock); 1207} 1208 1209static void add_partial(struct kmem_cache_node *n, struct page *page) 1210{ 1211 spin_lock(&n->list_lock); 1212 n->nr_partial++; 1213 list_add(&page->lru, &n->partial); 1214 spin_unlock(&n->list_lock); 1215} 1216 1217static void remove_partial(struct kmem_cache *s, 1218 struct page *page) 1219{ 1220 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1221 1222 spin_lock(&n->list_lock); 1223 list_del(&page->lru); 1224 n->nr_partial--; 1225 spin_unlock(&n->list_lock); 1226} 1227 1228/* 1229 * Lock slab and remove from the partial list. 1230 * 1231 * Must hold list_lock. 1232 */ 1233static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page) 1234{ 1235 if (slab_trylock(page)) { 1236 list_del(&page->lru); 1237 n->nr_partial--; 1238 SetSlabFrozen(page); 1239 return 1; 1240 } 1241 return 0; 1242} 1243 1244/* 1245 * Try to allocate a partial slab from a specific node. 1246 */ 1247static struct page *get_partial_node(struct kmem_cache_node *n) 1248{ 1249 struct page *page; 1250 1251 /* 1252 * Racy check. If we mistakenly see no partial slabs then we 1253 * just allocate an empty slab. If we mistakenly try to get a 1254 * partial slab and there is none available then get_partials() 1255 * will return NULL. 1256 */ 1257 if (!n || !n->nr_partial) 1258 return NULL; 1259 1260 spin_lock(&n->list_lock); 1261 list_for_each_entry(page, &n->partial, lru) 1262 if (lock_and_freeze_slab(n, page)) 1263 goto out; 1264 page = NULL; 1265out: 1266 spin_unlock(&n->list_lock); 1267 return page; 1268} 1269 1270/* 1271 * Get a page from somewhere. Search in increasing NUMA distances. 1272 */ 1273static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) 1274{ 1275#ifdef CONFIG_NUMA 1276 struct zonelist *zonelist; 1277 struct zone **z; 1278 struct page *page; 1279 1280 /* 1281 * The defrag ratio allows a configuration of the tradeoffs between 1282 * inter node defragmentation and node local allocations. A lower 1283 * defrag_ratio increases the tendency to do local allocations 1284 * instead of attempting to obtain partial slabs from other nodes. 1285 * 1286 * If the defrag_ratio is set to 0 then kmalloc() always 1287 * returns node local objects. If the ratio is higher then kmalloc() 1288 * may return off node objects because partial slabs are obtained 1289 * from other nodes and filled up. 1290 * 1291 * If /sys/slab/xx/defrag_ratio is set to 100 (which makes 1292 * defrag_ratio = 1000) then every (well almost) allocation will 1293 * first attempt to defrag slab caches on other nodes. This means 1294 * scanning over all nodes to look for partial slabs which may be 1295 * expensive if we do it every time we are trying to find a slab 1296 * with available objects. 1297 */ 1298 if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) 1299 return NULL; 1300 1301 zonelist = &NODE_DATA(slab_node(current->mempolicy)) 1302 ->node_zonelists[gfp_zone(flags)]; 1303 for (z = zonelist->zones; *z; z++) { 1304 struct kmem_cache_node *n; 1305 1306 n = get_node(s, zone_to_nid(*z)); 1307 1308 if (n && cpuset_zone_allowed_hardwall(*z, flags) && 1309 n->nr_partial > MIN_PARTIAL) { 1310 page = get_partial_node(n); 1311 if (page) 1312 return page; 1313 } 1314 } 1315#endif 1316 return NULL; 1317} 1318 1319/* 1320 * Get a partial page, lock it and return it. 1321 */ 1322static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) 1323{ 1324 struct page *page; 1325 int searchnode = (node == -1) ? numa_node_id() : node; 1326 1327 page = get_partial_node(get_node(s, searchnode)); 1328 if (page || (flags & __GFP_THISNODE)) 1329 return page; 1330 1331 return get_any_partial(s, flags); 1332} 1333 1334/* 1335 * Move a page back to the lists. 1336 * 1337 * Must be called with the slab lock held. 1338 * 1339 * On exit the slab lock will have been dropped. 1340 */ 1341static void unfreeze_slab(struct kmem_cache *s, struct page *page) 1342{ 1343 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1344 1345 ClearSlabFrozen(page); 1346 if (page->inuse) { 1347 1348 if (page->freelist) 1349 add_partial(n, page); 1350 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1351 add_full(n, page); 1352 slab_unlock(page); 1353 1354 } else { 1355 if (n->nr_partial < MIN_PARTIAL) { 1356 /* 1357 * Adding an empty slab to the partial slabs in order 1358 * to avoid page allocator overhead. This slab needs 1359 * to come after the other slabs with objects in 1360 * order to fill them up. That way the size of the 1361 * partial list stays small. kmem_cache_shrink can 1362 * reclaim empty slabs from the partial list. 1363 */ 1364 add_partial_tail(n, page); 1365 slab_unlock(page); 1366 } else { 1367 slab_unlock(page); 1368 discard_slab(s, page); 1369 } 1370 } 1371} 1372 1373/* 1374 * Remove the cpu slab 1375 */ 1376static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) 1377{ 1378 /* 1379 * Merge cpu freelist into freelist. Typically we get here 1380 * because both freelists are empty. So this is unlikely 1381 * to occur. 1382 */ 1383 while (unlikely(page->lockless_freelist)) { 1384 void **object; 1385 1386 /* Retrieve object from cpu_freelist */ 1387 object = page->lockless_freelist; 1388 page->lockless_freelist = page->lockless_freelist[page->offset]; 1389 1390 /* And put onto the regular freelist */ 1391 object[page->offset] = page->freelist; 1392 page->freelist = object; 1393 page->inuse--; 1394 } 1395 s->cpu_slab[cpu] = NULL; 1396 unfreeze_slab(s, page); 1397} 1398 1399static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu) 1400{ 1401 slab_lock(page); 1402 deactivate_slab(s, page, cpu); 1403} 1404 1405/* 1406 * Flush cpu slab. 1407 * Called from IPI handler with interrupts disabled. 1408 */ 1409static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1410{ 1411 struct page *page = s->cpu_slab[cpu]; 1412 1413 if (likely(page)) 1414 flush_slab(s, page, cpu); 1415} 1416 1417static void flush_cpu_slab(void *d) 1418{ 1419 struct kmem_cache *s = d; 1420 int cpu = smp_processor_id(); 1421 1422 __flush_cpu_slab(s, cpu); 1423} 1424 1425static void flush_all(struct kmem_cache *s) 1426{ 1427#ifdef CONFIG_SMP 1428 on_each_cpu(flush_cpu_slab, s, 1, 1); 1429#else 1430 unsigned long flags; 1431 1432 local_irq_save(flags); 1433 flush_cpu_slab(s); 1434 local_irq_restore(flags); 1435#endif 1436} 1437 1438/* 1439 * Slow path. The lockless freelist is empty or we need to perform 1440 * debugging duties. 1441 * 1442 * Interrupts are disabled. 1443 * 1444 * Processing is still very fast if new objects have been freed to the 1445 * regular freelist. In that case we simply take over the regular freelist 1446 * as the lockless freelist and zap the regular freelist. 1447 * 1448 * If that is not working then we fall back to the partial lists. We take the 1449 * first element of the freelist as the object to allocate now and move the 1450 * rest of the freelist to the lockless freelist. 1451 * 1452 * And if we were unable to get a new slab from the partial slab lists then 1453 * we need to allocate a new slab. This is slowest path since we may sleep. 1454 */ 1455static void *__slab_alloc(struct kmem_cache *s, 1456 gfp_t gfpflags, int node, void *addr, struct page *page) 1457{ 1458 void **object; 1459 int cpu = smp_processor_id(); 1460 1461 if (!page) 1462 goto new_slab; 1463 1464 slab_lock(page); 1465 if (unlikely(node != -1 && page_to_nid(page) != node)) 1466 goto another_slab; 1467load_freelist: 1468 object = page->freelist; 1469 if (unlikely(!object)) 1470 goto another_slab; 1471 if (unlikely(SlabDebug(page))) 1472 goto debug; 1473 1474 object = page->freelist; 1475 page->lockless_freelist = object[page->offset]; 1476 page->inuse = s->objects; 1477 page->freelist = NULL; 1478 slab_unlock(page); 1479 return object; 1480 1481another_slab: 1482 deactivate_slab(s, page, cpu); 1483 1484new_slab: 1485 page = get_partial(s, gfpflags, node); 1486 if (page) { 1487 s->cpu_slab[cpu] = page; 1488 goto load_freelist; 1489 } 1490 1491 page = new_slab(s, gfpflags, node); 1492 if (page) { 1493 cpu = smp_processor_id(); 1494 if (s->cpu_slab[cpu]) { 1495 /* 1496 * Someone else populated the cpu_slab while we 1497 * enabled interrupts, or we have gotten scheduled 1498 * on another cpu. The page may not be on the 1499 * requested node even if __GFP_THISNODE was 1500 * specified. So we need to recheck. 1501 */ 1502 if (node == -1 || 1503 page_to_nid(s->cpu_slab[cpu]) == node) { 1504 /* 1505 * Current cpuslab is acceptable and we 1506 * want the current one since its cache hot 1507 */ 1508 discard_slab(s, page); 1509 page = s->cpu_slab[cpu]; 1510 slab_lock(page); 1511 goto load_freelist; 1512 } 1513 /* New slab does not fit our expectations */ 1514 flush_slab(s, s->cpu_slab[cpu], cpu); 1515 } 1516 slab_lock(page); 1517 SetSlabFrozen(page); 1518 s->cpu_slab[cpu] = page; 1519 goto load_freelist; 1520 } 1521 return NULL; 1522debug: 1523 object = page->freelist; 1524 if (!alloc_debug_processing(s, page, object, addr)) 1525 goto another_slab; 1526 1527 page->inuse++; 1528 page->freelist = object[page->offset]; 1529 slab_unlock(page); 1530 return object; 1531} 1532 1533/* 1534 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) 1535 * have the fastpath folded into their functions. So no function call 1536 * overhead for requests that can be satisfied on the fastpath. 1537 * 1538 * The fastpath works by first checking if the lockless freelist can be used. 1539 * If not then __slab_alloc is called for slow processing. 1540 * 1541 * Otherwise we can simply pick the next object from the lockless free list. 1542 */ 1543static void __always_inline *slab_alloc(struct kmem_cache *s, 1544 gfp_t gfpflags, int node, void *addr) 1545{ 1546 struct page *page; 1547 void **object; 1548 unsigned long flags; 1549 1550 local_irq_save(flags); 1551 page = s->cpu_slab[smp_processor_id()]; 1552 if (unlikely(!page || !page->lockless_freelist || 1553 (node != -1 && page_to_nid(page) != node))) 1554 1555 object = __slab_alloc(s, gfpflags, node, addr, page); 1556 1557 else { 1558 object = page->lockless_freelist; 1559 page->lockless_freelist = object[page->offset]; 1560 } 1561 local_irq_restore(flags); 1562 1563 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1564 memset(object, 0, s->objsize); 1565 1566 return object; 1567} 1568 1569void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 1570{ 1571 return slab_alloc(s, gfpflags, -1, __builtin_return_address(0)); 1572} 1573EXPORT_SYMBOL(kmem_cache_alloc); 1574 1575#ifdef CONFIG_NUMA 1576void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) 1577{ 1578 return slab_alloc(s, gfpflags, node, __builtin_return_address(0)); 1579} 1580EXPORT_SYMBOL(kmem_cache_alloc_node); 1581#endif 1582 1583/* 1584 * Slow patch handling. This may still be called frequently since objects 1585 * have a longer lifetime than the cpu slabs in most processing loads. 1586 * 1587 * So we still attempt to reduce cache line usage. Just take the slab 1588 * lock and free the item. If there is no additional partial page 1589 * handling required then we can return immediately. 1590 */ 1591static void __slab_free(struct kmem_cache *s, struct page *page, 1592 void *x, void *addr) 1593{ 1594 void *prior; 1595 void **object = (void *)x; 1596 1597 slab_lock(page); 1598 1599 if (unlikely(SlabDebug(page))) 1600 goto debug; 1601checks_ok: 1602 prior = object[page->offset] = page->freelist; 1603 page->freelist = object; 1604 page->inuse--; 1605 1606 if (unlikely(SlabFrozen(page))) 1607 goto out_unlock; 1608 1609 if (unlikely(!page->inuse)) 1610 goto slab_empty; 1611 1612 /* 1613 * Objects left in the slab. If it 1614 * was not on the partial list before 1615 * then add it. 1616 */ 1617 if (unlikely(!prior)) 1618 add_partial(get_node(s, page_to_nid(page)), page); 1619 1620out_unlock: 1621 slab_unlock(page); 1622 return; 1623 1624slab_empty: 1625 if (prior) 1626 /* 1627 * Slab still on the partial list. 1628 */ 1629 remove_partial(s, page); 1630 1631 slab_unlock(page); 1632 discard_slab(s, page); 1633 return; 1634 1635debug: 1636 if (!free_debug_processing(s, page, x, addr)) 1637 goto out_unlock; 1638 goto checks_ok; 1639} 1640 1641/* 1642 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that 1643 * can perform fastpath freeing without additional function calls. 1644 * 1645 * The fastpath is only possible if we are freeing to the current cpu slab 1646 * of this processor. This typically the case if we have just allocated 1647 * the item before. 1648 * 1649 * If fastpath is not possible then fall back to __slab_free where we deal 1650 * with all sorts of special processing. 1651 */ 1652static void __always_inline slab_free(struct kmem_cache *s, 1653 struct page *page, void *x, void *addr) 1654{ 1655 void **object = (void *)x; 1656 unsigned long flags; 1657 1658 local_irq_save(flags); 1659 if (likely(page == s->cpu_slab[smp_processor_id()] && 1660 !SlabDebug(page))) { 1661 object[page->offset] = page->lockless_freelist; 1662 page->lockless_freelist = object; 1663 } else 1664 __slab_free(s, page, x, addr); 1665 1666 local_irq_restore(flags); 1667} 1668 1669void kmem_cache_free(struct kmem_cache *s, void *x) 1670{ 1671 struct page *page; 1672 1673 page = virt_to_head_page(x); 1674 1675 slab_free(s, page, x, __builtin_return_address(0)); 1676} 1677EXPORT_SYMBOL(kmem_cache_free); 1678 1679/* Figure out on which slab object the object resides */ 1680static struct page *get_object_page(const void *x) 1681{ 1682 struct page *page = virt_to_head_page(x); 1683 1684 if (!PageSlab(page)) 1685 return NULL; 1686 1687 return page; 1688} 1689 1690/* 1691 * Object placement in a slab is made very easy because we always start at 1692 * offset 0. If we tune the size of the object to the alignment then we can 1693 * get the required alignment by putting one properly sized object after 1694 * another. 1695 * 1696 * Notice that the allocation order determines the sizes of the per cpu 1697 * caches. Each processor has always one slab available for allocations. 1698 * Increasing the allocation order reduces the number of times that slabs 1699 * must be moved on and off the partial lists and is therefore a factor in 1700 * locking overhead. 1701 */ 1702 1703/* 1704 * Mininum / Maximum order of slab pages. This influences locking overhead 1705 * and slab fragmentation. A higher order reduces the number of partial slabs 1706 * and increases the number of allocations possible without having to 1707 * take the list_lock. 1708 */ 1709static int slub_min_order; 1710static int slub_max_order = DEFAULT_MAX_ORDER; 1711static int slub_min_objects = DEFAULT_MIN_OBJECTS; 1712 1713/* 1714 * Merge control. If this is set then no merging of slab caches will occur. 1715 * (Could be removed. This was introduced to pacify the merge skeptics.) 1716 */ 1717static int slub_nomerge; 1718 1719/* 1720 * Calculate the order of allocation given an slab object size. 1721 * 1722 * The order of allocation has significant impact on performance and other 1723 * system components. Generally order 0 allocations should be preferred since 1724 * order 0 does not cause fragmentation in the page allocator. Larger objects 1725 * be problematic to put into order 0 slabs because there may be too much 1726 * unused space left. We go to a higher order if more than 1/8th of the slab 1727 * would be wasted. 1728 * 1729 * In order to reach satisfactory performance we must ensure that a minimum 1730 * number of objects is in one slab. Otherwise we may generate too much 1731 * activity on the partial lists which requires taking the list_lock. This is 1732 * less a concern for large slabs though which are rarely used. 1733 * 1734 * slub_max_order specifies the order where we begin to stop considering the 1735 * number of objects in a slab as critical. If we reach slub_max_order then 1736 * we try to keep the page order as low as possible. So we accept more waste 1737 * of space in favor of a small page order. 1738 * 1739 * Higher order allocations also allow the placement of more objects in a 1740 * slab and thereby reduce object handling overhead. If the user has 1741 * requested a higher mininum order then we start with that one instead of 1742 * the smallest order which will fit the object. 1743 */ 1744static inline int slab_order(int size, int min_objects, 1745 int max_order, int fract_leftover) 1746{ 1747 int order; 1748 int rem; 1749 int min_order = slub_min_order; 1750 1751 /* 1752 * If we would create too many object per slab then reduce 1753 * the slab order even if it goes below slub_min_order. 1754 */ 1755 while (min_order > 0 && 1756 (PAGE_SIZE << min_order) >= MAX_OBJECTS_PER_SLAB * size) 1757 min_order--; 1758 1759 for (order = max(min_order, 1760 fls(min_objects * size - 1) - PAGE_SHIFT); 1761 order <= max_order; order++) { 1762 1763 unsigned long slab_size = PAGE_SIZE << order; 1764 1765 if (slab_size < min_objects * size) 1766 continue; 1767 1768 rem = slab_size % size; 1769 1770 if (rem <= slab_size / fract_leftover) 1771 break; 1772 1773 /* If the next size is too high then exit now */ 1774 if (slab_size * 2 >= MAX_OBJECTS_PER_SLAB * size) 1775 break; 1776 } 1777 1778 return order; 1779} 1780 1781static inline int calculate_order(int size) 1782{ 1783 int order; 1784 int min_objects; 1785 int fraction; 1786 1787 /* 1788 * Attempt to find best configuration for a slab. This 1789 * works by first attempting to generate a layout with 1790 * the best configuration and backing off gradually. 1791 * 1792 * First we reduce the acceptable waste in a slab. Then 1793 * we reduce the minimum objects required in a slab. 1794 */ 1795 min_objects = slub_min_objects; 1796 while (min_objects > 1) { 1797 fraction = 8; 1798 while (fraction >= 4) { 1799 order = slab_order(size, min_objects, 1800 slub_max_order, fraction); 1801 if (order <= slub_max_order) 1802 return order; 1803 fraction /= 2; 1804 } 1805 min_objects /= 2; 1806 } 1807 1808 /* 1809 * We were unable to place multiple objects in a slab. Now 1810 * lets see if we can place a single object there. 1811 */ 1812 order = slab_order(size, 1, slub_max_order, 1); 1813 if (order <= slub_max_order) 1814 return order; 1815 1816 /* 1817 * Doh this slab cannot be placed using slub_max_order. 1818 */ 1819 order = slab_order(size, 1, MAX_ORDER, 1); 1820 if (order <= MAX_ORDER) 1821 return order; 1822 return -ENOSYS; 1823} 1824 1825/* 1826 * Figure out what the alignment of the objects will be. 1827 */ 1828static unsigned long calculate_alignment(unsigned long flags, 1829 unsigned long align, unsigned long size) 1830{ 1831 /* 1832 * If the user wants hardware cache aligned objects then 1833 * follow that suggestion if the object is sufficiently 1834 * large. 1835 * 1836 * The hardware cache alignment cannot override the 1837 * specified alignment though. If that is greater 1838 * then use it. 1839 */ 1840 if ((flags & SLAB_HWCACHE_ALIGN) && 1841 size > cache_line_size() / 2) 1842 return max_t(unsigned long, align, cache_line_size()); 1843 1844 if (align < ARCH_SLAB_MINALIGN) 1845 return ARCH_SLAB_MINALIGN; 1846 1847 return ALIGN(align, sizeof(void *)); 1848} 1849 1850static void init_kmem_cache_node(struct kmem_cache_node *n) 1851{ 1852 n->nr_partial = 0; 1853 atomic_long_set(&n->nr_slabs, 0); 1854 spin_lock_init(&n->list_lock); 1855 INIT_LIST_HEAD(&n->partial); 1856#ifdef CONFIG_SLUB_DEBUG 1857 INIT_LIST_HEAD(&n->full); 1858#endif 1859} 1860 1861#ifdef CONFIG_NUMA 1862/* 1863 * No kmalloc_node yet so do it by hand. We know that this is the first 1864 * slab on the node for this slabcache. There are no concurrent accesses 1865 * possible. 1866 * 1867 * Note that this function only works on the kmalloc_node_cache 1868 * when allocating for the kmalloc_node_cache. 1869 */ 1870static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflags, 1871 int node) 1872{ 1873 struct page *page; 1874 struct kmem_cache_node *n; 1875 1876 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); 1877 1878 page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node); 1879 1880 BUG_ON(!page); 1881 n = page->freelist; 1882 BUG_ON(!n); 1883 page->freelist = get_freepointer(kmalloc_caches, n); 1884 page->inuse++; 1885 kmalloc_caches->node[node] = n; 1886#ifdef CONFIG_SLUB_DEBUG 1887 init_object(kmalloc_caches, n, 1); 1888 init_tracking(kmalloc_caches, n); 1889#endif 1890 init_kmem_cache_node(n); 1891 atomic_long_inc(&n->nr_slabs); 1892 add_partial(n, page); 1893 1894 /* 1895 * new_slab() disables interupts. If we do not reenable interrupts here 1896 * then bootup would continue with interrupts disabled. 1897 */ 1898 local_irq_enable(); 1899 return n; 1900} 1901 1902static void free_kmem_cache_nodes(struct kmem_cache *s) 1903{ 1904 int node; 1905 1906 for_each_online_node(node) { 1907 struct kmem_cache_node *n = s->node[node]; 1908 if (n && n != &s->local_node) 1909 kmem_cache_free(kmalloc_caches, n); 1910 s->node[node] = NULL; 1911 } 1912} 1913 1914static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) 1915{ 1916 int node; 1917 int local_node; 1918 1919 if (slab_state >= UP) 1920 local_node = page_to_nid(virt_to_page(s)); 1921 else 1922 local_node = 0; 1923 1924 for_each_online_node(node) { 1925 struct kmem_cache_node *n; 1926 1927 if (local_node == node) 1928 n = &s->local_node; 1929 else { 1930 if (slab_state == DOWN) { 1931 n = early_kmem_cache_node_alloc(gfpflags, 1932 node); 1933 continue; 1934 } 1935 n = kmem_cache_alloc_node(kmalloc_caches, 1936 gfpflags, node); 1937 1938 if (!n) { 1939 free_kmem_cache_nodes(s); 1940 return 0; 1941 } 1942 1943 } 1944 s->node[node] = n; 1945 init_kmem_cache_node(n); 1946 } 1947 return 1; 1948} 1949#else 1950static void free_kmem_cache_nodes(struct kmem_cache *s) 1951{ 1952} 1953 1954static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) 1955{ 1956 init_kmem_cache_node(&s->local_node); 1957 return 1; 1958} 1959#endif 1960 1961/* 1962 * calculate_sizes() determines the order and the distribution of data within 1963 * a slab object. 1964 */ 1965static int calculate_sizes(struct kmem_cache *s) 1966{ 1967 unsigned long flags = s->flags; 1968 unsigned long size = s->objsize; 1969 unsigned long align = s->align; 1970 1971 /* 1972 * Determine if we can poison the object itself. If the user of 1973 * the slab may touch the object after free or before allocation 1974 * then we should never poison the object itself. 1975 */ 1976 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && 1977 !s->ctor) 1978 s->flags |= __OBJECT_POISON; 1979 else 1980 s->flags &= ~__OBJECT_POISON; 1981 1982 /* 1983 * Round up object size to the next word boundary. We can only 1984 * place the free pointer at word boundaries and this determines 1985 * the possible location of the free pointer. 1986 */ 1987 size = ALIGN(size, sizeof(void *)); 1988 1989#ifdef CONFIG_SLUB_DEBUG 1990 /* 1991 * If we are Redzoning then check if there is some space between the 1992 * end of the object and the free pointer. If not then add an 1993 * additional word to have some bytes to store Redzone information. 1994 */ 1995 if ((flags & SLAB_RED_ZONE) && size == s->objsize) 1996 size += sizeof(void *); 1997#endif 1998 1999 /* 2000 * With that we have determined the number of bytes in actual use 2001 * by the object. This is the potential offset to the free pointer. 2002 */ 2003 s->inuse = size; 2004 2005 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || 2006 s->ctor)) { 2007 /* 2008 * Relocate free pointer after the object if it is not 2009 * permitted to overwrite the first word of the object on 2010 * kmem_cache_free. 2011 * 2012 * This is the case if we do RCU, have a constructor or 2013 * destructor or are poisoning the objects. 2014 */ 2015 s->offset = size; 2016 size += sizeof(void *); 2017 } 2018 2019#ifdef CONFIG_SLUB_DEBUG 2020 if (flags & SLAB_STORE_USER) 2021 /* 2022 * Need to store information about allocs and frees after 2023 * the object. 2024 */ 2025 size += 2 * sizeof(struct track); 2026 2027 if (flags & SLAB_RED_ZONE) 2028 /* 2029 * Add some empty padding so that we can catch 2030 * overwrites from earlier objects rather than let 2031 * tracking information or the free pointer be 2032 * corrupted if an user writes before the start 2033 * of the object. 2034 */ 2035 size += sizeof(void *); 2036#endif 2037 2038 /* 2039 * Determine the alignment based on various parameters that the 2040 * user specified and the dynamic determination of cache line size 2041 * on bootup. 2042 */ 2043 align = calculate_alignment(flags, align, s->objsize); 2044 2045 /* 2046 * SLUB stores one object immediately after another beginning from 2047 * offset 0. In order to align the objects we have to simply size 2048 * each object to conform to the alignment. 2049 */ 2050 size = ALIGN(size, align); 2051 s->size = size; 2052 2053 s->order = calculate_order(size); 2054 if (s->order < 0) 2055 return 0; 2056 2057 /* 2058 * Determine the number of objects per slab 2059 */ 2060 s->objects = (PAGE_SIZE << s->order) / size; 2061 2062 /* 2063 * Verify that the number of objects is within permitted limits. 2064 * The page->inuse field is only 16 bit wide! So we cannot have 2065 * more than 64k objects per slab. 2066 */ 2067 if (!s->objects || s->objects > MAX_OBJECTS_PER_SLAB) 2068 return 0; 2069 return 1; 2070 2071} 2072 2073static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, 2074 const char *name, size_t size, 2075 size_t align, unsigned long flags, 2076 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 2077{ 2078 memset(s, 0, kmem_size); 2079 s->name = name; 2080 s->ctor = ctor; 2081 s->objsize = size; 2082 s->flags = flags; 2083 s->align = align; 2084 kmem_cache_open_debug_check(s); 2085 2086 if (!calculate_sizes(s)) 2087 goto error; 2088 2089 s->refcount = 1; 2090#ifdef CONFIG_NUMA 2091 s->defrag_ratio = 100; 2092#endif 2093 2094 if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) 2095 return 1; 2096error: 2097 if (flags & SLAB_PANIC) 2098 panic("Cannot create slab %s size=%lu realsize=%u " 2099 "order=%u offset=%u flags=%lx\n", 2100 s->name, (unsigned long)size, s->size, s->order, 2101 s->offset, flags); 2102 return 0; 2103} 2104 2105/* 2106 * Check if a given pointer is valid 2107 */ 2108int kmem_ptr_validate(struct kmem_cache *s, const void *object) 2109{ 2110 struct page * page; 2111 2112 page = get_object_page(object); 2113 2114 if (!page || s != page->slab) 2115 /* No slab or wrong slab */ 2116 return 0; 2117 2118 if (!check_valid_pointer(s, page, object)) 2119 return 0; 2120 2121 /* 2122 * We could also check if the object is on the slabs freelist. 2123 * But this would be too expensive and it seems that the main 2124 * purpose of kmem_ptr_valid is to check if the object belongs 2125 * to a certain slab. 2126 */ 2127 return 1; 2128} 2129EXPORT_SYMBOL(kmem_ptr_validate); 2130 2131/* 2132 * Determine the size of a slab object 2133 */ 2134unsigned int kmem_cache_size(struct kmem_cache *s) 2135{ 2136 return s->objsize; 2137} 2138EXPORT_SYMBOL(kmem_cache_size); 2139 2140const char *kmem_cache_name(struct kmem_cache *s) 2141{ 2142 return s->name; 2143} 2144EXPORT_SYMBOL(kmem_cache_name); 2145 2146/* 2147 * Attempt to free all slabs on a node. Return the number of slabs we 2148 * were unable to free. 2149 */ 2150static int free_list(struct kmem_cache *s, struct kmem_cache_node *n, 2151 struct list_head *list) 2152{ 2153 int slabs_inuse = 0; 2154 unsigned long flags; 2155 struct page *page, *h; 2156 2157 spin_lock_irqsave(&n->list_lock, flags); 2158 list_for_each_entry_safe(page, h, list, lru) 2159 if (!page->inuse) { 2160 list_del(&page->lru); 2161 discard_slab(s, page); 2162 } else 2163 slabs_inuse++; 2164 spin_unlock_irqrestore(&n->list_lock, flags); 2165 return slabs_inuse; 2166} 2167 2168/* 2169 * Release all resources used by a slab cache. 2170 */ 2171static inline int kmem_cache_close(struct kmem_cache *s) 2172{ 2173 int node; 2174 2175 flush_all(s); 2176 2177 /* Attempt to free all objects */ 2178 for_each_online_node(node) { 2179 struct kmem_cache_node *n = get_node(s, node); 2180 2181 n->nr_partial -= free_list(s, n, &n->partial); 2182 if (atomic_long_read(&n->nr_slabs)) 2183 return 1; 2184 } 2185 free_kmem_cache_nodes(s); 2186 return 0; 2187} 2188 2189/* 2190 * Close a cache and release the kmem_cache structure 2191 * (must be used for caches created using kmem_cache_create) 2192 */ 2193void kmem_cache_destroy(struct kmem_cache *s) 2194{ 2195 down_write(&slub_lock); 2196 s->refcount--; 2197 if (!s->refcount) { 2198 list_del(&s->list); 2199 up_write(&slub_lock); 2200 if (kmem_cache_close(s)) 2201 WARN_ON(1); 2202 sysfs_slab_remove(s); 2203 kfree(s); 2204 } else 2205 up_write(&slub_lock); 2206} 2207EXPORT_SYMBOL(kmem_cache_destroy); 2208 2209/******************************************************************** 2210 * Kmalloc subsystem 2211 *******************************************************************/ 2212 2213struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __cacheline_aligned; 2214EXPORT_SYMBOL(kmalloc_caches); 2215 2216#ifdef CONFIG_ZONE_DMA 2217static struct kmem_cache *kmalloc_caches_dma[KMALLOC_SHIFT_HIGH + 1]; 2218#endif 2219 2220static int __init setup_slub_min_order(char *str) 2221{ 2222 get_option (&str, &slub_min_order); 2223 2224 return 1; 2225} 2226 2227__setup("slub_min_order=", setup_slub_min_order); 2228 2229static int __init setup_slub_max_order(char *str) 2230{ 2231 get_option (&str, &slub_max_order); 2232 2233 return 1; 2234} 2235 2236__setup("slub_max_order=", setup_slub_max_order); 2237 2238static int __init setup_slub_min_objects(char *str) 2239{ 2240 get_option (&str, &slub_min_objects); 2241 2242 return 1; 2243} 2244 2245__setup("slub_min_objects=", setup_slub_min_objects); 2246 2247static int __init setup_slub_nomerge(char *str) 2248{ 2249 slub_nomerge = 1; 2250 return 1; 2251} 2252 2253__setup("slub_nomerge", setup_slub_nomerge); 2254 2255static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, 2256 const char *name, int size, gfp_t gfp_flags) 2257{ 2258 unsigned int flags = 0; 2259 2260 if (gfp_flags & SLUB_DMA) 2261 flags = SLAB_CACHE_DMA; 2262 2263 down_write(&slub_lock); 2264 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, 2265 flags, NULL)) 2266 goto panic; 2267 2268 list_add(&s->list, &slab_caches); 2269 up_write(&slub_lock); 2270 if (sysfs_slab_add(s)) 2271 goto panic; 2272 return s; 2273 2274panic: 2275 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); 2276} 2277 2278#ifdef CONFIG_ZONE_DMA 2279static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) 2280{ 2281 struct kmem_cache *s; 2282 struct kmem_cache *x; 2283 char *text; 2284 size_t realsize; 2285 2286 s = kmalloc_caches_dma[index]; 2287 if (s) 2288 return s; 2289 2290 /* Dynamically create dma cache */ 2291 x = kmalloc(kmem_size, flags & ~SLUB_DMA); 2292 if (!x) 2293 panic("Unable to allocate memory for dma cache\n"); 2294 2295 realsize = kmalloc_caches[index].objsize; 2296 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", 2297 (unsigned int)realsize); 2298 s = create_kmalloc_cache(x, text, realsize, flags); 2299 down_write(&slub_lock); 2300 if (!kmalloc_caches_dma[index]) { 2301 kmalloc_caches_dma[index] = s; 2302 up_write(&slub_lock); 2303 return s; 2304 } 2305 up_write(&slub_lock); 2306 kmem_cache_destroy(s); 2307 return kmalloc_caches_dma[index]; 2308} 2309#endif 2310 2311/* 2312 * Conversion table for small slabs sizes / 8 to the index in the 2313 * kmalloc array. This is necessary for slabs < 192 since we have non power 2314 * of two cache sizes there. The size of larger slabs can be determined using 2315 * fls. 2316 */ 2317static s8 size_index[24] = { 2318 3, /* 8 */ 2319 4, /* 16 */ 2320 5, /* 24 */ 2321 5, /* 32 */ 2322 6, /* 40 */ 2323 6, /* 48 */ 2324 6, /* 56 */ 2325 6, /* 64 */ 2326 1, /* 72 */ 2327 1, /* 80 */ 2328 1, /* 88 */ 2329 1, /* 96 */ 2330 7, /* 104 */ 2331 7, /* 112 */ 2332 7, /* 120 */ 2333 7, /* 128 */ 2334 2, /* 136 */ 2335 2, /* 144 */ 2336 2, /* 152 */ 2337 2, /* 160 */ 2338 2, /* 168 */ 2339 2, /* 176 */ 2340 2, /* 184 */ 2341 2 /* 192 */ 2342}; 2343 2344static struct kmem_cache *get_slab(size_t size, gfp_t flags) 2345{ 2346 int index; 2347 2348 if (size <= 192) { 2349 if (!size) 2350 return ZERO_SIZE_PTR; 2351 2352 index = size_index[(size - 1) / 8]; 2353 } else { 2354 if (size > KMALLOC_MAX_SIZE) 2355 return NULL; 2356 2357 index = fls(size - 1); 2358 } 2359 2360#ifdef CONFIG_ZONE_DMA 2361 if (unlikely((flags & SLUB_DMA))) 2362 return dma_kmalloc_cache(index, flags); 2363 2364#endif 2365 return &kmalloc_caches[index]; 2366} 2367 2368void *__kmalloc(size_t size, gfp_t flags) 2369{ 2370 struct kmem_cache *s = get_slab(size, flags); 2371 2372 if (ZERO_OR_NULL_PTR(s)) 2373 return s; 2374 2375 return slab_alloc(s, flags, -1, __builtin_return_address(0)); 2376} 2377EXPORT_SYMBOL(__kmalloc); 2378 2379#ifdef CONFIG_NUMA 2380void *__kmalloc_node(size_t size, gfp_t flags, int node) 2381{ 2382 struct kmem_cache *s = get_slab(size, flags); 2383 2384 if (ZERO_OR_NULL_PTR(s)) 2385 return s; 2386 2387 return slab_alloc(s, flags, node, __builtin_return_address(0)); 2388} 2389EXPORT_SYMBOL(__kmalloc_node); 2390#endif 2391 2392size_t ksize(const void *object) 2393{ 2394 struct page *page; 2395 struct kmem_cache *s; 2396 2397 if (ZERO_OR_NULL_PTR(object)) 2398 return 0; 2399 2400 page = get_object_page(object); 2401 BUG_ON(!page); 2402 s = page->slab; 2403 BUG_ON(!s); 2404 2405 /* 2406 * Debugging requires use of the padding between object 2407 * and whatever may come after it. 2408 */ 2409 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 2410 return s->objsize; 2411 2412 /* 2413 * If we have the need to store the freelist pointer 2414 * back there or track user information then we can 2415 * only use the space before that information. 2416 */ 2417 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) 2418 return s->inuse; 2419 2420 /* 2421 * Else we can use all the padding etc for the allocation 2422 */ 2423 return s->size; 2424} 2425EXPORT_SYMBOL(ksize); 2426 2427void kfree(const void *x) 2428{ 2429 struct kmem_cache *s; 2430 struct page *page; 2431 2432 /* 2433 * This has to be an unsigned comparison. According to Linus 2434 * some gcc version treat a pointer as a signed entity. Then 2435 * this comparison would be true for all "negative" pointers 2436 * (which would cover the whole upper half of the address space). 2437 */ 2438 if (ZERO_OR_NULL_PTR(x)) 2439 return; 2440 2441 page = virt_to_head_page(x); 2442 s = page->slab; 2443 2444 slab_free(s, page, (void *)x, __builtin_return_address(0)); 2445} 2446EXPORT_SYMBOL(kfree); 2447 2448/* 2449 * kmem_cache_shrink removes empty slabs from the partial lists and sorts 2450 * the remaining slabs by the number of items in use. The slabs with the 2451 * most items in use come first. New allocations will then fill those up 2452 * and thus they can be removed from the partial lists. 2453 * 2454 * The slabs with the least items are placed last. This results in them 2455 * being allocated from last increasing the chance that the last objects 2456 * are freed in them. 2457 */ 2458int kmem_cache_shrink(struct kmem_cache *s) 2459{ 2460 int node; 2461 int i; 2462 struct kmem_cache_node *n; 2463 struct page *page; 2464 struct page *t; 2465 struct list_head *slabs_by_inuse = 2466 kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); 2467 unsigned long flags; 2468 2469 if (!slabs_by_inuse) 2470 return -ENOMEM; 2471 2472 flush_all(s); 2473 for_each_online_node(node) { 2474 n = get_node(s, node); 2475 2476 if (!n->nr_partial) 2477 continue; 2478 2479 for (i = 0; i < s->objects; i++) 2480 INIT_LIST_HEAD(slabs_by_inuse + i); 2481 2482 spin_lock_irqsave(&n->list_lock, flags); 2483 2484 /* 2485 * Build lists indexed by the items in use in each slab. 2486 * 2487 * Note that concurrent frees may occur while we hold the 2488 * list_lock. page->inuse here is the upper limit. 2489 */ 2490 list_for_each_entry_safe(page, t, &n->partial, lru) { 2491 if (!page->inuse && slab_trylock(page)) { 2492 /* 2493 * Must hold slab lock here because slab_free 2494 * may have freed the last object and be 2495 * waiting to release the slab. 2496 */ 2497 list_del(&page->lru); 2498 n->nr_partial--; 2499 slab_unlock(page); 2500 discard_slab(s, page); 2501 } else { 2502 if (n->nr_partial > MAX_PARTIAL) 2503 list_move(&page->lru, 2504 slabs_by_inuse + page->inuse); 2505 } 2506 } 2507 2508 if (n->nr_partial <= MAX_PARTIAL) 2509 goto out; 2510 2511 /* 2512 * Rebuild the partial list with the slabs filled up most 2513 * first and the least used slabs at the end. 2514 */ 2515 for (i = s->objects - 1; i >= 0; i--) 2516 list_splice(slabs_by_inuse + i, n->partial.prev); 2517 2518 out: 2519 spin_unlock_irqrestore(&n->list_lock, flags); 2520 } 2521 2522 kfree(slabs_by_inuse); 2523 return 0; 2524} 2525EXPORT_SYMBOL(kmem_cache_shrink); 2526 2527/******************************************************************** 2528 * Basic setup of slabs 2529 *******************************************************************/ 2530 2531void __init kmem_cache_init(void) 2532{ 2533 int i; 2534 int caches = 0; 2535 2536#ifdef CONFIG_NUMA 2537 /* 2538 * Must first have the slab cache available for the allocations of the 2539 * struct kmem_cache_node's. There is special bootstrap code in 2540 * kmem_cache_open for slab_state == DOWN. 2541 */ 2542 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", 2543 sizeof(struct kmem_cache_node), GFP_KERNEL); 2544 kmalloc_caches[0].refcount = -1; 2545 caches++; 2546#endif 2547 2548 /* Able to allocate the per node structures */ 2549 slab_state = PARTIAL; 2550 2551 /* Caches that are not of the two-to-the-power-of size */ 2552 if (KMALLOC_MIN_SIZE <= 64) { 2553 create_kmalloc_cache(&kmalloc_caches[1], 2554 "kmalloc-96", 96, GFP_KERNEL); 2555 caches++; 2556 } 2557 if (KMALLOC_MIN_SIZE <= 128) { 2558 create_kmalloc_cache(&kmalloc_caches[2], 2559 "kmalloc-192", 192, GFP_KERNEL); 2560 caches++; 2561 } 2562 2563 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 2564 create_kmalloc_cache(&kmalloc_caches[i], 2565 "kmalloc", 1 << i, GFP_KERNEL); 2566 caches++; 2567 } 2568 2569 2570 /* 2571 * Patch up the size_index table if we have strange large alignment 2572 * requirements for the kmalloc array. This is only the case for 2573 * mips it seems. The standard arches will not generate any code here. 2574 * 2575 * Largest permitted alignment is 256 bytes due to the way we 2576 * handle the index determination for the smaller caches. 2577 * 2578 * Make sure that nothing crazy happens if someone starts tinkering 2579 * around with ARCH_KMALLOC_MINALIGN 2580 */ 2581 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 2582 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 2583 2584 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) 2585 size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; 2586 2587 slab_state = UP; 2588 2589 /* Provide the correct kmalloc names now that the caches are up */ 2590 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) 2591 kmalloc_caches[i]. name = 2592 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 2593 2594#ifdef CONFIG_SMP 2595 register_cpu_notifier(&slab_notifier); 2596#endif 2597 2598 kmem_size = offsetof(struct kmem_cache, cpu_slab) + 2599 nr_cpu_ids * sizeof(struct page *); 2600 2601 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 2602 " CPUs=%d, Nodes=%d\n", 2603 caches, cache_line_size(), 2604 slub_min_order, slub_max_order, slub_min_objects, 2605 nr_cpu_ids, nr_node_ids); 2606} 2607 2608/* 2609 * Find a mergeable slab cache 2610 */ 2611static int slab_unmergeable(struct kmem_cache *s) 2612{ 2613 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 2614 return 1; 2615 2616 if (s->ctor) 2617 return 1; 2618 2619 /* 2620 * We may have set a slab to be unmergeable during bootstrap. 2621 */ 2622 if (s->refcount < 0) 2623 return 1; 2624 2625 return 0; 2626} 2627 2628static struct kmem_cache *find_mergeable(size_t size, 2629 size_t align, unsigned long flags, 2630 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 2631{ 2632 struct kmem_cache *s; 2633 2634 if (slub_nomerge || (flags & SLUB_NEVER_MERGE)) 2635 return NULL; 2636 2637 if (ctor) 2638 return NULL; 2639 2640 size = ALIGN(size, sizeof(void *)); 2641 align = calculate_alignment(flags, align, size); 2642 size = ALIGN(size, align); 2643 2644 list_for_each_entry(s, &slab_caches, list) { 2645 if (slab_unmergeable(s)) 2646 continue; 2647 2648 if (size > s->size) 2649 continue; 2650 2651 if (((flags | slub_debug) & SLUB_MERGE_SAME) != 2652 (s->flags & SLUB_MERGE_SAME)) 2653 continue; 2654 /* 2655 * Check if alignment is compatible. 2656 * Courtesy of Adrian Drzewiecki 2657 */ 2658 if ((s->size & ~(align -1)) != s->size) 2659 continue; 2660 2661 if (s->size - size >= sizeof(void *)) 2662 continue; 2663 2664 return s; 2665 } 2666 return NULL; 2667} 2668 2669struct kmem_cache *kmem_cache_create(const char *name, size_t size, 2670 size_t align, unsigned long flags, 2671 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 2672{ 2673 struct kmem_cache *s; 2674 2675 down_write(&slub_lock); 2676 s = find_mergeable(size, align, flags, ctor); 2677 if (s) { 2678 s->refcount++; 2679 /* 2680 * Adjust the object sizes so that we clear 2681 * the complete object on kzalloc. 2682 */ 2683 s->objsize = max(s->objsize, (int)size); 2684 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 2685 up_write(&slub_lock); 2686 if (sysfs_slab_alias(s, name)) 2687 goto err; 2688 return s; 2689 } 2690 s = kmalloc(kmem_size, GFP_KERNEL); 2691 if (s) { 2692 if (kmem_cache_open(s, GFP_KERNEL, name, 2693 size, align, flags, ctor)) { 2694 list_add(&s->list, &slab_caches); 2695 up_write(&slub_lock); 2696 if (sysfs_slab_add(s)) 2697 goto err; 2698 return s; 2699 } 2700 kfree(s); 2701 } 2702 up_write(&slub_lock); 2703 2704err: 2705 if (flags & SLAB_PANIC) 2706 panic("Cannot create slabcache %s\n", name); 2707 else 2708 s = NULL; 2709 return s; 2710} 2711EXPORT_SYMBOL(kmem_cache_create); 2712 2713#ifdef CONFIG_SMP 2714/* 2715 * Use the cpu notifier to insure that the cpu slabs are flushed when 2716 * necessary. 2717 */ 2718static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, 2719 unsigned long action, void *hcpu) 2720{ 2721 long cpu = (long)hcpu; 2722 struct kmem_cache *s; 2723 unsigned long flags; 2724 2725 switch (action) { 2726 case CPU_UP_CANCELED: 2727 case CPU_UP_CANCELED_FROZEN: 2728 case CPU_DEAD: 2729 case CPU_DEAD_FROZEN: 2730 down_read(&slub_lock); 2731 list_for_each_entry(s, &slab_caches, list) { 2732 local_irq_save(flags); 2733 __flush_cpu_slab(s, cpu); 2734 local_irq_restore(flags); 2735 } 2736 up_read(&slub_lock); 2737 break; 2738 default: 2739 break; 2740 } 2741 return NOTIFY_OK; 2742} 2743 2744static struct notifier_block __cpuinitdata slab_notifier = 2745 { &slab_cpuup_callback, NULL, 0 }; 2746 2747#endif 2748 2749void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) 2750{ 2751 struct kmem_cache *s = get_slab(size, gfpflags); 2752 2753 if (ZERO_OR_NULL_PTR(s)) 2754 return s; 2755 2756 return slab_alloc(s, gfpflags, -1, caller); 2757} 2758 2759void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 2760 int node, void *caller) 2761{ 2762 struct kmem_cache *s = get_slab(size, gfpflags); 2763 2764 if (ZERO_OR_NULL_PTR(s)) 2765 return s; 2766 2767 return slab_alloc(s, gfpflags, node, caller); 2768} 2769 2770#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) 2771static int validate_slab(struct kmem_cache *s, struct page *page, 2772 unsigned long *map) 2773{ 2774 void *p; 2775 void *addr = page_address(page); 2776 2777 if (!check_slab(s, page) || 2778 !on_freelist(s, page, NULL)) 2779 return 0; 2780 2781 /* Now we know that a valid freelist exists */ 2782 bitmap_zero(map, s->objects); 2783 2784 for_each_free_object(p, s, page->freelist) { 2785 set_bit(slab_index(p, s, addr), map); 2786 if (!check_object(s, page, p, 0)) 2787 return 0; 2788 } 2789 2790 for_each_object(p, s, addr) 2791 if (!test_bit(slab_index(p, s, addr), map)) 2792 if (!check_object(s, page, p, 1)) 2793 return 0; 2794 return 1; 2795} 2796 2797static void validate_slab_slab(struct kmem_cache *s, struct page *page, 2798 unsigned long *map) 2799{ 2800 if (slab_trylock(page)) { 2801 validate_slab(s, page, map); 2802 slab_unlock(page); 2803 } else 2804 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n", 2805 s->name, page); 2806 2807 if (s->flags & DEBUG_DEFAULT_FLAGS) { 2808 if (!SlabDebug(page)) 2809 printk(KERN_ERR "SLUB %s: SlabDebug not set " 2810 "on slab 0x%p\n", s->name, page); 2811 } else { 2812 if (SlabDebug(page)) 2813 printk(KERN_ERR "SLUB %s: SlabDebug set on " 2814 "slab 0x%p\n", s->name, page); 2815 } 2816} 2817 2818static int validate_slab_node(struct kmem_cache *s, 2819 struct kmem_cache_node *n, unsigned long *map) 2820{ 2821 unsigned long count = 0; 2822 struct page *page; 2823 unsigned long flags; 2824 2825 spin_lock_irqsave(&n->list_lock, flags); 2826 2827 list_for_each_entry(page, &n->partial, lru) { 2828 validate_slab_slab(s, page, map); 2829 count++; 2830 } 2831 if (count != n->nr_partial) 2832 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but " 2833 "counter=%ld\n", s->name, count, n->nr_partial); 2834 2835 if (!(s->flags & SLAB_STORE_USER)) 2836 goto out; 2837 2838 list_for_each_entry(page, &n->full, lru) { 2839 validate_slab_slab(s, page, map); 2840 count++; 2841 } 2842 if (count != atomic_long_read(&n->nr_slabs)) 2843 printk(KERN_ERR "SLUB: %s %ld slabs counted but " 2844 "counter=%ld\n", s->name, count, 2845 atomic_long_read(&n->nr_slabs)); 2846 2847out: 2848 spin_unlock_irqrestore(&n->list_lock, flags); 2849 return count; 2850} 2851 2852static long validate_slab_cache(struct kmem_cache *s) 2853{ 2854 int node; 2855 unsigned long count = 0; 2856 unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) * 2857 sizeof(unsigned long), GFP_KERNEL); 2858 2859 if (!map) 2860 return -ENOMEM; 2861 2862 flush_all(s); 2863 for_each_online_node(node) { 2864 struct kmem_cache_node *n = get_node(s, node); 2865 2866 count += validate_slab_node(s, n, map); 2867 } 2868 kfree(map); 2869 return count; 2870} 2871 2872#ifdef SLUB_RESILIENCY_TEST 2873static void resiliency_test(void) 2874{ 2875 u8 *p; 2876 2877 printk(KERN_ERR "SLUB resiliency testing\n"); 2878 printk(KERN_ERR "-----------------------\n"); 2879 printk(KERN_ERR "A. Corruption after allocation\n"); 2880 2881 p = kzalloc(16, GFP_KERNEL); 2882 p[16] = 0x12; 2883 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" 2884 " 0x12->0x%p\n\n", p + 16); 2885 2886 validate_slab_cache(kmalloc_caches + 4); 2887 2888 /* Hmmm... The next two are dangerous */ 2889 p = kzalloc(32, GFP_KERNEL); 2890 p[32 + sizeof(void *)] = 0x34; 2891 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" 2892 " 0x34 -> -0x%p\n", p); 2893 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); 2894 2895 validate_slab_cache(kmalloc_caches + 5); 2896 p = kzalloc(64, GFP_KERNEL); 2897 p += 64 + (get_cycles() & 0xff) * sizeof(void *); 2898 *p = 0x56; 2899 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", 2900 p); 2901 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); 2902 validate_slab_cache(kmalloc_caches + 6); 2903 2904 printk(KERN_ERR "\nB. Corruption after free\n"); 2905 p = kzalloc(128, GFP_KERNEL); 2906 kfree(p); 2907 *p = 0x78; 2908 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); 2909 validate_slab_cache(kmalloc_caches + 7); 2910 2911 p = kzalloc(256, GFP_KERNEL); 2912 kfree(p); 2913 p[50] = 0x9a; 2914 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); 2915 validate_slab_cache(kmalloc_caches + 8); 2916 2917 p = kzalloc(512, GFP_KERNEL); 2918 kfree(p); 2919 p[512] = 0xab; 2920 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); 2921 validate_slab_cache(kmalloc_caches + 9); 2922} 2923#else 2924static void resiliency_test(void) {}; 2925#endif 2926 2927/* 2928 * Generate lists of code addresses where slabcache objects are allocated 2929 * and freed. 2930 */ 2931 2932struct location { 2933 unsigned long count; 2934 void *addr; 2935 long long sum_time; 2936 long min_time; 2937 long max_time; 2938 long min_pid; 2939 long max_pid; 2940 cpumask_t cpus; 2941 nodemask_t nodes; 2942}; 2943 2944struct loc_track { 2945 unsigned long max; 2946 unsigned long count; 2947 struct location *loc; 2948}; 2949 2950static void free_loc_track(struct loc_track *t) 2951{ 2952 if (t->max) 2953 free_pages((unsigned long)t->loc, 2954 get_order(sizeof(struct location) * t->max)); 2955} 2956 2957static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags) 2958{ 2959 struct location *l; 2960 int order; 2961 2962 order = get_order(sizeof(struct location) * max); 2963 2964 l = (void *)__get_free_pages(flags, order); 2965 if (!l) 2966 return 0; 2967 2968 if (t->count) { 2969 memcpy(l, t->loc, sizeof(struct location) * t->count); 2970 free_loc_track(t); 2971 } 2972 t->max = max; 2973 t->loc = l; 2974 return 1; 2975} 2976 2977static int add_location(struct loc_track *t, struct kmem_cache *s, 2978 const struct track *track) 2979{ 2980 long start, end, pos; 2981 struct location *l; 2982 void *caddr; 2983 unsigned long age = jiffies - track->when; 2984 2985 start = -1; 2986 end = t->count; 2987 2988 for ( ; ; ) { 2989 pos = start + (end - start + 1) / 2; 2990 2991 /* 2992 * There is nothing at "end". If we end up there 2993 * we need to add something to before end. 2994 */ 2995 if (pos == end) 2996 break; 2997 2998 caddr = t->loc[pos].addr; 2999 if (track->addr == caddr) { 3000 3001 l = &t->loc[pos]; 3002 l->count++; 3003 if (track->when) { 3004 l->sum_time += age; 3005 if (age < l->min_time) 3006 l->min_time = age; 3007 if (age > l->max_time) 3008 l->max_time = age; 3009 3010 if (track->pid < l->min_pid) 3011 l->min_pid = track->pid; 3012 if (track->pid > l->max_pid) 3013 l->max_pid = track->pid; 3014 3015 cpu_set(track->cpu, l->cpus); 3016 } 3017 node_set(page_to_nid(virt_to_page(track)), l->nodes); 3018 return 1; 3019 } 3020 3021 if (track->addr < caddr) 3022 end = pos; 3023 else 3024 start = pos; 3025 } 3026 3027 /* 3028 * Not found. Insert new tracking element. 3029 */ 3030 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC)) 3031 return 0; 3032 3033 l = t->loc + pos; 3034 if (pos < t->count) 3035 memmove(l + 1, l, 3036 (t->count - pos) * sizeof(struct location)); 3037 t->count++; 3038 l->count = 1; 3039 l->addr = track->addr; 3040 l->sum_time = age; 3041 l->min_time = age; 3042 l->max_time = age; 3043 l->min_pid = track->pid; 3044 l->max_pid = track->pid; 3045 cpus_clear(l->cpus); 3046 cpu_set(track->cpu, l->cpus); 3047 nodes_clear(l->nodes); 3048 node_set(page_to_nid(virt_to_page(track)), l->nodes); 3049 return 1; 3050} 3051 3052static void process_slab(struct loc_track *t, struct kmem_cache *s, 3053 struct page *page, enum track_item alloc) 3054{ 3055 void *addr = page_address(page); 3056 DECLARE_BITMAP(map, s->objects); 3057 void *p; 3058 3059 bitmap_zero(map, s->objects); 3060 for_each_free_object(p, s, page->freelist) 3061 set_bit(slab_index(p, s, addr), map); 3062 3063 for_each_object(p, s, addr) 3064 if (!test_bit(slab_index(p, s, addr), map)) 3065 add_location(t, s, get_track(s, p, alloc)); 3066} 3067 3068static int list_locations(struct kmem_cache *s, char *buf, 3069 enum track_item alloc) 3070{ 3071 int n = 0; 3072 unsigned long i; 3073 struct loc_track t = { 0, 0, NULL }; 3074 int node; 3075 3076 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), 3077 GFP_KERNEL)) 3078 return sprintf(buf, "Out of memory\n"); 3079 3080 /* Push back cpu slabs */ 3081 flush_all(s); 3082 3083 for_each_online_node(node) { 3084 struct kmem_cache_node *n = get_node(s, node); 3085 unsigned long flags; 3086 struct page *page; 3087 3088 if (!atomic_read(&n->nr_slabs)) 3089 continue; 3090 3091 spin_lock_irqsave(&n->list_lock, flags); 3092 list_for_each_entry(page, &n->partial, lru) 3093 process_slab(&t, s, page, alloc); 3094 list_for_each_entry(page, &n->full, lru) 3095 process_slab(&t, s, page, alloc); 3096 spin_unlock_irqrestore(&n->list_lock, flags); 3097 } 3098 3099 for (i = 0; i < t.count; i++) { 3100 struct location *l = &t.loc[i]; 3101 3102 if (n > PAGE_SIZE - 100) 3103 break; 3104 n += sprintf(buf + n, "%7ld ", l->count); 3105 3106 if (l->addr) 3107 n += sprint_symbol(buf + n, (unsigned long)l->addr); 3108 else 3109 n += sprintf(buf + n, "<not-available>"); 3110 3111 if (l->sum_time != l->min_time) { 3112 unsigned long remainder; 3113 3114 n += sprintf(buf + n, " age=%ld/%ld/%ld", 3115 l->min_time, 3116 div_long_long_rem(l->sum_time, l->count, &remainder), 3117 l->max_time); 3118 } else 3119 n += sprintf(buf + n, " age=%ld", 3120 l->min_time); 3121 3122 if (l->min_pid != l->max_pid) 3123 n += sprintf(buf + n, " pid=%ld-%ld", 3124 l->min_pid, l->max_pid); 3125 else 3126 n += sprintf(buf + n, " pid=%ld", 3127 l->min_pid); 3128 3129 if (num_online_cpus() > 1 && !cpus_empty(l->cpus) && 3130 n < PAGE_SIZE - 60) { 3131 n += sprintf(buf + n, " cpus="); 3132 n += cpulist_scnprintf(buf + n, PAGE_SIZE - n - 50, 3133 l->cpus); 3134 } 3135 3136 if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && 3137 n < PAGE_SIZE - 60) { 3138 n += sprintf(buf + n, " nodes="); 3139 n += nodelist_scnprintf(buf + n, PAGE_SIZE - n - 50, 3140 l->nodes); 3141 } 3142 3143 n += sprintf(buf + n, "\n"); 3144 } 3145 3146 free_loc_track(&t); 3147 if (!t.count) 3148 n += sprintf(buf, "No data\n"); 3149 return n; 3150} 3151 3152static unsigned long count_partial(struct kmem_cache_node *n) 3153{ 3154 unsigned long flags; 3155 unsigned long x = 0; 3156 struct page *page; 3157 3158 spin_lock_irqsave(&n->list_lock, flags); 3159 list_for_each_entry(page, &n->partial, lru) 3160 x += page->inuse; 3161 spin_unlock_irqrestore(&n->list_lock, flags); 3162 return x; 3163} 3164 3165enum slab_stat_type { 3166 SL_FULL, 3167 SL_PARTIAL, 3168 SL_CPU, 3169 SL_OBJECTS 3170}; 3171 3172#define SO_FULL (1 << SL_FULL) 3173#define SO_PARTIAL (1 << SL_PARTIAL) 3174#define SO_CPU (1 << SL_CPU) 3175#define SO_OBJECTS (1 << SL_OBJECTS) 3176 3177static unsigned long slab_objects(struct kmem_cache *s, 3178 char *buf, unsigned long flags) 3179{ 3180 unsigned long total = 0; 3181 int cpu; 3182 int node; 3183 int x; 3184 unsigned long *nodes; 3185 unsigned long *per_cpu; 3186 3187 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); 3188 per_cpu = nodes + nr_node_ids; 3189 3190 for_each_possible_cpu(cpu) { 3191 struct page *page = s->cpu_slab[cpu]; 3192 int node; 3193 3194 if (page) { 3195 node = page_to_nid(page); 3196 if (flags & SO_CPU) { 3197 int x = 0; 3198 3199 if (flags & SO_OBJECTS) 3200 x = page->inuse; 3201 else 3202 x = 1; 3203 total += x; 3204 nodes[node] += x; 3205 } 3206 per_cpu[node]++; 3207 } 3208 } 3209 3210 for_each_online_node(node) { 3211 struct kmem_cache_node *n = get_node(s, node); 3212 3213 if (flags & SO_PARTIAL) { 3214 if (flags & SO_OBJECTS) 3215 x = count_partial(n); 3216 else 3217 x = n->nr_partial; 3218 total += x; 3219 nodes[node] += x; 3220 } 3221 3222 if (flags & SO_FULL) { 3223 int full_slabs = atomic_read(&n->nr_slabs) 3224 - per_cpu[node] 3225 - n->nr_partial; 3226 3227 if (flags & SO_OBJECTS) 3228 x = full_slabs * s->objects; 3229 else 3230 x = full_slabs; 3231 total += x; 3232 nodes[node] += x; 3233 } 3234 } 3235 3236 x = sprintf(buf, "%lu", total); 3237#ifdef CONFIG_NUMA 3238 for_each_online_node(node) 3239 if (nodes[node]) 3240 x += sprintf(buf + x, " N%d=%lu", 3241 node, nodes[node]); 3242#endif 3243 kfree(nodes); 3244 return x + sprintf(buf + x, "\n"); 3245} 3246 3247static int any_slab_objects(struct kmem_cache *s) 3248{ 3249 int node; 3250 int cpu; 3251 3252 for_each_possible_cpu(cpu) 3253 if (s->cpu_slab[cpu]) 3254 return 1; 3255 3256 for_each_node(node) { 3257 struct kmem_cache_node *n = get_node(s, node); 3258 3259 if (n->nr_partial || atomic_read(&n->nr_slabs)) 3260 return 1; 3261 } 3262 return 0; 3263} 3264 3265#define to_slab_attr(n) container_of(n, struct slab_attribute, attr) 3266#define to_slab(n) container_of(n, struct kmem_cache, kobj); 3267 3268struct slab_attribute { 3269 struct attribute attr; 3270 ssize_t (*show)(struct kmem_cache *s, char *buf); 3271 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count); 3272}; 3273 3274#define SLAB_ATTR_RO(_name) \ 3275 static struct slab_attribute _name##_attr = __ATTR_RO(_name) 3276 3277#define SLAB_ATTR(_name) \ 3278 static struct slab_attribute _name##_attr = \ 3279 __ATTR(_name, 0644, _name##_show, _name##_store) 3280 3281static ssize_t slab_size_show(struct kmem_cache *s, char *buf) 3282{ 3283 return sprintf(buf, "%d\n", s->size); 3284} 3285SLAB_ATTR_RO(slab_size); 3286 3287static ssize_t align_show(struct kmem_cache *s, char *buf) 3288{ 3289 return sprintf(buf, "%d\n", s->align); 3290} 3291SLAB_ATTR_RO(align); 3292 3293static ssize_t object_size_show(struct kmem_cache *s, char *buf) 3294{ 3295 return sprintf(buf, "%d\n", s->objsize); 3296} 3297SLAB_ATTR_RO(object_size); 3298 3299static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) 3300{ 3301 return sprintf(buf, "%d\n", s->objects); 3302} 3303SLAB_ATTR_RO(objs_per_slab); 3304 3305static ssize_t order_show(struct kmem_cache *s, char *buf) 3306{ 3307 return sprintf(buf, "%d\n", s->order); 3308} 3309SLAB_ATTR_RO(order); 3310 3311static ssize_t ctor_show(struct kmem_cache *s, char *buf) 3312{ 3313 if (s->ctor) { 3314 int n = sprint_symbol(buf, (unsigned long)s->ctor); 3315 3316 return n + sprintf(buf + n, "\n"); 3317 } 3318 return 0; 3319} 3320SLAB_ATTR_RO(ctor); 3321 3322static ssize_t aliases_show(struct kmem_cache *s, char *buf) 3323{ 3324 return sprintf(buf, "%d\n", s->refcount - 1); 3325} 3326SLAB_ATTR_RO(aliases); 3327 3328static ssize_t slabs_show(struct kmem_cache *s, char *buf) 3329{ 3330 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); 3331} 3332SLAB_ATTR_RO(slabs); 3333 3334static ssize_t partial_show(struct kmem_cache *s, char *buf) 3335{ 3336 return slab_objects(s, buf, SO_PARTIAL); 3337} 3338SLAB_ATTR_RO(partial); 3339 3340static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) 3341{ 3342 return slab_objects(s, buf, SO_CPU); 3343} 3344SLAB_ATTR_RO(cpu_slabs); 3345 3346static ssize_t objects_show(struct kmem_cache *s, char *buf) 3347{ 3348 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); 3349} 3350SLAB_ATTR_RO(objects); 3351 3352static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) 3353{ 3354 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); 3355} 3356 3357static ssize_t sanity_checks_store(struct kmem_cache *s, 3358 const char *buf, size_t length) 3359{ 3360 s->flags &= ~SLAB_DEBUG_FREE; 3361 if (buf[0] == '1') 3362 s->flags |= SLAB_DEBUG_FREE; 3363 return length; 3364} 3365SLAB_ATTR(sanity_checks); 3366 3367static ssize_t trace_show(struct kmem_cache *s, char *buf) 3368{ 3369 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); 3370} 3371 3372static ssize_t trace_store(struct kmem_cache *s, const char *buf, 3373 size_t length) 3374{ 3375 s->flags &= ~SLAB_TRACE; 3376 if (buf[0] == '1') 3377 s->flags |= SLAB_TRACE; 3378 return length; 3379} 3380SLAB_ATTR(trace); 3381 3382static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 3383{ 3384 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); 3385} 3386 3387static ssize_t reclaim_account_store(struct kmem_cache *s, 3388 const char *buf, size_t length) 3389{ 3390 s->flags &= ~SLAB_RECLAIM_ACCOUNT; 3391 if (buf[0] == '1') 3392 s->flags |= SLAB_RECLAIM_ACCOUNT; 3393 return length; 3394} 3395SLAB_ATTR(reclaim_account); 3396 3397static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) 3398{ 3399 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); 3400} 3401SLAB_ATTR_RO(hwcache_align); 3402 3403#ifdef CONFIG_ZONE_DMA 3404static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) 3405{ 3406 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA)); 3407} 3408SLAB_ATTR_RO(cache_dma); 3409#endif 3410 3411static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) 3412{ 3413 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); 3414} 3415SLAB_ATTR_RO(destroy_by_rcu); 3416 3417static ssize_t red_zone_show(struct kmem_cache *s, char *buf) 3418{ 3419 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE)); 3420} 3421 3422static ssize_t red_zone_store(struct kmem_cache *s, 3423 const char *buf, size_t length) 3424{ 3425 if (any_slab_objects(s)) 3426 return -EBUSY; 3427 3428 s->flags &= ~SLAB_RED_ZONE; 3429 if (buf[0] == '1') 3430 s->flags |= SLAB_RED_ZONE; 3431 calculate_sizes(s); 3432 return length; 3433} 3434SLAB_ATTR(red_zone); 3435 3436static ssize_t poison_show(struct kmem_cache *s, char *buf) 3437{ 3438 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON)); 3439} 3440 3441static ssize_t poison_store(struct kmem_cache *s, 3442 const char *buf, size_t length) 3443{ 3444 if (any_slab_objects(s)) 3445 return -EBUSY; 3446 3447 s->flags &= ~SLAB_POISON; 3448 if (buf[0] == '1') 3449 s->flags |= SLAB_POISON; 3450 calculate_sizes(s); 3451 return length; 3452} 3453SLAB_ATTR(poison); 3454 3455static ssize_t store_user_show(struct kmem_cache *s, char *buf) 3456{ 3457 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER)); 3458} 3459 3460static ssize_t store_user_store(struct kmem_cache *s, 3461 const char *buf, size_t length) 3462{ 3463 if (any_slab_objects(s)) 3464 return -EBUSY; 3465 3466 s->flags &= ~SLAB_STORE_USER; 3467 if (buf[0] == '1') 3468 s->flags |= SLAB_STORE_USER; 3469 calculate_sizes(s); 3470 return length; 3471} 3472SLAB_ATTR(store_user); 3473 3474static ssize_t validate_show(struct kmem_cache *s, char *buf) 3475{ 3476 return 0; 3477} 3478 3479static ssize_t validate_store(struct kmem_cache *s, 3480 const char *buf, size_t length) 3481{ 3482 int ret = -EINVAL; 3483 3484 if (buf[0] == '1') { 3485 ret = validate_slab_cache(s); 3486 if (ret >= 0) 3487 ret = length; 3488 } 3489 return ret; 3490} 3491SLAB_ATTR(validate); 3492 3493static ssize_t shrink_show(struct kmem_cache *s, char *buf) 3494{ 3495 return 0; 3496} 3497 3498static ssize_t shrink_store(struct kmem_cache *s, 3499 const char *buf, size_t length) 3500{ 3501 if (buf[0] == '1') { 3502 int rc = kmem_cache_shrink(s); 3503 3504 if (rc) 3505 return rc; 3506 } else 3507 return -EINVAL; 3508 return length; 3509} 3510SLAB_ATTR(shrink); 3511 3512static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) 3513{ 3514 if (!(s->flags & SLAB_STORE_USER)) 3515 return -ENOSYS; 3516 return list_locations(s, buf, TRACK_ALLOC); 3517} 3518SLAB_ATTR_RO(alloc_calls); 3519 3520static ssize_t free_calls_show(struct kmem_cache *s, char *buf) 3521{ 3522 if (!(s->flags & SLAB_STORE_USER)) 3523 return -ENOSYS; 3524 return list_locations(s, buf, TRACK_FREE); 3525} 3526SLAB_ATTR_RO(free_calls); 3527 3528#ifdef CONFIG_NUMA 3529static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf) 3530{ 3531 return sprintf(buf, "%d\n", s->defrag_ratio / 10); 3532} 3533 3534static ssize_t defrag_ratio_store(struct kmem_cache *s, 3535 const char *buf, size_t length) 3536{ 3537 int n = simple_strtoul(buf, NULL, 10); 3538 3539 if (n < 100) 3540 s->defrag_ratio = n * 10; 3541 return length; 3542} 3543SLAB_ATTR(defrag_ratio); 3544#endif 3545 3546static struct attribute * slab_attrs[] = { 3547 &slab_size_attr.attr, 3548 &object_size_attr.attr, 3549 &objs_per_slab_attr.attr, 3550 &order_attr.attr, 3551 &objects_attr.attr, 3552 &slabs_attr.attr, 3553 &partial_attr.attr, 3554 &cpu_slabs_attr.attr, 3555 &ctor_attr.attr, 3556 &aliases_attr.attr, 3557 &align_attr.attr, 3558 &sanity_checks_attr.attr, 3559 &trace_attr.attr, 3560 &hwcache_align_attr.attr, 3561 &reclaim_account_attr.attr, 3562 &destroy_by_rcu_attr.attr, 3563 &red_zone_attr.attr, 3564 &poison_attr.attr, 3565 &store_user_attr.attr, 3566 &validate_attr.attr, 3567 &shrink_attr.attr, 3568 &alloc_calls_attr.attr, 3569 &free_calls_attr.attr, 3570#ifdef CONFIG_ZONE_DMA 3571 &cache_dma_attr.attr, 3572#endif 3573#ifdef CONFIG_NUMA 3574 &defrag_ratio_attr.attr, 3575#endif 3576 NULL 3577}; 3578 3579static struct attribute_group slab_attr_group = { 3580 .attrs = slab_attrs, 3581}; 3582 3583static ssize_t slab_attr_show(struct kobject *kobj, 3584 struct attribute *attr, 3585 char *buf) 3586{ 3587 struct slab_attribute *attribute; 3588 struct kmem_cache *s; 3589 int err; 3590 3591 attribute = to_slab_attr(attr); 3592 s = to_slab(kobj); 3593 3594 if (!attribute->show) 3595 return -EIO; 3596 3597 err = attribute->show(s, buf); 3598 3599 return err; 3600} 3601 3602static ssize_t slab_attr_store(struct kobject *kobj, 3603 struct attribute *attr, 3604 const char *buf, size_t len) 3605{ 3606 struct slab_attribute *attribute; 3607 struct kmem_cache *s; 3608 int err; 3609 3610 attribute = to_slab_attr(attr); 3611 s = to_slab(kobj); 3612 3613 if (!attribute->store) 3614 return -EIO; 3615 3616 err = attribute->store(s, buf, len); 3617 3618 return err; 3619} 3620 3621static struct sysfs_ops slab_sysfs_ops = { 3622 .show = slab_attr_show, 3623 .store = slab_attr_store, 3624}; 3625 3626static struct kobj_type slab_ktype = { 3627 .sysfs_ops = &slab_sysfs_ops, 3628}; 3629 3630static int uevent_filter(struct kset *kset, struct kobject *kobj) 3631{ 3632 struct kobj_type *ktype = get_ktype(kobj); 3633 3634 if (ktype == &slab_ktype) 3635 return 1; 3636 return 0; 3637} 3638 3639static struct kset_uevent_ops slab_uevent_ops = { 3640 .filter = uevent_filter, 3641}; 3642 3643static decl_subsys(slab, &slab_ktype, &slab_uevent_ops); 3644 3645#define ID_STR_LENGTH 64 3646 3647/* Create a unique string id for a slab cache: 3648 * format 3649 * :[flags-]size:[memory address of kmemcache] 3650 */ 3651static char *create_unique_id(struct kmem_cache *s) 3652{ 3653 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); 3654 char *p = name; 3655 3656 BUG_ON(!name); 3657 3658 *p++ = ':'; 3659 /* 3660 * First flags affecting slabcache operations. We will only 3661 * get here for aliasable slabs so we do not need to support 3662 * too many flags. The flags here must cover all flags that 3663 * are matched during merging to guarantee that the id is 3664 * unique. 3665 */ 3666 if (s->flags & SLAB_CACHE_DMA) 3667 *p++ = 'd'; 3668 if (s->flags & SLAB_RECLAIM_ACCOUNT) 3669 *p++ = 'a'; 3670 if (s->flags & SLAB_DEBUG_FREE) 3671 *p++ = 'F'; 3672 if (p != name + 1) 3673 *p++ = '-'; 3674 p += sprintf(p, "%07d", s->size); 3675 BUG_ON(p > name + ID_STR_LENGTH - 1); 3676 return name; 3677} 3678 3679static int sysfs_slab_add(struct kmem_cache *s) 3680{ 3681 int err; 3682 const char *name; 3683 int unmergeable; 3684 3685 if (slab_state < SYSFS) 3686 /* Defer until later */ 3687 return 0; 3688 3689 unmergeable = slab_unmergeable(s); 3690 if (unmergeable) { 3691 /* 3692 * Slabcache can never be merged so we can use the name proper. 3693 * This is typically the case for debug situations. In that 3694 * case we can catch duplicate names easily. 3695 */ 3696 sysfs_remove_link(&slab_subsys.kobj, s->name); 3697 name = s->name; 3698 } else { 3699 /* 3700 * Create a unique name for the slab as a target 3701 * for the symlinks. 3702 */ 3703 name = create_unique_id(s); 3704 } 3705 3706 kobj_set_kset_s(s, slab_subsys); 3707 kobject_set_name(&s->kobj, name); 3708 kobject_init(&s->kobj); 3709 err = kobject_add(&s->kobj); 3710 if (err) 3711 return err; 3712 3713 err = sysfs_create_group(&s->kobj, &slab_attr_group); 3714 if (err) 3715 return err; 3716 kobject_uevent(&s->kobj, KOBJ_ADD); 3717 if (!unmergeable) { 3718 /* Setup first alias */ 3719 sysfs_slab_alias(s, s->name); 3720 kfree(name); 3721 } 3722 return 0; 3723} 3724 3725static void sysfs_slab_remove(struct kmem_cache *s) 3726{ 3727 kobject_uevent(&s->kobj, KOBJ_REMOVE); 3728 kobject_del(&s->kobj); 3729} 3730 3731/* 3732 * Need to buffer aliases during bootup until sysfs becomes 3733 * available lest we loose that information. 3734 */ 3735struct saved_alias { 3736 struct kmem_cache *s; 3737 const char *name; 3738 struct saved_alias *next; 3739}; 3740 3741static struct saved_alias *alias_list; 3742 3743static int sysfs_slab_alias(struct kmem_cache *s, const char *name) 3744{ 3745 struct saved_alias *al; 3746 3747 if (slab_state == SYSFS) { 3748 /* 3749 * If we have a leftover link then remove it. 3750 */ 3751 sysfs_remove_link(&slab_subsys.kobj, name); 3752 return sysfs_create_link(&slab_subsys.kobj, 3753 &s->kobj, name); 3754 } 3755 3756 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); 3757 if (!al) 3758 return -ENOMEM; 3759 3760 al->s = s; 3761 al->name = name; 3762 al->next = alias_list; 3763 alias_list = al; 3764 return 0; 3765} 3766 3767static int __init slab_sysfs_init(void) 3768{ 3769 struct kmem_cache *s; 3770 int err; 3771 3772 err = subsystem_register(&slab_subsys); 3773 if (err) { 3774 printk(KERN_ERR "Cannot register slab subsystem.\n"); 3775 return -ENOSYS; 3776 } 3777 3778 slab_state = SYSFS; 3779 3780 list_for_each_entry(s, &slab_caches, list) { 3781 err = sysfs_slab_add(s); 3782 BUG_ON(err); 3783 } 3784 3785 while (alias_list) { 3786 struct saved_alias *al = alias_list; 3787 3788 alias_list = alias_list->next; 3789 err = sysfs_slab_alias(al->s, al->name); 3790 BUG_ON(err); 3791 kfree(al); 3792 } 3793 3794 resiliency_test(); 3795 return 0; 3796} 3797 3798__initcall(slab_sysfs_init); 3799#endif 3800