slub.c revision 434e245ddd3f14aa8eef97cae16c71b863ab092a
1/* 2 * SLUB: A slab allocator that limits cache line use instead of queuing 3 * objects in per cpu and per node lists. 4 * 5 * The allocator synchronizes using per slab locks and only 6 * uses a centralized lock to manage a pool of partial slabs. 7 * 8 * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com> 9 */ 10 11#include <linux/mm.h> 12#include <linux/module.h> 13#include <linux/bit_spinlock.h> 14#include <linux/interrupt.h> 15#include <linux/bitops.h> 16#include <linux/slab.h> 17#include <linux/seq_file.h> 18#include <linux/cpu.h> 19#include <linux/cpuset.h> 20#include <linux/mempolicy.h> 21#include <linux/ctype.h> 22#include <linux/kallsyms.h> 23 24/* 25 * Lock order: 26 * 1. slab_lock(page) 27 * 2. slab->list_lock 28 * 29 * The slab_lock protects operations on the object of a particular 30 * slab and its metadata in the page struct. If the slab lock 31 * has been taken then no allocations nor frees can be performed 32 * on the objects in the slab nor can the slab be added or removed 33 * from the partial or full lists since this would mean modifying 34 * the page_struct of the slab. 35 * 36 * The list_lock protects the partial and full list on each node and 37 * the partial slab counter. If taken then no new slabs may be added or 38 * removed from the lists nor make the number of partial slabs be modified. 39 * (Note that the total number of slabs is an atomic value that may be 40 * modified without taking the list lock). 41 * 42 * The list_lock is a centralized lock and thus we avoid taking it as 43 * much as possible. As long as SLUB does not have to handle partial 44 * slabs, operations can continue without any centralized lock. F.e. 45 * allocating a long series of objects that fill up slabs does not require 46 * the list lock. 47 * 48 * The lock order is sometimes inverted when we are trying to get a slab 49 * off a list. We take the list_lock and then look for a page on the list 50 * to use. While we do that objects in the slabs may be freed. We can 51 * only operate on the slab if we have also taken the slab_lock. So we use 52 * a slab_trylock() on the slab. If trylock was successful then no frees 53 * can occur anymore and we can use the slab for allocations etc. If the 54 * slab_trylock() does not succeed then frees are in progress in the slab and 55 * we must stay away from it for a while since we may cause a bouncing 56 * cacheline if we try to acquire the lock. So go onto the next slab. 57 * If all pages are busy then we may allocate a new slab instead of reusing 58 * a partial slab. A new slab has noone operating on it and thus there is 59 * no danger of cacheline contention. 60 * 61 * Interrupts are disabled during allocation and deallocation in order to 62 * make the slab allocator safe to use in the context of an irq. In addition 63 * interrupts are disabled to ensure that the processor does not change 64 * while handling per_cpu slabs, due to kernel preemption. 65 * 66 * SLUB assigns one slab for allocation to each processor. 67 * Allocations only occur from these slabs called cpu slabs. 68 * 69 * Slabs with free elements are kept on a partial list and during regular 70 * operations no list for full slabs is used. If an object in a full slab is 71 * freed then the slab will show up again on the partial lists. 72 * We track full slabs for debugging purposes though because otherwise we 73 * cannot scan all objects. 74 * 75 * Slabs are freed when they become empty. Teardown and setup is 76 * minimal so we rely on the page allocators per cpu caches for 77 * fast frees and allocs. 78 * 79 * Overloading of page flags that are otherwise used for LRU management. 80 * 81 * PageActive The slab is frozen and exempt from list processing. 82 * This means that the slab is dedicated to a purpose 83 * such as satisfying allocations for a specific 84 * processor. Objects may be freed in the slab while 85 * it is frozen but slab_free will then skip the usual 86 * list operations. It is up to the processor holding 87 * the slab to integrate the slab into the slab lists 88 * when the slab is no longer needed. 89 * 90 * One use of this flag is to mark slabs that are 91 * used for allocations. Then such a slab becomes a cpu 92 * slab. The cpu slab may be equipped with an additional 93 * lockless_freelist that allows lockless access to 94 * free objects in addition to the regular freelist 95 * that requires the slab lock. 96 * 97 * PageError Slab requires special handling due to debug 98 * options set. This moves slab handling out of 99 * the fast path and disables lockless freelists. 100 */ 101 102#define FROZEN (1 << PG_active) 103 104#ifdef CONFIG_SLUB_DEBUG 105#define SLABDEBUG (1 << PG_error) 106#else 107#define SLABDEBUG 0 108#endif 109 110static inline int SlabFrozen(struct page *page) 111{ 112 return page->flags & FROZEN; 113} 114 115static inline void SetSlabFrozen(struct page *page) 116{ 117 page->flags |= FROZEN; 118} 119 120static inline void ClearSlabFrozen(struct page *page) 121{ 122 page->flags &= ~FROZEN; 123} 124 125static inline int SlabDebug(struct page *page) 126{ 127 return page->flags & SLABDEBUG; 128} 129 130static inline void SetSlabDebug(struct page *page) 131{ 132 page->flags |= SLABDEBUG; 133} 134 135static inline void ClearSlabDebug(struct page *page) 136{ 137 page->flags &= ~SLABDEBUG; 138} 139 140/* 141 * Issues still to be resolved: 142 * 143 * - The per cpu array is updated for each new slab and and is a remote 144 * cacheline for most nodes. This could become a bouncing cacheline given 145 * enough frequent updates. There are 16 pointers in a cacheline, so at 146 * max 16 cpus could compete for the cacheline which may be okay. 147 * 148 * - Support PAGE_ALLOC_DEBUG. Should be easy to do. 149 * 150 * - Variable sizing of the per node arrays 151 */ 152 153/* Enable to test recovery from slab corruption on boot */ 154#undef SLUB_RESILIENCY_TEST 155 156#if PAGE_SHIFT <= 12 157 158/* 159 * Small page size. Make sure that we do not fragment memory 160 */ 161#define DEFAULT_MAX_ORDER 1 162#define DEFAULT_MIN_OBJECTS 4 163 164#else 165 166/* 167 * Large page machines are customarily able to handle larger 168 * page orders. 169 */ 170#define DEFAULT_MAX_ORDER 2 171#define DEFAULT_MIN_OBJECTS 8 172 173#endif 174 175/* 176 * Mininum number of partial slabs. These will be left on the partial 177 * lists even if they are empty. kmem_cache_shrink may reclaim them. 178 */ 179#define MIN_PARTIAL 2 180 181/* 182 * Maximum number of desirable partial slabs. 183 * The existence of more partial slabs makes kmem_cache_shrink 184 * sort the partial list by the number of objects in the. 185 */ 186#define MAX_PARTIAL 10 187 188#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ 189 SLAB_POISON | SLAB_STORE_USER) 190 191/* 192 * Set of flags that will prevent slab merging 193 */ 194#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 195 SLAB_TRACE | SLAB_DESTROY_BY_RCU) 196 197#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 198 SLAB_CACHE_DMA) 199 200#ifndef ARCH_KMALLOC_MINALIGN 201#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) 202#endif 203 204#ifndef ARCH_SLAB_MINALIGN 205#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) 206#endif 207 208/* 209 * The page->inuse field is 16 bit thus we have this limitation 210 */ 211#define MAX_OBJECTS_PER_SLAB 65535 212 213/* Internal SLUB flags */ 214#define __OBJECT_POISON 0x80000000 /* Poison object */ 215 216/* Not all arches define cache_line_size */ 217#ifndef cache_line_size 218#define cache_line_size() L1_CACHE_BYTES 219#endif 220 221static int kmem_size = sizeof(struct kmem_cache); 222 223#ifdef CONFIG_SMP 224static struct notifier_block slab_notifier; 225#endif 226 227static enum { 228 DOWN, /* No slab functionality available */ 229 PARTIAL, /* kmem_cache_open() works but kmalloc does not */ 230 UP, /* Everything works but does not show up in sysfs */ 231 SYSFS /* Sysfs up */ 232} slab_state = DOWN; 233 234/* A list of all slab caches on the system */ 235static DECLARE_RWSEM(slub_lock); 236static LIST_HEAD(slab_caches); 237 238/* 239 * Tracking user of a slab. 240 */ 241struct track { 242 void *addr; /* Called from address */ 243 int cpu; /* Was running on cpu */ 244 int pid; /* Pid context */ 245 unsigned long when; /* When did the operation occur */ 246}; 247 248enum track_item { TRACK_ALLOC, TRACK_FREE }; 249 250#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) 251static int sysfs_slab_add(struct kmem_cache *); 252static int sysfs_slab_alias(struct kmem_cache *, const char *); 253static void sysfs_slab_remove(struct kmem_cache *); 254#else 255static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 256static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 257 { return 0; } 258static inline void sysfs_slab_remove(struct kmem_cache *s) {} 259#endif 260 261/******************************************************************** 262 * Core slab cache functions 263 *******************************************************************/ 264 265int slab_is_available(void) 266{ 267 return slab_state >= UP; 268} 269 270static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 271{ 272#ifdef CONFIG_NUMA 273 return s->node[node]; 274#else 275 return &s->local_node; 276#endif 277} 278 279static inline int check_valid_pointer(struct kmem_cache *s, 280 struct page *page, const void *object) 281{ 282 void *base; 283 284 if (!object) 285 return 1; 286 287 base = page_address(page); 288 if (object < base || object >= base + s->objects * s->size || 289 (object - base) % s->size) { 290 return 0; 291 } 292 293 return 1; 294} 295 296/* 297 * Slow version of get and set free pointer. 298 * 299 * This version requires touching the cache lines of kmem_cache which 300 * we avoid to do in the fast alloc free paths. There we obtain the offset 301 * from the page struct. 302 */ 303static inline void *get_freepointer(struct kmem_cache *s, void *object) 304{ 305 return *(void **)(object + s->offset); 306} 307 308static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) 309{ 310 *(void **)(object + s->offset) = fp; 311} 312 313/* Loop over all objects in a slab */ 314#define for_each_object(__p, __s, __addr) \ 315 for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\ 316 __p += (__s)->size) 317 318/* Scan freelist */ 319#define for_each_free_object(__p, __s, __free) \ 320 for (__p = (__free); __p; __p = get_freepointer((__s), __p)) 321 322/* Determine object index from a given position */ 323static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 324{ 325 return (p - addr) / s->size; 326} 327 328#ifdef CONFIG_SLUB_DEBUG 329/* 330 * Debug settings: 331 */ 332#ifdef CONFIG_SLUB_DEBUG_ON 333static int slub_debug = DEBUG_DEFAULT_FLAGS; 334#else 335static int slub_debug; 336#endif 337 338static char *slub_debug_slabs; 339 340/* 341 * Object debugging 342 */ 343static void print_section(char *text, u8 *addr, unsigned int length) 344{ 345 int i, offset; 346 int newline = 1; 347 char ascii[17]; 348 349 ascii[16] = 0; 350 351 for (i = 0; i < length; i++) { 352 if (newline) { 353 printk(KERN_ERR "%8s 0x%p: ", text, addr + i); 354 newline = 0; 355 } 356 printk(" %02x", addr[i]); 357 offset = i % 16; 358 ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; 359 if (offset == 15) { 360 printk(" %s\n",ascii); 361 newline = 1; 362 } 363 } 364 if (!newline) { 365 i %= 16; 366 while (i < 16) { 367 printk(" "); 368 ascii[i] = ' '; 369 i++; 370 } 371 printk(" %s\n", ascii); 372 } 373} 374 375static struct track *get_track(struct kmem_cache *s, void *object, 376 enum track_item alloc) 377{ 378 struct track *p; 379 380 if (s->offset) 381 p = object + s->offset + sizeof(void *); 382 else 383 p = object + s->inuse; 384 385 return p + alloc; 386} 387 388static void set_track(struct kmem_cache *s, void *object, 389 enum track_item alloc, void *addr) 390{ 391 struct track *p; 392 393 if (s->offset) 394 p = object + s->offset + sizeof(void *); 395 else 396 p = object + s->inuse; 397 398 p += alloc; 399 if (addr) { 400 p->addr = addr; 401 p->cpu = smp_processor_id(); 402 p->pid = current ? current->pid : -1; 403 p->when = jiffies; 404 } else 405 memset(p, 0, sizeof(struct track)); 406} 407 408static void init_tracking(struct kmem_cache *s, void *object) 409{ 410 if (!(s->flags & SLAB_STORE_USER)) 411 return; 412 413 set_track(s, object, TRACK_FREE, NULL); 414 set_track(s, object, TRACK_ALLOC, NULL); 415} 416 417static void print_track(const char *s, struct track *t) 418{ 419 if (!t->addr) 420 return; 421 422 printk(KERN_ERR "INFO: %s in ", s); 423 __print_symbol("%s", (unsigned long)t->addr); 424 printk(" age=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid); 425} 426 427static void print_tracking(struct kmem_cache *s, void *object) 428{ 429 if (!(s->flags & SLAB_STORE_USER)) 430 return; 431 432 print_track("Allocated", get_track(s, object, TRACK_ALLOC)); 433 print_track("Freed", get_track(s, object, TRACK_FREE)); 434} 435 436static void print_page_info(struct page *page) 437{ 438 printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n", 439 page, page->inuse, page->freelist, page->flags); 440 441} 442 443static void slab_bug(struct kmem_cache *s, char *fmt, ...) 444{ 445 va_list args; 446 char buf[100]; 447 448 va_start(args, fmt); 449 vsnprintf(buf, sizeof(buf), fmt, args); 450 va_end(args); 451 printk(KERN_ERR "========================================" 452 "=====================================\n"); 453 printk(KERN_ERR "BUG %s: %s\n", s->name, buf); 454 printk(KERN_ERR "----------------------------------------" 455 "-------------------------------------\n\n"); 456} 457 458static void slab_fix(struct kmem_cache *s, char *fmt, ...) 459{ 460 va_list args; 461 char buf[100]; 462 463 va_start(args, fmt); 464 vsnprintf(buf, sizeof(buf), fmt, args); 465 va_end(args); 466 printk(KERN_ERR "FIX %s: %s\n", s->name, buf); 467} 468 469static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 470{ 471 unsigned int off; /* Offset of last byte */ 472 u8 *addr = page_address(page); 473 474 print_tracking(s, p); 475 476 print_page_info(page); 477 478 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", 479 p, p - addr, get_freepointer(s, p)); 480 481 if (p > addr + 16) 482 print_section("Bytes b4", p - 16, 16); 483 484 print_section("Object", p, min(s->objsize, 128)); 485 486 if (s->flags & SLAB_RED_ZONE) 487 print_section("Redzone", p + s->objsize, 488 s->inuse - s->objsize); 489 490 if (s->offset) 491 off = s->offset + sizeof(void *); 492 else 493 off = s->inuse; 494 495 if (s->flags & SLAB_STORE_USER) 496 off += 2 * sizeof(struct track); 497 498 if (off != s->size) 499 /* Beginning of the filler is the free pointer */ 500 print_section("Padding", p + off, s->size - off); 501 502 dump_stack(); 503} 504 505static void object_err(struct kmem_cache *s, struct page *page, 506 u8 *object, char *reason) 507{ 508 slab_bug(s, reason); 509 print_trailer(s, page, object); 510} 511 512static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) 513{ 514 va_list args; 515 char buf[100]; 516 517 va_start(args, fmt); 518 vsnprintf(buf, sizeof(buf), fmt, args); 519 va_end(args); 520 slab_bug(s, fmt); 521 print_page_info(page); 522 dump_stack(); 523} 524 525static void init_object(struct kmem_cache *s, void *object, int active) 526{ 527 u8 *p = object; 528 529 if (s->flags & __OBJECT_POISON) { 530 memset(p, POISON_FREE, s->objsize - 1); 531 p[s->objsize -1] = POISON_END; 532 } 533 534 if (s->flags & SLAB_RED_ZONE) 535 memset(p + s->objsize, 536 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE, 537 s->inuse - s->objsize); 538} 539 540static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) 541{ 542 while (bytes) { 543 if (*start != (u8)value) 544 return start; 545 start++; 546 bytes--; 547 } 548 return NULL; 549} 550 551static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 552 void *from, void *to) 553{ 554 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data); 555 memset(from, data, to - from); 556} 557 558static int check_bytes_and_report(struct kmem_cache *s, struct page *page, 559 u8 *object, char *what, 560 u8* start, unsigned int value, unsigned int bytes) 561{ 562 u8 *fault; 563 u8 *end; 564 565 fault = check_bytes(start, value, bytes); 566 if (!fault) 567 return 1; 568 569 end = start + bytes; 570 while (end > fault && end[-1] == value) 571 end--; 572 573 slab_bug(s, "%s overwritten", what); 574 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n", 575 fault, end - 1, fault[0], value); 576 print_trailer(s, page, object); 577 578 restore_bytes(s, what, value, fault, end); 579 return 0; 580} 581 582/* 583 * Object layout: 584 * 585 * object address 586 * Bytes of the object to be managed. 587 * If the freepointer may overlay the object then the free 588 * pointer is the first word of the object. 589 * 590 * Poisoning uses 0x6b (POISON_FREE) and the last byte is 591 * 0xa5 (POISON_END) 592 * 593 * object + s->objsize 594 * Padding to reach word boundary. This is also used for Redzoning. 595 * Padding is extended by another word if Redzoning is enabled and 596 * objsize == inuse. 597 * 598 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 599 * 0xcc (RED_ACTIVE) for objects in use. 600 * 601 * object + s->inuse 602 * Meta data starts here. 603 * 604 * A. Free pointer (if we cannot overwrite object on free) 605 * B. Tracking data for SLAB_STORE_USER 606 * C. Padding to reach required alignment boundary or at mininum 607 * one word if debuggin is on to be able to detect writes 608 * before the word boundary. 609 * 610 * Padding is done using 0x5a (POISON_INUSE) 611 * 612 * object + s->size 613 * Nothing is used beyond s->size. 614 * 615 * If slabcaches are merged then the objsize and inuse boundaries are mostly 616 * ignored. And therefore no slab options that rely on these boundaries 617 * may be used with merged slabcaches. 618 */ 619 620static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) 621{ 622 unsigned long off = s->inuse; /* The end of info */ 623 624 if (s->offset) 625 /* Freepointer is placed after the object. */ 626 off += sizeof(void *); 627 628 if (s->flags & SLAB_STORE_USER) 629 /* We also have user information there */ 630 off += 2 * sizeof(struct track); 631 632 if (s->size == off) 633 return 1; 634 635 return check_bytes_and_report(s, page, p, "Object padding", 636 p + off, POISON_INUSE, s->size - off); 637} 638 639static int slab_pad_check(struct kmem_cache *s, struct page *page) 640{ 641 u8 *start; 642 u8 *fault; 643 u8 *end; 644 int length; 645 int remainder; 646 647 if (!(s->flags & SLAB_POISON)) 648 return 1; 649 650 start = page_address(page); 651 end = start + (PAGE_SIZE << s->order); 652 length = s->objects * s->size; 653 remainder = end - (start + length); 654 if (!remainder) 655 return 1; 656 657 fault = check_bytes(start + length, POISON_INUSE, remainder); 658 if (!fault) 659 return 1; 660 while (end > fault && end[-1] == POISON_INUSE) 661 end--; 662 663 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); 664 print_section("Padding", start, length); 665 666 restore_bytes(s, "slab padding", POISON_INUSE, start, end); 667 return 0; 668} 669 670static int check_object(struct kmem_cache *s, struct page *page, 671 void *object, int active) 672{ 673 u8 *p = object; 674 u8 *endobject = object + s->objsize; 675 676 if (s->flags & SLAB_RED_ZONE) { 677 unsigned int red = 678 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE; 679 680 if (!check_bytes_and_report(s, page, object, "Redzone", 681 endobject, red, s->inuse - s->objsize)) 682 return 0; 683 } else { 684 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) 685 check_bytes_and_report(s, page, p, "Alignment padding", endobject, 686 POISON_INUSE, s->inuse - s->objsize); 687 } 688 689 if (s->flags & SLAB_POISON) { 690 if (!active && (s->flags & __OBJECT_POISON) && 691 (!check_bytes_and_report(s, page, p, "Poison", p, 692 POISON_FREE, s->objsize - 1) || 693 !check_bytes_and_report(s, page, p, "Poison", 694 p + s->objsize -1, POISON_END, 1))) 695 return 0; 696 /* 697 * check_pad_bytes cleans up on its own. 698 */ 699 check_pad_bytes(s, page, p); 700 } 701 702 if (!s->offset && active) 703 /* 704 * Object and freepointer overlap. Cannot check 705 * freepointer while object is allocated. 706 */ 707 return 1; 708 709 /* Check free pointer validity */ 710 if (!check_valid_pointer(s, page, get_freepointer(s, p))) { 711 object_err(s, page, p, "Freepointer corrupt"); 712 /* 713 * No choice but to zap it and thus loose the remainder 714 * of the free objects in this slab. May cause 715 * another error because the object count is now wrong. 716 */ 717 set_freepointer(s, p, NULL); 718 return 0; 719 } 720 return 1; 721} 722 723static int check_slab(struct kmem_cache *s, struct page *page) 724{ 725 VM_BUG_ON(!irqs_disabled()); 726 727 if (!PageSlab(page)) { 728 slab_err(s, page, "Not a valid slab page"); 729 return 0; 730 } 731 if (page->offset * sizeof(void *) != s->offset) { 732 slab_err(s, page, "Corrupted offset %lu", 733 (unsigned long)(page->offset * sizeof(void *))); 734 return 0; 735 } 736 if (page->inuse > s->objects) { 737 slab_err(s, page, "inuse %u > max %u", 738 s->name, page->inuse, s->objects); 739 return 0; 740 } 741 /* Slab_pad_check fixes things up after itself */ 742 slab_pad_check(s, page); 743 return 1; 744} 745 746/* 747 * Determine if a certain object on a page is on the freelist. Must hold the 748 * slab lock to guarantee that the chains are in a consistent state. 749 */ 750static int on_freelist(struct kmem_cache *s, struct page *page, void *search) 751{ 752 int nr = 0; 753 void *fp = page->freelist; 754 void *object = NULL; 755 756 while (fp && nr <= s->objects) { 757 if (fp == search) 758 return 1; 759 if (!check_valid_pointer(s, page, fp)) { 760 if (object) { 761 object_err(s, page, object, 762 "Freechain corrupt"); 763 set_freepointer(s, object, NULL); 764 break; 765 } else { 766 slab_err(s, page, "Freepointer corrupt"); 767 page->freelist = NULL; 768 page->inuse = s->objects; 769 slab_fix(s, "Freelist cleared"); 770 return 0; 771 } 772 break; 773 } 774 object = fp; 775 fp = get_freepointer(s, object); 776 nr++; 777 } 778 779 if (page->inuse != s->objects - nr) { 780 slab_err(s, page, "Wrong object count. Counter is %d but " 781 "counted were %d", page->inuse, s->objects - nr); 782 page->inuse = s->objects - nr; 783 slab_fix(s, "Object count adjusted."); 784 } 785 return search == NULL; 786} 787 788static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) 789{ 790 if (s->flags & SLAB_TRACE) { 791 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", 792 s->name, 793 alloc ? "alloc" : "free", 794 object, page->inuse, 795 page->freelist); 796 797 if (!alloc) 798 print_section("Object", (void *)object, s->objsize); 799 800 dump_stack(); 801 } 802} 803 804/* 805 * Tracking of fully allocated slabs for debugging purposes. 806 */ 807static void add_full(struct kmem_cache_node *n, struct page *page) 808{ 809 spin_lock(&n->list_lock); 810 list_add(&page->lru, &n->full); 811 spin_unlock(&n->list_lock); 812} 813 814static void remove_full(struct kmem_cache *s, struct page *page) 815{ 816 struct kmem_cache_node *n; 817 818 if (!(s->flags & SLAB_STORE_USER)) 819 return; 820 821 n = get_node(s, page_to_nid(page)); 822 823 spin_lock(&n->list_lock); 824 list_del(&page->lru); 825 spin_unlock(&n->list_lock); 826} 827 828static void setup_object_debug(struct kmem_cache *s, struct page *page, 829 void *object) 830{ 831 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) 832 return; 833 834 init_object(s, object, 0); 835 init_tracking(s, object); 836} 837 838static int alloc_debug_processing(struct kmem_cache *s, struct page *page, 839 void *object, void *addr) 840{ 841 if (!check_slab(s, page)) 842 goto bad; 843 844 if (object && !on_freelist(s, page, object)) { 845 object_err(s, page, object, "Object already allocated"); 846 goto bad; 847 } 848 849 if (!check_valid_pointer(s, page, object)) { 850 object_err(s, page, object, "Freelist Pointer check fails"); 851 goto bad; 852 } 853 854 if (object && !check_object(s, page, object, 0)) 855 goto bad; 856 857 /* Success perform special debug activities for allocs */ 858 if (s->flags & SLAB_STORE_USER) 859 set_track(s, object, TRACK_ALLOC, addr); 860 trace(s, page, object, 1); 861 init_object(s, object, 1); 862 return 1; 863 864bad: 865 if (PageSlab(page)) { 866 /* 867 * If this is a slab page then lets do the best we can 868 * to avoid issues in the future. Marking all objects 869 * as used avoids touching the remaining objects. 870 */ 871 slab_fix(s, "Marking all objects used"); 872 page->inuse = s->objects; 873 page->freelist = NULL; 874 /* Fix up fields that may be corrupted */ 875 page->offset = s->offset / sizeof(void *); 876 } 877 return 0; 878} 879 880static int free_debug_processing(struct kmem_cache *s, struct page *page, 881 void *object, void *addr) 882{ 883 if (!check_slab(s, page)) 884 goto fail; 885 886 if (!check_valid_pointer(s, page, object)) { 887 slab_err(s, page, "Invalid object pointer 0x%p", object); 888 goto fail; 889 } 890 891 if (on_freelist(s, page, object)) { 892 object_err(s, page, object, "Object already free"); 893 goto fail; 894 } 895 896 if (!check_object(s, page, object, 1)) 897 return 0; 898 899 if (unlikely(s != page->slab)) { 900 if (!PageSlab(page)) 901 slab_err(s, page, "Attempt to free object(0x%p) " 902 "outside of slab", object); 903 else 904 if (!page->slab) { 905 printk(KERN_ERR 906 "SLUB <none>: no slab for object 0x%p.\n", 907 object); 908 dump_stack(); 909 } 910 else 911 object_err(s, page, object, 912 "page slab pointer corrupt."); 913 goto fail; 914 } 915 916 /* Special debug activities for freeing objects */ 917 if (!SlabFrozen(page) && !page->freelist) 918 remove_full(s, page); 919 if (s->flags & SLAB_STORE_USER) 920 set_track(s, object, TRACK_FREE, addr); 921 trace(s, page, object, 0); 922 init_object(s, object, 0); 923 return 1; 924 925fail: 926 slab_fix(s, "Object at 0x%p not freed", object); 927 return 0; 928} 929 930static int __init setup_slub_debug(char *str) 931{ 932 slub_debug = DEBUG_DEFAULT_FLAGS; 933 if (*str++ != '=' || !*str) 934 /* 935 * No options specified. Switch on full debugging. 936 */ 937 goto out; 938 939 if (*str == ',') 940 /* 941 * No options but restriction on slabs. This means full 942 * debugging for slabs matching a pattern. 943 */ 944 goto check_slabs; 945 946 slub_debug = 0; 947 if (*str == '-') 948 /* 949 * Switch off all debugging measures. 950 */ 951 goto out; 952 953 /* 954 * Determine which debug features should be switched on 955 */ 956 for ( ;*str && *str != ','; str++) { 957 switch (tolower(*str)) { 958 case 'f': 959 slub_debug |= SLAB_DEBUG_FREE; 960 break; 961 case 'z': 962 slub_debug |= SLAB_RED_ZONE; 963 break; 964 case 'p': 965 slub_debug |= SLAB_POISON; 966 break; 967 case 'u': 968 slub_debug |= SLAB_STORE_USER; 969 break; 970 case 't': 971 slub_debug |= SLAB_TRACE; 972 break; 973 default: 974 printk(KERN_ERR "slub_debug option '%c' " 975 "unknown. skipped\n",*str); 976 } 977 } 978 979check_slabs: 980 if (*str == ',') 981 slub_debug_slabs = str + 1; 982out: 983 return 1; 984} 985 986__setup("slub_debug", setup_slub_debug); 987 988static void kmem_cache_open_debug_check(struct kmem_cache *s) 989{ 990 /* 991 * The page->offset field is only 16 bit wide. This is an offset 992 * in units of words from the beginning of an object. If the slab 993 * size is bigger then we cannot move the free pointer behind the 994 * object anymore. 995 * 996 * On 32 bit platforms the limit is 256k. On 64bit platforms 997 * the limit is 512k. 998 * 999 * Debugging or ctor may create a need to move the free 1000 * pointer. Fail if this happens. 1001 */ 1002 if (s->objsize >= 65535 * sizeof(void *)) { 1003 BUG_ON(s->flags & (SLAB_RED_ZONE | SLAB_POISON | 1004 SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); 1005 BUG_ON(s->ctor); 1006 } 1007 else 1008 /* 1009 * Enable debugging if selected on the kernel commandline. 1010 */ 1011 if (slub_debug && (!slub_debug_slabs || 1012 strncmp(slub_debug_slabs, s->name, 1013 strlen(slub_debug_slabs)) == 0)) 1014 s->flags |= slub_debug; 1015} 1016#else 1017static inline void setup_object_debug(struct kmem_cache *s, 1018 struct page *page, void *object) {} 1019 1020static inline int alloc_debug_processing(struct kmem_cache *s, 1021 struct page *page, void *object, void *addr) { return 0; } 1022 1023static inline int free_debug_processing(struct kmem_cache *s, 1024 struct page *page, void *object, void *addr) { return 0; } 1025 1026static inline int slab_pad_check(struct kmem_cache *s, struct page *page) 1027 { return 1; } 1028static inline int check_object(struct kmem_cache *s, struct page *page, 1029 void *object, int active) { return 1; } 1030static inline void add_full(struct kmem_cache_node *n, struct page *page) {} 1031static inline void kmem_cache_open_debug_check(struct kmem_cache *s) {} 1032#define slub_debug 0 1033#endif 1034/* 1035 * Slab allocation and freeing 1036 */ 1037static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) 1038{ 1039 struct page * page; 1040 int pages = 1 << s->order; 1041 1042 if (s->order) 1043 flags |= __GFP_COMP; 1044 1045 if (s->flags & SLAB_CACHE_DMA) 1046 flags |= SLUB_DMA; 1047 1048 if (node == -1) 1049 page = alloc_pages(flags, s->order); 1050 else 1051 page = alloc_pages_node(node, flags, s->order); 1052 1053 if (!page) 1054 return NULL; 1055 1056 mod_zone_page_state(page_zone(page), 1057 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1058 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1059 pages); 1060 1061 return page; 1062} 1063 1064static void setup_object(struct kmem_cache *s, struct page *page, 1065 void *object) 1066{ 1067 setup_object_debug(s, page, object); 1068 if (unlikely(s->ctor)) 1069 s->ctor(object, s, 0); 1070} 1071 1072static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) 1073{ 1074 struct page *page; 1075 struct kmem_cache_node *n; 1076 void *start; 1077 void *end; 1078 void *last; 1079 void *p; 1080 1081 BUG_ON(flags & ~(GFP_DMA | __GFP_ZERO | GFP_LEVEL_MASK)); 1082 1083 if (flags & __GFP_WAIT) 1084 local_irq_enable(); 1085 1086 page = allocate_slab(s, flags & GFP_LEVEL_MASK, node); 1087 if (!page) 1088 goto out; 1089 1090 n = get_node(s, page_to_nid(page)); 1091 if (n) 1092 atomic_long_inc(&n->nr_slabs); 1093 page->offset = s->offset / sizeof(void *); 1094 page->slab = s; 1095 page->flags |= 1 << PG_slab; 1096 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | 1097 SLAB_STORE_USER | SLAB_TRACE)) 1098 SetSlabDebug(page); 1099 1100 start = page_address(page); 1101 end = start + s->objects * s->size; 1102 1103 if (unlikely(s->flags & SLAB_POISON)) 1104 memset(start, POISON_INUSE, PAGE_SIZE << s->order); 1105 1106 last = start; 1107 for_each_object(p, s, start) { 1108 setup_object(s, page, last); 1109 set_freepointer(s, last, p); 1110 last = p; 1111 } 1112 setup_object(s, page, last); 1113 set_freepointer(s, last, NULL); 1114 1115 page->freelist = start; 1116 page->lockless_freelist = NULL; 1117 page->inuse = 0; 1118out: 1119 if (flags & __GFP_WAIT) 1120 local_irq_disable(); 1121 return page; 1122} 1123 1124static void __free_slab(struct kmem_cache *s, struct page *page) 1125{ 1126 int pages = 1 << s->order; 1127 1128 if (unlikely(SlabDebug(page))) { 1129 void *p; 1130 1131 slab_pad_check(s, page); 1132 for_each_object(p, s, page_address(page)) 1133 check_object(s, page, p, 0); 1134 } 1135 1136 mod_zone_page_state(page_zone(page), 1137 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1138 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1139 - pages); 1140 1141 page->mapping = NULL; 1142 __free_pages(page, s->order); 1143} 1144 1145static void rcu_free_slab(struct rcu_head *h) 1146{ 1147 struct page *page; 1148 1149 page = container_of((struct list_head *)h, struct page, lru); 1150 __free_slab(page->slab, page); 1151} 1152 1153static void free_slab(struct kmem_cache *s, struct page *page) 1154{ 1155 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { 1156 /* 1157 * RCU free overloads the RCU head over the LRU 1158 */ 1159 struct rcu_head *head = (void *)&page->lru; 1160 1161 call_rcu(head, rcu_free_slab); 1162 } else 1163 __free_slab(s, page); 1164} 1165 1166static void discard_slab(struct kmem_cache *s, struct page *page) 1167{ 1168 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1169 1170 atomic_long_dec(&n->nr_slabs); 1171 reset_page_mapcount(page); 1172 ClearSlabDebug(page); 1173 __ClearPageSlab(page); 1174 free_slab(s, page); 1175} 1176 1177/* 1178 * Per slab locking using the pagelock 1179 */ 1180static __always_inline void slab_lock(struct page *page) 1181{ 1182 bit_spin_lock(PG_locked, &page->flags); 1183} 1184 1185static __always_inline void slab_unlock(struct page *page) 1186{ 1187 bit_spin_unlock(PG_locked, &page->flags); 1188} 1189 1190static __always_inline int slab_trylock(struct page *page) 1191{ 1192 int rc = 1; 1193 1194 rc = bit_spin_trylock(PG_locked, &page->flags); 1195 return rc; 1196} 1197 1198/* 1199 * Management of partially allocated slabs 1200 */ 1201static void add_partial_tail(struct kmem_cache_node *n, struct page *page) 1202{ 1203 spin_lock(&n->list_lock); 1204 n->nr_partial++; 1205 list_add_tail(&page->lru, &n->partial); 1206 spin_unlock(&n->list_lock); 1207} 1208 1209static void add_partial(struct kmem_cache_node *n, struct page *page) 1210{ 1211 spin_lock(&n->list_lock); 1212 n->nr_partial++; 1213 list_add(&page->lru, &n->partial); 1214 spin_unlock(&n->list_lock); 1215} 1216 1217static void remove_partial(struct kmem_cache *s, 1218 struct page *page) 1219{ 1220 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1221 1222 spin_lock(&n->list_lock); 1223 list_del(&page->lru); 1224 n->nr_partial--; 1225 spin_unlock(&n->list_lock); 1226} 1227 1228/* 1229 * Lock slab and remove from the partial list. 1230 * 1231 * Must hold list_lock. 1232 */ 1233static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page) 1234{ 1235 if (slab_trylock(page)) { 1236 list_del(&page->lru); 1237 n->nr_partial--; 1238 SetSlabFrozen(page); 1239 return 1; 1240 } 1241 return 0; 1242} 1243 1244/* 1245 * Try to allocate a partial slab from a specific node. 1246 */ 1247static struct page *get_partial_node(struct kmem_cache_node *n) 1248{ 1249 struct page *page; 1250 1251 /* 1252 * Racy check. If we mistakenly see no partial slabs then we 1253 * just allocate an empty slab. If we mistakenly try to get a 1254 * partial slab and there is none available then get_partials() 1255 * will return NULL. 1256 */ 1257 if (!n || !n->nr_partial) 1258 return NULL; 1259 1260 spin_lock(&n->list_lock); 1261 list_for_each_entry(page, &n->partial, lru) 1262 if (lock_and_freeze_slab(n, page)) 1263 goto out; 1264 page = NULL; 1265out: 1266 spin_unlock(&n->list_lock); 1267 return page; 1268} 1269 1270/* 1271 * Get a page from somewhere. Search in increasing NUMA distances. 1272 */ 1273static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) 1274{ 1275#ifdef CONFIG_NUMA 1276 struct zonelist *zonelist; 1277 struct zone **z; 1278 struct page *page; 1279 1280 /* 1281 * The defrag ratio allows a configuration of the tradeoffs between 1282 * inter node defragmentation and node local allocations. A lower 1283 * defrag_ratio increases the tendency to do local allocations 1284 * instead of attempting to obtain partial slabs from other nodes. 1285 * 1286 * If the defrag_ratio is set to 0 then kmalloc() always 1287 * returns node local objects. If the ratio is higher then kmalloc() 1288 * may return off node objects because partial slabs are obtained 1289 * from other nodes and filled up. 1290 * 1291 * If /sys/slab/xx/defrag_ratio is set to 100 (which makes 1292 * defrag_ratio = 1000) then every (well almost) allocation will 1293 * first attempt to defrag slab caches on other nodes. This means 1294 * scanning over all nodes to look for partial slabs which may be 1295 * expensive if we do it every time we are trying to find a slab 1296 * with available objects. 1297 */ 1298 if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) 1299 return NULL; 1300 1301 zonelist = &NODE_DATA(slab_node(current->mempolicy)) 1302 ->node_zonelists[gfp_zone(flags)]; 1303 for (z = zonelist->zones; *z; z++) { 1304 struct kmem_cache_node *n; 1305 1306 n = get_node(s, zone_to_nid(*z)); 1307 1308 if (n && cpuset_zone_allowed_hardwall(*z, flags) && 1309 n->nr_partial > MIN_PARTIAL) { 1310 page = get_partial_node(n); 1311 if (page) 1312 return page; 1313 } 1314 } 1315#endif 1316 return NULL; 1317} 1318 1319/* 1320 * Get a partial page, lock it and return it. 1321 */ 1322static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) 1323{ 1324 struct page *page; 1325 int searchnode = (node == -1) ? numa_node_id() : node; 1326 1327 page = get_partial_node(get_node(s, searchnode)); 1328 if (page || (flags & __GFP_THISNODE)) 1329 return page; 1330 1331 return get_any_partial(s, flags); 1332} 1333 1334/* 1335 * Move a page back to the lists. 1336 * 1337 * Must be called with the slab lock held. 1338 * 1339 * On exit the slab lock will have been dropped. 1340 */ 1341static void unfreeze_slab(struct kmem_cache *s, struct page *page) 1342{ 1343 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1344 1345 ClearSlabFrozen(page); 1346 if (page->inuse) { 1347 1348 if (page->freelist) 1349 add_partial(n, page); 1350 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1351 add_full(n, page); 1352 slab_unlock(page); 1353 1354 } else { 1355 if (n->nr_partial < MIN_PARTIAL) { 1356 /* 1357 * Adding an empty slab to the partial slabs in order 1358 * to avoid page allocator overhead. This slab needs 1359 * to come after the other slabs with objects in 1360 * order to fill them up. That way the size of the 1361 * partial list stays small. kmem_cache_shrink can 1362 * reclaim empty slabs from the partial list. 1363 */ 1364 add_partial_tail(n, page); 1365 slab_unlock(page); 1366 } else { 1367 slab_unlock(page); 1368 discard_slab(s, page); 1369 } 1370 } 1371} 1372 1373/* 1374 * Remove the cpu slab 1375 */ 1376static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) 1377{ 1378 /* 1379 * Merge cpu freelist into freelist. Typically we get here 1380 * because both freelists are empty. So this is unlikely 1381 * to occur. 1382 */ 1383 while (unlikely(page->lockless_freelist)) { 1384 void **object; 1385 1386 /* Retrieve object from cpu_freelist */ 1387 object = page->lockless_freelist; 1388 page->lockless_freelist = page->lockless_freelist[page->offset]; 1389 1390 /* And put onto the regular freelist */ 1391 object[page->offset] = page->freelist; 1392 page->freelist = object; 1393 page->inuse--; 1394 } 1395 s->cpu_slab[cpu] = NULL; 1396 unfreeze_slab(s, page); 1397} 1398 1399static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu) 1400{ 1401 slab_lock(page); 1402 deactivate_slab(s, page, cpu); 1403} 1404 1405/* 1406 * Flush cpu slab. 1407 * Called from IPI handler with interrupts disabled. 1408 */ 1409static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1410{ 1411 struct page *page = s->cpu_slab[cpu]; 1412 1413 if (likely(page)) 1414 flush_slab(s, page, cpu); 1415} 1416 1417static void flush_cpu_slab(void *d) 1418{ 1419 struct kmem_cache *s = d; 1420 int cpu = smp_processor_id(); 1421 1422 __flush_cpu_slab(s, cpu); 1423} 1424 1425static void flush_all(struct kmem_cache *s) 1426{ 1427#ifdef CONFIG_SMP 1428 on_each_cpu(flush_cpu_slab, s, 1, 1); 1429#else 1430 unsigned long flags; 1431 1432 local_irq_save(flags); 1433 flush_cpu_slab(s); 1434 local_irq_restore(flags); 1435#endif 1436} 1437 1438/* 1439 * Slow path. The lockless freelist is empty or we need to perform 1440 * debugging duties. 1441 * 1442 * Interrupts are disabled. 1443 * 1444 * Processing is still very fast if new objects have been freed to the 1445 * regular freelist. In that case we simply take over the regular freelist 1446 * as the lockless freelist and zap the regular freelist. 1447 * 1448 * If that is not working then we fall back to the partial lists. We take the 1449 * first element of the freelist as the object to allocate now and move the 1450 * rest of the freelist to the lockless freelist. 1451 * 1452 * And if we were unable to get a new slab from the partial slab lists then 1453 * we need to allocate a new slab. This is slowest path since we may sleep. 1454 */ 1455static void *__slab_alloc(struct kmem_cache *s, 1456 gfp_t gfpflags, int node, void *addr, struct page *page) 1457{ 1458 void **object; 1459 int cpu = smp_processor_id(); 1460 1461 if (!page) 1462 goto new_slab; 1463 1464 slab_lock(page); 1465 if (unlikely(node != -1 && page_to_nid(page) != node)) 1466 goto another_slab; 1467load_freelist: 1468 object = page->freelist; 1469 if (unlikely(!object)) 1470 goto another_slab; 1471 if (unlikely(SlabDebug(page))) 1472 goto debug; 1473 1474 object = page->freelist; 1475 page->lockless_freelist = object[page->offset]; 1476 page->inuse = s->objects; 1477 page->freelist = NULL; 1478 slab_unlock(page); 1479 return object; 1480 1481another_slab: 1482 deactivate_slab(s, page, cpu); 1483 1484new_slab: 1485 page = get_partial(s, gfpflags, node); 1486 if (page) { 1487 s->cpu_slab[cpu] = page; 1488 goto load_freelist; 1489 } 1490 1491 page = new_slab(s, gfpflags, node); 1492 if (page) { 1493 cpu = smp_processor_id(); 1494 if (s->cpu_slab[cpu]) { 1495 /* 1496 * Someone else populated the cpu_slab while we 1497 * enabled interrupts, or we have gotten scheduled 1498 * on another cpu. The page may not be on the 1499 * requested node even if __GFP_THISNODE was 1500 * specified. So we need to recheck. 1501 */ 1502 if (node == -1 || 1503 page_to_nid(s->cpu_slab[cpu]) == node) { 1504 /* 1505 * Current cpuslab is acceptable and we 1506 * want the current one since its cache hot 1507 */ 1508 discard_slab(s, page); 1509 page = s->cpu_slab[cpu]; 1510 slab_lock(page); 1511 goto load_freelist; 1512 } 1513 /* New slab does not fit our expectations */ 1514 flush_slab(s, s->cpu_slab[cpu], cpu); 1515 } 1516 slab_lock(page); 1517 SetSlabFrozen(page); 1518 s->cpu_slab[cpu] = page; 1519 goto load_freelist; 1520 } 1521 return NULL; 1522debug: 1523 object = page->freelist; 1524 if (!alloc_debug_processing(s, page, object, addr)) 1525 goto another_slab; 1526 1527 page->inuse++; 1528 page->freelist = object[page->offset]; 1529 slab_unlock(page); 1530 return object; 1531} 1532 1533/* 1534 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) 1535 * have the fastpath folded into their functions. So no function call 1536 * overhead for requests that can be satisfied on the fastpath. 1537 * 1538 * The fastpath works by first checking if the lockless freelist can be used. 1539 * If not then __slab_alloc is called for slow processing. 1540 * 1541 * Otherwise we can simply pick the next object from the lockless free list. 1542 */ 1543static void __always_inline *slab_alloc(struct kmem_cache *s, 1544 gfp_t gfpflags, int node, void *addr) 1545{ 1546 struct page *page; 1547 void **object; 1548 unsigned long flags; 1549 1550 local_irq_save(flags); 1551 page = s->cpu_slab[smp_processor_id()]; 1552 if (unlikely(!page || !page->lockless_freelist || 1553 (node != -1 && page_to_nid(page) != node))) 1554 1555 object = __slab_alloc(s, gfpflags, node, addr, page); 1556 1557 else { 1558 object = page->lockless_freelist; 1559 page->lockless_freelist = object[page->offset]; 1560 } 1561 local_irq_restore(flags); 1562 1563 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1564 memset(object, 0, s->objsize); 1565 1566 return object; 1567} 1568 1569void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 1570{ 1571 return slab_alloc(s, gfpflags, -1, __builtin_return_address(0)); 1572} 1573EXPORT_SYMBOL(kmem_cache_alloc); 1574 1575#ifdef CONFIG_NUMA 1576void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) 1577{ 1578 return slab_alloc(s, gfpflags, node, __builtin_return_address(0)); 1579} 1580EXPORT_SYMBOL(kmem_cache_alloc_node); 1581#endif 1582 1583/* 1584 * Slow patch handling. This may still be called frequently since objects 1585 * have a longer lifetime than the cpu slabs in most processing loads. 1586 * 1587 * So we still attempt to reduce cache line usage. Just take the slab 1588 * lock and free the item. If there is no additional partial page 1589 * handling required then we can return immediately. 1590 */ 1591static void __slab_free(struct kmem_cache *s, struct page *page, 1592 void *x, void *addr) 1593{ 1594 void *prior; 1595 void **object = (void *)x; 1596 1597 slab_lock(page); 1598 1599 if (unlikely(SlabDebug(page))) 1600 goto debug; 1601checks_ok: 1602 prior = object[page->offset] = page->freelist; 1603 page->freelist = object; 1604 page->inuse--; 1605 1606 if (unlikely(SlabFrozen(page))) 1607 goto out_unlock; 1608 1609 if (unlikely(!page->inuse)) 1610 goto slab_empty; 1611 1612 /* 1613 * Objects left in the slab. If it 1614 * was not on the partial list before 1615 * then add it. 1616 */ 1617 if (unlikely(!prior)) 1618 add_partial(get_node(s, page_to_nid(page)), page); 1619 1620out_unlock: 1621 slab_unlock(page); 1622 return; 1623 1624slab_empty: 1625 if (prior) 1626 /* 1627 * Slab still on the partial list. 1628 */ 1629 remove_partial(s, page); 1630 1631 slab_unlock(page); 1632 discard_slab(s, page); 1633 return; 1634 1635debug: 1636 if (!free_debug_processing(s, page, x, addr)) 1637 goto out_unlock; 1638 goto checks_ok; 1639} 1640 1641/* 1642 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that 1643 * can perform fastpath freeing without additional function calls. 1644 * 1645 * The fastpath is only possible if we are freeing to the current cpu slab 1646 * of this processor. This typically the case if we have just allocated 1647 * the item before. 1648 * 1649 * If fastpath is not possible then fall back to __slab_free where we deal 1650 * with all sorts of special processing. 1651 */ 1652static void __always_inline slab_free(struct kmem_cache *s, 1653 struct page *page, void *x, void *addr) 1654{ 1655 void **object = (void *)x; 1656 unsigned long flags; 1657 1658 local_irq_save(flags); 1659 if (likely(page == s->cpu_slab[smp_processor_id()] && 1660 !SlabDebug(page))) { 1661 object[page->offset] = page->lockless_freelist; 1662 page->lockless_freelist = object; 1663 } else 1664 __slab_free(s, page, x, addr); 1665 1666 local_irq_restore(flags); 1667} 1668 1669void kmem_cache_free(struct kmem_cache *s, void *x) 1670{ 1671 struct page *page; 1672 1673 page = virt_to_head_page(x); 1674 1675 slab_free(s, page, x, __builtin_return_address(0)); 1676} 1677EXPORT_SYMBOL(kmem_cache_free); 1678 1679/* Figure out on which slab object the object resides */ 1680static struct page *get_object_page(const void *x) 1681{ 1682 struct page *page = virt_to_head_page(x); 1683 1684 if (!PageSlab(page)) 1685 return NULL; 1686 1687 return page; 1688} 1689 1690/* 1691 * Object placement in a slab is made very easy because we always start at 1692 * offset 0. If we tune the size of the object to the alignment then we can 1693 * get the required alignment by putting one properly sized object after 1694 * another. 1695 * 1696 * Notice that the allocation order determines the sizes of the per cpu 1697 * caches. Each processor has always one slab available for allocations. 1698 * Increasing the allocation order reduces the number of times that slabs 1699 * must be moved on and off the partial lists and is therefore a factor in 1700 * locking overhead. 1701 */ 1702 1703/* 1704 * Mininum / Maximum order of slab pages. This influences locking overhead 1705 * and slab fragmentation. A higher order reduces the number of partial slabs 1706 * and increases the number of allocations possible without having to 1707 * take the list_lock. 1708 */ 1709static int slub_min_order; 1710static int slub_max_order = DEFAULT_MAX_ORDER; 1711static int slub_min_objects = DEFAULT_MIN_OBJECTS; 1712 1713/* 1714 * Merge control. If this is set then no merging of slab caches will occur. 1715 * (Could be removed. This was introduced to pacify the merge skeptics.) 1716 */ 1717static int slub_nomerge; 1718 1719/* 1720 * Calculate the order of allocation given an slab object size. 1721 * 1722 * The order of allocation has significant impact on performance and other 1723 * system components. Generally order 0 allocations should be preferred since 1724 * order 0 does not cause fragmentation in the page allocator. Larger objects 1725 * be problematic to put into order 0 slabs because there may be too much 1726 * unused space left. We go to a higher order if more than 1/8th of the slab 1727 * would be wasted. 1728 * 1729 * In order to reach satisfactory performance we must ensure that a minimum 1730 * number of objects is in one slab. Otherwise we may generate too much 1731 * activity on the partial lists which requires taking the list_lock. This is 1732 * less a concern for large slabs though which are rarely used. 1733 * 1734 * slub_max_order specifies the order where we begin to stop considering the 1735 * number of objects in a slab as critical. If we reach slub_max_order then 1736 * we try to keep the page order as low as possible. So we accept more waste 1737 * of space in favor of a small page order. 1738 * 1739 * Higher order allocations also allow the placement of more objects in a 1740 * slab and thereby reduce object handling overhead. If the user has 1741 * requested a higher mininum order then we start with that one instead of 1742 * the smallest order which will fit the object. 1743 */ 1744static inline int slab_order(int size, int min_objects, 1745 int max_order, int fract_leftover) 1746{ 1747 int order; 1748 int rem; 1749 int min_order = slub_min_order; 1750 1751 /* 1752 * If we would create too many object per slab then reduce 1753 * the slab order even if it goes below slub_min_order. 1754 */ 1755 while (min_order > 0 && 1756 (PAGE_SIZE << min_order) >= MAX_OBJECTS_PER_SLAB * size) 1757 min_order--; 1758 1759 for (order = max(min_order, 1760 fls(min_objects * size - 1) - PAGE_SHIFT); 1761 order <= max_order; order++) { 1762 1763 unsigned long slab_size = PAGE_SIZE << order; 1764 1765 if (slab_size < min_objects * size) 1766 continue; 1767 1768 rem = slab_size % size; 1769 1770 if (rem <= slab_size / fract_leftover) 1771 break; 1772 1773 /* If the next size is too high then exit now */ 1774 if (slab_size * 2 >= MAX_OBJECTS_PER_SLAB * size) 1775 break; 1776 } 1777 1778 return order; 1779} 1780 1781static inline int calculate_order(int size) 1782{ 1783 int order; 1784 int min_objects; 1785 int fraction; 1786 1787 /* 1788 * Attempt to find best configuration for a slab. This 1789 * works by first attempting to generate a layout with 1790 * the best configuration and backing off gradually. 1791 * 1792 * First we reduce the acceptable waste in a slab. Then 1793 * we reduce the minimum objects required in a slab. 1794 */ 1795 min_objects = slub_min_objects; 1796 while (min_objects > 1) { 1797 fraction = 8; 1798 while (fraction >= 4) { 1799 order = slab_order(size, min_objects, 1800 slub_max_order, fraction); 1801 if (order <= slub_max_order) 1802 return order; 1803 fraction /= 2; 1804 } 1805 min_objects /= 2; 1806 } 1807 1808 /* 1809 * We were unable to place multiple objects in a slab. Now 1810 * lets see if we can place a single object there. 1811 */ 1812 order = slab_order(size, 1, slub_max_order, 1); 1813 if (order <= slub_max_order) 1814 return order; 1815 1816 /* 1817 * Doh this slab cannot be placed using slub_max_order. 1818 */ 1819 order = slab_order(size, 1, MAX_ORDER, 1); 1820 if (order <= MAX_ORDER) 1821 return order; 1822 return -ENOSYS; 1823} 1824 1825/* 1826 * Figure out what the alignment of the objects will be. 1827 */ 1828static unsigned long calculate_alignment(unsigned long flags, 1829 unsigned long align, unsigned long size) 1830{ 1831 /* 1832 * If the user wants hardware cache aligned objects then 1833 * follow that suggestion if the object is sufficiently 1834 * large. 1835 * 1836 * The hardware cache alignment cannot override the 1837 * specified alignment though. If that is greater 1838 * then use it. 1839 */ 1840 if ((flags & SLAB_HWCACHE_ALIGN) && 1841 size > cache_line_size() / 2) 1842 return max_t(unsigned long, align, cache_line_size()); 1843 1844 if (align < ARCH_SLAB_MINALIGN) 1845 return ARCH_SLAB_MINALIGN; 1846 1847 return ALIGN(align, sizeof(void *)); 1848} 1849 1850static void init_kmem_cache_node(struct kmem_cache_node *n) 1851{ 1852 n->nr_partial = 0; 1853 atomic_long_set(&n->nr_slabs, 0); 1854 spin_lock_init(&n->list_lock); 1855 INIT_LIST_HEAD(&n->partial); 1856 INIT_LIST_HEAD(&n->full); 1857} 1858 1859#ifdef CONFIG_NUMA 1860/* 1861 * No kmalloc_node yet so do it by hand. We know that this is the first 1862 * slab on the node for this slabcache. There are no concurrent accesses 1863 * possible. 1864 * 1865 * Note that this function only works on the kmalloc_node_cache 1866 * when allocating for the kmalloc_node_cache. 1867 */ 1868static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflags, 1869 int node) 1870{ 1871 struct page *page; 1872 struct kmem_cache_node *n; 1873 1874 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); 1875 1876 page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node); 1877 1878 BUG_ON(!page); 1879 n = page->freelist; 1880 BUG_ON(!n); 1881 page->freelist = get_freepointer(kmalloc_caches, n); 1882 page->inuse++; 1883 kmalloc_caches->node[node] = n; 1884 init_object(kmalloc_caches, n, 1); 1885 init_tracking(kmalloc_caches, n); 1886 init_kmem_cache_node(n); 1887 atomic_long_inc(&n->nr_slabs); 1888 add_partial(n, page); 1889 1890 /* 1891 * new_slab() disables interupts. If we do not reenable interrupts here 1892 * then bootup would continue with interrupts disabled. 1893 */ 1894 local_irq_enable(); 1895 return n; 1896} 1897 1898static void free_kmem_cache_nodes(struct kmem_cache *s) 1899{ 1900 int node; 1901 1902 for_each_online_node(node) { 1903 struct kmem_cache_node *n = s->node[node]; 1904 if (n && n != &s->local_node) 1905 kmem_cache_free(kmalloc_caches, n); 1906 s->node[node] = NULL; 1907 } 1908} 1909 1910static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) 1911{ 1912 int node; 1913 int local_node; 1914 1915 if (slab_state >= UP) 1916 local_node = page_to_nid(virt_to_page(s)); 1917 else 1918 local_node = 0; 1919 1920 for_each_online_node(node) { 1921 struct kmem_cache_node *n; 1922 1923 if (local_node == node) 1924 n = &s->local_node; 1925 else { 1926 if (slab_state == DOWN) { 1927 n = early_kmem_cache_node_alloc(gfpflags, 1928 node); 1929 continue; 1930 } 1931 n = kmem_cache_alloc_node(kmalloc_caches, 1932 gfpflags, node); 1933 1934 if (!n) { 1935 free_kmem_cache_nodes(s); 1936 return 0; 1937 } 1938 1939 } 1940 s->node[node] = n; 1941 init_kmem_cache_node(n); 1942 } 1943 return 1; 1944} 1945#else 1946static void free_kmem_cache_nodes(struct kmem_cache *s) 1947{ 1948} 1949 1950static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) 1951{ 1952 init_kmem_cache_node(&s->local_node); 1953 return 1; 1954} 1955#endif 1956 1957/* 1958 * calculate_sizes() determines the order and the distribution of data within 1959 * a slab object. 1960 */ 1961static int calculate_sizes(struct kmem_cache *s) 1962{ 1963 unsigned long flags = s->flags; 1964 unsigned long size = s->objsize; 1965 unsigned long align = s->align; 1966 1967 /* 1968 * Determine if we can poison the object itself. If the user of 1969 * the slab may touch the object after free or before allocation 1970 * then we should never poison the object itself. 1971 */ 1972 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && 1973 !s->ctor) 1974 s->flags |= __OBJECT_POISON; 1975 else 1976 s->flags &= ~__OBJECT_POISON; 1977 1978 /* 1979 * Round up object size to the next word boundary. We can only 1980 * place the free pointer at word boundaries and this determines 1981 * the possible location of the free pointer. 1982 */ 1983 size = ALIGN(size, sizeof(void *)); 1984 1985#ifdef CONFIG_SLUB_DEBUG 1986 /* 1987 * If we are Redzoning then check if there is some space between the 1988 * end of the object and the free pointer. If not then add an 1989 * additional word to have some bytes to store Redzone information. 1990 */ 1991 if ((flags & SLAB_RED_ZONE) && size == s->objsize) 1992 size += sizeof(void *); 1993#endif 1994 1995 /* 1996 * With that we have determined the number of bytes in actual use 1997 * by the object. This is the potential offset to the free pointer. 1998 */ 1999 s->inuse = size; 2000 2001 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || 2002 s->ctor)) { 2003 /* 2004 * Relocate free pointer after the object if it is not 2005 * permitted to overwrite the first word of the object on 2006 * kmem_cache_free. 2007 * 2008 * This is the case if we do RCU, have a constructor or 2009 * destructor or are poisoning the objects. 2010 */ 2011 s->offset = size; 2012 size += sizeof(void *); 2013 } 2014 2015#ifdef CONFIG_SLUB_DEBUG 2016 if (flags & SLAB_STORE_USER) 2017 /* 2018 * Need to store information about allocs and frees after 2019 * the object. 2020 */ 2021 size += 2 * sizeof(struct track); 2022 2023 if (flags & SLAB_RED_ZONE) 2024 /* 2025 * Add some empty padding so that we can catch 2026 * overwrites from earlier objects rather than let 2027 * tracking information or the free pointer be 2028 * corrupted if an user writes before the start 2029 * of the object. 2030 */ 2031 size += sizeof(void *); 2032#endif 2033 2034 /* 2035 * Determine the alignment based on various parameters that the 2036 * user specified and the dynamic determination of cache line size 2037 * on bootup. 2038 */ 2039 align = calculate_alignment(flags, align, s->objsize); 2040 2041 /* 2042 * SLUB stores one object immediately after another beginning from 2043 * offset 0. In order to align the objects we have to simply size 2044 * each object to conform to the alignment. 2045 */ 2046 size = ALIGN(size, align); 2047 s->size = size; 2048 2049 s->order = calculate_order(size); 2050 if (s->order < 0) 2051 return 0; 2052 2053 /* 2054 * Determine the number of objects per slab 2055 */ 2056 s->objects = (PAGE_SIZE << s->order) / size; 2057 2058 /* 2059 * Verify that the number of objects is within permitted limits. 2060 * The page->inuse field is only 16 bit wide! So we cannot have 2061 * more than 64k objects per slab. 2062 */ 2063 if (!s->objects || s->objects > MAX_OBJECTS_PER_SLAB) 2064 return 0; 2065 return 1; 2066 2067} 2068 2069static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, 2070 const char *name, size_t size, 2071 size_t align, unsigned long flags, 2072 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 2073{ 2074 memset(s, 0, kmem_size); 2075 s->name = name; 2076 s->ctor = ctor; 2077 s->objsize = size; 2078 s->flags = flags; 2079 s->align = align; 2080 kmem_cache_open_debug_check(s); 2081 2082 if (!calculate_sizes(s)) 2083 goto error; 2084 2085 s->refcount = 1; 2086#ifdef CONFIG_NUMA 2087 s->defrag_ratio = 100; 2088#endif 2089 2090 if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) 2091 return 1; 2092error: 2093 if (flags & SLAB_PANIC) 2094 panic("Cannot create slab %s size=%lu realsize=%u " 2095 "order=%u offset=%u flags=%lx\n", 2096 s->name, (unsigned long)size, s->size, s->order, 2097 s->offset, flags); 2098 return 0; 2099} 2100 2101/* 2102 * Check if a given pointer is valid 2103 */ 2104int kmem_ptr_validate(struct kmem_cache *s, const void *object) 2105{ 2106 struct page * page; 2107 2108 page = get_object_page(object); 2109 2110 if (!page || s != page->slab) 2111 /* No slab or wrong slab */ 2112 return 0; 2113 2114 if (!check_valid_pointer(s, page, object)) 2115 return 0; 2116 2117 /* 2118 * We could also check if the object is on the slabs freelist. 2119 * But this would be too expensive and it seems that the main 2120 * purpose of kmem_ptr_valid is to check if the object belongs 2121 * to a certain slab. 2122 */ 2123 return 1; 2124} 2125EXPORT_SYMBOL(kmem_ptr_validate); 2126 2127/* 2128 * Determine the size of a slab object 2129 */ 2130unsigned int kmem_cache_size(struct kmem_cache *s) 2131{ 2132 return s->objsize; 2133} 2134EXPORT_SYMBOL(kmem_cache_size); 2135 2136const char *kmem_cache_name(struct kmem_cache *s) 2137{ 2138 return s->name; 2139} 2140EXPORT_SYMBOL(kmem_cache_name); 2141 2142/* 2143 * Attempt to free all slabs on a node. Return the number of slabs we 2144 * were unable to free. 2145 */ 2146static int free_list(struct kmem_cache *s, struct kmem_cache_node *n, 2147 struct list_head *list) 2148{ 2149 int slabs_inuse = 0; 2150 unsigned long flags; 2151 struct page *page, *h; 2152 2153 spin_lock_irqsave(&n->list_lock, flags); 2154 list_for_each_entry_safe(page, h, list, lru) 2155 if (!page->inuse) { 2156 list_del(&page->lru); 2157 discard_slab(s, page); 2158 } else 2159 slabs_inuse++; 2160 spin_unlock_irqrestore(&n->list_lock, flags); 2161 return slabs_inuse; 2162} 2163 2164/* 2165 * Release all resources used by a slab cache. 2166 */ 2167static inline int kmem_cache_close(struct kmem_cache *s) 2168{ 2169 int node; 2170 2171 flush_all(s); 2172 2173 /* Attempt to free all objects */ 2174 for_each_online_node(node) { 2175 struct kmem_cache_node *n = get_node(s, node); 2176 2177 n->nr_partial -= free_list(s, n, &n->partial); 2178 if (atomic_long_read(&n->nr_slabs)) 2179 return 1; 2180 } 2181 free_kmem_cache_nodes(s); 2182 return 0; 2183} 2184 2185/* 2186 * Close a cache and release the kmem_cache structure 2187 * (must be used for caches created using kmem_cache_create) 2188 */ 2189void kmem_cache_destroy(struct kmem_cache *s) 2190{ 2191 down_write(&slub_lock); 2192 s->refcount--; 2193 if (!s->refcount) { 2194 list_del(&s->list); 2195 if (kmem_cache_close(s)) 2196 WARN_ON(1); 2197 sysfs_slab_remove(s); 2198 kfree(s); 2199 } 2200 up_write(&slub_lock); 2201} 2202EXPORT_SYMBOL(kmem_cache_destroy); 2203 2204/******************************************************************** 2205 * Kmalloc subsystem 2206 *******************************************************************/ 2207 2208struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __cacheline_aligned; 2209EXPORT_SYMBOL(kmalloc_caches); 2210 2211#ifdef CONFIG_ZONE_DMA 2212static struct kmem_cache *kmalloc_caches_dma[KMALLOC_SHIFT_HIGH + 1]; 2213#endif 2214 2215static int __init setup_slub_min_order(char *str) 2216{ 2217 get_option (&str, &slub_min_order); 2218 2219 return 1; 2220} 2221 2222__setup("slub_min_order=", setup_slub_min_order); 2223 2224static int __init setup_slub_max_order(char *str) 2225{ 2226 get_option (&str, &slub_max_order); 2227 2228 return 1; 2229} 2230 2231__setup("slub_max_order=", setup_slub_max_order); 2232 2233static int __init setup_slub_min_objects(char *str) 2234{ 2235 get_option (&str, &slub_min_objects); 2236 2237 return 1; 2238} 2239 2240__setup("slub_min_objects=", setup_slub_min_objects); 2241 2242static int __init setup_slub_nomerge(char *str) 2243{ 2244 slub_nomerge = 1; 2245 return 1; 2246} 2247 2248__setup("slub_nomerge", setup_slub_nomerge); 2249 2250static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, 2251 const char *name, int size, gfp_t gfp_flags) 2252{ 2253 unsigned int flags = 0; 2254 2255 if (gfp_flags & SLUB_DMA) 2256 flags = SLAB_CACHE_DMA; 2257 2258 down_write(&slub_lock); 2259 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, 2260 flags, NULL)) 2261 goto panic; 2262 2263 list_add(&s->list, &slab_caches); 2264 up_write(&slub_lock); 2265 if (sysfs_slab_add(s)) 2266 goto panic; 2267 return s; 2268 2269panic: 2270 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); 2271} 2272 2273#ifdef CONFIG_ZONE_DMA 2274static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) 2275{ 2276 struct kmem_cache *s; 2277 struct kmem_cache *x; 2278 char *text; 2279 size_t realsize; 2280 2281 s = kmalloc_caches_dma[index]; 2282 if (s) 2283 return s; 2284 2285 /* Dynamically create dma cache */ 2286 x = kmalloc(kmem_size, flags & ~SLUB_DMA); 2287 if (!x) 2288 panic("Unable to allocate memory for dma cache\n"); 2289 2290 realsize = kmalloc_caches[index].objsize; 2291 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", 2292 (unsigned int)realsize); 2293 s = create_kmalloc_cache(x, text, realsize, flags); 2294 down_write(&slub_lock); 2295 if (!kmalloc_caches_dma[index]) { 2296 kmalloc_caches_dma[index] = s; 2297 up_write(&slub_lock); 2298 return s; 2299 } 2300 up_write(&slub_lock); 2301 kmem_cache_destroy(s); 2302 return kmalloc_caches_dma[index]; 2303} 2304#endif 2305 2306/* 2307 * Conversion table for small slabs sizes / 8 to the index in the 2308 * kmalloc array. This is necessary for slabs < 192 since we have non power 2309 * of two cache sizes there. The size of larger slabs can be determined using 2310 * fls. 2311 */ 2312static s8 size_index[24] = { 2313 3, /* 8 */ 2314 4, /* 16 */ 2315 5, /* 24 */ 2316 5, /* 32 */ 2317 6, /* 40 */ 2318 6, /* 48 */ 2319 6, /* 56 */ 2320 6, /* 64 */ 2321 1, /* 72 */ 2322 1, /* 80 */ 2323 1, /* 88 */ 2324 1, /* 96 */ 2325 7, /* 104 */ 2326 7, /* 112 */ 2327 7, /* 120 */ 2328 7, /* 128 */ 2329 2, /* 136 */ 2330 2, /* 144 */ 2331 2, /* 152 */ 2332 2, /* 160 */ 2333 2, /* 168 */ 2334 2, /* 176 */ 2335 2, /* 184 */ 2336 2 /* 192 */ 2337}; 2338 2339static struct kmem_cache *get_slab(size_t size, gfp_t flags) 2340{ 2341 int index; 2342 2343 if (size <= 192) { 2344 if (!size) 2345 return ZERO_SIZE_PTR; 2346 2347 index = size_index[(size - 1) / 8]; 2348 } else { 2349 if (size > KMALLOC_MAX_SIZE) 2350 return NULL; 2351 2352 index = fls(size - 1); 2353 } 2354 2355#ifdef CONFIG_ZONE_DMA 2356 if (unlikely((flags & SLUB_DMA))) 2357 return dma_kmalloc_cache(index, flags); 2358 2359#endif 2360 return &kmalloc_caches[index]; 2361} 2362 2363void *__kmalloc(size_t size, gfp_t flags) 2364{ 2365 struct kmem_cache *s = get_slab(size, flags); 2366 2367 if (ZERO_OR_NULL_PTR(s)) 2368 return s; 2369 2370 return slab_alloc(s, flags, -1, __builtin_return_address(0)); 2371} 2372EXPORT_SYMBOL(__kmalloc); 2373 2374#ifdef CONFIG_NUMA 2375void *__kmalloc_node(size_t size, gfp_t flags, int node) 2376{ 2377 struct kmem_cache *s = get_slab(size, flags); 2378 2379 if (ZERO_OR_NULL_PTR(s)) 2380 return s; 2381 2382 return slab_alloc(s, flags, node, __builtin_return_address(0)); 2383} 2384EXPORT_SYMBOL(__kmalloc_node); 2385#endif 2386 2387size_t ksize(const void *object) 2388{ 2389 struct page *page; 2390 struct kmem_cache *s; 2391 2392 if (object == ZERO_SIZE_PTR) 2393 return 0; 2394 2395 page = get_object_page(object); 2396 BUG_ON(!page); 2397 s = page->slab; 2398 BUG_ON(!s); 2399 2400 /* 2401 * Debugging requires use of the padding between object 2402 * and whatever may come after it. 2403 */ 2404 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 2405 return s->objsize; 2406 2407 /* 2408 * If we have the need to store the freelist pointer 2409 * back there or track user information then we can 2410 * only use the space before that information. 2411 */ 2412 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) 2413 return s->inuse; 2414 2415 /* 2416 * Else we can use all the padding etc for the allocation 2417 */ 2418 return s->size; 2419} 2420EXPORT_SYMBOL(ksize); 2421 2422void kfree(const void *x) 2423{ 2424 struct kmem_cache *s; 2425 struct page *page; 2426 2427 /* 2428 * This has to be an unsigned comparison. According to Linus 2429 * some gcc version treat a pointer as a signed entity. Then 2430 * this comparison would be true for all "negative" pointers 2431 * (which would cover the whole upper half of the address space). 2432 */ 2433 if (ZERO_OR_NULL_PTR(x)) 2434 return; 2435 2436 page = virt_to_head_page(x); 2437 s = page->slab; 2438 2439 slab_free(s, page, (void *)x, __builtin_return_address(0)); 2440} 2441EXPORT_SYMBOL(kfree); 2442 2443/* 2444 * kmem_cache_shrink removes empty slabs from the partial lists and sorts 2445 * the remaining slabs by the number of items in use. The slabs with the 2446 * most items in use come first. New allocations will then fill those up 2447 * and thus they can be removed from the partial lists. 2448 * 2449 * The slabs with the least items are placed last. This results in them 2450 * being allocated from last increasing the chance that the last objects 2451 * are freed in them. 2452 */ 2453int kmem_cache_shrink(struct kmem_cache *s) 2454{ 2455 int node; 2456 int i; 2457 struct kmem_cache_node *n; 2458 struct page *page; 2459 struct page *t; 2460 struct list_head *slabs_by_inuse = 2461 kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); 2462 unsigned long flags; 2463 2464 if (!slabs_by_inuse) 2465 return -ENOMEM; 2466 2467 flush_all(s); 2468 for_each_online_node(node) { 2469 n = get_node(s, node); 2470 2471 if (!n->nr_partial) 2472 continue; 2473 2474 for (i = 0; i < s->objects; i++) 2475 INIT_LIST_HEAD(slabs_by_inuse + i); 2476 2477 spin_lock_irqsave(&n->list_lock, flags); 2478 2479 /* 2480 * Build lists indexed by the items in use in each slab. 2481 * 2482 * Note that concurrent frees may occur while we hold the 2483 * list_lock. page->inuse here is the upper limit. 2484 */ 2485 list_for_each_entry_safe(page, t, &n->partial, lru) { 2486 if (!page->inuse && slab_trylock(page)) { 2487 /* 2488 * Must hold slab lock here because slab_free 2489 * may have freed the last object and be 2490 * waiting to release the slab. 2491 */ 2492 list_del(&page->lru); 2493 n->nr_partial--; 2494 slab_unlock(page); 2495 discard_slab(s, page); 2496 } else { 2497 if (n->nr_partial > MAX_PARTIAL) 2498 list_move(&page->lru, 2499 slabs_by_inuse + page->inuse); 2500 } 2501 } 2502 2503 if (n->nr_partial <= MAX_PARTIAL) 2504 goto out; 2505 2506 /* 2507 * Rebuild the partial list with the slabs filled up most 2508 * first and the least used slabs at the end. 2509 */ 2510 for (i = s->objects - 1; i >= 0; i--) 2511 list_splice(slabs_by_inuse + i, n->partial.prev); 2512 2513 out: 2514 spin_unlock_irqrestore(&n->list_lock, flags); 2515 } 2516 2517 kfree(slabs_by_inuse); 2518 return 0; 2519} 2520EXPORT_SYMBOL(kmem_cache_shrink); 2521 2522/******************************************************************** 2523 * Basic setup of slabs 2524 *******************************************************************/ 2525 2526void __init kmem_cache_init(void) 2527{ 2528 int i; 2529 int caches = 0; 2530 2531#ifdef CONFIG_NUMA 2532 /* 2533 * Must first have the slab cache available for the allocations of the 2534 * struct kmem_cache_node's. There is special bootstrap code in 2535 * kmem_cache_open for slab_state == DOWN. 2536 */ 2537 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", 2538 sizeof(struct kmem_cache_node), GFP_KERNEL); 2539 kmalloc_caches[0].refcount = -1; 2540 caches++; 2541#endif 2542 2543 /* Able to allocate the per node structures */ 2544 slab_state = PARTIAL; 2545 2546 /* Caches that are not of the two-to-the-power-of size */ 2547 if (KMALLOC_MIN_SIZE <= 64) { 2548 create_kmalloc_cache(&kmalloc_caches[1], 2549 "kmalloc-96", 96, GFP_KERNEL); 2550 caches++; 2551 } 2552 if (KMALLOC_MIN_SIZE <= 128) { 2553 create_kmalloc_cache(&kmalloc_caches[2], 2554 "kmalloc-192", 192, GFP_KERNEL); 2555 caches++; 2556 } 2557 2558 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 2559 create_kmalloc_cache(&kmalloc_caches[i], 2560 "kmalloc", 1 << i, GFP_KERNEL); 2561 caches++; 2562 } 2563 2564 2565 /* 2566 * Patch up the size_index table if we have strange large alignment 2567 * requirements for the kmalloc array. This is only the case for 2568 * mips it seems. The standard arches will not generate any code here. 2569 * 2570 * Largest permitted alignment is 256 bytes due to the way we 2571 * handle the index determination for the smaller caches. 2572 * 2573 * Make sure that nothing crazy happens if someone starts tinkering 2574 * around with ARCH_KMALLOC_MINALIGN 2575 */ 2576 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 2577 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 2578 2579 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) 2580 size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; 2581 2582 slab_state = UP; 2583 2584 /* Provide the correct kmalloc names now that the caches are up */ 2585 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) 2586 kmalloc_caches[i]. name = 2587 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 2588 2589#ifdef CONFIG_SMP 2590 register_cpu_notifier(&slab_notifier); 2591#endif 2592 2593 kmem_size = offsetof(struct kmem_cache, cpu_slab) + 2594 nr_cpu_ids * sizeof(struct page *); 2595 2596 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 2597 " CPUs=%d, Nodes=%d\n", 2598 caches, cache_line_size(), 2599 slub_min_order, slub_max_order, slub_min_objects, 2600 nr_cpu_ids, nr_node_ids); 2601} 2602 2603/* 2604 * Find a mergeable slab cache 2605 */ 2606static int slab_unmergeable(struct kmem_cache *s) 2607{ 2608 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 2609 return 1; 2610 2611 if (s->ctor) 2612 return 1; 2613 2614 /* 2615 * We may have set a slab to be unmergeable during bootstrap. 2616 */ 2617 if (s->refcount < 0) 2618 return 1; 2619 2620 return 0; 2621} 2622 2623static struct kmem_cache *find_mergeable(size_t size, 2624 size_t align, unsigned long flags, 2625 void (*ctor)(void *, struct kmem_cache *, unsigned long)) 2626{ 2627 struct kmem_cache *s; 2628 2629 if (slub_nomerge || (flags & SLUB_NEVER_MERGE)) 2630 return NULL; 2631 2632 if (ctor) 2633 return NULL; 2634 2635 size = ALIGN(size, sizeof(void *)); 2636 align = calculate_alignment(flags, align, size); 2637 size = ALIGN(size, align); 2638 2639 list_for_each_entry(s, &slab_caches, list) { 2640 if (slab_unmergeable(s)) 2641 continue; 2642 2643 if (size > s->size) 2644 continue; 2645 2646 if (((flags | slub_debug) & SLUB_MERGE_SAME) != 2647 (s->flags & SLUB_MERGE_SAME)) 2648 continue; 2649 /* 2650 * Check if alignment is compatible. 2651 * Courtesy of Adrian Drzewiecki 2652 */ 2653 if ((s->size & ~(align -1)) != s->size) 2654 continue; 2655 2656 if (s->size - size >= sizeof(void *)) 2657 continue; 2658 2659 return s; 2660 } 2661 return NULL; 2662} 2663 2664struct kmem_cache *kmem_cache_create(const char *name, size_t size, 2665 size_t align, unsigned long flags, 2666 void (*ctor)(void *, struct kmem_cache *, unsigned long), 2667 void (*dtor)(void *, struct kmem_cache *, unsigned long)) 2668{ 2669 struct kmem_cache *s; 2670 2671 BUG_ON(dtor); 2672 down_write(&slub_lock); 2673 s = find_mergeable(size, align, flags, ctor); 2674 if (s) { 2675 s->refcount++; 2676 /* 2677 * Adjust the object sizes so that we clear 2678 * the complete object on kzalloc. 2679 */ 2680 s->objsize = max(s->objsize, (int)size); 2681 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 2682 if (sysfs_slab_alias(s, name)) 2683 goto err; 2684 } else { 2685 s = kmalloc(kmem_size, GFP_KERNEL); 2686 if (s && kmem_cache_open(s, GFP_KERNEL, name, 2687 size, align, flags, ctor)) { 2688 if (sysfs_slab_add(s)) { 2689 kfree(s); 2690 goto err; 2691 } 2692 list_add(&s->list, &slab_caches); 2693 } else 2694 kfree(s); 2695 } 2696 up_write(&slub_lock); 2697 return s; 2698 2699err: 2700 up_write(&slub_lock); 2701 if (flags & SLAB_PANIC) 2702 panic("Cannot create slabcache %s\n", name); 2703 else 2704 s = NULL; 2705 return s; 2706} 2707EXPORT_SYMBOL(kmem_cache_create); 2708 2709#ifdef CONFIG_SMP 2710/* 2711 * Use the cpu notifier to insure that the cpu slabs are flushed when 2712 * necessary. 2713 */ 2714static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, 2715 unsigned long action, void *hcpu) 2716{ 2717 long cpu = (long)hcpu; 2718 struct kmem_cache *s; 2719 unsigned long flags; 2720 2721 switch (action) { 2722 case CPU_UP_CANCELED: 2723 case CPU_UP_CANCELED_FROZEN: 2724 case CPU_DEAD: 2725 case CPU_DEAD_FROZEN: 2726 down_read(&slub_lock); 2727 list_for_each_entry(s, &slab_caches, list) { 2728 local_irq_save(flags); 2729 __flush_cpu_slab(s, cpu); 2730 local_irq_restore(flags); 2731 } 2732 up_read(&slub_lock); 2733 break; 2734 default: 2735 break; 2736 } 2737 return NOTIFY_OK; 2738} 2739 2740static struct notifier_block __cpuinitdata slab_notifier = 2741 { &slab_cpuup_callback, NULL, 0 }; 2742 2743#endif 2744 2745void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) 2746{ 2747 struct kmem_cache *s = get_slab(size, gfpflags); 2748 2749 if (ZERO_OR_NULL_PTR(s)) 2750 return s; 2751 2752 return slab_alloc(s, gfpflags, -1, caller); 2753} 2754 2755void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 2756 int node, void *caller) 2757{ 2758 struct kmem_cache *s = get_slab(size, gfpflags); 2759 2760 if (ZERO_OR_NULL_PTR(s)) 2761 return s; 2762 2763 return slab_alloc(s, gfpflags, node, caller); 2764} 2765 2766#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) 2767static int validate_slab(struct kmem_cache *s, struct page *page, 2768 unsigned long *map) 2769{ 2770 void *p; 2771 void *addr = page_address(page); 2772 2773 if (!check_slab(s, page) || 2774 !on_freelist(s, page, NULL)) 2775 return 0; 2776 2777 /* Now we know that a valid freelist exists */ 2778 bitmap_zero(map, s->objects); 2779 2780 for_each_free_object(p, s, page->freelist) { 2781 set_bit(slab_index(p, s, addr), map); 2782 if (!check_object(s, page, p, 0)) 2783 return 0; 2784 } 2785 2786 for_each_object(p, s, addr) 2787 if (!test_bit(slab_index(p, s, addr), map)) 2788 if (!check_object(s, page, p, 1)) 2789 return 0; 2790 return 1; 2791} 2792 2793static void validate_slab_slab(struct kmem_cache *s, struct page *page, 2794 unsigned long *map) 2795{ 2796 if (slab_trylock(page)) { 2797 validate_slab(s, page, map); 2798 slab_unlock(page); 2799 } else 2800 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n", 2801 s->name, page); 2802 2803 if (s->flags & DEBUG_DEFAULT_FLAGS) { 2804 if (!SlabDebug(page)) 2805 printk(KERN_ERR "SLUB %s: SlabDebug not set " 2806 "on slab 0x%p\n", s->name, page); 2807 } else { 2808 if (SlabDebug(page)) 2809 printk(KERN_ERR "SLUB %s: SlabDebug set on " 2810 "slab 0x%p\n", s->name, page); 2811 } 2812} 2813 2814static int validate_slab_node(struct kmem_cache *s, 2815 struct kmem_cache_node *n, unsigned long *map) 2816{ 2817 unsigned long count = 0; 2818 struct page *page; 2819 unsigned long flags; 2820 2821 spin_lock_irqsave(&n->list_lock, flags); 2822 2823 list_for_each_entry(page, &n->partial, lru) { 2824 validate_slab_slab(s, page, map); 2825 count++; 2826 } 2827 if (count != n->nr_partial) 2828 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but " 2829 "counter=%ld\n", s->name, count, n->nr_partial); 2830 2831 if (!(s->flags & SLAB_STORE_USER)) 2832 goto out; 2833 2834 list_for_each_entry(page, &n->full, lru) { 2835 validate_slab_slab(s, page, map); 2836 count++; 2837 } 2838 if (count != atomic_long_read(&n->nr_slabs)) 2839 printk(KERN_ERR "SLUB: %s %ld slabs counted but " 2840 "counter=%ld\n", s->name, count, 2841 atomic_long_read(&n->nr_slabs)); 2842 2843out: 2844 spin_unlock_irqrestore(&n->list_lock, flags); 2845 return count; 2846} 2847 2848static long validate_slab_cache(struct kmem_cache *s) 2849{ 2850 int node; 2851 unsigned long count = 0; 2852 unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) * 2853 sizeof(unsigned long), GFP_KERNEL); 2854 2855 if (!map) 2856 return -ENOMEM; 2857 2858 flush_all(s); 2859 for_each_online_node(node) { 2860 struct kmem_cache_node *n = get_node(s, node); 2861 2862 count += validate_slab_node(s, n, map); 2863 } 2864 kfree(map); 2865 return count; 2866} 2867 2868#ifdef SLUB_RESILIENCY_TEST 2869static void resiliency_test(void) 2870{ 2871 u8 *p; 2872 2873 printk(KERN_ERR "SLUB resiliency testing\n"); 2874 printk(KERN_ERR "-----------------------\n"); 2875 printk(KERN_ERR "A. Corruption after allocation\n"); 2876 2877 p = kzalloc(16, GFP_KERNEL); 2878 p[16] = 0x12; 2879 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" 2880 " 0x12->0x%p\n\n", p + 16); 2881 2882 validate_slab_cache(kmalloc_caches + 4); 2883 2884 /* Hmmm... The next two are dangerous */ 2885 p = kzalloc(32, GFP_KERNEL); 2886 p[32 + sizeof(void *)] = 0x34; 2887 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" 2888 " 0x34 -> -0x%p\n", p); 2889 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); 2890 2891 validate_slab_cache(kmalloc_caches + 5); 2892 p = kzalloc(64, GFP_KERNEL); 2893 p += 64 + (get_cycles() & 0xff) * sizeof(void *); 2894 *p = 0x56; 2895 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", 2896 p); 2897 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); 2898 validate_slab_cache(kmalloc_caches + 6); 2899 2900 printk(KERN_ERR "\nB. Corruption after free\n"); 2901 p = kzalloc(128, GFP_KERNEL); 2902 kfree(p); 2903 *p = 0x78; 2904 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); 2905 validate_slab_cache(kmalloc_caches + 7); 2906 2907 p = kzalloc(256, GFP_KERNEL); 2908 kfree(p); 2909 p[50] = 0x9a; 2910 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); 2911 validate_slab_cache(kmalloc_caches + 8); 2912 2913 p = kzalloc(512, GFP_KERNEL); 2914 kfree(p); 2915 p[512] = 0xab; 2916 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); 2917 validate_slab_cache(kmalloc_caches + 9); 2918} 2919#else 2920static void resiliency_test(void) {}; 2921#endif 2922 2923/* 2924 * Generate lists of code addresses where slabcache objects are allocated 2925 * and freed. 2926 */ 2927 2928struct location { 2929 unsigned long count; 2930 void *addr; 2931 long long sum_time; 2932 long min_time; 2933 long max_time; 2934 long min_pid; 2935 long max_pid; 2936 cpumask_t cpus; 2937 nodemask_t nodes; 2938}; 2939 2940struct loc_track { 2941 unsigned long max; 2942 unsigned long count; 2943 struct location *loc; 2944}; 2945 2946static void free_loc_track(struct loc_track *t) 2947{ 2948 if (t->max) 2949 free_pages((unsigned long)t->loc, 2950 get_order(sizeof(struct location) * t->max)); 2951} 2952 2953static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags) 2954{ 2955 struct location *l; 2956 int order; 2957 2958 order = get_order(sizeof(struct location) * max); 2959 2960 l = (void *)__get_free_pages(flags, order); 2961 if (!l) 2962 return 0; 2963 2964 if (t->count) { 2965 memcpy(l, t->loc, sizeof(struct location) * t->count); 2966 free_loc_track(t); 2967 } 2968 t->max = max; 2969 t->loc = l; 2970 return 1; 2971} 2972 2973static int add_location(struct loc_track *t, struct kmem_cache *s, 2974 const struct track *track) 2975{ 2976 long start, end, pos; 2977 struct location *l; 2978 void *caddr; 2979 unsigned long age = jiffies - track->when; 2980 2981 start = -1; 2982 end = t->count; 2983 2984 for ( ; ; ) { 2985 pos = start + (end - start + 1) / 2; 2986 2987 /* 2988 * There is nothing at "end". If we end up there 2989 * we need to add something to before end. 2990 */ 2991 if (pos == end) 2992 break; 2993 2994 caddr = t->loc[pos].addr; 2995 if (track->addr == caddr) { 2996 2997 l = &t->loc[pos]; 2998 l->count++; 2999 if (track->when) { 3000 l->sum_time += age; 3001 if (age < l->min_time) 3002 l->min_time = age; 3003 if (age > l->max_time) 3004 l->max_time = age; 3005 3006 if (track->pid < l->min_pid) 3007 l->min_pid = track->pid; 3008 if (track->pid > l->max_pid) 3009 l->max_pid = track->pid; 3010 3011 cpu_set(track->cpu, l->cpus); 3012 } 3013 node_set(page_to_nid(virt_to_page(track)), l->nodes); 3014 return 1; 3015 } 3016 3017 if (track->addr < caddr) 3018 end = pos; 3019 else 3020 start = pos; 3021 } 3022 3023 /* 3024 * Not found. Insert new tracking element. 3025 */ 3026 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC)) 3027 return 0; 3028 3029 l = t->loc + pos; 3030 if (pos < t->count) 3031 memmove(l + 1, l, 3032 (t->count - pos) * sizeof(struct location)); 3033 t->count++; 3034 l->count = 1; 3035 l->addr = track->addr; 3036 l->sum_time = age; 3037 l->min_time = age; 3038 l->max_time = age; 3039 l->min_pid = track->pid; 3040 l->max_pid = track->pid; 3041 cpus_clear(l->cpus); 3042 cpu_set(track->cpu, l->cpus); 3043 nodes_clear(l->nodes); 3044 node_set(page_to_nid(virt_to_page(track)), l->nodes); 3045 return 1; 3046} 3047 3048static void process_slab(struct loc_track *t, struct kmem_cache *s, 3049 struct page *page, enum track_item alloc) 3050{ 3051 void *addr = page_address(page); 3052 DECLARE_BITMAP(map, s->objects); 3053 void *p; 3054 3055 bitmap_zero(map, s->objects); 3056 for_each_free_object(p, s, page->freelist) 3057 set_bit(slab_index(p, s, addr), map); 3058 3059 for_each_object(p, s, addr) 3060 if (!test_bit(slab_index(p, s, addr), map)) 3061 add_location(t, s, get_track(s, p, alloc)); 3062} 3063 3064static int list_locations(struct kmem_cache *s, char *buf, 3065 enum track_item alloc) 3066{ 3067 int n = 0; 3068 unsigned long i; 3069 struct loc_track t = { 0, 0, NULL }; 3070 int node; 3071 3072 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), 3073 GFP_KERNEL)) 3074 return sprintf(buf, "Out of memory\n"); 3075 3076 /* Push back cpu slabs */ 3077 flush_all(s); 3078 3079 for_each_online_node(node) { 3080 struct kmem_cache_node *n = get_node(s, node); 3081 unsigned long flags; 3082 struct page *page; 3083 3084 if (!atomic_read(&n->nr_slabs)) 3085 continue; 3086 3087 spin_lock_irqsave(&n->list_lock, flags); 3088 list_for_each_entry(page, &n->partial, lru) 3089 process_slab(&t, s, page, alloc); 3090 list_for_each_entry(page, &n->full, lru) 3091 process_slab(&t, s, page, alloc); 3092 spin_unlock_irqrestore(&n->list_lock, flags); 3093 } 3094 3095 for (i = 0; i < t.count; i++) { 3096 struct location *l = &t.loc[i]; 3097 3098 if (n > PAGE_SIZE - 100) 3099 break; 3100 n += sprintf(buf + n, "%7ld ", l->count); 3101 3102 if (l->addr) 3103 n += sprint_symbol(buf + n, (unsigned long)l->addr); 3104 else 3105 n += sprintf(buf + n, "<not-available>"); 3106 3107 if (l->sum_time != l->min_time) { 3108 unsigned long remainder; 3109 3110 n += sprintf(buf + n, " age=%ld/%ld/%ld", 3111 l->min_time, 3112 div_long_long_rem(l->sum_time, l->count, &remainder), 3113 l->max_time); 3114 } else 3115 n += sprintf(buf + n, " age=%ld", 3116 l->min_time); 3117 3118 if (l->min_pid != l->max_pid) 3119 n += sprintf(buf + n, " pid=%ld-%ld", 3120 l->min_pid, l->max_pid); 3121 else 3122 n += sprintf(buf + n, " pid=%ld", 3123 l->min_pid); 3124 3125 if (num_online_cpus() > 1 && !cpus_empty(l->cpus) && 3126 n < PAGE_SIZE - 60) { 3127 n += sprintf(buf + n, " cpus="); 3128 n += cpulist_scnprintf(buf + n, PAGE_SIZE - n - 50, 3129 l->cpus); 3130 } 3131 3132 if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && 3133 n < PAGE_SIZE - 60) { 3134 n += sprintf(buf + n, " nodes="); 3135 n += nodelist_scnprintf(buf + n, PAGE_SIZE - n - 50, 3136 l->nodes); 3137 } 3138 3139 n += sprintf(buf + n, "\n"); 3140 } 3141 3142 free_loc_track(&t); 3143 if (!t.count) 3144 n += sprintf(buf, "No data\n"); 3145 return n; 3146} 3147 3148static unsigned long count_partial(struct kmem_cache_node *n) 3149{ 3150 unsigned long flags; 3151 unsigned long x = 0; 3152 struct page *page; 3153 3154 spin_lock_irqsave(&n->list_lock, flags); 3155 list_for_each_entry(page, &n->partial, lru) 3156 x += page->inuse; 3157 spin_unlock_irqrestore(&n->list_lock, flags); 3158 return x; 3159} 3160 3161enum slab_stat_type { 3162 SL_FULL, 3163 SL_PARTIAL, 3164 SL_CPU, 3165 SL_OBJECTS 3166}; 3167 3168#define SO_FULL (1 << SL_FULL) 3169#define SO_PARTIAL (1 << SL_PARTIAL) 3170#define SO_CPU (1 << SL_CPU) 3171#define SO_OBJECTS (1 << SL_OBJECTS) 3172 3173static unsigned long slab_objects(struct kmem_cache *s, 3174 char *buf, unsigned long flags) 3175{ 3176 unsigned long total = 0; 3177 int cpu; 3178 int node; 3179 int x; 3180 unsigned long *nodes; 3181 unsigned long *per_cpu; 3182 3183 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); 3184 per_cpu = nodes + nr_node_ids; 3185 3186 for_each_possible_cpu(cpu) { 3187 struct page *page = s->cpu_slab[cpu]; 3188 int node; 3189 3190 if (page) { 3191 node = page_to_nid(page); 3192 if (flags & SO_CPU) { 3193 int x = 0; 3194 3195 if (flags & SO_OBJECTS) 3196 x = page->inuse; 3197 else 3198 x = 1; 3199 total += x; 3200 nodes[node] += x; 3201 } 3202 per_cpu[node]++; 3203 } 3204 } 3205 3206 for_each_online_node(node) { 3207 struct kmem_cache_node *n = get_node(s, node); 3208 3209 if (flags & SO_PARTIAL) { 3210 if (flags & SO_OBJECTS) 3211 x = count_partial(n); 3212 else 3213 x = n->nr_partial; 3214 total += x; 3215 nodes[node] += x; 3216 } 3217 3218 if (flags & SO_FULL) { 3219 int full_slabs = atomic_read(&n->nr_slabs) 3220 - per_cpu[node] 3221 - n->nr_partial; 3222 3223 if (flags & SO_OBJECTS) 3224 x = full_slabs * s->objects; 3225 else 3226 x = full_slabs; 3227 total += x; 3228 nodes[node] += x; 3229 } 3230 } 3231 3232 x = sprintf(buf, "%lu", total); 3233#ifdef CONFIG_NUMA 3234 for_each_online_node(node) 3235 if (nodes[node]) 3236 x += sprintf(buf + x, " N%d=%lu", 3237 node, nodes[node]); 3238#endif 3239 kfree(nodes); 3240 return x + sprintf(buf + x, "\n"); 3241} 3242 3243static int any_slab_objects(struct kmem_cache *s) 3244{ 3245 int node; 3246 int cpu; 3247 3248 for_each_possible_cpu(cpu) 3249 if (s->cpu_slab[cpu]) 3250 return 1; 3251 3252 for_each_node(node) { 3253 struct kmem_cache_node *n = get_node(s, node); 3254 3255 if (n->nr_partial || atomic_read(&n->nr_slabs)) 3256 return 1; 3257 } 3258 return 0; 3259} 3260 3261#define to_slab_attr(n) container_of(n, struct slab_attribute, attr) 3262#define to_slab(n) container_of(n, struct kmem_cache, kobj); 3263 3264struct slab_attribute { 3265 struct attribute attr; 3266 ssize_t (*show)(struct kmem_cache *s, char *buf); 3267 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count); 3268}; 3269 3270#define SLAB_ATTR_RO(_name) \ 3271 static struct slab_attribute _name##_attr = __ATTR_RO(_name) 3272 3273#define SLAB_ATTR(_name) \ 3274 static struct slab_attribute _name##_attr = \ 3275 __ATTR(_name, 0644, _name##_show, _name##_store) 3276 3277static ssize_t slab_size_show(struct kmem_cache *s, char *buf) 3278{ 3279 return sprintf(buf, "%d\n", s->size); 3280} 3281SLAB_ATTR_RO(slab_size); 3282 3283static ssize_t align_show(struct kmem_cache *s, char *buf) 3284{ 3285 return sprintf(buf, "%d\n", s->align); 3286} 3287SLAB_ATTR_RO(align); 3288 3289static ssize_t object_size_show(struct kmem_cache *s, char *buf) 3290{ 3291 return sprintf(buf, "%d\n", s->objsize); 3292} 3293SLAB_ATTR_RO(object_size); 3294 3295static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) 3296{ 3297 return sprintf(buf, "%d\n", s->objects); 3298} 3299SLAB_ATTR_RO(objs_per_slab); 3300 3301static ssize_t order_show(struct kmem_cache *s, char *buf) 3302{ 3303 return sprintf(buf, "%d\n", s->order); 3304} 3305SLAB_ATTR_RO(order); 3306 3307static ssize_t ctor_show(struct kmem_cache *s, char *buf) 3308{ 3309 if (s->ctor) { 3310 int n = sprint_symbol(buf, (unsigned long)s->ctor); 3311 3312 return n + sprintf(buf + n, "\n"); 3313 } 3314 return 0; 3315} 3316SLAB_ATTR_RO(ctor); 3317 3318static ssize_t aliases_show(struct kmem_cache *s, char *buf) 3319{ 3320 return sprintf(buf, "%d\n", s->refcount - 1); 3321} 3322SLAB_ATTR_RO(aliases); 3323 3324static ssize_t slabs_show(struct kmem_cache *s, char *buf) 3325{ 3326 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); 3327} 3328SLAB_ATTR_RO(slabs); 3329 3330static ssize_t partial_show(struct kmem_cache *s, char *buf) 3331{ 3332 return slab_objects(s, buf, SO_PARTIAL); 3333} 3334SLAB_ATTR_RO(partial); 3335 3336static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) 3337{ 3338 return slab_objects(s, buf, SO_CPU); 3339} 3340SLAB_ATTR_RO(cpu_slabs); 3341 3342static ssize_t objects_show(struct kmem_cache *s, char *buf) 3343{ 3344 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); 3345} 3346SLAB_ATTR_RO(objects); 3347 3348static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) 3349{ 3350 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); 3351} 3352 3353static ssize_t sanity_checks_store(struct kmem_cache *s, 3354 const char *buf, size_t length) 3355{ 3356 s->flags &= ~SLAB_DEBUG_FREE; 3357 if (buf[0] == '1') 3358 s->flags |= SLAB_DEBUG_FREE; 3359 return length; 3360} 3361SLAB_ATTR(sanity_checks); 3362 3363static ssize_t trace_show(struct kmem_cache *s, char *buf) 3364{ 3365 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); 3366} 3367 3368static ssize_t trace_store(struct kmem_cache *s, const char *buf, 3369 size_t length) 3370{ 3371 s->flags &= ~SLAB_TRACE; 3372 if (buf[0] == '1') 3373 s->flags |= SLAB_TRACE; 3374 return length; 3375} 3376SLAB_ATTR(trace); 3377 3378static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 3379{ 3380 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); 3381} 3382 3383static ssize_t reclaim_account_store(struct kmem_cache *s, 3384 const char *buf, size_t length) 3385{ 3386 s->flags &= ~SLAB_RECLAIM_ACCOUNT; 3387 if (buf[0] == '1') 3388 s->flags |= SLAB_RECLAIM_ACCOUNT; 3389 return length; 3390} 3391SLAB_ATTR(reclaim_account); 3392 3393static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) 3394{ 3395 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); 3396} 3397SLAB_ATTR_RO(hwcache_align); 3398 3399#ifdef CONFIG_ZONE_DMA 3400static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) 3401{ 3402 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA)); 3403} 3404SLAB_ATTR_RO(cache_dma); 3405#endif 3406 3407static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) 3408{ 3409 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); 3410} 3411SLAB_ATTR_RO(destroy_by_rcu); 3412 3413static ssize_t red_zone_show(struct kmem_cache *s, char *buf) 3414{ 3415 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE)); 3416} 3417 3418static ssize_t red_zone_store(struct kmem_cache *s, 3419 const char *buf, size_t length) 3420{ 3421 if (any_slab_objects(s)) 3422 return -EBUSY; 3423 3424 s->flags &= ~SLAB_RED_ZONE; 3425 if (buf[0] == '1') 3426 s->flags |= SLAB_RED_ZONE; 3427 calculate_sizes(s); 3428 return length; 3429} 3430SLAB_ATTR(red_zone); 3431 3432static ssize_t poison_show(struct kmem_cache *s, char *buf) 3433{ 3434 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON)); 3435} 3436 3437static ssize_t poison_store(struct kmem_cache *s, 3438 const char *buf, size_t length) 3439{ 3440 if (any_slab_objects(s)) 3441 return -EBUSY; 3442 3443 s->flags &= ~SLAB_POISON; 3444 if (buf[0] == '1') 3445 s->flags |= SLAB_POISON; 3446 calculate_sizes(s); 3447 return length; 3448} 3449SLAB_ATTR(poison); 3450 3451static ssize_t store_user_show(struct kmem_cache *s, char *buf) 3452{ 3453 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER)); 3454} 3455 3456static ssize_t store_user_store(struct kmem_cache *s, 3457 const char *buf, size_t length) 3458{ 3459 if (any_slab_objects(s)) 3460 return -EBUSY; 3461 3462 s->flags &= ~SLAB_STORE_USER; 3463 if (buf[0] == '1') 3464 s->flags |= SLAB_STORE_USER; 3465 calculate_sizes(s); 3466 return length; 3467} 3468SLAB_ATTR(store_user); 3469 3470static ssize_t validate_show(struct kmem_cache *s, char *buf) 3471{ 3472 return 0; 3473} 3474 3475static ssize_t validate_store(struct kmem_cache *s, 3476 const char *buf, size_t length) 3477{ 3478 int ret = -EINVAL; 3479 3480 if (buf[0] == '1') { 3481 ret = validate_slab_cache(s); 3482 if (ret >= 0) 3483 ret = length; 3484 } 3485 return ret; 3486} 3487SLAB_ATTR(validate); 3488 3489static ssize_t shrink_show(struct kmem_cache *s, char *buf) 3490{ 3491 return 0; 3492} 3493 3494static ssize_t shrink_store(struct kmem_cache *s, 3495 const char *buf, size_t length) 3496{ 3497 if (buf[0] == '1') { 3498 int rc = kmem_cache_shrink(s); 3499 3500 if (rc) 3501 return rc; 3502 } else 3503 return -EINVAL; 3504 return length; 3505} 3506SLAB_ATTR(shrink); 3507 3508static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) 3509{ 3510 if (!(s->flags & SLAB_STORE_USER)) 3511 return -ENOSYS; 3512 return list_locations(s, buf, TRACK_ALLOC); 3513} 3514SLAB_ATTR_RO(alloc_calls); 3515 3516static ssize_t free_calls_show(struct kmem_cache *s, char *buf) 3517{ 3518 if (!(s->flags & SLAB_STORE_USER)) 3519 return -ENOSYS; 3520 return list_locations(s, buf, TRACK_FREE); 3521} 3522SLAB_ATTR_RO(free_calls); 3523 3524#ifdef CONFIG_NUMA 3525static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf) 3526{ 3527 return sprintf(buf, "%d\n", s->defrag_ratio / 10); 3528} 3529 3530static ssize_t defrag_ratio_store(struct kmem_cache *s, 3531 const char *buf, size_t length) 3532{ 3533 int n = simple_strtoul(buf, NULL, 10); 3534 3535 if (n < 100) 3536 s->defrag_ratio = n * 10; 3537 return length; 3538} 3539SLAB_ATTR(defrag_ratio); 3540#endif 3541 3542static struct attribute * slab_attrs[] = { 3543 &slab_size_attr.attr, 3544 &object_size_attr.attr, 3545 &objs_per_slab_attr.attr, 3546 &order_attr.attr, 3547 &objects_attr.attr, 3548 &slabs_attr.attr, 3549 &partial_attr.attr, 3550 &cpu_slabs_attr.attr, 3551 &ctor_attr.attr, 3552 &aliases_attr.attr, 3553 &align_attr.attr, 3554 &sanity_checks_attr.attr, 3555 &trace_attr.attr, 3556 &hwcache_align_attr.attr, 3557 &reclaim_account_attr.attr, 3558 &destroy_by_rcu_attr.attr, 3559 &red_zone_attr.attr, 3560 &poison_attr.attr, 3561 &store_user_attr.attr, 3562 &validate_attr.attr, 3563 &shrink_attr.attr, 3564 &alloc_calls_attr.attr, 3565 &free_calls_attr.attr, 3566#ifdef CONFIG_ZONE_DMA 3567 &cache_dma_attr.attr, 3568#endif 3569#ifdef CONFIG_NUMA 3570 &defrag_ratio_attr.attr, 3571#endif 3572 NULL 3573}; 3574 3575static struct attribute_group slab_attr_group = { 3576 .attrs = slab_attrs, 3577}; 3578 3579static ssize_t slab_attr_show(struct kobject *kobj, 3580 struct attribute *attr, 3581 char *buf) 3582{ 3583 struct slab_attribute *attribute; 3584 struct kmem_cache *s; 3585 int err; 3586 3587 attribute = to_slab_attr(attr); 3588 s = to_slab(kobj); 3589 3590 if (!attribute->show) 3591 return -EIO; 3592 3593 err = attribute->show(s, buf); 3594 3595 return err; 3596} 3597 3598static ssize_t slab_attr_store(struct kobject *kobj, 3599 struct attribute *attr, 3600 const char *buf, size_t len) 3601{ 3602 struct slab_attribute *attribute; 3603 struct kmem_cache *s; 3604 int err; 3605 3606 attribute = to_slab_attr(attr); 3607 s = to_slab(kobj); 3608 3609 if (!attribute->store) 3610 return -EIO; 3611 3612 err = attribute->store(s, buf, len); 3613 3614 return err; 3615} 3616 3617static struct sysfs_ops slab_sysfs_ops = { 3618 .show = slab_attr_show, 3619 .store = slab_attr_store, 3620}; 3621 3622static struct kobj_type slab_ktype = { 3623 .sysfs_ops = &slab_sysfs_ops, 3624}; 3625 3626static int uevent_filter(struct kset *kset, struct kobject *kobj) 3627{ 3628 struct kobj_type *ktype = get_ktype(kobj); 3629 3630 if (ktype == &slab_ktype) 3631 return 1; 3632 return 0; 3633} 3634 3635static struct kset_uevent_ops slab_uevent_ops = { 3636 .filter = uevent_filter, 3637}; 3638 3639static decl_subsys(slab, &slab_ktype, &slab_uevent_ops); 3640 3641#define ID_STR_LENGTH 64 3642 3643/* Create a unique string id for a slab cache: 3644 * format 3645 * :[flags-]size:[memory address of kmemcache] 3646 */ 3647static char *create_unique_id(struct kmem_cache *s) 3648{ 3649 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); 3650 char *p = name; 3651 3652 BUG_ON(!name); 3653 3654 *p++ = ':'; 3655 /* 3656 * First flags affecting slabcache operations. We will only 3657 * get here for aliasable slabs so we do not need to support 3658 * too many flags. The flags here must cover all flags that 3659 * are matched during merging to guarantee that the id is 3660 * unique. 3661 */ 3662 if (s->flags & SLAB_CACHE_DMA) 3663 *p++ = 'd'; 3664 if (s->flags & SLAB_RECLAIM_ACCOUNT) 3665 *p++ = 'a'; 3666 if (s->flags & SLAB_DEBUG_FREE) 3667 *p++ = 'F'; 3668 if (p != name + 1) 3669 *p++ = '-'; 3670 p += sprintf(p, "%07d", s->size); 3671 BUG_ON(p > name + ID_STR_LENGTH - 1); 3672 return name; 3673} 3674 3675static int sysfs_slab_add(struct kmem_cache *s) 3676{ 3677 int err; 3678 const char *name; 3679 int unmergeable; 3680 3681 if (slab_state < SYSFS) 3682 /* Defer until later */ 3683 return 0; 3684 3685 unmergeable = slab_unmergeable(s); 3686 if (unmergeable) { 3687 /* 3688 * Slabcache can never be merged so we can use the name proper. 3689 * This is typically the case for debug situations. In that 3690 * case we can catch duplicate names easily. 3691 */ 3692 sysfs_remove_link(&slab_subsys.kobj, s->name); 3693 name = s->name; 3694 } else { 3695 /* 3696 * Create a unique name for the slab as a target 3697 * for the symlinks. 3698 */ 3699 name = create_unique_id(s); 3700 } 3701 3702 kobj_set_kset_s(s, slab_subsys); 3703 kobject_set_name(&s->kobj, name); 3704 kobject_init(&s->kobj); 3705 err = kobject_add(&s->kobj); 3706 if (err) 3707 return err; 3708 3709 err = sysfs_create_group(&s->kobj, &slab_attr_group); 3710 if (err) 3711 return err; 3712 kobject_uevent(&s->kobj, KOBJ_ADD); 3713 if (!unmergeable) { 3714 /* Setup first alias */ 3715 sysfs_slab_alias(s, s->name); 3716 kfree(name); 3717 } 3718 return 0; 3719} 3720 3721static void sysfs_slab_remove(struct kmem_cache *s) 3722{ 3723 kobject_uevent(&s->kobj, KOBJ_REMOVE); 3724 kobject_del(&s->kobj); 3725} 3726 3727/* 3728 * Need to buffer aliases during bootup until sysfs becomes 3729 * available lest we loose that information. 3730 */ 3731struct saved_alias { 3732 struct kmem_cache *s; 3733 const char *name; 3734 struct saved_alias *next; 3735}; 3736 3737static struct saved_alias *alias_list; 3738 3739static int sysfs_slab_alias(struct kmem_cache *s, const char *name) 3740{ 3741 struct saved_alias *al; 3742 3743 if (slab_state == SYSFS) { 3744 /* 3745 * If we have a leftover link then remove it. 3746 */ 3747 sysfs_remove_link(&slab_subsys.kobj, name); 3748 return sysfs_create_link(&slab_subsys.kobj, 3749 &s->kobj, name); 3750 } 3751 3752 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); 3753 if (!al) 3754 return -ENOMEM; 3755 3756 al->s = s; 3757 al->name = name; 3758 al->next = alias_list; 3759 alias_list = al; 3760 return 0; 3761} 3762 3763static int __init slab_sysfs_init(void) 3764{ 3765 struct kmem_cache *s; 3766 int err; 3767 3768 err = subsystem_register(&slab_subsys); 3769 if (err) { 3770 printk(KERN_ERR "Cannot register slab subsystem.\n"); 3771 return -ENOSYS; 3772 } 3773 3774 slab_state = SYSFS; 3775 3776 list_for_each_entry(s, &slab_caches, list) { 3777 err = sysfs_slab_add(s); 3778 BUG_ON(err); 3779 } 3780 3781 while (alias_list) { 3782 struct saved_alias *al = alias_list; 3783 3784 alias_list = alias_list->next; 3785 err = sysfs_slab_alias(al->s, al->name); 3786 BUG_ON(err); 3787 kfree(al); 3788 } 3789 3790 resiliency_test(); 3791 return 0; 3792} 3793 3794__initcall(slab_sysfs_init); 3795#endif 3796