slab.c revision c5e3b83e97be4e09961c0af101644643e5d03d17
1/* 2 * linux/mm/slab.c 3 * Written by Mark Hemment, 1996/97. 4 * (markhe@nextd.demon.co.uk) 5 * 6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli 7 * 8 * Major cleanup, different bufctl logic, per-cpu arrays 9 * (c) 2000 Manfred Spraul 10 * 11 * Cleanup, make the head arrays unconditional, preparation for NUMA 12 * (c) 2002 Manfred Spraul 13 * 14 * An implementation of the Slab Allocator as described in outline in; 15 * UNIX Internals: The New Frontiers by Uresh Vahalia 16 * Pub: Prentice Hall ISBN 0-13-101908-2 17 * or with a little more detail in; 18 * The Slab Allocator: An Object-Caching Kernel Memory Allocator 19 * Jeff Bonwick (Sun Microsystems). 20 * Presented at: USENIX Summer 1994 Technical Conference 21 * 22 * The memory is organized in caches, one cache for each object type. 23 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct) 24 * Each cache consists out of many slabs (they are small (usually one 25 * page long) and always contiguous), and each slab contains multiple 26 * initialized objects. 27 * 28 * This means, that your constructor is used only for newly allocated 29 * slabs and you must pass objects with the same intializations to 30 * kmem_cache_free. 31 * 32 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, 33 * normal). If you need a special memory type, then must create a new 34 * cache for that memory type. 35 * 36 * In order to reduce fragmentation, the slabs are sorted in 3 groups: 37 * full slabs with 0 free objects 38 * partial slabs 39 * empty slabs with no allocated objects 40 * 41 * If partial slabs exist, then new allocations come from these slabs, 42 * otherwise from empty slabs or new slabs are allocated. 43 * 44 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache 45 * during kmem_cache_destroy(). The caller must prevent concurrent allocs. 46 * 47 * Each cache has a short per-cpu head array, most allocs 48 * and frees go into that array, and if that array overflows, then 1/2 49 * of the entries in the array are given back into the global cache. 50 * The head array is strictly LIFO and should improve the cache hit rates. 51 * On SMP, it additionally reduces the spinlock operations. 52 * 53 * The c_cpuarray may not be read with enabled local interrupts - 54 * it's changed with a smp_call_function(). 55 * 56 * SMP synchronization: 57 * constructors and destructors are called without any locking. 58 * Several members in struct kmem_cache and struct slab never change, they 59 * are accessed without any locking. 60 * The per-cpu arrays are never accessed from the wrong cpu, no locking, 61 * and local interrupts are disabled so slab code is preempt-safe. 62 * The non-constant members are protected with a per-cache irq spinlock. 63 * 64 * Many thanks to Mark Hemment, who wrote another per-cpu slab patch 65 * in 2000 - many ideas in the current implementation are derived from 66 * his patch. 67 * 68 * Further notes from the original documentation: 69 * 70 * 11 April '97. Started multi-threading - markhe 71 * The global cache-chain is protected by the mutex 'cache_chain_mutex'. 72 * The sem is only needed when accessing/extending the cache-chain, which 73 * can never happen inside an interrupt (kmem_cache_create(), 74 * kmem_cache_shrink() and kmem_cache_reap()). 75 * 76 * At present, each engine can be growing a cache. This should be blocked. 77 * 78 * 15 March 2005. NUMA slab allocator. 79 * Shai Fultheim <shai@scalex86.org>. 80 * Shobhit Dayal <shobhit@calsoftinc.com> 81 * Alok N Kataria <alokk@calsoftinc.com> 82 * Christoph Lameter <christoph@lameter.com> 83 * 84 * Modified the slab allocator to be node aware on NUMA systems. 85 * Each node has its own list of partial, free and full slabs. 86 * All object allocations for a node occur from node specific slab lists. 87 */ 88 89#include <linux/config.h> 90#include <linux/slab.h> 91#include <linux/mm.h> 92#include <linux/swap.h> 93#include <linux/cache.h> 94#include <linux/interrupt.h> 95#include <linux/init.h> 96#include <linux/compiler.h> 97#include <linux/cpuset.h> 98#include <linux/seq_file.h> 99#include <linux/notifier.h> 100#include <linux/kallsyms.h> 101#include <linux/cpu.h> 102#include <linux/sysctl.h> 103#include <linux/module.h> 104#include <linux/rcupdate.h> 105#include <linux/string.h> 106#include <linux/nodemask.h> 107#include <linux/mempolicy.h> 108#include <linux/mutex.h> 109 110#include <asm/uaccess.h> 111#include <asm/cacheflush.h> 112#include <asm/tlbflush.h> 113#include <asm/page.h> 114 115/* 116 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL, 117 * SLAB_RED_ZONE & SLAB_POISON. 118 * 0 for faster, smaller code (especially in the critical paths). 119 * 120 * STATS - 1 to collect stats for /proc/slabinfo. 121 * 0 for faster, smaller code (especially in the critical paths). 122 * 123 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible) 124 */ 125 126#ifdef CONFIG_DEBUG_SLAB 127#define DEBUG 1 128#define STATS 1 129#define FORCED_DEBUG 1 130#else 131#define DEBUG 0 132#define STATS 0 133#define FORCED_DEBUG 0 134#endif 135 136/* Shouldn't this be in a header file somewhere? */ 137#define BYTES_PER_WORD sizeof(void *) 138 139#ifndef cache_line_size 140#define cache_line_size() L1_CACHE_BYTES 141#endif 142 143#ifndef ARCH_KMALLOC_MINALIGN 144/* 145 * Enforce a minimum alignment for the kmalloc caches. 146 * Usually, the kmalloc caches are cache_line_size() aligned, except when 147 * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. 148 * Some archs want to perform DMA into kmalloc caches and need a guaranteed 149 * alignment larger than BYTES_PER_WORD. ARCH_KMALLOC_MINALIGN allows that. 150 * Note that this flag disables some debug features. 151 */ 152#define ARCH_KMALLOC_MINALIGN 0 153#endif 154 155#ifndef ARCH_SLAB_MINALIGN 156/* 157 * Enforce a minimum alignment for all caches. 158 * Intended for archs that get misalignment faults even for BYTES_PER_WORD 159 * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. 160 * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables 161 * some debug features. 162 */ 163#define ARCH_SLAB_MINALIGN 0 164#endif 165 166#ifndef ARCH_KMALLOC_FLAGS 167#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN 168#endif 169 170/* Legal flag mask for kmem_cache_create(). */ 171#if DEBUG 172# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ 173 SLAB_POISON | SLAB_HWCACHE_ALIGN | \ 174 SLAB_CACHE_DMA | \ 175 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ 176 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 177 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) 178#else 179# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ 180 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ 181 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 182 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) 183#endif 184 185/* 186 * kmem_bufctl_t: 187 * 188 * Bufctl's are used for linking objs within a slab 189 * linked offsets. 190 * 191 * This implementation relies on "struct page" for locating the cache & 192 * slab an object belongs to. 193 * This allows the bufctl structure to be small (one int), but limits 194 * the number of objects a slab (not a cache) can contain when off-slab 195 * bufctls are used. The limit is the size of the largest general cache 196 * that does not use off-slab slabs. 197 * For 32bit archs with 4 kB pages, is this 56. 198 * This is not serious, as it is only for large objects, when it is unwise 199 * to have too many per slab. 200 * Note: This limit can be raised by introducing a general cache whose size 201 * is less than 512 (PAGE_SIZE<<3), but greater than 256. 202 */ 203 204typedef unsigned int kmem_bufctl_t; 205#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) 206#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) 207#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) 208#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) 209 210/* Max number of objs-per-slab for caches which use off-slab slabs. 211 * Needed to avoid a possible looping condition in cache_grow(). 212 */ 213static unsigned long offslab_limit; 214 215/* 216 * struct slab 217 * 218 * Manages the objs in a slab. Placed either at the beginning of mem allocated 219 * for a slab, or allocated from an general cache. 220 * Slabs are chained into three list: fully used, partial, fully free slabs. 221 */ 222struct slab { 223 struct list_head list; 224 unsigned long colouroff; 225 void *s_mem; /* including colour offset */ 226 unsigned int inuse; /* num of objs active in slab */ 227 kmem_bufctl_t free; 228 unsigned short nodeid; 229}; 230 231/* 232 * struct slab_rcu 233 * 234 * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to 235 * arrange for kmem_freepages to be called via RCU. This is useful if 236 * we need to approach a kernel structure obliquely, from its address 237 * obtained without the usual locking. We can lock the structure to 238 * stabilize it and check it's still at the given address, only if we 239 * can be sure that the memory has not been meanwhile reused for some 240 * other kind of object (which our subsystem's lock might corrupt). 241 * 242 * rcu_read_lock before reading the address, then rcu_read_unlock after 243 * taking the spinlock within the structure expected at that address. 244 * 245 * We assume struct slab_rcu can overlay struct slab when destroying. 246 */ 247struct slab_rcu { 248 struct rcu_head head; 249 struct kmem_cache *cachep; 250 void *addr; 251}; 252 253/* 254 * struct array_cache 255 * 256 * Purpose: 257 * - LIFO ordering, to hand out cache-warm objects from _alloc 258 * - reduce the number of linked list operations 259 * - reduce spinlock operations 260 * 261 * The limit is stored in the per-cpu structure to reduce the data cache 262 * footprint. 263 * 264 */ 265struct array_cache { 266 unsigned int avail; 267 unsigned int limit; 268 unsigned int batchcount; 269 unsigned int touched; 270 spinlock_t lock; 271 void *entry[0]; /* 272 * Must have this definition in here for the proper 273 * alignment of array_cache. Also simplifies accessing 274 * the entries. 275 * [0] is for gcc 2.95. It should really be []. 276 */ 277}; 278 279/* 280 * bootstrap: The caches do not work without cpuarrays anymore, but the 281 * cpuarrays are allocated from the generic caches... 282 */ 283#define BOOT_CPUCACHE_ENTRIES 1 284struct arraycache_init { 285 struct array_cache cache; 286 void *entries[BOOT_CPUCACHE_ENTRIES]; 287}; 288 289/* 290 * The slab lists for all objects. 291 */ 292struct kmem_list3 { 293 struct list_head slabs_partial; /* partial list first, better asm code */ 294 struct list_head slabs_full; 295 struct list_head slabs_free; 296 unsigned long free_objects; 297 unsigned int free_limit; 298 unsigned int colour_next; /* Per-node cache coloring */ 299 spinlock_t list_lock; 300 struct array_cache *shared; /* shared per node */ 301 struct array_cache **alien; /* on other nodes */ 302 unsigned long next_reap; /* updated without locking */ 303 int free_touched; /* updated without locking */ 304}; 305 306/* 307 * Need this for bootstrapping a per node allocator. 308 */ 309#define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1) 310struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; 311#define CACHE_CACHE 0 312#define SIZE_AC 1 313#define SIZE_L3 (1 + MAX_NUMNODES) 314 315/* 316 * This function must be completely optimized away if a constant is passed to 317 * it. Mostly the same as what is in linux/slab.h except it returns an index. 318 */ 319static __always_inline int index_of(const size_t size) 320{ 321 extern void __bad_size(void); 322 323 if (__builtin_constant_p(size)) { 324 int i = 0; 325 326#define CACHE(x) \ 327 if (size <=x) \ 328 return i; \ 329 else \ 330 i++; 331#include "linux/kmalloc_sizes.h" 332#undef CACHE 333 __bad_size(); 334 } else 335 __bad_size(); 336 return 0; 337} 338 339#define INDEX_AC index_of(sizeof(struct arraycache_init)) 340#define INDEX_L3 index_of(sizeof(struct kmem_list3)) 341 342static void kmem_list3_init(struct kmem_list3 *parent) 343{ 344 INIT_LIST_HEAD(&parent->slabs_full); 345 INIT_LIST_HEAD(&parent->slabs_partial); 346 INIT_LIST_HEAD(&parent->slabs_free); 347 parent->shared = NULL; 348 parent->alien = NULL; 349 parent->colour_next = 0; 350 spin_lock_init(&parent->list_lock); 351 parent->free_objects = 0; 352 parent->free_touched = 0; 353} 354 355#define MAKE_LIST(cachep, listp, slab, nodeid) \ 356 do { \ 357 INIT_LIST_HEAD(listp); \ 358 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ 359 } while (0) 360 361#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ 362 do { \ 363 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ 364 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ 365 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ 366 } while (0) 367 368/* 369 * struct kmem_cache 370 * 371 * manages a cache. 372 */ 373 374struct kmem_cache { 375/* 1) per-cpu data, touched during every alloc/free */ 376 struct array_cache *array[NR_CPUS]; 377/* 2) Cache tunables. Protected by cache_chain_mutex */ 378 unsigned int batchcount; 379 unsigned int limit; 380 unsigned int shared; 381 382 unsigned int buffer_size; 383/* 3) touched by every alloc & free from the backend */ 384 struct kmem_list3 *nodelists[MAX_NUMNODES]; 385 386 unsigned int flags; /* constant flags */ 387 unsigned int num; /* # of objs per slab */ 388 389/* 4) cache_grow/shrink */ 390 /* order of pgs per slab (2^n) */ 391 unsigned int gfporder; 392 393 /* force GFP flags, e.g. GFP_DMA */ 394 gfp_t gfpflags; 395 396 size_t colour; /* cache colouring range */ 397 unsigned int colour_off; /* colour offset */ 398 struct kmem_cache *slabp_cache; 399 unsigned int slab_size; 400 unsigned int dflags; /* dynamic flags */ 401 402 /* constructor func */ 403 void (*ctor) (void *, struct kmem_cache *, unsigned long); 404 405 /* de-constructor func */ 406 void (*dtor) (void *, struct kmem_cache *, unsigned long); 407 408/* 5) cache creation/removal */ 409 const char *name; 410 struct list_head next; 411 412/* 6) statistics */ 413#if STATS 414 unsigned long num_active; 415 unsigned long num_allocations; 416 unsigned long high_mark; 417 unsigned long grown; 418 unsigned long reaped; 419 unsigned long errors; 420 unsigned long max_freeable; 421 unsigned long node_allocs; 422 unsigned long node_frees; 423 atomic_t allochit; 424 atomic_t allocmiss; 425 atomic_t freehit; 426 atomic_t freemiss; 427#endif 428#if DEBUG 429 /* 430 * If debugging is enabled, then the allocator can add additional 431 * fields and/or padding to every object. buffer_size contains the total 432 * object size including these internal fields, the following two 433 * variables contain the offset to the user object and its size. 434 */ 435 int obj_offset; 436 int obj_size; 437#endif 438}; 439 440#define CFLGS_OFF_SLAB (0x80000000UL) 441#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) 442 443#define BATCHREFILL_LIMIT 16 444/* 445 * Optimization question: fewer reaps means less probability for unnessary 446 * cpucache drain/refill cycles. 447 * 448 * OTOH the cpuarrays can contain lots of objects, 449 * which could lock up otherwise freeable slabs. 450 */ 451#define REAPTIMEOUT_CPUC (2*HZ) 452#define REAPTIMEOUT_LIST3 (4*HZ) 453 454#if STATS 455#define STATS_INC_ACTIVE(x) ((x)->num_active++) 456#define STATS_DEC_ACTIVE(x) ((x)->num_active--) 457#define STATS_INC_ALLOCED(x) ((x)->num_allocations++) 458#define STATS_INC_GROWN(x) ((x)->grown++) 459#define STATS_INC_REAPED(x) ((x)->reaped++) 460#define STATS_SET_HIGH(x) \ 461 do { \ 462 if ((x)->num_active > (x)->high_mark) \ 463 (x)->high_mark = (x)->num_active; \ 464 } while (0) 465#define STATS_INC_ERR(x) ((x)->errors++) 466#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) 467#define STATS_INC_NODEFREES(x) ((x)->node_frees++) 468#define STATS_SET_FREEABLE(x, i) \ 469 do { \ 470 if ((x)->max_freeable < i) \ 471 (x)->max_freeable = i; \ 472 } while (0) 473#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) 474#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) 475#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) 476#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss) 477#else 478#define STATS_INC_ACTIVE(x) do { } while (0) 479#define STATS_DEC_ACTIVE(x) do { } while (0) 480#define STATS_INC_ALLOCED(x) do { } while (0) 481#define STATS_INC_GROWN(x) do { } while (0) 482#define STATS_INC_REAPED(x) do { } while (0) 483#define STATS_SET_HIGH(x) do { } while (0) 484#define STATS_INC_ERR(x) do { } while (0) 485#define STATS_INC_NODEALLOCS(x) do { } while (0) 486#define STATS_INC_NODEFREES(x) do { } while (0) 487#define STATS_SET_FREEABLE(x, i) do { } while (0) 488#define STATS_INC_ALLOCHIT(x) do { } while (0) 489#define STATS_INC_ALLOCMISS(x) do { } while (0) 490#define STATS_INC_FREEHIT(x) do { } while (0) 491#define STATS_INC_FREEMISS(x) do { } while (0) 492#endif 493 494#if DEBUG 495/* 496 * Magic nums for obj red zoning. 497 * Placed in the first word before and the first word after an obj. 498 */ 499#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ 500#define RED_ACTIVE 0x170FC2A5UL /* when obj is active */ 501 502/* ...and for poisoning */ 503#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ 504#define POISON_FREE 0x6b /* for use-after-free poisoning */ 505#define POISON_END 0xa5 /* end-byte of poisoning */ 506 507/* 508 * memory layout of objects: 509 * 0 : objp 510 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that 511 * the end of an object is aligned with the end of the real 512 * allocation. Catches writes behind the end of the allocation. 513 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: 514 * redzone word. 515 * cachep->obj_offset: The real object. 516 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] 517 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address 518 * [BYTES_PER_WORD long] 519 */ 520static int obj_offset(struct kmem_cache *cachep) 521{ 522 return cachep->obj_offset; 523} 524 525static int obj_size(struct kmem_cache *cachep) 526{ 527 return cachep->obj_size; 528} 529 530static unsigned long *dbg_redzone1(struct kmem_cache *cachep, void *objp) 531{ 532 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 533 return (unsigned long*) (objp+obj_offset(cachep)-BYTES_PER_WORD); 534} 535 536static unsigned long *dbg_redzone2(struct kmem_cache *cachep, void *objp) 537{ 538 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 539 if (cachep->flags & SLAB_STORE_USER) 540 return (unsigned long *)(objp + cachep->buffer_size - 541 2 * BYTES_PER_WORD); 542 return (unsigned long *)(objp + cachep->buffer_size - BYTES_PER_WORD); 543} 544 545static void **dbg_userword(struct kmem_cache *cachep, void *objp) 546{ 547 BUG_ON(!(cachep->flags & SLAB_STORE_USER)); 548 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); 549} 550 551#else 552 553#define obj_offset(x) 0 554#define obj_size(cachep) (cachep->buffer_size) 555#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long *)NULL;}) 556#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long *)NULL;}) 557#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) 558 559#endif 560 561/* 562 * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp 563 * order. 564 */ 565#if defined(CONFIG_LARGE_ALLOCS) 566#define MAX_OBJ_ORDER 13 /* up to 32Mb */ 567#define MAX_GFP_ORDER 13 /* up to 32Mb */ 568#elif defined(CONFIG_MMU) 569#define MAX_OBJ_ORDER 5 /* 32 pages */ 570#define MAX_GFP_ORDER 5 /* 32 pages */ 571#else 572#define MAX_OBJ_ORDER 8 /* up to 1Mb */ 573#define MAX_GFP_ORDER 8 /* up to 1Mb */ 574#endif 575 576/* 577 * Do not go above this order unless 0 objects fit into the slab. 578 */ 579#define BREAK_GFP_ORDER_HI 1 580#define BREAK_GFP_ORDER_LO 0 581static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; 582 583/* 584 * Functions for storing/retrieving the cachep and or slab from the page 585 * allocator. These are used to find the slab an obj belongs to. With kfree(), 586 * these are used to find the cache which an obj belongs to. 587 */ 588static inline void page_set_cache(struct page *page, struct kmem_cache *cache) 589{ 590 page->lru.next = (struct list_head *)cache; 591} 592 593static inline struct kmem_cache *page_get_cache(struct page *page) 594{ 595 if (unlikely(PageCompound(page))) 596 page = (struct page *)page_private(page); 597 return (struct kmem_cache *)page->lru.next; 598} 599 600static inline void page_set_slab(struct page *page, struct slab *slab) 601{ 602 page->lru.prev = (struct list_head *)slab; 603} 604 605static inline struct slab *page_get_slab(struct page *page) 606{ 607 if (unlikely(PageCompound(page))) 608 page = (struct page *)page_private(page); 609 return (struct slab *)page->lru.prev; 610} 611 612static inline struct kmem_cache *virt_to_cache(const void *obj) 613{ 614 struct page *page = virt_to_page(obj); 615 return page_get_cache(page); 616} 617 618static inline struct slab *virt_to_slab(const void *obj) 619{ 620 struct page *page = virt_to_page(obj); 621 return page_get_slab(page); 622} 623 624static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, 625 unsigned int idx) 626{ 627 return slab->s_mem + cache->buffer_size * idx; 628} 629 630static inline unsigned int obj_to_index(struct kmem_cache *cache, 631 struct slab *slab, void *obj) 632{ 633 return (unsigned)(obj - slab->s_mem) / cache->buffer_size; 634} 635 636/* 637 * These are the default caches for kmalloc. Custom caches can have other sizes. 638 */ 639struct cache_sizes malloc_sizes[] = { 640#define CACHE(x) { .cs_size = (x) }, 641#include <linux/kmalloc_sizes.h> 642 CACHE(ULONG_MAX) 643#undef CACHE 644}; 645EXPORT_SYMBOL(malloc_sizes); 646 647/* Must match cache_sizes above. Out of line to keep cache footprint low. */ 648struct cache_names { 649 char *name; 650 char *name_dma; 651}; 652 653static struct cache_names __initdata cache_names[] = { 654#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" }, 655#include <linux/kmalloc_sizes.h> 656 {NULL,} 657#undef CACHE 658}; 659 660static struct arraycache_init initarray_cache __initdata = 661 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; 662static struct arraycache_init initarray_generic = 663 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; 664 665/* internal cache of cache description objs */ 666static struct kmem_cache cache_cache = { 667 .batchcount = 1, 668 .limit = BOOT_CPUCACHE_ENTRIES, 669 .shared = 1, 670 .buffer_size = sizeof(struct kmem_cache), 671 .name = "kmem_cache", 672#if DEBUG 673 .obj_size = sizeof(struct kmem_cache), 674#endif 675}; 676 677/* Guard access to the cache-chain. */ 678static DEFINE_MUTEX(cache_chain_mutex); 679static struct list_head cache_chain; 680 681/* 682 * vm_enough_memory() looks at this to determine how many slab-allocated pages 683 * are possibly freeable under pressure 684 * 685 * SLAB_RECLAIM_ACCOUNT turns this on per-slab 686 */ 687atomic_t slab_reclaim_pages; 688 689/* 690 * chicken and egg problem: delay the per-cpu array allocation 691 * until the general caches are up. 692 */ 693static enum { 694 NONE, 695 PARTIAL_AC, 696 PARTIAL_L3, 697 FULL 698} g_cpucache_up; 699 700static DEFINE_PER_CPU(struct work_struct, reap_work); 701 702static void free_block(struct kmem_cache *cachep, void **objpp, int len, 703 int node); 704static void enable_cpucache(struct kmem_cache *cachep); 705static void cache_reap(void *unused); 706static int __node_shrink(struct kmem_cache *cachep, int node); 707 708static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 709{ 710 return cachep->array[smp_processor_id()]; 711} 712 713static inline struct kmem_cache *__find_general_cachep(size_t size, 714 gfp_t gfpflags) 715{ 716 struct cache_sizes *csizep = malloc_sizes; 717 718#if DEBUG 719 /* This happens if someone tries to call 720 * kmem_cache_create(), or __kmalloc(), before 721 * the generic caches are initialized. 722 */ 723 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL); 724#endif 725 while (size > csizep->cs_size) 726 csizep++; 727 728 /* 729 * Really subtle: The last entry with cs->cs_size==ULONG_MAX 730 * has cs_{dma,}cachep==NULL. Thus no special case 731 * for large kmalloc calls required. 732 */ 733 if (unlikely(gfpflags & GFP_DMA)) 734 return csizep->cs_dmacachep; 735 return csizep->cs_cachep; 736} 737 738struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) 739{ 740 return __find_general_cachep(size, gfpflags); 741} 742EXPORT_SYMBOL(kmem_find_general_cachep); 743 744static size_t slab_mgmt_size(size_t nr_objs, size_t align) 745{ 746 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); 747} 748 749/* 750 * Calculate the number of objects and left-over bytes for a given buffer size. 751 */ 752static void cache_estimate(unsigned long gfporder, size_t buffer_size, 753 size_t align, int flags, size_t *left_over, 754 unsigned int *num) 755{ 756 int nr_objs; 757 size_t mgmt_size; 758 size_t slab_size = PAGE_SIZE << gfporder; 759 760 /* 761 * The slab management structure can be either off the slab or 762 * on it. For the latter case, the memory allocated for a 763 * slab is used for: 764 * 765 * - The struct slab 766 * - One kmem_bufctl_t for each object 767 * - Padding to respect alignment of @align 768 * - @buffer_size bytes for each object 769 * 770 * If the slab management structure is off the slab, then the 771 * alignment will already be calculated into the size. Because 772 * the slabs are all pages aligned, the objects will be at the 773 * correct alignment when allocated. 774 */ 775 if (flags & CFLGS_OFF_SLAB) { 776 mgmt_size = 0; 777 nr_objs = slab_size / buffer_size; 778 779 if (nr_objs > SLAB_LIMIT) 780 nr_objs = SLAB_LIMIT; 781 } else { 782 /* 783 * Ignore padding for the initial guess. The padding 784 * is at most @align-1 bytes, and @buffer_size is at 785 * least @align. In the worst case, this result will 786 * be one greater than the number of objects that fit 787 * into the memory allocation when taking the padding 788 * into account. 789 */ 790 nr_objs = (slab_size - sizeof(struct slab)) / 791 (buffer_size + sizeof(kmem_bufctl_t)); 792 793 /* 794 * This calculated number will be either the right 795 * amount, or one greater than what we want. 796 */ 797 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size 798 > slab_size) 799 nr_objs--; 800 801 if (nr_objs > SLAB_LIMIT) 802 nr_objs = SLAB_LIMIT; 803 804 mgmt_size = slab_mgmt_size(nr_objs, align); 805 } 806 *num = nr_objs; 807 *left_over = slab_size - nr_objs*buffer_size - mgmt_size; 808} 809 810#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) 811 812static void __slab_error(const char *function, struct kmem_cache *cachep, 813 char *msg) 814{ 815 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", 816 function, cachep->name, msg); 817 dump_stack(); 818} 819 820#ifdef CONFIG_NUMA 821/* 822 * Special reaping functions for NUMA systems called from cache_reap(). 823 * These take care of doing round robin flushing of alien caches (containing 824 * objects freed on different nodes from which they were allocated) and the 825 * flushing of remote pcps by calling drain_node_pages. 826 */ 827static DEFINE_PER_CPU(unsigned long, reap_node); 828 829static void init_reap_node(int cpu) 830{ 831 int node; 832 833 node = next_node(cpu_to_node(cpu), node_online_map); 834 if (node == MAX_NUMNODES) 835 node = first_node(node_online_map); 836 837 __get_cpu_var(reap_node) = node; 838} 839 840static void next_reap_node(void) 841{ 842 int node = __get_cpu_var(reap_node); 843 844 /* 845 * Also drain per cpu pages on remote zones 846 */ 847 if (node != numa_node_id()) 848 drain_node_pages(node); 849 850 node = next_node(node, node_online_map); 851 if (unlikely(node >= MAX_NUMNODES)) 852 node = first_node(node_online_map); 853 __get_cpu_var(reap_node) = node; 854} 855 856#else 857#define init_reap_node(cpu) do { } while (0) 858#define next_reap_node(void) do { } while (0) 859#endif 860 861/* 862 * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz 863 * via the workqueue/eventd. 864 * Add the CPU number into the expiration time to minimize the possibility of 865 * the CPUs getting into lockstep and contending for the global cache chain 866 * lock. 867 */ 868static void __devinit start_cpu_timer(int cpu) 869{ 870 struct work_struct *reap_work = &per_cpu(reap_work, cpu); 871 872 /* 873 * When this gets called from do_initcalls via cpucache_init(), 874 * init_workqueues() has already run, so keventd will be setup 875 * at that time. 876 */ 877 if (keventd_up() && reap_work->func == NULL) { 878 init_reap_node(cpu); 879 INIT_WORK(reap_work, cache_reap, NULL); 880 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); 881 } 882} 883 884static struct array_cache *alloc_arraycache(int node, int entries, 885 int batchcount) 886{ 887 int memsize = sizeof(void *) * entries + sizeof(struct array_cache); 888 struct array_cache *nc = NULL; 889 890 nc = kmalloc_node(memsize, GFP_KERNEL, node); 891 if (nc) { 892 nc->avail = 0; 893 nc->limit = entries; 894 nc->batchcount = batchcount; 895 nc->touched = 0; 896 spin_lock_init(&nc->lock); 897 } 898 return nc; 899} 900 901#ifdef CONFIG_NUMA 902static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); 903static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 904 905static struct array_cache **alloc_alien_cache(int node, int limit) 906{ 907 struct array_cache **ac_ptr; 908 int memsize = sizeof(void *) * MAX_NUMNODES; 909 int i; 910 911 if (limit > 1) 912 limit = 12; 913 ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); 914 if (ac_ptr) { 915 for_each_node(i) { 916 if (i == node || !node_online(i)) { 917 ac_ptr[i] = NULL; 918 continue; 919 } 920 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); 921 if (!ac_ptr[i]) { 922 for (i--; i <= 0; i--) 923 kfree(ac_ptr[i]); 924 kfree(ac_ptr); 925 return NULL; 926 } 927 } 928 } 929 return ac_ptr; 930} 931 932static void free_alien_cache(struct array_cache **ac_ptr) 933{ 934 int i; 935 936 if (!ac_ptr) 937 return; 938 for_each_node(i) 939 kfree(ac_ptr[i]); 940 kfree(ac_ptr); 941} 942 943static void __drain_alien_cache(struct kmem_cache *cachep, 944 struct array_cache *ac, int node) 945{ 946 struct kmem_list3 *rl3 = cachep->nodelists[node]; 947 948 if (ac->avail) { 949 spin_lock(&rl3->list_lock); 950 free_block(cachep, ac->entry, ac->avail, node); 951 ac->avail = 0; 952 spin_unlock(&rl3->list_lock); 953 } 954} 955 956/* 957 * Called from cache_reap() to regularly drain alien caches round robin. 958 */ 959static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) 960{ 961 int node = __get_cpu_var(reap_node); 962 963 if (l3->alien) { 964 struct array_cache *ac = l3->alien[node]; 965 if (ac && ac->avail) { 966 spin_lock_irq(&ac->lock); 967 __drain_alien_cache(cachep, ac, node); 968 spin_unlock_irq(&ac->lock); 969 } 970 } 971} 972 973static void drain_alien_cache(struct kmem_cache *cachep, 974 struct array_cache **alien) 975{ 976 int i = 0; 977 struct array_cache *ac; 978 unsigned long flags; 979 980 for_each_online_node(i) { 981 ac = alien[i]; 982 if (ac) { 983 spin_lock_irqsave(&ac->lock, flags); 984 __drain_alien_cache(cachep, ac, i); 985 spin_unlock_irqrestore(&ac->lock, flags); 986 } 987 } 988} 989#else 990 991#define drain_alien_cache(cachep, alien) do { } while (0) 992#define reap_alien(cachep, l3) do { } while (0) 993 994static inline struct array_cache **alloc_alien_cache(int node, int limit) 995{ 996 return (struct array_cache **) 0x01020304ul; 997} 998 999static inline void free_alien_cache(struct array_cache **ac_ptr) 1000{ 1001} 1002 1003#endif 1004 1005static int __devinit cpuup_callback(struct notifier_block *nfb, 1006 unsigned long action, void *hcpu) 1007{ 1008 long cpu = (long)hcpu; 1009 struct kmem_cache *cachep; 1010 struct kmem_list3 *l3 = NULL; 1011 int node = cpu_to_node(cpu); 1012 int memsize = sizeof(struct kmem_list3); 1013 1014 switch (action) { 1015 case CPU_UP_PREPARE: 1016 mutex_lock(&cache_chain_mutex); 1017 /* 1018 * We need to do this right in the beginning since 1019 * alloc_arraycache's are going to use this list. 1020 * kmalloc_node allows us to add the slab to the right 1021 * kmem_list3 and not this cpu's kmem_list3 1022 */ 1023 1024 list_for_each_entry(cachep, &cache_chain, next) { 1025 /* 1026 * Set up the size64 kmemlist for cpu before we can 1027 * begin anything. Make sure some other cpu on this 1028 * node has not already allocated this 1029 */ 1030 if (!cachep->nodelists[node]) { 1031 l3 = kmalloc_node(memsize, GFP_KERNEL, node); 1032 if (!l3) 1033 goto bad; 1034 kmem_list3_init(l3); 1035 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 1036 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 1037 1038 /* 1039 * The l3s don't come and go as CPUs come and 1040 * go. cache_chain_mutex is sufficient 1041 * protection here. 1042 */ 1043 cachep->nodelists[node] = l3; 1044 } 1045 1046 spin_lock_irq(&cachep->nodelists[node]->list_lock); 1047 cachep->nodelists[node]->free_limit = 1048 (1 + nr_cpus_node(node)) * 1049 cachep->batchcount + cachep->num; 1050 spin_unlock_irq(&cachep->nodelists[node]->list_lock); 1051 } 1052 1053 /* 1054 * Now we can go ahead with allocating the shared arrays and 1055 * array caches 1056 */ 1057 list_for_each_entry(cachep, &cache_chain, next) { 1058 struct array_cache *nc; 1059 struct array_cache *shared; 1060 struct array_cache **alien; 1061 1062 nc = alloc_arraycache(node, cachep->limit, 1063 cachep->batchcount); 1064 if (!nc) 1065 goto bad; 1066 shared = alloc_arraycache(node, 1067 cachep->shared * cachep->batchcount, 1068 0xbaadf00d); 1069 if (!shared) 1070 goto bad; 1071 1072 alien = alloc_alien_cache(node, cachep->limit); 1073 if (!alien) 1074 goto bad; 1075 cachep->array[cpu] = nc; 1076 l3 = cachep->nodelists[node]; 1077 BUG_ON(!l3); 1078 1079 spin_lock_irq(&l3->list_lock); 1080 if (!l3->shared) { 1081 /* 1082 * We are serialised from CPU_DEAD or 1083 * CPU_UP_CANCELLED by the cpucontrol lock 1084 */ 1085 l3->shared = shared; 1086 shared = NULL; 1087 } 1088#ifdef CONFIG_NUMA 1089 if (!l3->alien) { 1090 l3->alien = alien; 1091 alien = NULL; 1092 } 1093#endif 1094 spin_unlock_irq(&l3->list_lock); 1095 kfree(shared); 1096 free_alien_cache(alien); 1097 } 1098 mutex_unlock(&cache_chain_mutex); 1099 break; 1100 case CPU_ONLINE: 1101 start_cpu_timer(cpu); 1102 break; 1103#ifdef CONFIG_HOTPLUG_CPU 1104 case CPU_DEAD: 1105 /* 1106 * Even if all the cpus of a node are down, we don't free the 1107 * kmem_list3 of any cache. This to avoid a race between 1108 * cpu_down, and a kmalloc allocation from another cpu for 1109 * memory from the node of the cpu going down. The list3 1110 * structure is usually allocated from kmem_cache_create() and 1111 * gets destroyed at kmem_cache_destroy(). 1112 */ 1113 /* fall thru */ 1114 case CPU_UP_CANCELED: 1115 mutex_lock(&cache_chain_mutex); 1116 list_for_each_entry(cachep, &cache_chain, next) { 1117 struct array_cache *nc; 1118 struct array_cache *shared; 1119 struct array_cache **alien; 1120 cpumask_t mask; 1121 1122 mask = node_to_cpumask(node); 1123 /* cpu is dead; no one can alloc from it. */ 1124 nc = cachep->array[cpu]; 1125 cachep->array[cpu] = NULL; 1126 l3 = cachep->nodelists[node]; 1127 1128 if (!l3) 1129 goto free_array_cache; 1130 1131 spin_lock_irq(&l3->list_lock); 1132 1133 /* Free limit for this kmem_list3 */ 1134 l3->free_limit -= cachep->batchcount; 1135 if (nc) 1136 free_block(cachep, nc->entry, nc->avail, node); 1137 1138 if (!cpus_empty(mask)) { 1139 spin_unlock_irq(&l3->list_lock); 1140 goto free_array_cache; 1141 } 1142 1143 shared = l3->shared; 1144 if (shared) { 1145 free_block(cachep, l3->shared->entry, 1146 l3->shared->avail, node); 1147 l3->shared = NULL; 1148 } 1149 1150 alien = l3->alien; 1151 l3->alien = NULL; 1152 1153 spin_unlock_irq(&l3->list_lock); 1154 1155 kfree(shared); 1156 if (alien) { 1157 drain_alien_cache(cachep, alien); 1158 free_alien_cache(alien); 1159 } 1160free_array_cache: 1161 kfree(nc); 1162 } 1163 /* 1164 * In the previous loop, all the objects were freed to 1165 * the respective cache's slabs, now we can go ahead and 1166 * shrink each nodelist to its limit. 1167 */ 1168 list_for_each_entry(cachep, &cache_chain, next) { 1169 l3 = cachep->nodelists[node]; 1170 if (!l3) 1171 continue; 1172 spin_lock_irq(&l3->list_lock); 1173 /* free slabs belonging to this node */ 1174 __node_shrink(cachep, node); 1175 spin_unlock_irq(&l3->list_lock); 1176 } 1177 mutex_unlock(&cache_chain_mutex); 1178 break; 1179#endif 1180 } 1181 return NOTIFY_OK; 1182bad: 1183 mutex_unlock(&cache_chain_mutex); 1184 return NOTIFY_BAD; 1185} 1186 1187static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; 1188 1189/* 1190 * swap the static kmem_list3 with kmalloced memory 1191 */ 1192static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, 1193 int nodeid) 1194{ 1195 struct kmem_list3 *ptr; 1196 1197 BUG_ON(cachep->nodelists[nodeid] != list); 1198 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); 1199 BUG_ON(!ptr); 1200 1201 local_irq_disable(); 1202 memcpy(ptr, list, sizeof(struct kmem_list3)); 1203 MAKE_ALL_LISTS(cachep, ptr, nodeid); 1204 cachep->nodelists[nodeid] = ptr; 1205 local_irq_enable(); 1206} 1207 1208/* 1209 * Initialisation. Called after the page allocator have been initialised and 1210 * before smp_init(). 1211 */ 1212void __init kmem_cache_init(void) 1213{ 1214 size_t left_over; 1215 struct cache_sizes *sizes; 1216 struct cache_names *names; 1217 int i; 1218 int order; 1219 1220 for (i = 0; i < NUM_INIT_LISTS; i++) { 1221 kmem_list3_init(&initkmem_list3[i]); 1222 if (i < MAX_NUMNODES) 1223 cache_cache.nodelists[i] = NULL; 1224 } 1225 1226 /* 1227 * Fragmentation resistance on low memory - only use bigger 1228 * page orders on machines with more than 32MB of memory. 1229 */ 1230 if (num_physpages > (32 << 20) >> PAGE_SHIFT) 1231 slab_break_gfp_order = BREAK_GFP_ORDER_HI; 1232 1233 /* Bootstrap is tricky, because several objects are allocated 1234 * from caches that do not exist yet: 1235 * 1) initialize the cache_cache cache: it contains the struct 1236 * kmem_cache structures of all caches, except cache_cache itself: 1237 * cache_cache is statically allocated. 1238 * Initially an __init data area is used for the head array and the 1239 * kmem_list3 structures, it's replaced with a kmalloc allocated 1240 * array at the end of the bootstrap. 1241 * 2) Create the first kmalloc cache. 1242 * The struct kmem_cache for the new cache is allocated normally. 1243 * An __init data area is used for the head array. 1244 * 3) Create the remaining kmalloc caches, with minimally sized 1245 * head arrays. 1246 * 4) Replace the __init data head arrays for cache_cache and the first 1247 * kmalloc cache with kmalloc allocated arrays. 1248 * 5) Replace the __init data for kmem_list3 for cache_cache and 1249 * the other cache's with kmalloc allocated memory. 1250 * 6) Resize the head arrays of the kmalloc caches to their final sizes. 1251 */ 1252 1253 /* 1) create the cache_cache */ 1254 INIT_LIST_HEAD(&cache_chain); 1255 list_add(&cache_cache.next, &cache_chain); 1256 cache_cache.colour_off = cache_line_size(); 1257 cache_cache.array[smp_processor_id()] = &initarray_cache.cache; 1258 cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; 1259 1260 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, 1261 cache_line_size()); 1262 1263 for (order = 0; order < MAX_ORDER; order++) { 1264 cache_estimate(order, cache_cache.buffer_size, 1265 cache_line_size(), 0, &left_over, &cache_cache.num); 1266 if (cache_cache.num) 1267 break; 1268 } 1269 if (!cache_cache.num) 1270 BUG(); 1271 cache_cache.gfporder = order; 1272 cache_cache.colour = left_over / cache_cache.colour_off; 1273 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + 1274 sizeof(struct slab), cache_line_size()); 1275 1276 /* 2+3) create the kmalloc caches */ 1277 sizes = malloc_sizes; 1278 names = cache_names; 1279 1280 /* 1281 * Initialize the caches that provide memory for the array cache and the 1282 * kmem_list3 structures first. Without this, further allocations will 1283 * bug. 1284 */ 1285 1286 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, 1287 sizes[INDEX_AC].cs_size, 1288 ARCH_KMALLOC_MINALIGN, 1289 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1290 NULL, NULL); 1291 1292 if (INDEX_AC != INDEX_L3) { 1293 sizes[INDEX_L3].cs_cachep = 1294 kmem_cache_create(names[INDEX_L3].name, 1295 sizes[INDEX_L3].cs_size, 1296 ARCH_KMALLOC_MINALIGN, 1297 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1298 NULL, NULL); 1299 } 1300 1301 while (sizes->cs_size != ULONG_MAX) { 1302 /* 1303 * For performance, all the general caches are L1 aligned. 1304 * This should be particularly beneficial on SMP boxes, as it 1305 * eliminates "false sharing". 1306 * Note for systems short on memory removing the alignment will 1307 * allow tighter packing of the smaller caches. 1308 */ 1309 if (!sizes->cs_cachep) { 1310 sizes->cs_cachep = kmem_cache_create(names->name, 1311 sizes->cs_size, 1312 ARCH_KMALLOC_MINALIGN, 1313 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1314 NULL, NULL); 1315 } 1316 1317 /* Inc off-slab bufctl limit until the ceiling is hit. */ 1318 if (!(OFF_SLAB(sizes->cs_cachep))) { 1319 offslab_limit = sizes->cs_size - sizeof(struct slab); 1320 offslab_limit /= sizeof(kmem_bufctl_t); 1321 } 1322 1323 sizes->cs_dmacachep = kmem_cache_create(names->name_dma, 1324 sizes->cs_size, 1325 ARCH_KMALLOC_MINALIGN, 1326 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| 1327 SLAB_PANIC, 1328 NULL, NULL); 1329 sizes++; 1330 names++; 1331 } 1332 /* 4) Replace the bootstrap head arrays */ 1333 { 1334 void *ptr; 1335 1336 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1337 1338 local_irq_disable(); 1339 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); 1340 memcpy(ptr, cpu_cache_get(&cache_cache), 1341 sizeof(struct arraycache_init)); 1342 cache_cache.array[smp_processor_id()] = ptr; 1343 local_irq_enable(); 1344 1345 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1346 1347 local_irq_disable(); 1348 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) 1349 != &initarray_generic.cache); 1350 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1351 sizeof(struct arraycache_init)); 1352 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1353 ptr; 1354 local_irq_enable(); 1355 } 1356 /* 5) Replace the bootstrap kmem_list3's */ 1357 { 1358 int node; 1359 /* Replace the static kmem_list3 structures for the boot cpu */ 1360 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], 1361 numa_node_id()); 1362 1363 for_each_online_node(node) { 1364 init_list(malloc_sizes[INDEX_AC].cs_cachep, 1365 &initkmem_list3[SIZE_AC + node], node); 1366 1367 if (INDEX_AC != INDEX_L3) { 1368 init_list(malloc_sizes[INDEX_L3].cs_cachep, 1369 &initkmem_list3[SIZE_L3 + node], 1370 node); 1371 } 1372 } 1373 } 1374 1375 /* 6) resize the head arrays to their final sizes */ 1376 { 1377 struct kmem_cache *cachep; 1378 mutex_lock(&cache_chain_mutex); 1379 list_for_each_entry(cachep, &cache_chain, next) 1380 enable_cpucache(cachep); 1381 mutex_unlock(&cache_chain_mutex); 1382 } 1383 1384 /* Done! */ 1385 g_cpucache_up = FULL; 1386 1387 /* 1388 * Register a cpu startup notifier callback that initializes 1389 * cpu_cache_get for all new cpus 1390 */ 1391 register_cpu_notifier(&cpucache_notifier); 1392 1393 /* 1394 * The reap timers are started later, with a module init call: That part 1395 * of the kernel is not yet operational. 1396 */ 1397} 1398 1399static int __init cpucache_init(void) 1400{ 1401 int cpu; 1402 1403 /* 1404 * Register the timers that return unneeded pages to the page allocator 1405 */ 1406 for_each_online_cpu(cpu) 1407 start_cpu_timer(cpu); 1408 return 0; 1409} 1410__initcall(cpucache_init); 1411 1412/* 1413 * Interface to system's page allocator. No need to hold the cache-lock. 1414 * 1415 * If we requested dmaable memory, we will get it. Even if we 1416 * did not request dmaable memory, we might get it, but that 1417 * would be relatively rare and ignorable. 1418 */ 1419static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) 1420{ 1421 struct page *page; 1422 void *addr; 1423 int i; 1424 1425 flags |= cachep->gfpflags; 1426 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1427 if (!page) 1428 return NULL; 1429 addr = page_address(page); 1430 1431 i = (1 << cachep->gfporder); 1432 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1433 atomic_add(i, &slab_reclaim_pages); 1434 add_page_state(nr_slab, i); 1435 while (i--) { 1436 __SetPageSlab(page); 1437 page++; 1438 } 1439 return addr; 1440} 1441 1442/* 1443 * Interface to system's page release. 1444 */ 1445static void kmem_freepages(struct kmem_cache *cachep, void *addr) 1446{ 1447 unsigned long i = (1 << cachep->gfporder); 1448 struct page *page = virt_to_page(addr); 1449 const unsigned long nr_freed = i; 1450 1451 while (i--) { 1452 BUG_ON(!PageSlab(page)); 1453 __ClearPageSlab(page); 1454 page++; 1455 } 1456 sub_page_state(nr_slab, nr_freed); 1457 if (current->reclaim_state) 1458 current->reclaim_state->reclaimed_slab += nr_freed; 1459 free_pages((unsigned long)addr, cachep->gfporder); 1460 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1461 atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages); 1462} 1463 1464static void kmem_rcu_free(struct rcu_head *head) 1465{ 1466 struct slab_rcu *slab_rcu = (struct slab_rcu *)head; 1467 struct kmem_cache *cachep = slab_rcu->cachep; 1468 1469 kmem_freepages(cachep, slab_rcu->addr); 1470 if (OFF_SLAB(cachep)) 1471 kmem_cache_free(cachep->slabp_cache, slab_rcu); 1472} 1473 1474#if DEBUG 1475 1476#ifdef CONFIG_DEBUG_PAGEALLOC 1477static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, 1478 unsigned long caller) 1479{ 1480 int size = obj_size(cachep); 1481 1482 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; 1483 1484 if (size < 5 * sizeof(unsigned long)) 1485 return; 1486 1487 *addr++ = 0x12345678; 1488 *addr++ = caller; 1489 *addr++ = smp_processor_id(); 1490 size -= 3 * sizeof(unsigned long); 1491 { 1492 unsigned long *sptr = &caller; 1493 unsigned long svalue; 1494 1495 while (!kstack_end(sptr)) { 1496 svalue = *sptr++; 1497 if (kernel_text_address(svalue)) { 1498 *addr++ = svalue; 1499 size -= sizeof(unsigned long); 1500 if (size <= sizeof(unsigned long)) 1501 break; 1502 } 1503 } 1504 1505 } 1506 *addr++ = 0x87654321; 1507} 1508#endif 1509 1510static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) 1511{ 1512 int size = obj_size(cachep); 1513 addr = &((char *)addr)[obj_offset(cachep)]; 1514 1515 memset(addr, val, size); 1516 *(unsigned char *)(addr + size - 1) = POISON_END; 1517} 1518 1519static void dump_line(char *data, int offset, int limit) 1520{ 1521 int i; 1522 printk(KERN_ERR "%03x:", offset); 1523 for (i = 0; i < limit; i++) 1524 printk(" %02x", (unsigned char)data[offset + i]); 1525 printk("\n"); 1526} 1527#endif 1528 1529#if DEBUG 1530 1531static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) 1532{ 1533 int i, size; 1534 char *realobj; 1535 1536 if (cachep->flags & SLAB_RED_ZONE) { 1537 printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", 1538 *dbg_redzone1(cachep, objp), 1539 *dbg_redzone2(cachep, objp)); 1540 } 1541 1542 if (cachep->flags & SLAB_STORE_USER) { 1543 printk(KERN_ERR "Last user: [<%p>]", 1544 *dbg_userword(cachep, objp)); 1545 print_symbol("(%s)", 1546 (unsigned long)*dbg_userword(cachep, objp)); 1547 printk("\n"); 1548 } 1549 realobj = (char *)objp + obj_offset(cachep); 1550 size = obj_size(cachep); 1551 for (i = 0; i < size && lines; i += 16, lines--) { 1552 int limit; 1553 limit = 16; 1554 if (i + limit > size) 1555 limit = size - i; 1556 dump_line(realobj, i, limit); 1557 } 1558} 1559 1560static void check_poison_obj(struct kmem_cache *cachep, void *objp) 1561{ 1562 char *realobj; 1563 int size, i; 1564 int lines = 0; 1565 1566 realobj = (char *)objp + obj_offset(cachep); 1567 size = obj_size(cachep); 1568 1569 for (i = 0; i < size; i++) { 1570 char exp = POISON_FREE; 1571 if (i == size - 1) 1572 exp = POISON_END; 1573 if (realobj[i] != exp) { 1574 int limit; 1575 /* Mismatch ! */ 1576 /* Print header */ 1577 if (lines == 0) { 1578 printk(KERN_ERR 1579 "Slab corruption: start=%p, len=%d\n", 1580 realobj, size); 1581 print_objinfo(cachep, objp, 0); 1582 } 1583 /* Hexdump the affected line */ 1584 i = (i / 16) * 16; 1585 limit = 16; 1586 if (i + limit > size) 1587 limit = size - i; 1588 dump_line(realobj, i, limit); 1589 i += 16; 1590 lines++; 1591 /* Limit to 5 lines */ 1592 if (lines > 5) 1593 break; 1594 } 1595 } 1596 if (lines != 0) { 1597 /* Print some data about the neighboring objects, if they 1598 * exist: 1599 */ 1600 struct slab *slabp = virt_to_slab(objp); 1601 unsigned int objnr; 1602 1603 objnr = obj_to_index(cachep, slabp, objp); 1604 if (objnr) { 1605 objp = index_to_obj(cachep, slabp, objnr - 1); 1606 realobj = (char *)objp + obj_offset(cachep); 1607 printk(KERN_ERR "Prev obj: start=%p, len=%d\n", 1608 realobj, size); 1609 print_objinfo(cachep, objp, 2); 1610 } 1611 if (objnr + 1 < cachep->num) { 1612 objp = index_to_obj(cachep, slabp, objnr + 1); 1613 realobj = (char *)objp + obj_offset(cachep); 1614 printk(KERN_ERR "Next obj: start=%p, len=%d\n", 1615 realobj, size); 1616 print_objinfo(cachep, objp, 2); 1617 } 1618 } 1619} 1620#endif 1621 1622#if DEBUG 1623/** 1624 * slab_destroy_objs - destroy a slab and its objects 1625 * @cachep: cache pointer being destroyed 1626 * @slabp: slab pointer being destroyed 1627 * 1628 * Call the registered destructor for each object in a slab that is being 1629 * destroyed. 1630 */ 1631static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) 1632{ 1633 int i; 1634 for (i = 0; i < cachep->num; i++) { 1635 void *objp = index_to_obj(cachep, slabp, i); 1636 1637 if (cachep->flags & SLAB_POISON) { 1638#ifdef CONFIG_DEBUG_PAGEALLOC 1639 if (cachep->buffer_size % PAGE_SIZE == 0 && 1640 OFF_SLAB(cachep)) 1641 kernel_map_pages(virt_to_page(objp), 1642 cachep->buffer_size / PAGE_SIZE, 1); 1643 else 1644 check_poison_obj(cachep, objp); 1645#else 1646 check_poison_obj(cachep, objp); 1647#endif 1648 } 1649 if (cachep->flags & SLAB_RED_ZONE) { 1650 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) 1651 slab_error(cachep, "start of a freed object " 1652 "was overwritten"); 1653 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) 1654 slab_error(cachep, "end of a freed object " 1655 "was overwritten"); 1656 } 1657 if (cachep->dtor && !(cachep->flags & SLAB_POISON)) 1658 (cachep->dtor) (objp + obj_offset(cachep), cachep, 0); 1659 } 1660} 1661#else 1662static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) 1663{ 1664 if (cachep->dtor) { 1665 int i; 1666 for (i = 0; i < cachep->num; i++) { 1667 void *objp = index_to_obj(cachep, slabp, i); 1668 (cachep->dtor) (objp, cachep, 0); 1669 } 1670 } 1671} 1672#endif 1673 1674/** 1675 * slab_destroy - destroy and release all objects in a slab 1676 * @cachep: cache pointer being destroyed 1677 * @slabp: slab pointer being destroyed 1678 * 1679 * Destroy all the objs in a slab, and release the mem back to the system. 1680 * Before calling the slab must have been unlinked from the cache. The 1681 * cache-lock is not held/needed. 1682 */ 1683static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) 1684{ 1685 void *addr = slabp->s_mem - slabp->colouroff; 1686 1687 slab_destroy_objs(cachep, slabp); 1688 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { 1689 struct slab_rcu *slab_rcu; 1690 1691 slab_rcu = (struct slab_rcu *)slabp; 1692 slab_rcu->cachep = cachep; 1693 slab_rcu->addr = addr; 1694 call_rcu(&slab_rcu->head, kmem_rcu_free); 1695 } else { 1696 kmem_freepages(cachep, addr); 1697 if (OFF_SLAB(cachep)) 1698 kmem_cache_free(cachep->slabp_cache, slabp); 1699 } 1700} 1701 1702/* 1703 * For setting up all the kmem_list3s for cache whose buffer_size is same as 1704 * size of kmem_list3. 1705 */ 1706static void set_up_list3s(struct kmem_cache *cachep, int index) 1707{ 1708 int node; 1709 1710 for_each_online_node(node) { 1711 cachep->nodelists[node] = &initkmem_list3[index + node]; 1712 cachep->nodelists[node]->next_reap = jiffies + 1713 REAPTIMEOUT_LIST3 + 1714 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 1715 } 1716} 1717 1718/** 1719 * calculate_slab_order - calculate size (page order) of slabs 1720 * @cachep: pointer to the cache that is being created 1721 * @size: size of objects to be created in this cache. 1722 * @align: required alignment for the objects. 1723 * @flags: slab allocation flags 1724 * 1725 * Also calculates the number of objects per slab. 1726 * 1727 * This could be made much more intelligent. For now, try to avoid using 1728 * high order pages for slabs. When the gfp() functions are more friendly 1729 * towards high-order requests, this should be changed. 1730 */ 1731static size_t calculate_slab_order(struct kmem_cache *cachep, 1732 size_t size, size_t align, unsigned long flags) 1733{ 1734 size_t left_over = 0; 1735 int gfporder; 1736 1737 for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) { 1738 unsigned int num; 1739 size_t remainder; 1740 1741 cache_estimate(gfporder, size, align, flags, &remainder, &num); 1742 if (!num) 1743 continue; 1744 1745 /* More than offslab_limit objects will cause problems */ 1746 if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit) 1747 break; 1748 1749 /* Found something acceptable - save it away */ 1750 cachep->num = num; 1751 cachep->gfporder = gfporder; 1752 left_over = remainder; 1753 1754 /* 1755 * A VFS-reclaimable slab tends to have most allocations 1756 * as GFP_NOFS and we really don't want to have to be allocating 1757 * higher-order pages when we are unable to shrink dcache. 1758 */ 1759 if (flags & SLAB_RECLAIM_ACCOUNT) 1760 break; 1761 1762 /* 1763 * Large number of objects is good, but very large slabs are 1764 * currently bad for the gfp()s. 1765 */ 1766 if (gfporder >= slab_break_gfp_order) 1767 break; 1768 1769 /* 1770 * Acceptable internal fragmentation? 1771 */ 1772 if (left_over * 8 <= (PAGE_SIZE << gfporder)) 1773 break; 1774 } 1775 return left_over; 1776} 1777 1778static void setup_cpu_cache(struct kmem_cache *cachep) 1779{ 1780 if (g_cpucache_up == FULL) { 1781 enable_cpucache(cachep); 1782 return; 1783 } 1784 if (g_cpucache_up == NONE) { 1785 /* 1786 * Note: the first kmem_cache_create must create the cache 1787 * that's used by kmalloc(24), otherwise the creation of 1788 * further caches will BUG(). 1789 */ 1790 cachep->array[smp_processor_id()] = &initarray_generic.cache; 1791 1792 /* 1793 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is 1794 * the first cache, then we need to set up all its list3s, 1795 * otherwise the creation of further caches will BUG(). 1796 */ 1797 set_up_list3s(cachep, SIZE_AC); 1798 if (INDEX_AC == INDEX_L3) 1799 g_cpucache_up = PARTIAL_L3; 1800 else 1801 g_cpucache_up = PARTIAL_AC; 1802 } else { 1803 cachep->array[smp_processor_id()] = 1804 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1805 1806 if (g_cpucache_up == PARTIAL_AC) { 1807 set_up_list3s(cachep, SIZE_L3); 1808 g_cpucache_up = PARTIAL_L3; 1809 } else { 1810 int node; 1811 for_each_online_node(node) { 1812 cachep->nodelists[node] = 1813 kmalloc_node(sizeof(struct kmem_list3), 1814 GFP_KERNEL, node); 1815 BUG_ON(!cachep->nodelists[node]); 1816 kmem_list3_init(cachep->nodelists[node]); 1817 } 1818 } 1819 } 1820 cachep->nodelists[numa_node_id()]->next_reap = 1821 jiffies + REAPTIMEOUT_LIST3 + 1822 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 1823 1824 cpu_cache_get(cachep)->avail = 0; 1825 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; 1826 cpu_cache_get(cachep)->batchcount = 1; 1827 cpu_cache_get(cachep)->touched = 0; 1828 cachep->batchcount = 1; 1829 cachep->limit = BOOT_CPUCACHE_ENTRIES; 1830} 1831 1832/** 1833 * kmem_cache_create - Create a cache. 1834 * @name: A string which is used in /proc/slabinfo to identify this cache. 1835 * @size: The size of objects to be created in this cache. 1836 * @align: The required alignment for the objects. 1837 * @flags: SLAB flags 1838 * @ctor: A constructor for the objects. 1839 * @dtor: A destructor for the objects. 1840 * 1841 * Returns a ptr to the cache on success, NULL on failure. 1842 * Cannot be called within a int, but can be interrupted. 1843 * The @ctor is run when new pages are allocated by the cache 1844 * and the @dtor is run before the pages are handed back. 1845 * 1846 * @name must be valid until the cache is destroyed. This implies that 1847 * the module calling this has to destroy the cache before getting unloaded. 1848 * 1849 * The flags are 1850 * 1851 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 1852 * to catch references to uninitialised memory. 1853 * 1854 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 1855 * for buffer overruns. 1856 * 1857 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 1858 * cacheline. This can be beneficial if you're counting cycles as closely 1859 * as davem. 1860 */ 1861struct kmem_cache * 1862kmem_cache_create (const char *name, size_t size, size_t align, 1863 unsigned long flags, 1864 void (*ctor)(void*, struct kmem_cache *, unsigned long), 1865 void (*dtor)(void*, struct kmem_cache *, unsigned long)) 1866{ 1867 size_t left_over, slab_size, ralign; 1868 struct kmem_cache *cachep = NULL; 1869 struct list_head *p; 1870 1871 /* 1872 * Sanity checks... these are all serious usage bugs. 1873 */ 1874 if (!name || in_interrupt() || (size < BYTES_PER_WORD) || 1875 (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { 1876 printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, 1877 name); 1878 BUG(); 1879 } 1880 1881 /* 1882 * Prevent CPUs from coming and going. 1883 * lock_cpu_hotplug() nests outside cache_chain_mutex 1884 */ 1885 lock_cpu_hotplug(); 1886 1887 mutex_lock(&cache_chain_mutex); 1888 1889 list_for_each(p, &cache_chain) { 1890 struct kmem_cache *pc = list_entry(p, struct kmem_cache, next); 1891 mm_segment_t old_fs = get_fs(); 1892 char tmp; 1893 int res; 1894 1895 /* 1896 * This happens when the module gets unloaded and doesn't 1897 * destroy its slab cache and no-one else reuses the vmalloc 1898 * area of the module. Print a warning. 1899 */ 1900 set_fs(KERNEL_DS); 1901 res = __get_user(tmp, pc->name); 1902 set_fs(old_fs); 1903 if (res) { 1904 printk("SLAB: cache with size %d has lost its name\n", 1905 pc->buffer_size); 1906 continue; 1907 } 1908 1909 if (!strcmp(pc->name, name)) { 1910 printk("kmem_cache_create: duplicate cache %s\n", name); 1911 dump_stack(); 1912 goto oops; 1913 } 1914 } 1915 1916#if DEBUG 1917 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 1918 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { 1919 /* No constructor, but inital state check requested */ 1920 printk(KERN_ERR "%s: No con, but init state check " 1921 "requested - %s\n", __FUNCTION__, name); 1922 flags &= ~SLAB_DEBUG_INITIAL; 1923 } 1924#if FORCED_DEBUG 1925 /* 1926 * Enable redzoning and last user accounting, except for caches with 1927 * large objects, if the increased size would increase the object size 1928 * above the next power of two: caches with object sizes just above a 1929 * power of two have a significant amount of internal fragmentation. 1930 */ 1931 if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD)) 1932 flags |= SLAB_RED_ZONE | SLAB_STORE_USER; 1933 if (!(flags & SLAB_DESTROY_BY_RCU)) 1934 flags |= SLAB_POISON; 1935#endif 1936 if (flags & SLAB_DESTROY_BY_RCU) 1937 BUG_ON(flags & SLAB_POISON); 1938#endif 1939 if (flags & SLAB_DESTROY_BY_RCU) 1940 BUG_ON(dtor); 1941 1942 /* 1943 * Always checks flags, a caller might be expecting debug support which 1944 * isn't available. 1945 */ 1946 if (flags & ~CREATE_MASK) 1947 BUG(); 1948 1949 /* 1950 * Check that size is in terms of words. This is needed to avoid 1951 * unaligned accesses for some archs when redzoning is used, and makes 1952 * sure any on-slab bufctl's are also correctly aligned. 1953 */ 1954 if (size & (BYTES_PER_WORD - 1)) { 1955 size += (BYTES_PER_WORD - 1); 1956 size &= ~(BYTES_PER_WORD - 1); 1957 } 1958 1959 /* calculate the final buffer alignment: */ 1960 1961 /* 1) arch recommendation: can be overridden for debug */ 1962 if (flags & SLAB_HWCACHE_ALIGN) { 1963 /* 1964 * Default alignment: as specified by the arch code. Except if 1965 * an object is really small, then squeeze multiple objects into 1966 * one cacheline. 1967 */ 1968 ralign = cache_line_size(); 1969 while (size <= ralign / 2) 1970 ralign /= 2; 1971 } else { 1972 ralign = BYTES_PER_WORD; 1973 } 1974 /* 2) arch mandated alignment: disables debug if necessary */ 1975 if (ralign < ARCH_SLAB_MINALIGN) { 1976 ralign = ARCH_SLAB_MINALIGN; 1977 if (ralign > BYTES_PER_WORD) 1978 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 1979 } 1980 /* 3) caller mandated alignment: disables debug if necessary */ 1981 if (ralign < align) { 1982 ralign = align; 1983 if (ralign > BYTES_PER_WORD) 1984 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 1985 } 1986 /* 1987 * 4) Store it. Note that the debug code below can reduce 1988 * the alignment to BYTES_PER_WORD. 1989 */ 1990 align = ralign; 1991 1992 /* Get cache's description obj. */ 1993 cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL); 1994 if (!cachep) 1995 goto oops; 1996 1997#if DEBUG 1998 cachep->obj_size = size; 1999 2000 if (flags & SLAB_RED_ZONE) { 2001 /* redzoning only works with word aligned caches */ 2002 align = BYTES_PER_WORD; 2003 2004 /* add space for red zone words */ 2005 cachep->obj_offset += BYTES_PER_WORD; 2006 size += 2 * BYTES_PER_WORD; 2007 } 2008 if (flags & SLAB_STORE_USER) { 2009 /* user store requires word alignment and 2010 * one word storage behind the end of the real 2011 * object. 2012 */ 2013 align = BYTES_PER_WORD; 2014 size += BYTES_PER_WORD; 2015 } 2016#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 2017 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size 2018 && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) { 2019 cachep->obj_offset += PAGE_SIZE - size; 2020 size = PAGE_SIZE; 2021 } 2022#endif 2023#endif 2024 2025 /* Determine if the slab management is 'on' or 'off' slab. */ 2026 if (size >= (PAGE_SIZE >> 3)) 2027 /* 2028 * Size is large, assume best to place the slab management obj 2029 * off-slab (should allow better packing of objs). 2030 */ 2031 flags |= CFLGS_OFF_SLAB; 2032 2033 size = ALIGN(size, align); 2034 2035 left_over = calculate_slab_order(cachep, size, align, flags); 2036 2037 if (!cachep->num) { 2038 printk("kmem_cache_create: couldn't create cache %s.\n", name); 2039 kmem_cache_free(&cache_cache, cachep); 2040 cachep = NULL; 2041 goto oops; 2042 } 2043 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) 2044 + sizeof(struct slab), align); 2045 2046 /* 2047 * If the slab has been placed off-slab, and we have enough space then 2048 * move it on-slab. This is at the expense of any extra colouring. 2049 */ 2050 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { 2051 flags &= ~CFLGS_OFF_SLAB; 2052 left_over -= slab_size; 2053 } 2054 2055 if (flags & CFLGS_OFF_SLAB) { 2056 /* really off slab. No need for manual alignment */ 2057 slab_size = 2058 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); 2059 } 2060 2061 cachep->colour_off = cache_line_size(); 2062 /* Offset must be a multiple of the alignment. */ 2063 if (cachep->colour_off < align) 2064 cachep->colour_off = align; 2065 cachep->colour = left_over / cachep->colour_off; 2066 cachep->slab_size = slab_size; 2067 cachep->flags = flags; 2068 cachep->gfpflags = 0; 2069 if (flags & SLAB_CACHE_DMA) 2070 cachep->gfpflags |= GFP_DMA; 2071 cachep->buffer_size = size; 2072 2073 if (flags & CFLGS_OFF_SLAB) 2074 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); 2075 cachep->ctor = ctor; 2076 cachep->dtor = dtor; 2077 cachep->name = name; 2078 2079 2080 setup_cpu_cache(cachep); 2081 2082 /* cache setup completed, link it into the list */ 2083 list_add(&cachep->next, &cache_chain); 2084oops: 2085 if (!cachep && (flags & SLAB_PANIC)) 2086 panic("kmem_cache_create(): failed to create slab `%s'\n", 2087 name); 2088 mutex_unlock(&cache_chain_mutex); 2089 unlock_cpu_hotplug(); 2090 return cachep; 2091} 2092EXPORT_SYMBOL(kmem_cache_create); 2093 2094#if DEBUG 2095static void check_irq_off(void) 2096{ 2097 BUG_ON(!irqs_disabled()); 2098} 2099 2100static void check_irq_on(void) 2101{ 2102 BUG_ON(irqs_disabled()); 2103} 2104 2105static void check_spinlock_acquired(struct kmem_cache *cachep) 2106{ 2107#ifdef CONFIG_SMP 2108 check_irq_off(); 2109 assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock); 2110#endif 2111} 2112 2113static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) 2114{ 2115#ifdef CONFIG_SMP 2116 check_irq_off(); 2117 assert_spin_locked(&cachep->nodelists[node]->list_lock); 2118#endif 2119} 2120 2121#else 2122#define check_irq_off() do { } while(0) 2123#define check_irq_on() do { } while(0) 2124#define check_spinlock_acquired(x) do { } while(0) 2125#define check_spinlock_acquired_node(x, y) do { } while(0) 2126#endif 2127 2128static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, 2129 struct array_cache *ac, 2130 int force, int node); 2131 2132static void do_drain(void *arg) 2133{ 2134 struct kmem_cache *cachep = arg; 2135 struct array_cache *ac; 2136 int node = numa_node_id(); 2137 2138 check_irq_off(); 2139 ac = cpu_cache_get(cachep); 2140 spin_lock(&cachep->nodelists[node]->list_lock); 2141 free_block(cachep, ac->entry, ac->avail, node); 2142 spin_unlock(&cachep->nodelists[node]->list_lock); 2143 ac->avail = 0; 2144} 2145 2146static void drain_cpu_caches(struct kmem_cache *cachep) 2147{ 2148 struct kmem_list3 *l3; 2149 int node; 2150 2151 on_each_cpu(do_drain, cachep, 1, 1); 2152 check_irq_on(); 2153 for_each_online_node(node) { 2154 l3 = cachep->nodelists[node]; 2155 if (l3) { 2156 drain_array(cachep, l3, l3->shared, 1, node); 2157 if (l3->alien) 2158 drain_alien_cache(cachep, l3->alien); 2159 } 2160 } 2161} 2162 2163static int __node_shrink(struct kmem_cache *cachep, int node) 2164{ 2165 struct slab *slabp; 2166 struct kmem_list3 *l3 = cachep->nodelists[node]; 2167 int ret; 2168 2169 for (;;) { 2170 struct list_head *p; 2171 2172 p = l3->slabs_free.prev; 2173 if (p == &l3->slabs_free) 2174 break; 2175 2176 slabp = list_entry(l3->slabs_free.prev, struct slab, list); 2177#if DEBUG 2178 if (slabp->inuse) 2179 BUG(); 2180#endif 2181 list_del(&slabp->list); 2182 2183 l3->free_objects -= cachep->num; 2184 spin_unlock_irq(&l3->list_lock); 2185 slab_destroy(cachep, slabp); 2186 spin_lock_irq(&l3->list_lock); 2187 } 2188 ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial); 2189 return ret; 2190} 2191 2192static int __cache_shrink(struct kmem_cache *cachep) 2193{ 2194 int ret = 0, i = 0; 2195 struct kmem_list3 *l3; 2196 2197 drain_cpu_caches(cachep); 2198 2199 check_irq_on(); 2200 for_each_online_node(i) { 2201 l3 = cachep->nodelists[i]; 2202 if (l3) { 2203 spin_lock_irq(&l3->list_lock); 2204 ret += __node_shrink(cachep, i); 2205 spin_unlock_irq(&l3->list_lock); 2206 } 2207 } 2208 return (ret ? 1 : 0); 2209} 2210 2211/** 2212 * kmem_cache_shrink - Shrink a cache. 2213 * @cachep: The cache to shrink. 2214 * 2215 * Releases as many slabs as possible for a cache. 2216 * To help debugging, a zero exit status indicates all slabs were released. 2217 */ 2218int kmem_cache_shrink(struct kmem_cache *cachep) 2219{ 2220 if (!cachep || in_interrupt()) 2221 BUG(); 2222 2223 return __cache_shrink(cachep); 2224} 2225EXPORT_SYMBOL(kmem_cache_shrink); 2226 2227/** 2228 * kmem_cache_destroy - delete a cache 2229 * @cachep: the cache to destroy 2230 * 2231 * Remove a struct kmem_cache object from the slab cache. 2232 * Returns 0 on success. 2233 * 2234 * It is expected this function will be called by a module when it is 2235 * unloaded. This will remove the cache completely, and avoid a duplicate 2236 * cache being allocated each time a module is loaded and unloaded, if the 2237 * module doesn't have persistent in-kernel storage across loads and unloads. 2238 * 2239 * The cache must be empty before calling this function. 2240 * 2241 * The caller must guarantee that noone will allocate memory from the cache 2242 * during the kmem_cache_destroy(). 2243 */ 2244int kmem_cache_destroy(struct kmem_cache *cachep) 2245{ 2246 int i; 2247 struct kmem_list3 *l3; 2248 2249 if (!cachep || in_interrupt()) 2250 BUG(); 2251 2252 /* Don't let CPUs to come and go */ 2253 lock_cpu_hotplug(); 2254 2255 /* Find the cache in the chain of caches. */ 2256 mutex_lock(&cache_chain_mutex); 2257 /* 2258 * the chain is never empty, cache_cache is never destroyed 2259 */ 2260 list_del(&cachep->next); 2261 mutex_unlock(&cache_chain_mutex); 2262 2263 if (__cache_shrink(cachep)) { 2264 slab_error(cachep, "Can't free all objects"); 2265 mutex_lock(&cache_chain_mutex); 2266 list_add(&cachep->next, &cache_chain); 2267 mutex_unlock(&cache_chain_mutex); 2268 unlock_cpu_hotplug(); 2269 return 1; 2270 } 2271 2272 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) 2273 synchronize_rcu(); 2274 2275 for_each_online_cpu(i) 2276 kfree(cachep->array[i]); 2277 2278 /* NUMA: free the list3 structures */ 2279 for_each_online_node(i) { 2280 l3 = cachep->nodelists[i]; 2281 if (l3) { 2282 kfree(l3->shared); 2283 free_alien_cache(l3->alien); 2284 kfree(l3); 2285 } 2286 } 2287 kmem_cache_free(&cache_cache, cachep); 2288 unlock_cpu_hotplug(); 2289 return 0; 2290} 2291EXPORT_SYMBOL(kmem_cache_destroy); 2292 2293/* Get the memory for a slab management obj. */ 2294static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, 2295 int colour_off, gfp_t local_flags) 2296{ 2297 struct slab *slabp; 2298 2299 if (OFF_SLAB(cachep)) { 2300 /* Slab management obj is off-slab. */ 2301 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); 2302 if (!slabp) 2303 return NULL; 2304 } else { 2305 slabp = objp + colour_off; 2306 colour_off += cachep->slab_size; 2307 } 2308 slabp->inuse = 0; 2309 slabp->colouroff = colour_off; 2310 slabp->s_mem = objp + colour_off; 2311 return slabp; 2312} 2313 2314static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) 2315{ 2316 return (kmem_bufctl_t *) (slabp + 1); 2317} 2318 2319static void cache_init_objs(struct kmem_cache *cachep, 2320 struct slab *slabp, unsigned long ctor_flags) 2321{ 2322 int i; 2323 2324 for (i = 0; i < cachep->num; i++) { 2325 void *objp = index_to_obj(cachep, slabp, i); 2326#if DEBUG 2327 /* need to poison the objs? */ 2328 if (cachep->flags & SLAB_POISON) 2329 poison_obj(cachep, objp, POISON_FREE); 2330 if (cachep->flags & SLAB_STORE_USER) 2331 *dbg_userword(cachep, objp) = NULL; 2332 2333 if (cachep->flags & SLAB_RED_ZONE) { 2334 *dbg_redzone1(cachep, objp) = RED_INACTIVE; 2335 *dbg_redzone2(cachep, objp) = RED_INACTIVE; 2336 } 2337 /* 2338 * Constructors are not allowed to allocate memory from the same 2339 * cache which they are a constructor for. Otherwise, deadlock. 2340 * They must also be threaded. 2341 */ 2342 if (cachep->ctor && !(cachep->flags & SLAB_POISON)) 2343 cachep->ctor(objp + obj_offset(cachep), cachep, 2344 ctor_flags); 2345 2346 if (cachep->flags & SLAB_RED_ZONE) { 2347 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE) 2348 slab_error(cachep, "constructor overwrote the" 2349 " end of an object"); 2350 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) 2351 slab_error(cachep, "constructor overwrote the" 2352 " start of an object"); 2353 } 2354 if ((cachep->buffer_size % PAGE_SIZE) == 0 && 2355 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) 2356 kernel_map_pages(virt_to_page(objp), 2357 cachep->buffer_size / PAGE_SIZE, 0); 2358#else 2359 if (cachep->ctor) 2360 cachep->ctor(objp, cachep, ctor_flags); 2361#endif 2362 slab_bufctl(slabp)[i] = i + 1; 2363 } 2364 slab_bufctl(slabp)[i - 1] = BUFCTL_END; 2365 slabp->free = 0; 2366} 2367 2368static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) 2369{ 2370 if (flags & SLAB_DMA) 2371 BUG_ON(!(cachep->gfpflags & GFP_DMA)); 2372 else 2373 BUG_ON(cachep->gfpflags & GFP_DMA); 2374} 2375 2376static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, 2377 int nodeid) 2378{ 2379 void *objp = index_to_obj(cachep, slabp, slabp->free); 2380 kmem_bufctl_t next; 2381 2382 slabp->inuse++; 2383 next = slab_bufctl(slabp)[slabp->free]; 2384#if DEBUG 2385 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; 2386 WARN_ON(slabp->nodeid != nodeid); 2387#endif 2388 slabp->free = next; 2389 2390 return objp; 2391} 2392 2393static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, 2394 void *objp, int nodeid) 2395{ 2396 unsigned int objnr = obj_to_index(cachep, slabp, objp); 2397 2398#if DEBUG 2399 /* Verify that the slab belongs to the intended node */ 2400 WARN_ON(slabp->nodeid != nodeid); 2401 2402 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { 2403 printk(KERN_ERR "slab: double free detected in cache " 2404 "'%s', objp %p\n", cachep->name, objp); 2405 BUG(); 2406 } 2407#endif 2408 slab_bufctl(slabp)[objnr] = slabp->free; 2409 slabp->free = objnr; 2410 slabp->inuse--; 2411} 2412 2413static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, 2414 void *objp) 2415{ 2416 int i; 2417 struct page *page; 2418 2419 /* Nasty!!!!!! I hope this is OK. */ 2420 page = virt_to_page(objp); 2421 2422 i = 1; 2423 if (likely(!PageCompound(page))) 2424 i <<= cachep->gfporder; 2425 do { 2426 page_set_cache(page, cachep); 2427 page_set_slab(page, slabp); 2428 page++; 2429 } while (--i); 2430} 2431 2432/* 2433 * Grow (by 1) the number of slabs within a cache. This is called by 2434 * kmem_cache_alloc() when there are no active objs left in a cache. 2435 */ 2436static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) 2437{ 2438 struct slab *slabp; 2439 void *objp; 2440 size_t offset; 2441 gfp_t local_flags; 2442 unsigned long ctor_flags; 2443 struct kmem_list3 *l3; 2444 2445 /* 2446 * Be lazy and only check for valid flags here, keeping it out of the 2447 * critical path in kmem_cache_alloc(). 2448 */ 2449 if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) 2450 BUG(); 2451 if (flags & SLAB_NO_GROW) 2452 return 0; 2453 2454 ctor_flags = SLAB_CTOR_CONSTRUCTOR; 2455 local_flags = (flags & SLAB_LEVEL_MASK); 2456 if (!(local_flags & __GFP_WAIT)) 2457 /* 2458 * Not allowed to sleep. Need to tell a constructor about 2459 * this - it might need to know... 2460 */ 2461 ctor_flags |= SLAB_CTOR_ATOMIC; 2462 2463 /* Take the l3 list lock to change the colour_next on this node */ 2464 check_irq_off(); 2465 l3 = cachep->nodelists[nodeid]; 2466 spin_lock(&l3->list_lock); 2467 2468 /* Get colour for the slab, and cal the next value. */ 2469 offset = l3->colour_next; 2470 l3->colour_next++; 2471 if (l3->colour_next >= cachep->colour) 2472 l3->colour_next = 0; 2473 spin_unlock(&l3->list_lock); 2474 2475 offset *= cachep->colour_off; 2476 2477 if (local_flags & __GFP_WAIT) 2478 local_irq_enable(); 2479 2480 /* 2481 * The test for missing atomic flag is performed here, rather than 2482 * the more obvious place, simply to reduce the critical path length 2483 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they 2484 * will eventually be caught here (where it matters). 2485 */ 2486 kmem_flagcheck(cachep, flags); 2487 2488 /* 2489 * Get mem for the objs. Attempt to allocate a physical page from 2490 * 'nodeid'. 2491 */ 2492 objp = kmem_getpages(cachep, flags, nodeid); 2493 if (!objp) 2494 goto failed; 2495 2496 /* Get slab management. */ 2497 slabp = alloc_slabmgmt(cachep, objp, offset, local_flags); 2498 if (!slabp) 2499 goto opps1; 2500 2501 slabp->nodeid = nodeid; 2502 set_slab_attr(cachep, slabp, objp); 2503 2504 cache_init_objs(cachep, slabp, ctor_flags); 2505 2506 if (local_flags & __GFP_WAIT) 2507 local_irq_disable(); 2508 check_irq_off(); 2509 spin_lock(&l3->list_lock); 2510 2511 /* Make slab active. */ 2512 list_add_tail(&slabp->list, &(l3->slabs_free)); 2513 STATS_INC_GROWN(cachep); 2514 l3->free_objects += cachep->num; 2515 spin_unlock(&l3->list_lock); 2516 return 1; 2517opps1: 2518 kmem_freepages(cachep, objp); 2519failed: 2520 if (local_flags & __GFP_WAIT) 2521 local_irq_disable(); 2522 return 0; 2523} 2524 2525#if DEBUG 2526 2527/* 2528 * Perform extra freeing checks: 2529 * - detect bad pointers. 2530 * - POISON/RED_ZONE checking 2531 * - destructor calls, for caches with POISON+dtor 2532 */ 2533static void kfree_debugcheck(const void *objp) 2534{ 2535 struct page *page; 2536 2537 if (!virt_addr_valid(objp)) { 2538 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", 2539 (unsigned long)objp); 2540 BUG(); 2541 } 2542 page = virt_to_page(objp); 2543 if (!PageSlab(page)) { 2544 printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n", 2545 (unsigned long)objp); 2546 BUG(); 2547 } 2548} 2549 2550static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, 2551 void *caller) 2552{ 2553 struct page *page; 2554 unsigned int objnr; 2555 struct slab *slabp; 2556 2557 objp -= obj_offset(cachep); 2558 kfree_debugcheck(objp); 2559 page = virt_to_page(objp); 2560 2561 if (page_get_cache(page) != cachep) { 2562 printk(KERN_ERR "mismatch in kmem_cache_free: expected " 2563 "cache %p, got %p\n", 2564 page_get_cache(page), cachep); 2565 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); 2566 printk(KERN_ERR "%p is %s.\n", page_get_cache(page), 2567 page_get_cache(page)->name); 2568 WARN_ON(1); 2569 } 2570 slabp = page_get_slab(page); 2571 2572 if (cachep->flags & SLAB_RED_ZONE) { 2573 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || 2574 *dbg_redzone2(cachep, objp) != RED_ACTIVE) { 2575 slab_error(cachep, "double free, or memory outside" 2576 " object was overwritten"); 2577 printk(KERN_ERR "%p: redzone 1:0x%lx, " 2578 "redzone 2:0x%lx.\n", 2579 objp, *dbg_redzone1(cachep, objp), 2580 *dbg_redzone2(cachep, objp)); 2581 } 2582 *dbg_redzone1(cachep, objp) = RED_INACTIVE; 2583 *dbg_redzone2(cachep, objp) = RED_INACTIVE; 2584 } 2585 if (cachep->flags & SLAB_STORE_USER) 2586 *dbg_userword(cachep, objp) = caller; 2587 2588 objnr = obj_to_index(cachep, slabp, objp); 2589 2590 BUG_ON(objnr >= cachep->num); 2591 BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); 2592 2593 if (cachep->flags & SLAB_DEBUG_INITIAL) { 2594 /* 2595 * Need to call the slab's constructor so the caller can 2596 * perform a verify of its state (debugging). Called without 2597 * the cache-lock held. 2598 */ 2599 cachep->ctor(objp + obj_offset(cachep), 2600 cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); 2601 } 2602 if (cachep->flags & SLAB_POISON && cachep->dtor) { 2603 /* we want to cache poison the object, 2604 * call the destruction callback 2605 */ 2606 cachep->dtor(objp + obj_offset(cachep), cachep, 0); 2607 } 2608#ifdef CONFIG_DEBUG_SLAB_LEAK 2609 slab_bufctl(slabp)[objnr] = BUFCTL_FREE; 2610#endif 2611 if (cachep->flags & SLAB_POISON) { 2612#ifdef CONFIG_DEBUG_PAGEALLOC 2613 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { 2614 store_stackinfo(cachep, objp, (unsigned long)caller); 2615 kernel_map_pages(virt_to_page(objp), 2616 cachep->buffer_size / PAGE_SIZE, 0); 2617 } else { 2618 poison_obj(cachep, objp, POISON_FREE); 2619 } 2620#else 2621 poison_obj(cachep, objp, POISON_FREE); 2622#endif 2623 } 2624 return objp; 2625} 2626 2627static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) 2628{ 2629 kmem_bufctl_t i; 2630 int entries = 0; 2631 2632 /* Check slab's freelist to see if this obj is there. */ 2633 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { 2634 entries++; 2635 if (entries > cachep->num || i >= cachep->num) 2636 goto bad; 2637 } 2638 if (entries != cachep->num - slabp->inuse) { 2639bad: 2640 printk(KERN_ERR "slab: Internal list corruption detected in " 2641 "cache '%s'(%d), slabp %p(%d). Hexdump:\n", 2642 cachep->name, cachep->num, slabp, slabp->inuse); 2643 for (i = 0; 2644 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); 2645 i++) { 2646 if (i % 16 == 0) 2647 printk("\n%03x:", i); 2648 printk(" %02x", ((unsigned char *)slabp)[i]); 2649 } 2650 printk("\n"); 2651 BUG(); 2652 } 2653} 2654#else 2655#define kfree_debugcheck(x) do { } while(0) 2656#define cache_free_debugcheck(x,objp,z) (objp) 2657#define check_slabp(x,y) do { } while(0) 2658#endif 2659 2660static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) 2661{ 2662 int batchcount; 2663 struct kmem_list3 *l3; 2664 struct array_cache *ac; 2665 2666 check_irq_off(); 2667 ac = cpu_cache_get(cachep); 2668retry: 2669 batchcount = ac->batchcount; 2670 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { 2671 /* 2672 * If there was little recent activity on this cache, then 2673 * perform only a partial refill. Otherwise we could generate 2674 * refill bouncing. 2675 */ 2676 batchcount = BATCHREFILL_LIMIT; 2677 } 2678 l3 = cachep->nodelists[numa_node_id()]; 2679 2680 BUG_ON(ac->avail > 0 || !l3); 2681 spin_lock(&l3->list_lock); 2682 2683 if (l3->shared) { 2684 struct array_cache *shared_array = l3->shared; 2685 if (shared_array->avail) { 2686 if (batchcount > shared_array->avail) 2687 batchcount = shared_array->avail; 2688 shared_array->avail -= batchcount; 2689 ac->avail = batchcount; 2690 memcpy(ac->entry, 2691 &(shared_array->entry[shared_array->avail]), 2692 sizeof(void *) * batchcount); 2693 shared_array->touched = 1; 2694 goto alloc_done; 2695 } 2696 } 2697 while (batchcount > 0) { 2698 struct list_head *entry; 2699 struct slab *slabp; 2700 /* Get slab alloc is to come from. */ 2701 entry = l3->slabs_partial.next; 2702 if (entry == &l3->slabs_partial) { 2703 l3->free_touched = 1; 2704 entry = l3->slabs_free.next; 2705 if (entry == &l3->slabs_free) 2706 goto must_grow; 2707 } 2708 2709 slabp = list_entry(entry, struct slab, list); 2710 check_slabp(cachep, slabp); 2711 check_spinlock_acquired(cachep); 2712 while (slabp->inuse < cachep->num && batchcount--) { 2713 STATS_INC_ALLOCED(cachep); 2714 STATS_INC_ACTIVE(cachep); 2715 STATS_SET_HIGH(cachep); 2716 2717 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, 2718 numa_node_id()); 2719 } 2720 check_slabp(cachep, slabp); 2721 2722 /* move slabp to correct slabp list: */ 2723 list_del(&slabp->list); 2724 if (slabp->free == BUFCTL_END) 2725 list_add(&slabp->list, &l3->slabs_full); 2726 else 2727 list_add(&slabp->list, &l3->slabs_partial); 2728 } 2729 2730must_grow: 2731 l3->free_objects -= ac->avail; 2732alloc_done: 2733 spin_unlock(&l3->list_lock); 2734 2735 if (unlikely(!ac->avail)) { 2736 int x; 2737 x = cache_grow(cachep, flags, numa_node_id()); 2738 2739 /* cache_grow can reenable interrupts, then ac could change. */ 2740 ac = cpu_cache_get(cachep); 2741 if (!x && ac->avail == 0) /* no objects in sight? abort */ 2742 return NULL; 2743 2744 if (!ac->avail) /* objects refilled by interrupt? */ 2745 goto retry; 2746 } 2747 ac->touched = 1; 2748 return ac->entry[--ac->avail]; 2749} 2750 2751static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, 2752 gfp_t flags) 2753{ 2754 might_sleep_if(flags & __GFP_WAIT); 2755#if DEBUG 2756 kmem_flagcheck(cachep, flags); 2757#endif 2758} 2759 2760#if DEBUG 2761static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, 2762 gfp_t flags, void *objp, void *caller) 2763{ 2764 if (!objp) 2765 return objp; 2766 if (cachep->flags & SLAB_POISON) { 2767#ifdef CONFIG_DEBUG_PAGEALLOC 2768 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) 2769 kernel_map_pages(virt_to_page(objp), 2770 cachep->buffer_size / PAGE_SIZE, 1); 2771 else 2772 check_poison_obj(cachep, objp); 2773#else 2774 check_poison_obj(cachep, objp); 2775#endif 2776 poison_obj(cachep, objp, POISON_INUSE); 2777 } 2778 if (cachep->flags & SLAB_STORE_USER) 2779 *dbg_userword(cachep, objp) = caller; 2780 2781 if (cachep->flags & SLAB_RED_ZONE) { 2782 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || 2783 *dbg_redzone2(cachep, objp) != RED_INACTIVE) { 2784 slab_error(cachep, "double free, or memory outside" 2785 " object was overwritten"); 2786 printk(KERN_ERR 2787 "%p: redzone 1:0x%lx, redzone 2:0x%lx\n", 2788 objp, *dbg_redzone1(cachep, objp), 2789 *dbg_redzone2(cachep, objp)); 2790 } 2791 *dbg_redzone1(cachep, objp) = RED_ACTIVE; 2792 *dbg_redzone2(cachep, objp) = RED_ACTIVE; 2793 } 2794#ifdef CONFIG_DEBUG_SLAB_LEAK 2795 { 2796 struct slab *slabp; 2797 unsigned objnr; 2798 2799 slabp = page_get_slab(virt_to_page(objp)); 2800 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; 2801 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; 2802 } 2803#endif 2804 objp += obj_offset(cachep); 2805 if (cachep->ctor && cachep->flags & SLAB_POISON) { 2806 unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; 2807 2808 if (!(flags & __GFP_WAIT)) 2809 ctor_flags |= SLAB_CTOR_ATOMIC; 2810 2811 cachep->ctor(objp, cachep, ctor_flags); 2812 } 2813 return objp; 2814} 2815#else 2816#define cache_alloc_debugcheck_after(a,b,objp,d) (objp) 2817#endif 2818 2819static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) 2820{ 2821 void *objp; 2822 struct array_cache *ac; 2823 2824#ifdef CONFIG_NUMA 2825 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { 2826 objp = alternate_node_alloc(cachep, flags); 2827 if (objp != NULL) 2828 return objp; 2829 } 2830#endif 2831 2832 check_irq_off(); 2833 ac = cpu_cache_get(cachep); 2834 if (likely(ac->avail)) { 2835 STATS_INC_ALLOCHIT(cachep); 2836 ac->touched = 1; 2837 objp = ac->entry[--ac->avail]; 2838 } else { 2839 STATS_INC_ALLOCMISS(cachep); 2840 objp = cache_alloc_refill(cachep, flags); 2841 } 2842 return objp; 2843} 2844 2845static __always_inline void *__cache_alloc(struct kmem_cache *cachep, 2846 gfp_t flags, void *caller) 2847{ 2848 unsigned long save_flags; 2849 void *objp; 2850 2851 cache_alloc_debugcheck_before(cachep, flags); 2852 2853 local_irq_save(save_flags); 2854 objp = ____cache_alloc(cachep, flags); 2855 local_irq_restore(save_flags); 2856 objp = cache_alloc_debugcheck_after(cachep, flags, objp, 2857 caller); 2858 prefetchw(objp); 2859 return objp; 2860} 2861 2862#ifdef CONFIG_NUMA 2863/* 2864 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. 2865 * 2866 * If we are in_interrupt, then process context, including cpusets and 2867 * mempolicy, may not apply and should not be used for allocation policy. 2868 */ 2869static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) 2870{ 2871 int nid_alloc, nid_here; 2872 2873 if (in_interrupt()) 2874 return NULL; 2875 nid_alloc = nid_here = numa_node_id(); 2876 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 2877 nid_alloc = cpuset_mem_spread_node(); 2878 else if (current->mempolicy) 2879 nid_alloc = slab_node(current->mempolicy); 2880 if (nid_alloc != nid_here) 2881 return __cache_alloc_node(cachep, flags, nid_alloc); 2882 return NULL; 2883} 2884 2885/* 2886 * A interface to enable slab creation on nodeid 2887 */ 2888static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 2889 int nodeid) 2890{ 2891 struct list_head *entry; 2892 struct slab *slabp; 2893 struct kmem_list3 *l3; 2894 void *obj; 2895 int x; 2896 2897 l3 = cachep->nodelists[nodeid]; 2898 BUG_ON(!l3); 2899 2900retry: 2901 check_irq_off(); 2902 spin_lock(&l3->list_lock); 2903 entry = l3->slabs_partial.next; 2904 if (entry == &l3->slabs_partial) { 2905 l3->free_touched = 1; 2906 entry = l3->slabs_free.next; 2907 if (entry == &l3->slabs_free) 2908 goto must_grow; 2909 } 2910 2911 slabp = list_entry(entry, struct slab, list); 2912 check_spinlock_acquired_node(cachep, nodeid); 2913 check_slabp(cachep, slabp); 2914 2915 STATS_INC_NODEALLOCS(cachep); 2916 STATS_INC_ACTIVE(cachep); 2917 STATS_SET_HIGH(cachep); 2918 2919 BUG_ON(slabp->inuse == cachep->num); 2920 2921 obj = slab_get_obj(cachep, slabp, nodeid); 2922 check_slabp(cachep, slabp); 2923 l3->free_objects--; 2924 /* move slabp to correct slabp list: */ 2925 list_del(&slabp->list); 2926 2927 if (slabp->free == BUFCTL_END) 2928 list_add(&slabp->list, &l3->slabs_full); 2929 else 2930 list_add(&slabp->list, &l3->slabs_partial); 2931 2932 spin_unlock(&l3->list_lock); 2933 goto done; 2934 2935must_grow: 2936 spin_unlock(&l3->list_lock); 2937 x = cache_grow(cachep, flags, nodeid); 2938 2939 if (!x) 2940 return NULL; 2941 2942 goto retry; 2943done: 2944 return obj; 2945} 2946#endif 2947 2948/* 2949 * Caller needs to acquire correct kmem_list's list_lock 2950 */ 2951static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, 2952 int node) 2953{ 2954 int i; 2955 struct kmem_list3 *l3; 2956 2957 for (i = 0; i < nr_objects; i++) { 2958 void *objp = objpp[i]; 2959 struct slab *slabp; 2960 2961 slabp = virt_to_slab(objp); 2962 l3 = cachep->nodelists[node]; 2963 list_del(&slabp->list); 2964 check_spinlock_acquired_node(cachep, node); 2965 check_slabp(cachep, slabp); 2966 slab_put_obj(cachep, slabp, objp, node); 2967 STATS_DEC_ACTIVE(cachep); 2968 l3->free_objects++; 2969 check_slabp(cachep, slabp); 2970 2971 /* fixup slab chains */ 2972 if (slabp->inuse == 0) { 2973 if (l3->free_objects > l3->free_limit) { 2974 l3->free_objects -= cachep->num; 2975 slab_destroy(cachep, slabp); 2976 } else { 2977 list_add(&slabp->list, &l3->slabs_free); 2978 } 2979 } else { 2980 /* Unconditionally move a slab to the end of the 2981 * partial list on free - maximum time for the 2982 * other objects to be freed, too. 2983 */ 2984 list_add_tail(&slabp->list, &l3->slabs_partial); 2985 } 2986 } 2987} 2988 2989static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) 2990{ 2991 int batchcount; 2992 struct kmem_list3 *l3; 2993 int node = numa_node_id(); 2994 2995 batchcount = ac->batchcount; 2996#if DEBUG 2997 BUG_ON(!batchcount || batchcount > ac->avail); 2998#endif 2999 check_irq_off(); 3000 l3 = cachep->nodelists[node]; 3001 spin_lock(&l3->list_lock); 3002 if (l3->shared) { 3003 struct array_cache *shared_array = l3->shared; 3004 int max = shared_array->limit - shared_array->avail; 3005 if (max) { 3006 if (batchcount > max) 3007 batchcount = max; 3008 memcpy(&(shared_array->entry[shared_array->avail]), 3009 ac->entry, sizeof(void *) * batchcount); 3010 shared_array->avail += batchcount; 3011 goto free_done; 3012 } 3013 } 3014 3015 free_block(cachep, ac->entry, batchcount, node); 3016free_done: 3017#if STATS 3018 { 3019 int i = 0; 3020 struct list_head *p; 3021 3022 p = l3->slabs_free.next; 3023 while (p != &(l3->slabs_free)) { 3024 struct slab *slabp; 3025 3026 slabp = list_entry(p, struct slab, list); 3027 BUG_ON(slabp->inuse); 3028 3029 i++; 3030 p = p->next; 3031 } 3032 STATS_SET_FREEABLE(cachep, i); 3033 } 3034#endif 3035 spin_unlock(&l3->list_lock); 3036 ac->avail -= batchcount; 3037 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); 3038} 3039 3040/* 3041 * Release an obj back to its cache. If the obj has a constructed state, it must 3042 * be in this state _before_ it is released. Called with disabled ints. 3043 */ 3044static inline void __cache_free(struct kmem_cache *cachep, void *objp) 3045{ 3046 struct array_cache *ac = cpu_cache_get(cachep); 3047 3048 check_irq_off(); 3049 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3050 3051 /* Make sure we are not freeing a object from another 3052 * node to the array cache on this cpu. 3053 */ 3054#ifdef CONFIG_NUMA 3055 { 3056 struct slab *slabp; 3057 slabp = virt_to_slab(objp); 3058 if (unlikely(slabp->nodeid != numa_node_id())) { 3059 struct array_cache *alien = NULL; 3060 int nodeid = slabp->nodeid; 3061 struct kmem_list3 *l3; 3062 3063 l3 = cachep->nodelists[numa_node_id()]; 3064 STATS_INC_NODEFREES(cachep); 3065 if (l3->alien && l3->alien[nodeid]) { 3066 alien = l3->alien[nodeid]; 3067 spin_lock(&alien->lock); 3068 if (unlikely(alien->avail == alien->limit)) 3069 __drain_alien_cache(cachep, 3070 alien, nodeid); 3071 alien->entry[alien->avail++] = objp; 3072 spin_unlock(&alien->lock); 3073 } else { 3074 spin_lock(&(cachep->nodelists[nodeid])-> 3075 list_lock); 3076 free_block(cachep, &objp, 1, nodeid); 3077 spin_unlock(&(cachep->nodelists[nodeid])-> 3078 list_lock); 3079 } 3080 return; 3081 } 3082 } 3083#endif 3084 if (likely(ac->avail < ac->limit)) { 3085 STATS_INC_FREEHIT(cachep); 3086 ac->entry[ac->avail++] = objp; 3087 return; 3088 } else { 3089 STATS_INC_FREEMISS(cachep); 3090 cache_flusharray(cachep, ac); 3091 ac->entry[ac->avail++] = objp; 3092 } 3093} 3094 3095/** 3096 * kmem_cache_alloc - Allocate an object 3097 * @cachep: The cache to allocate from. 3098 * @flags: See kmalloc(). 3099 * 3100 * Allocate an object from this cache. The flags are only relevant 3101 * if the cache has no available objects. 3102 */ 3103void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3104{ 3105 return __cache_alloc(cachep, flags, __builtin_return_address(0)); 3106} 3107EXPORT_SYMBOL(kmem_cache_alloc); 3108 3109/** 3110 * kmem_cache_alloc - Allocate an object. The memory is set to zero. 3111 * @cache: The cache to allocate from. 3112 * @flags: See kmalloc(). 3113 * 3114 * Allocate an object from this cache and set the allocated memory to zero. 3115 * The flags are only relevant if the cache has no available objects. 3116 */ 3117void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags) 3118{ 3119 void *ret = __cache_alloc(cache, flags, __builtin_return_address(0)); 3120 if (ret) 3121 memset(ret, 0, obj_size(cache)); 3122 return ret; 3123} 3124EXPORT_SYMBOL(kmem_cache_zalloc); 3125 3126/** 3127 * kmem_ptr_validate - check if an untrusted pointer might 3128 * be a slab entry. 3129 * @cachep: the cache we're checking against 3130 * @ptr: pointer to validate 3131 * 3132 * This verifies that the untrusted pointer looks sane: 3133 * it is _not_ a guarantee that the pointer is actually 3134 * part of the slab cache in question, but it at least 3135 * validates that the pointer can be dereferenced and 3136 * looks half-way sane. 3137 * 3138 * Currently only used for dentry validation. 3139 */ 3140int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr) 3141{ 3142 unsigned long addr = (unsigned long)ptr; 3143 unsigned long min_addr = PAGE_OFFSET; 3144 unsigned long align_mask = BYTES_PER_WORD - 1; 3145 unsigned long size = cachep->buffer_size; 3146 struct page *page; 3147 3148 if (unlikely(addr < min_addr)) 3149 goto out; 3150 if (unlikely(addr > (unsigned long)high_memory - size)) 3151 goto out; 3152 if (unlikely(addr & align_mask)) 3153 goto out; 3154 if (unlikely(!kern_addr_valid(addr))) 3155 goto out; 3156 if (unlikely(!kern_addr_valid(addr + size - 1))) 3157 goto out; 3158 page = virt_to_page(ptr); 3159 if (unlikely(!PageSlab(page))) 3160 goto out; 3161 if (unlikely(page_get_cache(page) != cachep)) 3162 goto out; 3163 return 1; 3164out: 3165 return 0; 3166} 3167 3168#ifdef CONFIG_NUMA 3169/** 3170 * kmem_cache_alloc_node - Allocate an object on the specified node 3171 * @cachep: The cache to allocate from. 3172 * @flags: See kmalloc(). 3173 * @nodeid: node number of the target node. 3174 * 3175 * Identical to kmem_cache_alloc, except that this function is slow 3176 * and can sleep. And it will allocate memory on the given node, which 3177 * can improve the performance for cpu bound structures. 3178 * New and improved: it will now make sure that the object gets 3179 * put on the correct node list so that there is no false sharing. 3180 */ 3181void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3182{ 3183 unsigned long save_flags; 3184 void *ptr; 3185 3186 cache_alloc_debugcheck_before(cachep, flags); 3187 local_irq_save(save_flags); 3188 3189 if (nodeid == -1 || nodeid == numa_node_id() || 3190 !cachep->nodelists[nodeid]) 3191 ptr = ____cache_alloc(cachep, flags); 3192 else 3193 ptr = __cache_alloc_node(cachep, flags, nodeid); 3194 local_irq_restore(save_flags); 3195 3196 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, 3197 __builtin_return_address(0)); 3198 3199 return ptr; 3200} 3201EXPORT_SYMBOL(kmem_cache_alloc_node); 3202 3203void *kmalloc_node(size_t size, gfp_t flags, int node) 3204{ 3205 struct kmem_cache *cachep; 3206 3207 cachep = kmem_find_general_cachep(size, flags); 3208 if (unlikely(cachep == NULL)) 3209 return NULL; 3210 return kmem_cache_alloc_node(cachep, flags, node); 3211} 3212EXPORT_SYMBOL(kmalloc_node); 3213#endif 3214 3215/** 3216 * kmalloc - allocate memory 3217 * @size: how many bytes of memory are required. 3218 * @flags: the type of memory to allocate. 3219 * @caller: function caller for debug tracking of the caller 3220 * 3221 * kmalloc is the normal method of allocating memory 3222 * in the kernel. 3223 * 3224 * The @flags argument may be one of: 3225 * 3226 * %GFP_USER - Allocate memory on behalf of user. May sleep. 3227 * 3228 * %GFP_KERNEL - Allocate normal kernel ram. May sleep. 3229 * 3230 * %GFP_ATOMIC - Allocation will not sleep. Use inside interrupt handlers. 3231 * 3232 * Additionally, the %GFP_DMA flag may be set to indicate the memory 3233 * must be suitable for DMA. This can mean different things on different 3234 * platforms. For example, on i386, it means that the memory must come 3235 * from the first 16MB. 3236 */ 3237static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, 3238 void *caller) 3239{ 3240 struct kmem_cache *cachep; 3241 3242 /* If you want to save a few bytes .text space: replace 3243 * __ with kmem_. 3244 * Then kmalloc uses the uninlined functions instead of the inline 3245 * functions. 3246 */ 3247 cachep = __find_general_cachep(size, flags); 3248 if (unlikely(cachep == NULL)) 3249 return NULL; 3250 return __cache_alloc(cachep, flags, caller); 3251} 3252 3253 3254void *__kmalloc(size_t size, gfp_t flags) 3255{ 3256#ifndef CONFIG_DEBUG_SLAB 3257 return __do_kmalloc(size, flags, NULL); 3258#else 3259 return __do_kmalloc(size, flags, __builtin_return_address(0)); 3260#endif 3261} 3262EXPORT_SYMBOL(__kmalloc); 3263 3264#ifdef CONFIG_DEBUG_SLAB 3265void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) 3266{ 3267 return __do_kmalloc(size, flags, caller); 3268} 3269EXPORT_SYMBOL(__kmalloc_track_caller); 3270#endif 3271 3272#ifdef CONFIG_SMP 3273/** 3274 * __alloc_percpu - allocate one copy of the object for every present 3275 * cpu in the system, zeroing them. 3276 * Objects should be dereferenced using the per_cpu_ptr macro only. 3277 * 3278 * @size: how many bytes of memory are required. 3279 */ 3280void *__alloc_percpu(size_t size) 3281{ 3282 int i; 3283 struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); 3284 3285 if (!pdata) 3286 return NULL; 3287 3288 /* 3289 * Cannot use for_each_online_cpu since a cpu may come online 3290 * and we have no way of figuring out how to fix the array 3291 * that we have allocated then.... 3292 */ 3293 for_each_cpu(i) { 3294 int node = cpu_to_node(i); 3295 3296 if (node_online(node)) 3297 pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node); 3298 else 3299 pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); 3300 3301 if (!pdata->ptrs[i]) 3302 goto unwind_oom; 3303 memset(pdata->ptrs[i], 0, size); 3304 } 3305 3306 /* Catch derefs w/o wrappers */ 3307 return (void *)(~(unsigned long)pdata); 3308 3309unwind_oom: 3310 while (--i >= 0) { 3311 if (!cpu_possible(i)) 3312 continue; 3313 kfree(pdata->ptrs[i]); 3314 } 3315 kfree(pdata); 3316 return NULL; 3317} 3318EXPORT_SYMBOL(__alloc_percpu); 3319#endif 3320 3321/** 3322 * kmem_cache_free - Deallocate an object 3323 * @cachep: The cache the allocation was from. 3324 * @objp: The previously allocated object. 3325 * 3326 * Free an object which was previously allocated from this 3327 * cache. 3328 */ 3329void kmem_cache_free(struct kmem_cache *cachep, void *objp) 3330{ 3331 unsigned long flags; 3332 3333 local_irq_save(flags); 3334 __cache_free(cachep, objp); 3335 local_irq_restore(flags); 3336} 3337EXPORT_SYMBOL(kmem_cache_free); 3338 3339/** 3340 * kfree - free previously allocated memory 3341 * @objp: pointer returned by kmalloc. 3342 * 3343 * If @objp is NULL, no operation is performed. 3344 * 3345 * Don't free memory not originally allocated by kmalloc() 3346 * or you will run into trouble. 3347 */ 3348void kfree(const void *objp) 3349{ 3350 struct kmem_cache *c; 3351 unsigned long flags; 3352 3353 if (unlikely(!objp)) 3354 return; 3355 local_irq_save(flags); 3356 kfree_debugcheck(objp); 3357 c = virt_to_cache(objp); 3358 mutex_debug_check_no_locks_freed(objp, obj_size(c)); 3359 __cache_free(c, (void *)objp); 3360 local_irq_restore(flags); 3361} 3362EXPORT_SYMBOL(kfree); 3363 3364#ifdef CONFIG_SMP 3365/** 3366 * free_percpu - free previously allocated percpu memory 3367 * @objp: pointer returned by alloc_percpu. 3368 * 3369 * Don't free memory not originally allocated by alloc_percpu() 3370 * The complemented objp is to check for that. 3371 */ 3372void free_percpu(const void *objp) 3373{ 3374 int i; 3375 struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp); 3376 3377 /* 3378 * We allocate for all cpus so we cannot use for online cpu here. 3379 */ 3380 for_each_cpu(i) 3381 kfree(p->ptrs[i]); 3382 kfree(p); 3383} 3384EXPORT_SYMBOL(free_percpu); 3385#endif 3386 3387unsigned int kmem_cache_size(struct kmem_cache *cachep) 3388{ 3389 return obj_size(cachep); 3390} 3391EXPORT_SYMBOL(kmem_cache_size); 3392 3393const char *kmem_cache_name(struct kmem_cache *cachep) 3394{ 3395 return cachep->name; 3396} 3397EXPORT_SYMBOL_GPL(kmem_cache_name); 3398 3399/* 3400 * This initializes kmem_list3 for all nodes. 3401 */ 3402static int alloc_kmemlist(struct kmem_cache *cachep) 3403{ 3404 int node; 3405 struct kmem_list3 *l3; 3406 int err = 0; 3407 3408 for_each_online_node(node) { 3409 struct array_cache *nc = NULL, *new; 3410 struct array_cache **new_alien = NULL; 3411#ifdef CONFIG_NUMA 3412 new_alien = alloc_alien_cache(node, cachep->limit); 3413 if (!new_alien) 3414 goto fail; 3415#endif 3416 new = alloc_arraycache(node, cachep->shared*cachep->batchcount, 3417 0xbaadf00d); 3418 if (!new) 3419 goto fail; 3420 l3 = cachep->nodelists[node]; 3421 if (l3) { 3422 spin_lock_irq(&l3->list_lock); 3423 3424 nc = cachep->nodelists[node]->shared; 3425 if (nc) 3426 free_block(cachep, nc->entry, nc->avail, node); 3427 3428 l3->shared = new; 3429 if (!cachep->nodelists[node]->alien) { 3430 l3->alien = new_alien; 3431 new_alien = NULL; 3432 } 3433 l3->free_limit = (1 + nr_cpus_node(node)) * 3434 cachep->batchcount + cachep->num; 3435 spin_unlock_irq(&l3->list_lock); 3436 kfree(nc); 3437 free_alien_cache(new_alien); 3438 continue; 3439 } 3440 l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); 3441 if (!l3) 3442 goto fail; 3443 3444 kmem_list3_init(l3); 3445 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 3446 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 3447 l3->shared = new; 3448 l3->alien = new_alien; 3449 l3->free_limit = (1 + nr_cpus_node(node)) * 3450 cachep->batchcount + cachep->num; 3451 cachep->nodelists[node] = l3; 3452 } 3453 return err; 3454fail: 3455 err = -ENOMEM; 3456 return err; 3457} 3458 3459struct ccupdate_struct { 3460 struct kmem_cache *cachep; 3461 struct array_cache *new[NR_CPUS]; 3462}; 3463 3464static void do_ccupdate_local(void *info) 3465{ 3466 struct ccupdate_struct *new = info; 3467 struct array_cache *old; 3468 3469 check_irq_off(); 3470 old = cpu_cache_get(new->cachep); 3471 3472 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; 3473 new->new[smp_processor_id()] = old; 3474} 3475 3476/* Always called with the cache_chain_mutex held */ 3477static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3478 int batchcount, int shared) 3479{ 3480 struct ccupdate_struct new; 3481 int i, err; 3482 3483 memset(&new.new, 0, sizeof(new.new)); 3484 for_each_online_cpu(i) { 3485 new.new[i] = alloc_arraycache(cpu_to_node(i), limit, 3486 batchcount); 3487 if (!new.new[i]) { 3488 for (i--; i >= 0; i--) 3489 kfree(new.new[i]); 3490 return -ENOMEM; 3491 } 3492 } 3493 new.cachep = cachep; 3494 3495 on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); 3496 3497 check_irq_on(); 3498 cachep->batchcount = batchcount; 3499 cachep->limit = limit; 3500 cachep->shared = shared; 3501 3502 for_each_online_cpu(i) { 3503 struct array_cache *ccold = new.new[i]; 3504 if (!ccold) 3505 continue; 3506 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3507 free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); 3508 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3509 kfree(ccold); 3510 } 3511 3512 err = alloc_kmemlist(cachep); 3513 if (err) { 3514 printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n", 3515 cachep->name, -err); 3516 BUG(); 3517 } 3518 return 0; 3519} 3520 3521/* Called with cache_chain_mutex held always */ 3522static void enable_cpucache(struct kmem_cache *cachep) 3523{ 3524 int err; 3525 int limit, shared; 3526 3527 /* 3528 * The head array serves three purposes: 3529 * - create a LIFO ordering, i.e. return objects that are cache-warm 3530 * - reduce the number of spinlock operations. 3531 * - reduce the number of linked list operations on the slab and 3532 * bufctl chains: array operations are cheaper. 3533 * The numbers are guessed, we should auto-tune as described by 3534 * Bonwick. 3535 */ 3536 if (cachep->buffer_size > 131072) 3537 limit = 1; 3538 else if (cachep->buffer_size > PAGE_SIZE) 3539 limit = 8; 3540 else if (cachep->buffer_size > 1024) 3541 limit = 24; 3542 else if (cachep->buffer_size > 256) 3543 limit = 54; 3544 else 3545 limit = 120; 3546 3547 /* 3548 * CPU bound tasks (e.g. network routing) can exhibit cpu bound 3549 * allocation behaviour: Most allocs on one cpu, most free operations 3550 * on another cpu. For these cases, an efficient object passing between 3551 * cpus is necessary. This is provided by a shared array. The array 3552 * replaces Bonwick's magazine layer. 3553 * On uniprocessor, it's functionally equivalent (but less efficient) 3554 * to a larger limit. Thus disabled by default. 3555 */ 3556 shared = 0; 3557#ifdef CONFIG_SMP 3558 if (cachep->buffer_size <= PAGE_SIZE) 3559 shared = 8; 3560#endif 3561 3562#if DEBUG 3563 /* 3564 * With debugging enabled, large batchcount lead to excessively long 3565 * periods with disabled local interrupts. Limit the batchcount 3566 */ 3567 if (limit > 32) 3568 limit = 32; 3569#endif 3570 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); 3571 if (err) 3572 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", 3573 cachep->name, -err); 3574} 3575 3576/* 3577 * Drain an array if it contains any elements taking the l3 lock only if 3578 * necessary. Note that the l3 listlock also protects the array_cache 3579 * if drain_array() is used on the shared array. 3580 */ 3581void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, 3582 struct array_cache *ac, int force, int node) 3583{ 3584 int tofree; 3585 3586 if (!ac || !ac->avail) 3587 return; 3588 if (ac->touched && !force) { 3589 ac->touched = 0; 3590 } else { 3591 spin_lock_irq(&l3->list_lock); 3592 if (ac->avail) { 3593 tofree = force ? ac->avail : (ac->limit + 4) / 5; 3594 if (tofree > ac->avail) 3595 tofree = (ac->avail + 1) / 2; 3596 free_block(cachep, ac->entry, tofree, node); 3597 ac->avail -= tofree; 3598 memmove(ac->entry, &(ac->entry[tofree]), 3599 sizeof(void *) * ac->avail); 3600 } 3601 spin_unlock_irq(&l3->list_lock); 3602 } 3603} 3604 3605/** 3606 * cache_reap - Reclaim memory from caches. 3607 * @unused: unused parameter 3608 * 3609 * Called from workqueue/eventd every few seconds. 3610 * Purpose: 3611 * - clear the per-cpu caches for this CPU. 3612 * - return freeable pages to the main free memory pool. 3613 * 3614 * If we cannot acquire the cache chain mutex then just give up - we'll try 3615 * again on the next iteration. 3616 */ 3617static void cache_reap(void *unused) 3618{ 3619 struct list_head *walk; 3620 struct kmem_list3 *l3; 3621 int node = numa_node_id(); 3622 3623 if (!mutex_trylock(&cache_chain_mutex)) { 3624 /* Give up. Setup the next iteration. */ 3625 schedule_delayed_work(&__get_cpu_var(reap_work), 3626 REAPTIMEOUT_CPUC); 3627 return; 3628 } 3629 3630 list_for_each(walk, &cache_chain) { 3631 struct kmem_cache *searchp; 3632 struct list_head *p; 3633 int tofree; 3634 struct slab *slabp; 3635 3636 searchp = list_entry(walk, struct kmem_cache, next); 3637 check_irq_on(); 3638 3639 /* 3640 * We only take the l3 lock if absolutely necessary and we 3641 * have established with reasonable certainty that 3642 * we can do some work if the lock was obtained. 3643 */ 3644 l3 = searchp->nodelists[node]; 3645 3646 reap_alien(searchp, l3); 3647 3648 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); 3649 3650 /* 3651 * These are racy checks but it does not matter 3652 * if we skip one check or scan twice. 3653 */ 3654 if (time_after(l3->next_reap, jiffies)) 3655 goto next; 3656 3657 l3->next_reap = jiffies + REAPTIMEOUT_LIST3; 3658 3659 drain_array(searchp, l3, l3->shared, 0, node); 3660 3661 if (l3->free_touched) { 3662 l3->free_touched = 0; 3663 goto next; 3664 } 3665 3666 tofree = (l3->free_limit + 5 * searchp->num - 1) / 3667 (5 * searchp->num); 3668 do { 3669 /* 3670 * Do not lock if there are no free blocks. 3671 */ 3672 if (list_empty(&l3->slabs_free)) 3673 break; 3674 3675 spin_lock_irq(&l3->list_lock); 3676 p = l3->slabs_free.next; 3677 if (p == &(l3->slabs_free)) { 3678 spin_unlock_irq(&l3->list_lock); 3679 break; 3680 } 3681 3682 slabp = list_entry(p, struct slab, list); 3683 BUG_ON(slabp->inuse); 3684 list_del(&slabp->list); 3685 STATS_INC_REAPED(searchp); 3686 3687 /* 3688 * Safe to drop the lock. The slab is no longer linked 3689 * to the cache. searchp cannot disappear, we hold 3690 * cache_chain_lock 3691 */ 3692 l3->free_objects -= searchp->num; 3693 spin_unlock_irq(&l3->list_lock); 3694 slab_destroy(searchp, slabp); 3695 } while (--tofree > 0); 3696next: 3697 cond_resched(); 3698 } 3699 check_irq_on(); 3700 mutex_unlock(&cache_chain_mutex); 3701 next_reap_node(); 3702 /* Set up the next iteration */ 3703 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 3704} 3705 3706#ifdef CONFIG_PROC_FS 3707 3708static void print_slabinfo_header(struct seq_file *m) 3709{ 3710 /* 3711 * Output format version, so at least we can change it 3712 * without _too_ many complaints. 3713 */ 3714#if STATS 3715 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 3716#else 3717 seq_puts(m, "slabinfo - version: 2.1\n"); 3718#endif 3719 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 3720 "<objperslab> <pagesperslab>"); 3721 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 3722 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 3723#if STATS 3724 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 3725 "<error> <maxfreeable> <nodeallocs> <remotefrees>"); 3726 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 3727#endif 3728 seq_putc(m, '\n'); 3729} 3730 3731static void *s_start(struct seq_file *m, loff_t *pos) 3732{ 3733 loff_t n = *pos; 3734 struct list_head *p; 3735 3736 mutex_lock(&cache_chain_mutex); 3737 if (!n) 3738 print_slabinfo_header(m); 3739 p = cache_chain.next; 3740 while (n--) { 3741 p = p->next; 3742 if (p == &cache_chain) 3743 return NULL; 3744 } 3745 return list_entry(p, struct kmem_cache, next); 3746} 3747 3748static void *s_next(struct seq_file *m, void *p, loff_t *pos) 3749{ 3750 struct kmem_cache *cachep = p; 3751 ++*pos; 3752 return cachep->next.next == &cache_chain ? 3753 NULL : list_entry(cachep->next.next, struct kmem_cache, next); 3754} 3755 3756static void s_stop(struct seq_file *m, void *p) 3757{ 3758 mutex_unlock(&cache_chain_mutex); 3759} 3760 3761static int s_show(struct seq_file *m, void *p) 3762{ 3763 struct kmem_cache *cachep = p; 3764 struct list_head *q; 3765 struct slab *slabp; 3766 unsigned long active_objs; 3767 unsigned long num_objs; 3768 unsigned long active_slabs = 0; 3769 unsigned long num_slabs, free_objects = 0, shared_avail = 0; 3770 const char *name; 3771 char *error = NULL; 3772 int node; 3773 struct kmem_list3 *l3; 3774 3775 active_objs = 0; 3776 num_slabs = 0; 3777 for_each_online_node(node) { 3778 l3 = cachep->nodelists[node]; 3779 if (!l3) 3780 continue; 3781 3782 check_irq_on(); 3783 spin_lock_irq(&l3->list_lock); 3784 3785 list_for_each(q, &l3->slabs_full) { 3786 slabp = list_entry(q, struct slab, list); 3787 if (slabp->inuse != cachep->num && !error) 3788 error = "slabs_full accounting error"; 3789 active_objs += cachep->num; 3790 active_slabs++; 3791 } 3792 list_for_each(q, &l3->slabs_partial) { 3793 slabp = list_entry(q, struct slab, list); 3794 if (slabp->inuse == cachep->num && !error) 3795 error = "slabs_partial inuse accounting error"; 3796 if (!slabp->inuse && !error) 3797 error = "slabs_partial/inuse accounting error"; 3798 active_objs += slabp->inuse; 3799 active_slabs++; 3800 } 3801 list_for_each(q, &l3->slabs_free) { 3802 slabp = list_entry(q, struct slab, list); 3803 if (slabp->inuse && !error) 3804 error = "slabs_free/inuse accounting error"; 3805 num_slabs++; 3806 } 3807 free_objects += l3->free_objects; 3808 if (l3->shared) 3809 shared_avail += l3->shared->avail; 3810 3811 spin_unlock_irq(&l3->list_lock); 3812 } 3813 num_slabs += active_slabs; 3814 num_objs = num_slabs * cachep->num; 3815 if (num_objs - active_objs != free_objects && !error) 3816 error = "free_objects accounting error"; 3817 3818 name = cachep->name; 3819 if (error) 3820 printk(KERN_ERR "slab: cache %s error: %s\n", name, error); 3821 3822 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 3823 name, active_objs, num_objs, cachep->buffer_size, 3824 cachep->num, (1 << cachep->gfporder)); 3825 seq_printf(m, " : tunables %4u %4u %4u", 3826 cachep->limit, cachep->batchcount, cachep->shared); 3827 seq_printf(m, " : slabdata %6lu %6lu %6lu", 3828 active_slabs, num_slabs, shared_avail); 3829#if STATS 3830 { /* list3 stats */ 3831 unsigned long high = cachep->high_mark; 3832 unsigned long allocs = cachep->num_allocations; 3833 unsigned long grown = cachep->grown; 3834 unsigned long reaped = cachep->reaped; 3835 unsigned long errors = cachep->errors; 3836 unsigned long max_freeable = cachep->max_freeable; 3837 unsigned long node_allocs = cachep->node_allocs; 3838 unsigned long node_frees = cachep->node_frees; 3839 3840 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ 3841 %4lu %4lu %4lu %4lu", allocs, high, grown, 3842 reaped, errors, max_freeable, node_allocs, 3843 node_frees); 3844 } 3845 /* cpu stats */ 3846 { 3847 unsigned long allochit = atomic_read(&cachep->allochit); 3848 unsigned long allocmiss = atomic_read(&cachep->allocmiss); 3849 unsigned long freehit = atomic_read(&cachep->freehit); 3850 unsigned long freemiss = atomic_read(&cachep->freemiss); 3851 3852 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu", 3853 allochit, allocmiss, freehit, freemiss); 3854 } 3855#endif 3856 seq_putc(m, '\n'); 3857 return 0; 3858} 3859 3860/* 3861 * slabinfo_op - iterator that generates /proc/slabinfo 3862 * 3863 * Output layout: 3864 * cache-name 3865 * num-active-objs 3866 * total-objs 3867 * object size 3868 * num-active-slabs 3869 * total-slabs 3870 * num-pages-per-slab 3871 * + further values on SMP and with statistics enabled 3872 */ 3873 3874struct seq_operations slabinfo_op = { 3875 .start = s_start, 3876 .next = s_next, 3877 .stop = s_stop, 3878 .show = s_show, 3879}; 3880 3881#define MAX_SLABINFO_WRITE 128 3882/** 3883 * slabinfo_write - Tuning for the slab allocator 3884 * @file: unused 3885 * @buffer: user buffer 3886 * @count: data length 3887 * @ppos: unused 3888 */ 3889ssize_t slabinfo_write(struct file *file, const char __user * buffer, 3890 size_t count, loff_t *ppos) 3891{ 3892 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; 3893 int limit, batchcount, shared, res; 3894 struct list_head *p; 3895 3896 if (count > MAX_SLABINFO_WRITE) 3897 return -EINVAL; 3898 if (copy_from_user(&kbuf, buffer, count)) 3899 return -EFAULT; 3900 kbuf[MAX_SLABINFO_WRITE] = '\0'; 3901 3902 tmp = strchr(kbuf, ' '); 3903 if (!tmp) 3904 return -EINVAL; 3905 *tmp = '\0'; 3906 tmp++; 3907 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3) 3908 return -EINVAL; 3909 3910 /* Find the cache in the chain of caches. */ 3911 mutex_lock(&cache_chain_mutex); 3912 res = -EINVAL; 3913 list_for_each(p, &cache_chain) { 3914 struct kmem_cache *cachep; 3915 3916 cachep = list_entry(p, struct kmem_cache, next); 3917 if (!strcmp(cachep->name, kbuf)) { 3918 if (limit < 1 || batchcount < 1 || 3919 batchcount > limit || shared < 0) { 3920 res = 0; 3921 } else { 3922 res = do_tune_cpucache(cachep, limit, 3923 batchcount, shared); 3924 } 3925 break; 3926 } 3927 } 3928 mutex_unlock(&cache_chain_mutex); 3929 if (res >= 0) 3930 res = count; 3931 return res; 3932} 3933 3934#ifdef CONFIG_DEBUG_SLAB_LEAK 3935 3936static void *leaks_start(struct seq_file *m, loff_t *pos) 3937{ 3938 loff_t n = *pos; 3939 struct list_head *p; 3940 3941 mutex_lock(&cache_chain_mutex); 3942 p = cache_chain.next; 3943 while (n--) { 3944 p = p->next; 3945 if (p == &cache_chain) 3946 return NULL; 3947 } 3948 return list_entry(p, struct kmem_cache, next); 3949} 3950 3951static inline int add_caller(unsigned long *n, unsigned long v) 3952{ 3953 unsigned long *p; 3954 int l; 3955 if (!v) 3956 return 1; 3957 l = n[1]; 3958 p = n + 2; 3959 while (l) { 3960 int i = l/2; 3961 unsigned long *q = p + 2 * i; 3962 if (*q == v) { 3963 q[1]++; 3964 return 1; 3965 } 3966 if (*q > v) { 3967 l = i; 3968 } else { 3969 p = q + 2; 3970 l -= i + 1; 3971 } 3972 } 3973 if (++n[1] == n[0]) 3974 return 0; 3975 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n)); 3976 p[0] = v; 3977 p[1] = 1; 3978 return 1; 3979} 3980 3981static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) 3982{ 3983 void *p; 3984 int i; 3985 if (n[0] == n[1]) 3986 return; 3987 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) { 3988 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) 3989 continue; 3990 if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) 3991 return; 3992 } 3993} 3994 3995static void show_symbol(struct seq_file *m, unsigned long address) 3996{ 3997#ifdef CONFIG_KALLSYMS 3998 char *modname; 3999 const char *name; 4000 unsigned long offset, size; 4001 char namebuf[KSYM_NAME_LEN+1]; 4002 4003 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); 4004 4005 if (name) { 4006 seq_printf(m, "%s+%#lx/%#lx", name, offset, size); 4007 if (modname) 4008 seq_printf(m, " [%s]", modname); 4009 return; 4010 } 4011#endif 4012 seq_printf(m, "%p", (void *)address); 4013} 4014 4015static int leaks_show(struct seq_file *m, void *p) 4016{ 4017 struct kmem_cache *cachep = p; 4018 struct list_head *q; 4019 struct slab *slabp; 4020 struct kmem_list3 *l3; 4021 const char *name; 4022 unsigned long *n = m->private; 4023 int node; 4024 int i; 4025 4026 if (!(cachep->flags & SLAB_STORE_USER)) 4027 return 0; 4028 if (!(cachep->flags & SLAB_RED_ZONE)) 4029 return 0; 4030 4031 /* OK, we can do it */ 4032 4033 n[1] = 0; 4034 4035 for_each_online_node(node) { 4036 l3 = cachep->nodelists[node]; 4037 if (!l3) 4038 continue; 4039 4040 check_irq_on(); 4041 spin_lock_irq(&l3->list_lock); 4042 4043 list_for_each(q, &l3->slabs_full) { 4044 slabp = list_entry(q, struct slab, list); 4045 handle_slab(n, cachep, slabp); 4046 } 4047 list_for_each(q, &l3->slabs_partial) { 4048 slabp = list_entry(q, struct slab, list); 4049 handle_slab(n, cachep, slabp); 4050 } 4051 spin_unlock_irq(&l3->list_lock); 4052 } 4053 name = cachep->name; 4054 if (n[0] == n[1]) { 4055 /* Increase the buffer size */ 4056 mutex_unlock(&cache_chain_mutex); 4057 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); 4058 if (!m->private) { 4059 /* Too bad, we are really out */ 4060 m->private = n; 4061 mutex_lock(&cache_chain_mutex); 4062 return -ENOMEM; 4063 } 4064 *(unsigned long *)m->private = n[0] * 2; 4065 kfree(n); 4066 mutex_lock(&cache_chain_mutex); 4067 /* Now make sure this entry will be retried */ 4068 m->count = m->size; 4069 return 0; 4070 } 4071 for (i = 0; i < n[1]; i++) { 4072 seq_printf(m, "%s: %lu ", name, n[2*i+3]); 4073 show_symbol(m, n[2*i+2]); 4074 seq_putc(m, '\n'); 4075 } 4076 return 0; 4077} 4078 4079struct seq_operations slabstats_op = { 4080 .start = leaks_start, 4081 .next = s_next, 4082 .stop = s_stop, 4083 .show = leaks_show, 4084}; 4085#endif 4086#endif 4087 4088/** 4089 * ksize - get the actual amount of memory allocated for a given object 4090 * @objp: Pointer to the object 4091 * 4092 * kmalloc may internally round up allocations and return more memory 4093 * than requested. ksize() can be used to determine the actual amount of 4094 * memory allocated. The caller may use this additional memory, even though 4095 * a smaller amount of memory was initially specified with the kmalloc call. 4096 * The caller must guarantee that objp points to a valid object previously 4097 * allocated with either kmalloc() or kmem_cache_alloc(). The object 4098 * must not be freed during the duration of the call. 4099 */ 4100unsigned int ksize(const void *objp) 4101{ 4102 if (unlikely(objp == NULL)) 4103 return 0; 4104 4105 return obj_size(virt_to_cache(objp)); 4106} 4107