tcache.c revision 1dcb4f86b23a5760f5a717ace716360b63b33fad
1#define JEMALLOC_TCACHE_C_ 2#include "jemalloc/internal/jemalloc_internal.h" 3#ifdef JEMALLOC_TCACHE 4/******************************************************************************/ 5/* Data. */ 6 7bool opt_tcache = true; 8ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; 9ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; 10 11tcache_bin_info_t *tcache_bin_info; 12static unsigned stack_nelms; /* Total stack elms per tcache. */ 13 14/* Map of thread-specific caches. */ 15#ifndef NO_TLS 16__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); 17#endif 18 19/* 20 * Same contents as tcache, but initialized such that the TSD destructor is 21 * called when a thread exits, so that the cache can be cleaned up. 22 */ 23pthread_key_t tcache_tsd; 24 25size_t nhbins; 26size_t tcache_maxclass; 27unsigned tcache_gc_incr; 28 29/******************************************************************************/ 30/* Function prototypes for non-inline static functions. */ 31 32static void tcache_thread_cleanup(void *arg); 33 34/******************************************************************************/ 35 36void * 37tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) 38{ 39 void *ret; 40 41 arena_tcache_fill_small(tcache->arena, tbin, binind 42#ifdef JEMALLOC_PROF 43 , tcache->prof_accumbytes 44#endif 45 ); 46#ifdef JEMALLOC_PROF 47 tcache->prof_accumbytes = 0; 48#endif 49 ret = tcache_alloc_easy(tbin); 50 51 return (ret); 52} 53 54void 55tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem 56#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) 57 , tcache_t *tcache 58#endif 59 ) 60{ 61 void *ptr; 62 unsigned i, nflush, ndeferred; 63#ifdef JEMALLOC_STATS 64 bool merged_stats = false; 65#endif 66 67 assert(binind < nbins); 68 assert(rem <= tbin->ncached); 69 70 for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { 71 /* Lock the arena bin associated with the first object. */ 72 arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( 73 tbin->avail[0]); 74 arena_t *arena = chunk->arena; 75 arena_bin_t *bin = &arena->bins[binind]; 76 77#ifdef JEMALLOC_PROF 78 if (arena == tcache->arena) { 79 malloc_mutex_lock(&arena->lock); 80 arena_prof_accum(arena, tcache->prof_accumbytes); 81 malloc_mutex_unlock(&arena->lock); 82 tcache->prof_accumbytes = 0; 83 } 84#endif 85 86 malloc_mutex_lock(&bin->lock); 87#ifdef JEMALLOC_STATS 88 if (arena == tcache->arena) { 89 assert(merged_stats == false); 90 merged_stats = true; 91 bin->stats.nflushes++; 92 bin->stats.nrequests += tbin->tstats.nrequests; 93 tbin->tstats.nrequests = 0; 94 } 95#endif 96 ndeferred = 0; 97 for (i = 0; i < nflush; i++) { 98 ptr = tbin->avail[i]; 99 assert(ptr != NULL); 100 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 101 if (chunk->arena == arena) { 102 size_t pageind = ((uintptr_t)ptr - 103 (uintptr_t)chunk) >> PAGE_SHIFT; 104 arena_chunk_map_t *mapelm = 105 &chunk->map[pageind-map_bias]; 106 arena_dalloc_bin(arena, chunk, ptr, mapelm); 107 } else { 108 /* 109 * This object was allocated via a different 110 * arena bin than the one that is currently 111 * locked. Stash the object, so that it can be 112 * handled in a future pass. 113 */ 114 tbin->avail[ndeferred] = ptr; 115 ndeferred++; 116 } 117 } 118 malloc_mutex_unlock(&bin->lock); 119 } 120#ifdef JEMALLOC_STATS 121 if (merged_stats == false) { 122 /* 123 * The flush loop didn't happen to flush to this thread's 124 * arena, so the stats didn't get merged. Manually do so now. 125 */ 126 arena_bin_t *bin = &tcache->arena->bins[binind]; 127 malloc_mutex_lock(&bin->lock); 128 bin->stats.nflushes++; 129 bin->stats.nrequests += tbin->tstats.nrequests; 130 tbin->tstats.nrequests = 0; 131 malloc_mutex_unlock(&bin->lock); 132 } 133#endif 134 135 memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], 136 rem * sizeof(void *)); 137 tbin->ncached = rem; 138 if ((int)tbin->ncached < tbin->low_water) 139 tbin->low_water = tbin->ncached; 140} 141 142void 143tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem 144#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) 145 , tcache_t *tcache 146#endif 147 ) 148{ 149 void *ptr; 150 unsigned i, nflush, ndeferred; 151#ifdef JEMALLOC_STATS 152 bool merged_stats = false; 153#endif 154 155 assert(binind < nhbins); 156 assert(rem <= tbin->ncached); 157 158 for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { 159 /* Lock the arena associated with the first object. */ 160 arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( 161 tbin->avail[0]); 162 arena_t *arena = chunk->arena; 163 164 malloc_mutex_lock(&arena->lock); 165#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) 166 if (arena == tcache->arena) { 167#endif 168#ifdef JEMALLOC_PROF 169 arena_prof_accum(arena, tcache->prof_accumbytes); 170 tcache->prof_accumbytes = 0; 171#endif 172#ifdef JEMALLOC_STATS 173 merged_stats = true; 174 arena->stats.nrequests_large += tbin->tstats.nrequests; 175 arena->stats.lstats[binind - nbins].nrequests += 176 tbin->tstats.nrequests; 177 tbin->tstats.nrequests = 0; 178#endif 179#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) 180 } 181#endif 182 ndeferred = 0; 183 for (i = 0; i < nflush; i++) { 184 ptr = tbin->avail[i]; 185 assert(ptr != NULL); 186 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 187 if (chunk->arena == arena) 188 arena_dalloc_large(arena, chunk, ptr); 189 else { 190 /* 191 * This object was allocated via a different 192 * arena than the one that is currently locked. 193 * Stash the object, so that it can be handled 194 * in a future pass. 195 */ 196 tbin->avail[ndeferred] = ptr; 197 ndeferred++; 198 } 199 } 200 malloc_mutex_unlock(&arena->lock); 201 } 202#ifdef JEMALLOC_STATS 203 if (merged_stats == false) { 204 /* 205 * The flush loop didn't happen to flush to this thread's 206 * arena, so the stats didn't get merged. Manually do so now. 207 */ 208 arena_t *arena = tcache->arena; 209 malloc_mutex_lock(&arena->lock); 210 arena->stats.nrequests_large += tbin->tstats.nrequests; 211 arena->stats.lstats[binind - nbins].nrequests += 212 tbin->tstats.nrequests; 213 tbin->tstats.nrequests = 0; 214 malloc_mutex_unlock(&arena->lock); 215 } 216#endif 217 218 memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], 219 rem * sizeof(void *)); 220 tbin->ncached = rem; 221 if ((int)tbin->ncached < tbin->low_water) 222 tbin->low_water = tbin->ncached; 223} 224 225tcache_t * 226tcache_create(arena_t *arena) 227{ 228 tcache_t *tcache; 229 size_t size, stack_offset; 230 unsigned i; 231 232 size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); 233 /* Naturally align the pointer stacks. */ 234 size = PTR_CEILING(size); 235 stack_offset = size; 236 size += stack_nelms * sizeof(void *); 237 /* 238 * Round up to the nearest multiple of the cacheline size, in order to 239 * avoid the possibility of false cacheline sharing. 240 * 241 * That this works relies on the same logic as in ipalloc(), but we 242 * cannot directly call ipalloc() here due to tcache bootstrapping 243 * issues. 244 */ 245 size = (size + CACHELINE_MASK) & (-CACHELINE); 246 247 if (size <= small_maxclass) 248 tcache = (tcache_t *)arena_malloc_small(arena, size, true); 249 else if (size <= tcache_maxclass) 250 tcache = (tcache_t *)arena_malloc_large(arena, size, true); 251 else 252 tcache = (tcache_t *)icalloc(size); 253 254 if (tcache == NULL) 255 return (NULL); 256 257#ifdef JEMALLOC_STATS 258 /* Link into list of extant tcaches. */ 259 malloc_mutex_lock(&arena->lock); 260 ql_elm_new(tcache, link); 261 ql_tail_insert(&arena->tcache_ql, tcache, link); 262 malloc_mutex_unlock(&arena->lock); 263#endif 264 265 tcache->arena = arena; 266 assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); 267 for (i = 0; i < nhbins; i++) { 268 tcache->tbins[i].lg_fill_div = 1; 269 tcache->tbins[i].avail = (void **)((uintptr_t)tcache + 270 (uintptr_t)stack_offset); 271 stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); 272 } 273 274 TCACHE_SET(tcache); 275 276 return (tcache); 277} 278 279void 280tcache_destroy(tcache_t *tcache) 281{ 282 unsigned i; 283 size_t tcache_size; 284 285#ifdef JEMALLOC_STATS 286 /* Unlink from list of extant tcaches. */ 287 malloc_mutex_lock(&tcache->arena->lock); 288 ql_remove(&tcache->arena->tcache_ql, tcache, link); 289 malloc_mutex_unlock(&tcache->arena->lock); 290 tcache_stats_merge(tcache, tcache->arena); 291#endif 292 293 for (i = 0; i < nbins; i++) { 294 tcache_bin_t *tbin = &tcache->tbins[i]; 295 tcache_bin_flush_small(tbin, i, 0 296#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) 297 , tcache 298#endif 299 ); 300 301#ifdef JEMALLOC_STATS 302 if (tbin->tstats.nrequests != 0) { 303 arena_t *arena = tcache->arena; 304 arena_bin_t *bin = &arena->bins[i]; 305 malloc_mutex_lock(&bin->lock); 306 bin->stats.nrequests += tbin->tstats.nrequests; 307 malloc_mutex_unlock(&bin->lock); 308 } 309#endif 310 } 311 312 for (; i < nhbins; i++) { 313 tcache_bin_t *tbin = &tcache->tbins[i]; 314 tcache_bin_flush_large(tbin, i, 0 315#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) 316 , tcache 317#endif 318 ); 319 320#ifdef JEMALLOC_STATS 321 if (tbin->tstats.nrequests != 0) { 322 arena_t *arena = tcache->arena; 323 malloc_mutex_lock(&arena->lock); 324 arena->stats.nrequests_large += tbin->tstats.nrequests; 325 arena->stats.lstats[i - nbins].nrequests += 326 tbin->tstats.nrequests; 327 malloc_mutex_unlock(&arena->lock); 328 } 329#endif 330 } 331 332#ifdef JEMALLOC_PROF 333 if (tcache->prof_accumbytes > 0) { 334 malloc_mutex_lock(&tcache->arena->lock); 335 arena_prof_accum(tcache->arena, tcache->prof_accumbytes); 336 malloc_mutex_unlock(&tcache->arena->lock); 337 } 338#endif 339 340 tcache_size = arena_salloc(tcache); 341 if (tcache_size <= small_maxclass) { 342 arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); 343 arena_t *arena = chunk->arena; 344 size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> 345 PAGE_SHIFT; 346 arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; 347 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + 348 (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << 349 PAGE_SHIFT)); 350 arena_bin_t *bin = run->bin; 351 352 malloc_mutex_lock(&bin->lock); 353 arena_dalloc_bin(arena, chunk, tcache, mapelm); 354 malloc_mutex_unlock(&bin->lock); 355 } else if (tcache_size <= tcache_maxclass) { 356 arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); 357 arena_t *arena = chunk->arena; 358 359 malloc_mutex_lock(&arena->lock); 360 arena_dalloc_large(arena, chunk, tcache); 361 malloc_mutex_unlock(&arena->lock); 362 } else 363 idalloc(tcache); 364} 365 366static void 367tcache_thread_cleanup(void *arg) 368{ 369 tcache_t *tcache = (tcache_t *)arg; 370 371 if (tcache == (void *)(uintptr_t)1) { 372 /* 373 * The previous time this destructor was called, we set the key 374 * to 1 so that other destructors wouldn't cause re-creation of 375 * the tcache. This time, do nothing, so that the destructor 376 * will not be called again. 377 */ 378 } else if (tcache == (void *)(uintptr_t)2) { 379 /* 380 * Another destructor called an allocator function after this 381 * destructor was called. Reset tcache to 1 in order to 382 * receive another callback. 383 */ 384 TCACHE_SET((uintptr_t)1); 385 } else if (tcache != NULL) { 386 assert(tcache != (void *)(uintptr_t)1); 387 tcache_destroy(tcache); 388 TCACHE_SET((uintptr_t)1); 389 } 390} 391 392#ifdef JEMALLOC_STATS 393void 394tcache_stats_merge(tcache_t *tcache, arena_t *arena) 395{ 396 unsigned i; 397 398 /* Merge and reset tcache stats. */ 399 for (i = 0; i < nbins; i++) { 400 arena_bin_t *bin = &arena->bins[i]; 401 tcache_bin_t *tbin = &tcache->tbins[i]; 402 malloc_mutex_lock(&bin->lock); 403 bin->stats.nrequests += tbin->tstats.nrequests; 404 malloc_mutex_unlock(&bin->lock); 405 tbin->tstats.nrequests = 0; 406 } 407 408 for (; i < nhbins; i++) { 409 malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins]; 410 tcache_bin_t *tbin = &tcache->tbins[i]; 411 arena->stats.nrequests_large += tbin->tstats.nrequests; 412 lstats->nrequests += tbin->tstats.nrequests; 413 tbin->tstats.nrequests = 0; 414 } 415} 416#endif 417 418bool 419tcache_boot(void) 420{ 421 422 if (opt_tcache) { 423 unsigned i; 424 425 /* 426 * If necessary, clamp opt_lg_tcache_max, now that 427 * small_maxclass and arena_maxclass are known. 428 */ 429 if (opt_lg_tcache_max < 0 || (1U << 430 opt_lg_tcache_max) < small_maxclass) 431 tcache_maxclass = small_maxclass; 432 else if ((1U << opt_lg_tcache_max) > arena_maxclass) 433 tcache_maxclass = arena_maxclass; 434 else 435 tcache_maxclass = (1U << opt_lg_tcache_max); 436 437 nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); 438 439 /* Initialize tcache_bin_info. */ 440 tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * 441 sizeof(tcache_bin_info_t)); 442 if (tcache_bin_info == NULL) 443 return (true); 444 stack_nelms = 0; 445 for (i = 0; i < nbins; i++) { 446 if ((arena_bin_info[i].nregs << 1) <= 447 TCACHE_NSLOTS_SMALL_MAX) { 448 tcache_bin_info[i].ncached_max = 449 (arena_bin_info[i].nregs << 1); 450 } else { 451 tcache_bin_info[i].ncached_max = 452 TCACHE_NSLOTS_SMALL_MAX; 453 } 454 stack_nelms += tcache_bin_info[i].ncached_max; 455 } 456 for (; i < nhbins; i++) { 457 tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; 458 stack_nelms += tcache_bin_info[i].ncached_max; 459 } 460 461 /* Compute incremental GC event threshold. */ 462 if (opt_lg_tcache_gc_sweep >= 0) { 463 tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / 464 nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins == 465 0) ? 0 : 1); 466 } else 467 tcache_gc_incr = 0; 468 469 if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) != 470 0) { 471 malloc_write( 472 "<jemalloc>: Error in pthread_key_create()\n"); 473 abort(); 474 } 475 } 476 477 return (false); 478} 479/******************************************************************************/ 480#endif /* JEMALLOC_TCACHE */ 481