tcache.c revision 4507f34628dfae26e6b0a6faa13e5f9a49600616
1#define JEMALLOC_TCACHE_C_ 2#include "jemalloc/internal/jemalloc_internal.h" 3 4/******************************************************************************/ 5/* Data. */ 6 7bool opt_tcache = true; 8ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; 9 10tcache_bin_info_t *tcache_bin_info; 11static unsigned stack_nelms; /* Total stack elms per tcache. */ 12 13/* Map of thread-specific caches. */ 14#ifndef NO_TLS 15__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); 16#endif 17 18/* 19 * Same contents as tcache, but initialized such that the TSD destructor is 20 * called when a thread exits, so that the cache can be cleaned up. 21 */ 22pthread_key_t tcache_tsd; 23 24size_t nhbins; 25size_t tcache_maxclass; 26 27/******************************************************************************/ 28/* Function prototypes for non-inline static functions. */ 29 30static void tcache_thread_cleanup(void *arg); 31 32/******************************************************************************/ 33 34void * 35tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) 36{ 37 void *ret; 38 39 arena_tcache_fill_small(tcache->arena, tbin, binind, 40 config_prof ? tcache->prof_accumbytes : 0); 41 if (config_prof) 42 tcache->prof_accumbytes = 0; 43 ret = tcache_alloc_easy(tbin); 44 45 return (ret); 46} 47 48void 49tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, 50 tcache_t *tcache) 51{ 52 void *ptr; 53 unsigned i, nflush, ndeferred; 54 bool merged_stats = false; 55 56 assert(binind < NBINS); 57 assert(rem <= tbin->ncached); 58 59 for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { 60 /* Lock the arena bin associated with the first object. */ 61 arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( 62 tbin->avail[0]); 63 arena_t *arena = chunk->arena; 64 arena_bin_t *bin = &arena->bins[binind]; 65 66 if (config_prof && arena == tcache->arena) { 67 malloc_mutex_lock(&arena->lock); 68 arena_prof_accum(arena, tcache->prof_accumbytes); 69 malloc_mutex_unlock(&arena->lock); 70 tcache->prof_accumbytes = 0; 71 } 72 73 malloc_mutex_lock(&bin->lock); 74 if (config_stats && arena == tcache->arena) { 75 assert(merged_stats == false); 76 merged_stats = true; 77 bin->stats.nflushes++; 78 bin->stats.nrequests += tbin->tstats.nrequests; 79 tbin->tstats.nrequests = 0; 80 } 81 ndeferred = 0; 82 for (i = 0; i < nflush; i++) { 83 ptr = tbin->avail[i]; 84 assert(ptr != NULL); 85 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 86 if (chunk->arena == arena) { 87 size_t pageind = ((uintptr_t)ptr - 88 (uintptr_t)chunk) >> PAGE_SHIFT; 89 arena_chunk_map_t *mapelm = 90 &chunk->map[pageind-map_bias]; 91 arena_dalloc_bin(arena, chunk, ptr, mapelm); 92 } else { 93 /* 94 * This object was allocated via a different 95 * arena bin than the one that is currently 96 * locked. Stash the object, so that it can be 97 * handled in a future pass. 98 */ 99 tbin->avail[ndeferred] = ptr; 100 ndeferred++; 101 } 102 } 103 malloc_mutex_unlock(&bin->lock); 104 } 105 if (config_stats && merged_stats == false) { 106 /* 107 * The flush loop didn't happen to flush to this thread's 108 * arena, so the stats didn't get merged. Manually do so now. 109 */ 110 arena_bin_t *bin = &tcache->arena->bins[binind]; 111 malloc_mutex_lock(&bin->lock); 112 bin->stats.nflushes++; 113 bin->stats.nrequests += tbin->tstats.nrequests; 114 tbin->tstats.nrequests = 0; 115 malloc_mutex_unlock(&bin->lock); 116 } 117 118 memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], 119 rem * sizeof(void *)); 120 tbin->ncached = rem; 121 if ((int)tbin->ncached < tbin->low_water) 122 tbin->low_water = tbin->ncached; 123} 124 125void 126tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, 127 tcache_t *tcache) 128{ 129 void *ptr; 130 unsigned i, nflush, ndeferred; 131 bool merged_stats = false; 132 133 assert(binind < nhbins); 134 assert(rem <= tbin->ncached); 135 136 for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { 137 /* Lock the arena associated with the first object. */ 138 arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( 139 tbin->avail[0]); 140 arena_t *arena = chunk->arena; 141 142 malloc_mutex_lock(&arena->lock); 143 if ((config_prof || config_stats) && arena == tcache->arena) { 144 if (config_prof) { 145 arena_prof_accum(arena, 146 tcache->prof_accumbytes); 147 tcache->prof_accumbytes = 0; 148 } 149 if (config_stats) { 150 merged_stats = true; 151 arena->stats.nrequests_large += 152 tbin->tstats.nrequests; 153 arena->stats.lstats[binind - NBINS].nrequests += 154 tbin->tstats.nrequests; 155 tbin->tstats.nrequests = 0; 156 } 157 } 158 ndeferred = 0; 159 for (i = 0; i < nflush; i++) { 160 ptr = tbin->avail[i]; 161 assert(ptr != NULL); 162 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 163 if (chunk->arena == arena) 164 arena_dalloc_large(arena, chunk, ptr); 165 else { 166 /* 167 * This object was allocated via a different 168 * arena than the one that is currently locked. 169 * Stash the object, so that it can be handled 170 * in a future pass. 171 */ 172 tbin->avail[ndeferred] = ptr; 173 ndeferred++; 174 } 175 } 176 malloc_mutex_unlock(&arena->lock); 177 } 178 if (config_stats && merged_stats == false) { 179 /* 180 * The flush loop didn't happen to flush to this thread's 181 * arena, so the stats didn't get merged. Manually do so now. 182 */ 183 arena_t *arena = tcache->arena; 184 malloc_mutex_lock(&arena->lock); 185 arena->stats.nrequests_large += tbin->tstats.nrequests; 186 arena->stats.lstats[binind - NBINS].nrequests += 187 tbin->tstats.nrequests; 188 tbin->tstats.nrequests = 0; 189 malloc_mutex_unlock(&arena->lock); 190 } 191 192 memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], 193 rem * sizeof(void *)); 194 tbin->ncached = rem; 195 if ((int)tbin->ncached < tbin->low_water) 196 tbin->low_water = tbin->ncached; 197} 198 199tcache_t * 200tcache_create(arena_t *arena) 201{ 202 tcache_t *tcache; 203 size_t size, stack_offset; 204 unsigned i; 205 206 size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); 207 /* Naturally align the pointer stacks. */ 208 size = PTR_CEILING(size); 209 stack_offset = size; 210 size += stack_nelms * sizeof(void *); 211 /* 212 * Round up to the nearest multiple of the cacheline size, in order to 213 * avoid the possibility of false cacheline sharing. 214 * 215 * That this works relies on the same logic as in ipalloc(), but we 216 * cannot directly call ipalloc() here due to tcache bootstrapping 217 * issues. 218 */ 219 size = (size + CACHELINE_MASK) & (-CACHELINE); 220 221 if (size <= SMALL_MAXCLASS) 222 tcache = (tcache_t *)arena_malloc_small(arena, size, true); 223 else if (size <= tcache_maxclass) 224 tcache = (tcache_t *)arena_malloc_large(arena, size, true); 225 else 226 tcache = (tcache_t *)icalloc(size); 227 228 if (tcache == NULL) 229 return (NULL); 230 231 if (config_stats) { 232 /* Link into list of extant tcaches. */ 233 malloc_mutex_lock(&arena->lock); 234 ql_elm_new(tcache, link); 235 ql_tail_insert(&arena->tcache_ql, tcache, link); 236 malloc_mutex_unlock(&arena->lock); 237 } 238 239 tcache->arena = arena; 240 assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); 241 for (i = 0; i < nhbins; i++) { 242 tcache->tbins[i].lg_fill_div = 1; 243 tcache->tbins[i].avail = (void **)((uintptr_t)tcache + 244 (uintptr_t)stack_offset); 245 stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); 246 } 247 248 TCACHE_SET(tcache); 249 250 return (tcache); 251} 252 253void 254tcache_destroy(tcache_t *tcache) 255{ 256 unsigned i; 257 size_t tcache_size; 258 259 if (config_stats) { 260 /* Unlink from list of extant tcaches. */ 261 malloc_mutex_lock(&tcache->arena->lock); 262 ql_remove(&tcache->arena->tcache_ql, tcache, link); 263 malloc_mutex_unlock(&tcache->arena->lock); 264 tcache_stats_merge(tcache, tcache->arena); 265 } 266 267 for (i = 0; i < NBINS; i++) { 268 tcache_bin_t *tbin = &tcache->tbins[i]; 269 tcache_bin_flush_small(tbin, i, 0, tcache); 270 271 if (config_stats && tbin->tstats.nrequests != 0) { 272 arena_t *arena = tcache->arena; 273 arena_bin_t *bin = &arena->bins[i]; 274 malloc_mutex_lock(&bin->lock); 275 bin->stats.nrequests += tbin->tstats.nrequests; 276 malloc_mutex_unlock(&bin->lock); 277 } 278 } 279 280 for (; i < nhbins; i++) { 281 tcache_bin_t *tbin = &tcache->tbins[i]; 282 tcache_bin_flush_large(tbin, i, 0, tcache); 283 284 if (config_stats && tbin->tstats.nrequests != 0) { 285 arena_t *arena = tcache->arena; 286 malloc_mutex_lock(&arena->lock); 287 arena->stats.nrequests_large += tbin->tstats.nrequests; 288 arena->stats.lstats[i - NBINS].nrequests += 289 tbin->tstats.nrequests; 290 malloc_mutex_unlock(&arena->lock); 291 } 292 } 293 294 if (config_prof && tcache->prof_accumbytes > 0) { 295 malloc_mutex_lock(&tcache->arena->lock); 296 arena_prof_accum(tcache->arena, tcache->prof_accumbytes); 297 malloc_mutex_unlock(&tcache->arena->lock); 298 } 299 300 tcache_size = arena_salloc(tcache); 301 if (tcache_size <= SMALL_MAXCLASS) { 302 arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); 303 arena_t *arena = chunk->arena; 304 size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> 305 PAGE_SHIFT; 306 arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; 307 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + 308 (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << 309 PAGE_SHIFT)); 310 arena_bin_t *bin = run->bin; 311 312 malloc_mutex_lock(&bin->lock); 313 arena_dalloc_bin(arena, chunk, tcache, mapelm); 314 malloc_mutex_unlock(&bin->lock); 315 } else if (tcache_size <= tcache_maxclass) { 316 arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); 317 arena_t *arena = chunk->arena; 318 319 malloc_mutex_lock(&arena->lock); 320 arena_dalloc_large(arena, chunk, tcache); 321 malloc_mutex_unlock(&arena->lock); 322 } else 323 idalloc(tcache); 324} 325 326static void 327tcache_thread_cleanup(void *arg) 328{ 329 tcache_t *tcache = (tcache_t *)arg; 330 331 if (tcache == (void *)(uintptr_t)1) { 332 /* 333 * The previous time this destructor was called, we set the key 334 * to 1 so that other destructors wouldn't cause re-creation of 335 * the tcache. This time, do nothing, so that the destructor 336 * will not be called again. 337 */ 338 } else if (tcache == (void *)(uintptr_t)2) { 339 /* 340 * Another destructor called an allocator function after this 341 * destructor was called. Reset tcache to 1 in order to 342 * receive another callback. 343 */ 344 TCACHE_SET((uintptr_t)1); 345 } else if (tcache != NULL) { 346 assert(tcache != (void *)(uintptr_t)1); 347 tcache_destroy(tcache); 348 TCACHE_SET((uintptr_t)1); 349 } 350} 351 352void 353tcache_stats_merge(tcache_t *tcache, arena_t *arena) 354{ 355 unsigned i; 356 357 /* Merge and reset tcache stats. */ 358 for (i = 0; i < NBINS; i++) { 359 arena_bin_t *bin = &arena->bins[i]; 360 tcache_bin_t *tbin = &tcache->tbins[i]; 361 malloc_mutex_lock(&bin->lock); 362 bin->stats.nrequests += tbin->tstats.nrequests; 363 malloc_mutex_unlock(&bin->lock); 364 tbin->tstats.nrequests = 0; 365 } 366 367 for (; i < nhbins; i++) { 368 malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS]; 369 tcache_bin_t *tbin = &tcache->tbins[i]; 370 arena->stats.nrequests_large += tbin->tstats.nrequests; 371 lstats->nrequests += tbin->tstats.nrequests; 372 tbin->tstats.nrequests = 0; 373 } 374} 375 376bool 377tcache_boot(void) 378{ 379 380 if (opt_tcache) { 381 unsigned i; 382 383 /* 384 * If necessary, clamp opt_lg_tcache_max, now that 385 * SMALL_MAXCLASS and arena_maxclass are known. 386 * XXX Can this be done earlier? 387 */ 388 if (opt_lg_tcache_max < 0 || (1U << 389 opt_lg_tcache_max) < SMALL_MAXCLASS) 390 tcache_maxclass = SMALL_MAXCLASS; 391 else if ((1U << opt_lg_tcache_max) > arena_maxclass) 392 tcache_maxclass = arena_maxclass; 393 else 394 tcache_maxclass = (1U << opt_lg_tcache_max); 395 396 nhbins = NBINS + (tcache_maxclass >> PAGE_SHIFT); 397 398 /* Initialize tcache_bin_info. */ 399 tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * 400 sizeof(tcache_bin_info_t)); 401 if (tcache_bin_info == NULL) 402 return (true); 403 stack_nelms = 0; 404 for (i = 0; i < NBINS; i++) { 405 if ((arena_bin_info[i].nregs << 1) <= 406 TCACHE_NSLOTS_SMALL_MAX) { 407 tcache_bin_info[i].ncached_max = 408 (arena_bin_info[i].nregs << 1); 409 } else { 410 tcache_bin_info[i].ncached_max = 411 TCACHE_NSLOTS_SMALL_MAX; 412 } 413 stack_nelms += tcache_bin_info[i].ncached_max; 414 } 415 for (; i < nhbins; i++) { 416 tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; 417 stack_nelms += tcache_bin_info[i].ncached_max; 418 } 419 420 if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) != 421 0) { 422 malloc_write( 423 "<jemalloc>: Error in pthread_key_create()\n"); 424 abort(); 425 } 426 } 427 428 return (false); 429} 430