arena.h revision 88393cb0eb9a046000d20809809d4adac11957ab
1/******************************************************************************/ 2#ifdef JEMALLOC_H_TYPES 3 4/* 5 * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized 6 * as small as possible such that this setting is still honored, without 7 * violating other constraints. The goal is to make runs as small as possible 8 * without exceeding a per run external fragmentation threshold. 9 * 10 * We use binary fixed point math for overhead computations, where the binary 11 * point is implicitly RUN_BFP bits to the left. 12 * 13 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be 14 * honored for some/all object sizes, since when heap profiling is enabled 15 * there is one pointer of header overhead per object (plus a constant). This 16 * constraint is relaxed (ignored) for runs that are so small that the 17 * per-region overhead is greater than: 18 * 19 * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) 20 */ 21#define RUN_BFP 12 22/* \/ Implicit binary fixed point. */ 23#define RUN_MAX_OVRHD 0x0000003dU 24#define RUN_MAX_OVRHD_RELAX 0x00001800U 25 26/* Maximum number of regions in one run. */ 27#define LG_RUN_MAXREGS 11 28#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) 29 30/* 31 * Minimum redzone size. Redzones may be larger than this if necessary to 32 * preserve region alignment. 33 */ 34#define REDZONE_MINSIZE 16 35 36/* 37 * The minimum ratio of active:dirty pages per arena is computed as: 38 * 39 * (nactive >> opt_lg_dirty_mult) >= ndirty 40 * 41 * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times 42 * as many active pages as dirty pages. 43 */ 44#define LG_DIRTY_MULT_DEFAULT 3 45 46typedef struct arena_chunk_map_s arena_chunk_map_t; 47typedef struct arena_chunk_s arena_chunk_t; 48typedef struct arena_run_s arena_run_t; 49typedef struct arena_bin_info_s arena_bin_info_t; 50typedef struct arena_bin_s arena_bin_t; 51typedef struct arena_s arena_t; 52 53#endif /* JEMALLOC_H_TYPES */ 54/******************************************************************************/ 55#ifdef JEMALLOC_H_STRUCTS 56 57/* Each element of the chunk map corresponds to one page within the chunk. */ 58struct arena_chunk_map_s { 59#ifndef JEMALLOC_PROF 60 /* 61 * Overlay prof_ctx in order to allow it to be referenced by dead code. 62 * Such antics aren't warranted for per arena data structures, but 63 * chunk map overhead accounts for a percentage of memory, rather than 64 * being just a fixed cost. 65 */ 66 union { 67#endif 68 union { 69 /* 70 * Linkage for run trees. There are two disjoint uses: 71 * 72 * 1) arena_t's runs_avail tree. 73 * 2) arena_run_t conceptually uses this linkage for in-use 74 * non-full runs, rather than directly embedding linkage. 75 */ 76 rb_node(arena_chunk_map_t) rb_link; 77 /* 78 * List of runs currently in purgatory. arena_chunk_purge() 79 * temporarily allocates runs that contain dirty pages while 80 * purging, so that other threads cannot use the runs while the 81 * purging thread is operating without the arena lock held. 82 */ 83 ql_elm(arena_chunk_map_t) ql_link; 84 } u; 85 86 /* Profile counters, used for large object runs. */ 87 prof_ctx_t *prof_ctx; 88#ifndef JEMALLOC_PROF 89 }; /* union { ... }; */ 90#endif 91 92 /* 93 * Run address (or size) and various flags are stored together. The bit 94 * layout looks like (assuming 32-bit system): 95 * 96 * ???????? ???????? ????nnnn nnnndula 97 * 98 * ? : Unallocated: Run address for first/last pages, unset for internal 99 * pages. 100 * Small: Run page offset. 101 * Large: Run size for first page, unset for trailing pages. 102 * n : binind for small size class, BININD_INVALID for large size class. 103 * d : dirty? 104 * u : unzeroed? 105 * l : large? 106 * a : allocated? 107 * 108 * Following are example bit patterns for the three types of runs. 109 * 110 * p : run page offset 111 * s : run size 112 * n : binind for size class; large objects set these to BININD_INVALID 113 * except for promoted allocations (see prof_promote) 114 * x : don't care 115 * - : 0 116 * + : 1 117 * [DULA] : bit set 118 * [dula] : bit unset 119 * 120 * Unallocated (clean): 121 * ssssssss ssssssss ssss++++ ++++du-a 122 * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx 123 * ssssssss ssssssss ssss++++ ++++dU-a 124 * 125 * Unallocated (dirty): 126 * ssssssss ssssssss ssss++++ ++++D--a 127 * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 128 * ssssssss ssssssss ssss++++ ++++D--a 129 * 130 * Small: 131 * pppppppp pppppppp ppppnnnn nnnnd--A 132 * pppppppp pppppppp ppppnnnn nnnn---A 133 * pppppppp pppppppp ppppnnnn nnnnd--A 134 * 135 * Large: 136 * ssssssss ssssssss ssss++++ ++++D-LA 137 * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 138 * -------- -------- ----++++ ++++D-LA 139 * 140 * Large (sampled, size <= PAGE): 141 * ssssssss ssssssss ssssnnnn nnnnD-LA 142 * 143 * Large (not sampled, size == PAGE): 144 * ssssssss ssssssss ssss++++ ++++D-LA 145 */ 146 size_t bits; 147#define CHUNK_MAP_BININD_SHIFT 4 148#define BININD_INVALID ((size_t)0xffU) 149/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ 150#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) 151#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK 152#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) 153#define CHUNK_MAP_DIRTY ((size_t)0x8U) 154#define CHUNK_MAP_UNZEROED ((size_t)0x4U) 155#define CHUNK_MAP_LARGE ((size_t)0x2U) 156#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) 157#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED 158}; 159typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; 160typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; 161 162/* Arena chunk header. */ 163struct arena_chunk_s { 164 /* Arena that owns the chunk. */ 165 arena_t *arena; 166 167 /* Linkage for tree of arena chunks that contain dirty runs. */ 168 rb_node(arena_chunk_t) dirty_link; 169 170 /* Number of dirty pages. */ 171 size_t ndirty; 172 173 /* Number of available runs. */ 174 size_t nruns_avail; 175 176 /* 177 * Number of available run adjacencies. Clean and dirty available runs 178 * are not coalesced, which causes virtual memory fragmentation. The 179 * ratio of (nruns_avail-nruns_adjac):nruns_adjac is used for tracking 180 * this fragmentation. 181 * */ 182 size_t nruns_adjac; 183 184 /* 185 * Map of pages within chunk that keeps track of free/large/small. The 186 * first map_bias entries are omitted, since the chunk header does not 187 * need to be tracked in the map. This omission saves a header page 188 * for common chunk sizes (e.g. 4 MiB). 189 */ 190 arena_chunk_map_t map[1]; /* Dynamically sized. */ 191}; 192typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; 193 194struct arena_run_s { 195 /* Bin this run is associated with. */ 196 arena_bin_t *bin; 197 198 /* Index of next region that has never been allocated, or nregs. */ 199 uint32_t nextind; 200 201 /* Number of free regions in run. */ 202 unsigned nfree; 203}; 204 205/* 206 * Read-only information associated with each element of arena_t's bins array 207 * is stored separately, partly to reduce memory usage (only one copy, rather 208 * than one per arena), but mainly to avoid false cacheline sharing. 209 * 210 * Each run has the following layout: 211 * 212 * /--------------------\ 213 * | arena_run_t header | 214 * | ... | 215 * bitmap_offset | bitmap | 216 * | ... | 217 * ctx0_offset | ctx map | 218 * | ... | 219 * |--------------------| 220 * | redzone | 221 * reg0_offset | region 0 | 222 * | redzone | 223 * |--------------------| \ 224 * | redzone | | 225 * | region 1 | > reg_interval 226 * | redzone | / 227 * |--------------------| 228 * | ... | 229 * | ... | 230 * | ... | 231 * |--------------------| 232 * | redzone | 233 * | region nregs-1 | 234 * | redzone | 235 * |--------------------| 236 * | alignment pad? | 237 * \--------------------/ 238 * 239 * reg_interval has at least the same minimum alignment as reg_size; this 240 * preserves the alignment constraint that sa2u() depends on. Alignment pad is 241 * either 0 or redzone_size; it is present only if needed to align reg0_offset. 242 */ 243struct arena_bin_info_s { 244 /* Size of regions in a run for this bin's size class. */ 245 size_t reg_size; 246 247 /* Redzone size. */ 248 size_t redzone_size; 249 250 /* Interval between regions (reg_size + (redzone_size << 1)). */ 251 size_t reg_interval; 252 253 /* Total size of a run for this bin's size class. */ 254 size_t run_size; 255 256 /* Total number of regions in a run for this bin's size class. */ 257 uint32_t nregs; 258 259 /* 260 * Offset of first bitmap_t element in a run header for this bin's size 261 * class. 262 */ 263 uint32_t bitmap_offset; 264 265 /* 266 * Metadata used to manipulate bitmaps for runs associated with this 267 * bin. 268 */ 269 bitmap_info_t bitmap_info; 270 271 /* 272 * Offset of first (prof_ctx_t *) in a run header for this bin's size 273 * class, or 0 if (config_prof == false || opt_prof == false). 274 */ 275 uint32_t ctx0_offset; 276 277 /* Offset of first region in a run for this bin's size class. */ 278 uint32_t reg0_offset; 279}; 280 281struct arena_bin_s { 282 /* 283 * All operations on runcur, runs, and stats require that lock be 284 * locked. Run allocation/deallocation are protected by the arena lock, 285 * which may be acquired while holding one or more bin locks, but not 286 * vise versa. 287 */ 288 malloc_mutex_t lock; 289 290 /* 291 * Current run being used to service allocations of this bin's size 292 * class. 293 */ 294 arena_run_t *runcur; 295 296 /* 297 * Tree of non-full runs. This tree is used when looking for an 298 * existing run when runcur is no longer usable. We choose the 299 * non-full run that is lowest in memory; this policy tends to keep 300 * objects packed well, and it can also help reduce the number of 301 * almost-empty chunks. 302 */ 303 arena_run_tree_t runs; 304 305 /* Bin statistics. */ 306 malloc_bin_stats_t stats; 307}; 308 309struct arena_s { 310 /* This arena's index within the arenas array. */ 311 unsigned ind; 312 313 /* 314 * Number of threads currently assigned to this arena. This field is 315 * protected by arenas_lock. 316 */ 317 unsigned nthreads; 318 319 /* 320 * There are three classes of arena operations from a locking 321 * perspective: 322 * 1) Thread asssignment (modifies nthreads) is protected by 323 * arenas_lock. 324 * 2) Bin-related operations are protected by bin locks. 325 * 3) Chunk- and run-related operations are protected by this mutex. 326 */ 327 malloc_mutex_t lock; 328 329 arena_stats_t stats; 330 /* 331 * List of tcaches for extant threads associated with this arena. 332 * Stats from these are merged incrementally, and at exit. 333 */ 334 ql_head(tcache_t) tcache_ql; 335 336 uint64_t prof_accumbytes; 337 338 dss_prec_t dss_prec; 339 340 /* Tree of dirty-page-containing chunks this arena manages. */ 341 arena_chunk_tree_t chunks_dirty; 342 343 /* 344 * In order to avoid rapid chunk allocation/deallocation when an arena 345 * oscillates right on the cusp of needing a new chunk, cache the most 346 * recently freed chunk. The spare is left in the arena's chunk trees 347 * until it is deleted. 348 * 349 * There is one spare chunk per arena, rather than one spare total, in 350 * order to avoid interactions between multiple threads that could make 351 * a single spare inadequate. 352 */ 353 arena_chunk_t *spare; 354 355 /* Number of pages in active runs. */ 356 size_t nactive; 357 358 /* 359 * Current count of pages within unused runs that are potentially 360 * dirty, and for which madvise(... MADV_DONTNEED) has not been called. 361 * By tracking this, we can institute a limit on how much dirty unused 362 * memory is mapped for each arena. 363 */ 364 size_t ndirty; 365 366 /* 367 * Approximate number of pages being purged. It is possible for 368 * multiple threads to purge dirty pages concurrently, and they use 369 * npurgatory to indicate the total number of pages all threads are 370 * attempting to purge. 371 */ 372 size_t npurgatory; 373 374 /* 375 * Size/address-ordered trees of this arena's available runs. The trees 376 * are used for first-best-fit run allocation. 377 */ 378 arena_avail_tree_t runs_avail; 379 380 /* bins is used to store trees of free regions. */ 381 arena_bin_t bins[NBINS]; 382}; 383 384#endif /* JEMALLOC_H_STRUCTS */ 385/******************************************************************************/ 386#ifdef JEMALLOC_H_EXTERNS 387 388extern ssize_t opt_lg_dirty_mult; 389/* 390 * small_size2bin is a compact lookup table that rounds request sizes up to 391 * size classes. In order to reduce cache footprint, the table is compressed, 392 * and all accesses are via the SMALL_SIZE2BIN macro. 393 */ 394extern uint8_t const small_size2bin[]; 395#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) 396 397extern arena_bin_info_t arena_bin_info[NBINS]; 398 399/* Number of large size classes. */ 400#define nlclasses (chunk_npages - map_bias) 401 402void arena_purge_all(arena_t *arena); 403void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, 404 size_t binind, uint64_t prof_accumbytes); 405void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, 406 bool zero); 407void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); 408void *arena_malloc_small(arena_t *arena, size_t size, bool zero); 409void *arena_malloc_large(arena_t *arena, size_t size, bool zero); 410void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); 411void arena_prof_promoted(const void *ptr, size_t size); 412void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, 413 arena_chunk_map_t *mapelm); 414void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, 415 size_t pageind, arena_chunk_map_t *mapelm); 416void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, 417 size_t pageind); 418void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, 419 void *ptr); 420void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); 421void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, 422 size_t extra, bool zero); 423void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, 424 size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, 425 bool try_tcache_dalloc); 426dss_prec_t arena_dss_prec_get(arena_t *arena); 427void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); 428void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, 429 size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, 430 malloc_large_stats_t *lstats); 431bool arena_new(arena_t *arena, unsigned ind); 432void arena_boot(void); 433void arena_prefork(arena_t *arena); 434void arena_postfork_parent(arena_t *arena); 435void arena_postfork_child(arena_t *arena); 436 437#endif /* JEMALLOC_H_EXTERNS */ 438/******************************************************************************/ 439#ifdef JEMALLOC_H_INLINES 440 441#ifndef JEMALLOC_ENABLE_INLINE 442arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); 443size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); 444size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); 445size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, 446 size_t pageind); 447size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); 448size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); 449size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); 450size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); 451size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); 452size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); 453size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); 454void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, 455 size_t size, size_t flags); 456void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, 457 size_t size); 458void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, 459 size_t size, size_t flags); 460void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, 461 size_t binind); 462void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, 463 size_t runind, size_t binind, size_t flags); 464void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, 465 size_t unzeroed); 466void arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); 467void arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); 468void arena_prof_accum(arena_t *arena, uint64_t accumbytes); 469size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); 470size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); 471unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, 472 const void *ptr); 473prof_ctx_t *arena_prof_ctx_get(const void *ptr); 474void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); 475void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); 476size_t arena_salloc(const void *ptr, bool demote); 477void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, 478 bool try_tcache); 479#endif 480 481#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) 482# ifdef JEMALLOC_ARENA_INLINE_A 483JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * 484arena_mapp_get(arena_chunk_t *chunk, size_t pageind) 485{ 486 487 assert(pageind >= map_bias); 488 assert(pageind < chunk_npages); 489 490 return (&chunk->map[pageind-map_bias]); 491} 492 493JEMALLOC_ALWAYS_INLINE size_t * 494arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) 495{ 496 497 return (&arena_mapp_get(chunk, pageind)->bits); 498} 499 500JEMALLOC_ALWAYS_INLINE size_t 501arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) 502{ 503 504 return (*arena_mapbitsp_get(chunk, pageind)); 505} 506 507JEMALLOC_ALWAYS_INLINE size_t 508arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) 509{ 510 size_t mapbits; 511 512 mapbits = arena_mapbits_get(chunk, pageind); 513 assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); 514 return (mapbits & ~PAGE_MASK); 515} 516 517JEMALLOC_ALWAYS_INLINE size_t 518arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) 519{ 520 size_t mapbits; 521 522 mapbits = arena_mapbits_get(chunk, pageind); 523 assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 524 (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); 525 return (mapbits & ~PAGE_MASK); 526} 527 528JEMALLOC_ALWAYS_INLINE size_t 529arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) 530{ 531 size_t mapbits; 532 533 mapbits = arena_mapbits_get(chunk, pageind); 534 assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 535 CHUNK_MAP_ALLOCATED); 536 return (mapbits >> LG_PAGE); 537} 538 539JEMALLOC_ALWAYS_INLINE size_t 540arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) 541{ 542 size_t mapbits; 543 size_t binind; 544 545 mapbits = arena_mapbits_get(chunk, pageind); 546 binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; 547 assert(binind < NBINS || binind == BININD_INVALID); 548 return (binind); 549} 550 551JEMALLOC_ALWAYS_INLINE size_t 552arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) 553{ 554 size_t mapbits; 555 556 mapbits = arena_mapbits_get(chunk, pageind); 557 return (mapbits & CHUNK_MAP_DIRTY); 558} 559 560JEMALLOC_ALWAYS_INLINE size_t 561arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) 562{ 563 size_t mapbits; 564 565 mapbits = arena_mapbits_get(chunk, pageind); 566 return (mapbits & CHUNK_MAP_UNZEROED); 567} 568 569JEMALLOC_ALWAYS_INLINE size_t 570arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) 571{ 572 size_t mapbits; 573 574 mapbits = arena_mapbits_get(chunk, pageind); 575 return (mapbits & CHUNK_MAP_LARGE); 576} 577 578JEMALLOC_ALWAYS_INLINE size_t 579arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) 580{ 581 size_t mapbits; 582 583 mapbits = arena_mapbits_get(chunk, pageind); 584 return (mapbits & CHUNK_MAP_ALLOCATED); 585} 586 587JEMALLOC_ALWAYS_INLINE void 588arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, 589 size_t flags) 590{ 591 size_t *mapbitsp; 592 593 mapbitsp = arena_mapbitsp_get(chunk, pageind); 594 assert((size & PAGE_MASK) == 0); 595 assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); 596 assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); 597 *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; 598} 599 600JEMALLOC_ALWAYS_INLINE void 601arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, 602 size_t size) 603{ 604 size_t *mapbitsp; 605 606 mapbitsp = arena_mapbitsp_get(chunk, pageind); 607 assert((size & PAGE_MASK) == 0); 608 assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); 609 *mapbitsp = size | (*mapbitsp & PAGE_MASK); 610} 611 612JEMALLOC_ALWAYS_INLINE void 613arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, 614 size_t flags) 615{ 616 size_t *mapbitsp; 617 size_t unzeroed; 618 619 mapbitsp = arena_mapbitsp_get(chunk, pageind); 620 assert((size & PAGE_MASK) == 0); 621 assert((flags & CHUNK_MAP_DIRTY) == flags); 622 unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ 623 *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | unzeroed | 624 CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; 625} 626 627JEMALLOC_ALWAYS_INLINE void 628arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, 629 size_t binind) 630{ 631 size_t *mapbitsp; 632 633 assert(binind <= BININD_INVALID); 634 mapbitsp = arena_mapbitsp_get(chunk, pageind); 635 assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); 636 *mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind << 637 CHUNK_MAP_BININD_SHIFT); 638} 639 640JEMALLOC_ALWAYS_INLINE void 641arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, 642 size_t binind, size_t flags) 643{ 644 size_t *mapbitsp; 645 size_t unzeroed; 646 647 assert(binind < BININD_INVALID); 648 mapbitsp = arena_mapbitsp_get(chunk, pageind); 649 assert(pageind - runind >= map_bias); 650 assert((flags & CHUNK_MAP_DIRTY) == flags); 651 unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ 652 *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | 653 flags | unzeroed | CHUNK_MAP_ALLOCATED; 654} 655 656JEMALLOC_ALWAYS_INLINE void 657arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, 658 size_t unzeroed) 659{ 660 size_t *mapbitsp; 661 662 mapbitsp = arena_mapbitsp_get(chunk, pageind); 663 *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; 664} 665 666JEMALLOC_INLINE void 667arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) 668{ 669 670 cassert(config_prof); 671 assert(prof_interval != 0); 672 673 arena->prof_accumbytes += accumbytes; 674 if (arena->prof_accumbytes >= prof_interval) { 675 prof_idump(); 676 arena->prof_accumbytes -= prof_interval; 677 } 678} 679 680JEMALLOC_INLINE void 681arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) 682{ 683 684 cassert(config_prof); 685 686 if (prof_interval == 0) 687 return; 688 arena_prof_accum_impl(arena, accumbytes); 689} 690 691JEMALLOC_INLINE void 692arena_prof_accum(arena_t *arena, uint64_t accumbytes) 693{ 694 695 cassert(config_prof); 696 697 if (prof_interval == 0) 698 return; 699 malloc_mutex_lock(&arena->lock); 700 arena_prof_accum_impl(arena, accumbytes); 701 malloc_mutex_unlock(&arena->lock); 702} 703 704JEMALLOC_ALWAYS_INLINE size_t 705arena_ptr_small_binind_get(const void *ptr, size_t mapbits) 706{ 707 size_t binind; 708 709 binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; 710 711 if (config_debug) { 712 arena_chunk_t *chunk; 713 arena_t *arena; 714 size_t pageind; 715 size_t actual_mapbits; 716 arena_run_t *run; 717 arena_bin_t *bin; 718 size_t actual_binind; 719 arena_bin_info_t *bin_info; 720 721 assert(binind != BININD_INVALID); 722 assert(binind < NBINS); 723 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 724 arena = chunk->arena; 725 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 726 actual_mapbits = arena_mapbits_get(chunk, pageind); 727 assert(mapbits == actual_mapbits); 728 assert(arena_mapbits_large_get(chunk, pageind) == 0); 729 assert(arena_mapbits_allocated_get(chunk, pageind) != 0); 730 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - 731 (actual_mapbits >> LG_PAGE)) << LG_PAGE)); 732 bin = run->bin; 733 actual_binind = bin - arena->bins; 734 assert(binind == actual_binind); 735 bin_info = &arena_bin_info[actual_binind]; 736 assert(((uintptr_t)ptr - ((uintptr_t)run + 737 (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval 738 == 0); 739 } 740 741 return (binind); 742} 743# endif /* JEMALLOC_ARENA_INLINE_A */ 744 745# ifdef JEMALLOC_ARENA_INLINE_B 746JEMALLOC_INLINE size_t 747arena_bin_index(arena_t *arena, arena_bin_t *bin) 748{ 749 size_t binind = bin - arena->bins; 750 assert(binind < NBINS); 751 return (binind); 752} 753 754JEMALLOC_INLINE unsigned 755arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) 756{ 757 unsigned shift, diff, regind; 758 size_t interval; 759 760 /* 761 * Freeing a pointer lower than region zero can cause assertion 762 * failure. 763 */ 764 assert((uintptr_t)ptr >= (uintptr_t)run + 765 (uintptr_t)bin_info->reg0_offset); 766 767 /* 768 * Avoid doing division with a variable divisor if possible. Using 769 * actual division here can reduce allocator throughput by over 20%! 770 */ 771 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - 772 bin_info->reg0_offset); 773 774 /* Rescale (factor powers of 2 out of the numerator and denominator). */ 775 interval = bin_info->reg_interval; 776 shift = ffs(interval) - 1; 777 diff >>= shift; 778 interval >>= shift; 779 780 if (interval == 1) { 781 /* The divisor was a power of 2. */ 782 regind = diff; 783 } else { 784 /* 785 * To divide by a number D that is not a power of two we 786 * multiply by (2^21 / D) and then right shift by 21 positions. 787 * 788 * X / D 789 * 790 * becomes 791 * 792 * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT 793 * 794 * We can omit the first three elements, because we never 795 * divide by 0, and 1 and 2 are both powers of two, which are 796 * handled above. 797 */ 798#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) 799#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) 800 static const unsigned interval_invs[] = { 801 SIZE_INV(3), 802 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), 803 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), 804 SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), 805 SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), 806 SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), 807 SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), 808 SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) 809 }; 810 811 if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + 812 2)) { 813 regind = (diff * interval_invs[interval - 3]) >> 814 SIZE_INV_SHIFT; 815 } else 816 regind = diff / interval; 817#undef SIZE_INV 818#undef SIZE_INV_SHIFT 819 } 820 assert(diff == regind * interval); 821 assert(regind < bin_info->nregs); 822 823 return (regind); 824} 825 826JEMALLOC_INLINE prof_ctx_t * 827arena_prof_ctx_get(const void *ptr) 828{ 829 prof_ctx_t *ret; 830 arena_chunk_t *chunk; 831 size_t pageind, mapbits; 832 833 cassert(config_prof); 834 assert(ptr != NULL); 835 assert(CHUNK_ADDR2BASE(ptr) != ptr); 836 837 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 838 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 839 mapbits = arena_mapbits_get(chunk, pageind); 840 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); 841 if ((mapbits & CHUNK_MAP_LARGE) == 0) { 842 if (prof_promote) 843 ret = (prof_ctx_t *)(uintptr_t)1U; 844 else { 845 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + 846 (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << 847 LG_PAGE)); 848 size_t binind = arena_ptr_small_binind_get(ptr, 849 mapbits); 850 arena_bin_info_t *bin_info = &arena_bin_info[binind]; 851 unsigned regind; 852 853 regind = arena_run_regind(run, bin_info, ptr); 854 ret = *(prof_ctx_t **)((uintptr_t)run + 855 bin_info->ctx0_offset + (regind * 856 sizeof(prof_ctx_t *))); 857 } 858 } else 859 ret = arena_mapp_get(chunk, pageind)->prof_ctx; 860 861 return (ret); 862} 863 864JEMALLOC_INLINE void 865arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) 866{ 867 arena_chunk_t *chunk; 868 size_t pageind, mapbits; 869 870 cassert(config_prof); 871 assert(ptr != NULL); 872 assert(CHUNK_ADDR2BASE(ptr) != ptr); 873 874 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 875 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 876 mapbits = arena_mapbits_get(chunk, pageind); 877 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); 878 if ((mapbits & CHUNK_MAP_LARGE) == 0) { 879 if (prof_promote == false) { 880 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + 881 (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << 882 LG_PAGE)); 883 size_t binind; 884 arena_bin_info_t *bin_info; 885 unsigned regind; 886 887 binind = arena_ptr_small_binind_get(ptr, mapbits); 888 bin_info = &arena_bin_info[binind]; 889 regind = arena_run_regind(run, bin_info, ptr); 890 891 *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset 892 + (regind * sizeof(prof_ctx_t *)))) = ctx; 893 } else 894 assert((uintptr_t)ctx == (uintptr_t)1U); 895 } else 896 arena_mapp_get(chunk, pageind)->prof_ctx = ctx; 897} 898 899JEMALLOC_ALWAYS_INLINE void * 900arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) 901{ 902 tcache_t *tcache; 903 904 assert(size != 0); 905 assert(size <= arena_maxclass); 906 907 if (size <= SMALL_MAXCLASS) { 908 if (try_tcache && (tcache = tcache_get(true)) != NULL) 909 return (tcache_alloc_small(tcache, size, zero)); 910 else { 911 return (arena_malloc_small(choose_arena(arena), size, 912 zero)); 913 } 914 } else { 915 /* 916 * Initialize tcache after checking size in order to avoid 917 * infinite recursion during tcache initialization. 918 */ 919 if (try_tcache && size <= tcache_maxclass && (tcache = 920 tcache_get(true)) != NULL) 921 return (tcache_alloc_large(tcache, size, zero)); 922 else { 923 return (arena_malloc_large(choose_arena(arena), size, 924 zero)); 925 } 926 } 927} 928 929/* Return the size of the allocation pointed to by ptr. */ 930JEMALLOC_ALWAYS_INLINE size_t 931arena_salloc(const void *ptr, bool demote) 932{ 933 size_t ret; 934 arena_chunk_t *chunk; 935 size_t pageind, binind; 936 937 assert(ptr != NULL); 938 assert(CHUNK_ADDR2BASE(ptr) != ptr); 939 940 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 941 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 942 assert(arena_mapbits_allocated_get(chunk, pageind) != 0); 943 binind = arena_mapbits_binind_get(chunk, pageind); 944 if (binind == BININD_INVALID || (config_prof && demote == false && 945 prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { 946 /* 947 * Large allocation. In the common case (demote == true), and 948 * as this is an inline function, most callers will only end up 949 * looking at binind to determine that ptr is a small 950 * allocation. 951 */ 952 assert(((uintptr_t)ptr & PAGE_MASK) == 0); 953 ret = arena_mapbits_large_size_get(chunk, pageind); 954 assert(ret != 0); 955 assert(pageind + (ret>>LG_PAGE) <= chunk_npages); 956 assert(ret == PAGE || arena_mapbits_large_size_get(chunk, 957 pageind+(ret>>LG_PAGE)-1) == 0); 958 assert(binind == arena_mapbits_binind_get(chunk, 959 pageind+(ret>>LG_PAGE)-1)); 960 assert(arena_mapbits_dirty_get(chunk, pageind) == 961 arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); 962 } else { 963 /* 964 * Small allocation (possibly promoted to a large object due to 965 * prof_promote). 966 */ 967 assert(arena_mapbits_large_get(chunk, pageind) != 0 || 968 arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, 969 pageind)) == binind); 970 ret = arena_bin_info[binind].reg_size; 971 } 972 973 return (ret); 974} 975 976JEMALLOC_ALWAYS_INLINE void 977arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) 978{ 979 size_t pageind, mapbits; 980 tcache_t *tcache; 981 982 assert(arena != NULL); 983 assert(chunk->arena == arena); 984 assert(ptr != NULL); 985 assert(CHUNK_ADDR2BASE(ptr) != ptr); 986 987 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 988 mapbits = arena_mapbits_get(chunk, pageind); 989 assert(arena_mapbits_allocated_get(chunk, pageind) != 0); 990 if ((mapbits & CHUNK_MAP_LARGE) == 0) { 991 /* Small allocation. */ 992 if (try_tcache && (tcache = tcache_get(false)) != NULL) { 993 size_t binind; 994 995 binind = arena_ptr_small_binind_get(ptr, mapbits); 996 tcache_dalloc_small(tcache, ptr, binind); 997 } else 998 arena_dalloc_small(arena, chunk, ptr, pageind); 999 } else { 1000 size_t size = arena_mapbits_large_size_get(chunk, pageind); 1001 1002 assert(((uintptr_t)ptr & PAGE_MASK) == 0); 1003 1004 if (try_tcache && size <= tcache_maxclass && (tcache = 1005 tcache_get(false)) != NULL) { 1006 tcache_dalloc_large(tcache, ptr, size); 1007 } else 1008 arena_dalloc_large(arena, chunk, ptr); 1009 } 1010} 1011# endif /* JEMALLOC_ARENA_INLINE_B */ 1012#endif 1013 1014#endif /* JEMALLOC_H_INLINES */ 1015/******************************************************************************/ 1016