arena.h revision d8ceef6c5558fdab8f9448376ae065a9e5ffcbdd
1/******************************************************************************/ 2#ifdef JEMALLOC_H_TYPES 3 4/* 5 * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized 6 * as small as possible such that this setting is still honored, without 7 * violating other constraints. The goal is to make runs as small as possible 8 * without exceeding a per run external fragmentation threshold. 9 * 10 * We use binary fixed point math for overhead computations, where the binary 11 * point is implicitly RUN_BFP bits to the left. 12 * 13 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be 14 * honored for some/all object sizes, since when heap profiling is enabled 15 * there is one pointer of header overhead per object (plus a constant). This 16 * constraint is relaxed (ignored) for runs that are so small that the 17 * per-region overhead is greater than: 18 * 19 * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) 20 */ 21#define RUN_BFP 12 22/* \/ Implicit binary fixed point. */ 23#define RUN_MAX_OVRHD 0x0000003dU 24#define RUN_MAX_OVRHD_RELAX 0x00001800U 25 26/* Maximum number of regions in one run. */ 27#define LG_RUN_MAXREGS 11 28#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) 29 30/* 31 * Minimum redzone size. Redzones may be larger than this if necessary to 32 * preserve region alignment. 33 */ 34#define REDZONE_MINSIZE 16 35 36/* 37 * The minimum ratio of active:dirty pages per arena is computed as: 38 * 39 * (nactive >> opt_lg_dirty_mult) >= ndirty 40 * 41 * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 42 * times as many active pages as dirty pages. 43 */ 44#define LG_DIRTY_MULT_DEFAULT 5 45 46typedef struct arena_chunk_map_s arena_chunk_map_t; 47typedef struct arena_chunk_s arena_chunk_t; 48typedef struct arena_run_s arena_run_t; 49typedef struct arena_bin_info_s arena_bin_info_t; 50typedef struct arena_bin_s arena_bin_t; 51typedef struct arena_s arena_t; 52 53#endif /* JEMALLOC_H_TYPES */ 54/******************************************************************************/ 55#ifdef JEMALLOC_H_STRUCTS 56 57/* Each element of the chunk map corresponds to one page within the chunk. */ 58struct arena_chunk_map_s { 59#ifndef JEMALLOC_PROF 60 /* 61 * Overlay prof_ctx in order to allow it to be referenced by dead code. 62 * Such antics aren't warranted for per arena data structures, but 63 * chunk map overhead accounts for a percentage of memory, rather than 64 * being just a fixed cost. 65 */ 66 union { 67#endif 68 union { 69 /* 70 * Linkage for run trees. There are two disjoint uses: 71 * 72 * 1) arena_t's runs_avail_{clean,dirty} trees. 73 * 2) arena_run_t conceptually uses this linkage for in-use 74 * non-full runs, rather than directly embedding linkage. 75 */ 76 rb_node(arena_chunk_map_t) rb_link; 77 /* 78 * List of runs currently in purgatory. arena_chunk_purge() 79 * temporarily allocates runs that contain dirty pages while 80 * purging, so that other threads cannot use the runs while the 81 * purging thread is operating without the arena lock held. 82 */ 83 ql_elm(arena_chunk_map_t) ql_link; 84 } u; 85 86 /* Profile counters, used for large object runs. */ 87 prof_ctx_t *prof_ctx; 88#ifndef JEMALLOC_PROF 89 }; /* union { ... }; */ 90#endif 91 92 /* 93 * Run address (or size) and various flags are stored together. The bit 94 * layout looks like (assuming 32-bit system): 95 * 96 * ???????? ???????? ????nnnn nnnndula 97 * 98 * ? : Unallocated: Run address for first/last pages, unset for internal 99 * pages. 100 * Small: Run page offset. 101 * Large: Run size for first page, unset for trailing pages. 102 * n : binind for small size class, BININD_INVALID for large size class. 103 * d : dirty? 104 * u : unzeroed? 105 * l : large? 106 * a : allocated? 107 * 108 * Following are example bit patterns for the three types of runs. 109 * 110 * p : run page offset 111 * s : run size 112 * n : binind for size class; large objects set these to BININD_INVALID 113 * except for promoted allocations (see prof_promote) 114 * x : don't care 115 * - : 0 116 * + : 1 117 * [DULA] : bit set 118 * [dula] : bit unset 119 * 120 * Unallocated (clean): 121 * ssssssss ssssssss ssss++++ ++++du-a 122 * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx 123 * ssssssss ssssssss ssss++++ ++++dU-a 124 * 125 * Unallocated (dirty): 126 * ssssssss ssssssss ssss++++ ++++D--a 127 * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 128 * ssssssss ssssssss ssss++++ ++++D--a 129 * 130 * Small: 131 * pppppppp pppppppp ppppnnnn nnnnd--A 132 * pppppppp pppppppp ppppnnnn nnnn---A 133 * pppppppp pppppppp ppppnnnn nnnnd--A 134 * 135 * Large: 136 * ssssssss ssssssss ssss++++ ++++D-LA 137 * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 138 * -------- -------- ----++++ ++++D-LA 139 * 140 * Large (sampled, size <= PAGE): 141 * ssssssss ssssssss ssssnnnn nnnnD-LA 142 * 143 * Large (not sampled, size == PAGE): 144 * ssssssss ssssssss ssss++++ ++++D-LA 145 */ 146 size_t bits; 147#define CHUNK_MAP_BININD_SHIFT 4 148#define BININD_INVALID ((size_t)0xffU) 149/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ 150#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) 151#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK 152#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) 153#define CHUNK_MAP_DIRTY ((size_t)0x8U) 154#define CHUNK_MAP_UNZEROED ((size_t)0x4U) 155#define CHUNK_MAP_LARGE ((size_t)0x2U) 156#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) 157#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED 158}; 159typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; 160typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; 161 162/* Arena chunk header. */ 163struct arena_chunk_s { 164 /* Arena that owns the chunk. */ 165 arena_t *arena; 166 167 /* Linkage for the arena's chunks_dirty list. */ 168 ql_elm(arena_chunk_t) link_dirty; 169 170 /* 171 * True if the chunk is currently in the chunks_dirty list, due to 172 * having at some point contained one or more dirty pages. Removal 173 * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible. 174 */ 175 bool dirtied; 176 177 /* Number of dirty pages. */ 178 size_t ndirty; 179 180 /* 181 * Map of pages within chunk that keeps track of free/large/small. The 182 * first map_bias entries are omitted, since the chunk header does not 183 * need to be tracked in the map. This omission saves a header page 184 * for common chunk sizes (e.g. 4 MiB). 185 */ 186 arena_chunk_map_t map[1]; /* Dynamically sized. */ 187}; 188typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; 189 190struct arena_run_s { 191 /* Bin this run is associated with. */ 192 arena_bin_t *bin; 193 194 /* Index of next region that has never been allocated, or nregs. */ 195 uint32_t nextind; 196 197 /* Number of free regions in run. */ 198 unsigned nfree; 199}; 200 201/* 202 * Read-only information associated with each element of arena_t's bins array 203 * is stored separately, partly to reduce memory usage (only one copy, rather 204 * than one per arena), but mainly to avoid false cacheline sharing. 205 * 206 * Each run has the following layout: 207 * 208 * /--------------------\ 209 * | arena_run_t header | 210 * | ... | 211 * bitmap_offset | bitmap | 212 * | ... | 213 * ctx0_offset | ctx map | 214 * | ... | 215 * |--------------------| 216 * | redzone | 217 * reg0_offset | region 0 | 218 * | redzone | 219 * |--------------------| \ 220 * | redzone | | 221 * | region 1 | > reg_interval 222 * | redzone | / 223 * |--------------------| 224 * | ... | 225 * | ... | 226 * | ... | 227 * |--------------------| 228 * | redzone | 229 * | region nregs-1 | 230 * | redzone | 231 * |--------------------| 232 * | alignment pad? | 233 * \--------------------/ 234 * 235 * reg_interval has at least the same minimum alignment as reg_size; this 236 * preserves the alignment constraint that sa2u() depends on. Alignment pad is 237 * either 0 or redzone_size; it is present only if needed to align reg0_offset. 238 */ 239struct arena_bin_info_s { 240 /* Size of regions in a run for this bin's size class. */ 241 size_t reg_size; 242 243 /* Redzone size. */ 244 size_t redzone_size; 245 246 /* Interval between regions (reg_size + (redzone_size << 1)). */ 247 size_t reg_interval; 248 249 /* Total size of a run for this bin's size class. */ 250 size_t run_size; 251 252 /* Total number of regions in a run for this bin's size class. */ 253 uint32_t nregs; 254 255 /* 256 * Offset of first bitmap_t element in a run header for this bin's size 257 * class. 258 */ 259 uint32_t bitmap_offset; 260 261 /* 262 * Metadata used to manipulate bitmaps for runs associated with this 263 * bin. 264 */ 265 bitmap_info_t bitmap_info; 266 267 /* 268 * Offset of first (prof_ctx_t *) in a run header for this bin's size 269 * class, or 0 if (config_prof == false || opt_prof == false). 270 */ 271 uint32_t ctx0_offset; 272 273 /* Offset of first region in a run for this bin's size class. */ 274 uint32_t reg0_offset; 275}; 276 277struct arena_bin_s { 278 /* 279 * All operations on runcur, runs, and stats require that lock be 280 * locked. Run allocation/deallocation are protected by the arena lock, 281 * which may be acquired while holding one or more bin locks, but not 282 * vise versa. 283 */ 284 malloc_mutex_t lock; 285 286 /* 287 * Current run being used to service allocations of this bin's size 288 * class. 289 */ 290 arena_run_t *runcur; 291 292 /* 293 * Tree of non-full runs. This tree is used when looking for an 294 * existing run when runcur is no longer usable. We choose the 295 * non-full run that is lowest in memory; this policy tends to keep 296 * objects packed well, and it can also help reduce the number of 297 * almost-empty chunks. 298 */ 299 arena_run_tree_t runs; 300 301 /* Bin statistics. */ 302 malloc_bin_stats_t stats; 303}; 304 305struct arena_s { 306 /* This arena's index within the arenas array. */ 307 unsigned ind; 308 309 /* 310 * Number of threads currently assigned to this arena. This field is 311 * protected by arenas_lock. 312 */ 313 unsigned nthreads; 314 315 /* 316 * There are three classes of arena operations from a locking 317 * perspective: 318 * 1) Thread asssignment (modifies nthreads) is protected by 319 * arenas_lock. 320 * 2) Bin-related operations are protected by bin locks. 321 * 3) Chunk- and run-related operations are protected by this mutex. 322 */ 323 malloc_mutex_t lock; 324 325 arena_stats_t stats; 326 /* 327 * List of tcaches for extant threads associated with this arena. 328 * Stats from these are merged incrementally, and at exit. 329 */ 330 ql_head(tcache_t) tcache_ql; 331 332 uint64_t prof_accumbytes; 333 334 /* List of dirty-page-containing chunks this arena manages. */ 335 ql_head(arena_chunk_t) chunks_dirty; 336 337 /* 338 * In order to avoid rapid chunk allocation/deallocation when an arena 339 * oscillates right on the cusp of needing a new chunk, cache the most 340 * recently freed chunk. The spare is left in the arena's chunk trees 341 * until it is deleted. 342 * 343 * There is one spare chunk per arena, rather than one spare total, in 344 * order to avoid interactions between multiple threads that could make 345 * a single spare inadequate. 346 */ 347 arena_chunk_t *spare; 348 349 /* Number of pages in active runs. */ 350 size_t nactive; 351 352 /* 353 * Current count of pages within unused runs that are potentially 354 * dirty, and for which madvise(... MADV_DONTNEED) has not been called. 355 * By tracking this, we can institute a limit on how much dirty unused 356 * memory is mapped for each arena. 357 */ 358 size_t ndirty; 359 360 /* 361 * Approximate number of pages being purged. It is possible for 362 * multiple threads to purge dirty pages concurrently, and they use 363 * npurgatory to indicate the total number of pages all threads are 364 * attempting to purge. 365 */ 366 size_t npurgatory; 367 368 /* 369 * Size/address-ordered trees of this arena's available runs. The trees 370 * are used for first-best-fit run allocation. The dirty tree contains 371 * runs with dirty pages (i.e. very likely to have been touched and 372 * therefore have associated physical pages), whereas the clean tree 373 * contains runs with pages that either have no associated physical 374 * pages, or have pages that the kernel may recycle at any time due to 375 * previous madvise(2) calls. The dirty tree is used in preference to 376 * the clean tree for allocations, because using dirty pages reduces 377 * the amount of dirty purging necessary to keep the active:dirty page 378 * ratio below the purge threshold. 379 */ 380 arena_avail_tree_t runs_avail_clean; 381 arena_avail_tree_t runs_avail_dirty; 382 383 /* bins is used to store trees of free regions. */ 384 arena_bin_t bins[NBINS]; 385}; 386 387#endif /* JEMALLOC_H_STRUCTS */ 388/******************************************************************************/ 389#ifdef JEMALLOC_H_EXTERNS 390 391extern ssize_t opt_lg_dirty_mult; 392/* 393 * small_size2bin is a compact lookup table that rounds request sizes up to 394 * size classes. In order to reduce cache footprint, the table is compressed, 395 * and all accesses are via the SMALL_SIZE2BIN macro. 396 */ 397extern uint8_t const small_size2bin[]; 398#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) 399 400extern arena_bin_info_t arena_bin_info[NBINS]; 401 402/* Number of large size classes. */ 403#define nlclasses (chunk_npages - map_bias) 404 405void arena_purge_all(arena_t *arena); 406void arena_prof_accum(arena_t *arena, uint64_t accumbytes); 407void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, 408 size_t binind, uint64_t prof_accumbytes); 409void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, 410 bool zero); 411void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); 412void *arena_malloc_small(arena_t *arena, size_t size, bool zero); 413void *arena_malloc_large(arena_t *arena, size_t size, bool zero); 414void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); 415void arena_prof_promoted(const void *ptr, size_t size); 416void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, 417 arena_chunk_map_t *mapelm); 418void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, 419 size_t pageind, arena_chunk_map_t *mapelm); 420void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, 421 size_t pageind); 422void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, 423 void *ptr); 424void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); 425void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, 426 arena_stats_t *astats, malloc_bin_stats_t *bstats, 427 malloc_large_stats_t *lstats); 428void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, 429 size_t extra, bool zero); 430void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, 431 size_t alignment, bool zero, bool try_tcache); 432bool arena_new(arena_t *arena, unsigned ind); 433void arena_boot(void); 434void arena_prefork(arena_t *arena); 435void arena_postfork_parent(arena_t *arena); 436void arena_postfork_child(arena_t *arena); 437 438#endif /* JEMALLOC_H_EXTERNS */ 439/******************************************************************************/ 440#ifdef JEMALLOC_H_INLINES 441 442#ifndef JEMALLOC_ENABLE_INLINE 443arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); 444size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); 445size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); 446size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, 447 size_t pageind); 448size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); 449size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); 450size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); 451size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); 452size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); 453size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); 454size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); 455void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, 456 size_t size, size_t flags); 457void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, 458 size_t size); 459void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, 460 size_t size, size_t flags); 461void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, 462 size_t binind); 463void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, 464 size_t runind, size_t binind, size_t flags); 465void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, 466 size_t unzeroed); 467size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); 468size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); 469unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, 470 const void *ptr); 471prof_ctx_t *arena_prof_ctx_get(const void *ptr); 472void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); 473void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); 474size_t arena_salloc(const void *ptr, bool demote); 475void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, 476 bool try_tcache); 477#endif 478 479#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) 480# ifdef JEMALLOC_ARENA_INLINE_A 481JEMALLOC_INLINE arena_chunk_map_t * 482arena_mapp_get(arena_chunk_t *chunk, size_t pageind) 483{ 484 485 assert(pageind >= map_bias); 486 assert(pageind < chunk_npages); 487 488 return (&chunk->map[pageind-map_bias]); 489} 490 491JEMALLOC_INLINE size_t * 492arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) 493{ 494 495 return (&arena_mapp_get(chunk, pageind)->bits); 496} 497 498JEMALLOC_INLINE size_t 499arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) 500{ 501 502 return (*arena_mapbitsp_get(chunk, pageind)); 503} 504 505JEMALLOC_INLINE size_t 506arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) 507{ 508 size_t mapbits; 509 510 mapbits = arena_mapbits_get(chunk, pageind); 511 assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); 512 return (mapbits & ~PAGE_MASK); 513} 514 515JEMALLOC_INLINE size_t 516arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) 517{ 518 size_t mapbits; 519 520 mapbits = arena_mapbits_get(chunk, pageind); 521 assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 522 (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); 523 return (mapbits & ~PAGE_MASK); 524} 525 526JEMALLOC_INLINE size_t 527arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) 528{ 529 size_t mapbits; 530 531 mapbits = arena_mapbits_get(chunk, pageind); 532 assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 533 CHUNK_MAP_ALLOCATED); 534 return (mapbits >> LG_PAGE); 535} 536 537JEMALLOC_INLINE size_t 538arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) 539{ 540 size_t mapbits; 541 size_t binind; 542 543 mapbits = arena_mapbits_get(chunk, pageind); 544 binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; 545 assert(binind < NBINS || binind == BININD_INVALID); 546 return (binind); 547} 548 549JEMALLOC_INLINE size_t 550arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) 551{ 552 size_t mapbits; 553 554 mapbits = arena_mapbits_get(chunk, pageind); 555 return (mapbits & CHUNK_MAP_DIRTY); 556} 557 558JEMALLOC_INLINE size_t 559arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) 560{ 561 size_t mapbits; 562 563 mapbits = arena_mapbits_get(chunk, pageind); 564 return (mapbits & CHUNK_MAP_UNZEROED); 565} 566 567JEMALLOC_INLINE size_t 568arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) 569{ 570 size_t mapbits; 571 572 mapbits = arena_mapbits_get(chunk, pageind); 573 return (mapbits & CHUNK_MAP_LARGE); 574} 575 576JEMALLOC_INLINE size_t 577arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) 578{ 579 size_t mapbits; 580 581 mapbits = arena_mapbits_get(chunk, pageind); 582 return (mapbits & CHUNK_MAP_ALLOCATED); 583} 584 585JEMALLOC_INLINE void 586arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, 587 size_t flags) 588{ 589 size_t *mapbitsp; 590 591 mapbitsp = arena_mapbitsp_get(chunk, pageind); 592 assert((size & PAGE_MASK) == 0); 593 assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); 594 assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); 595 *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; 596} 597 598JEMALLOC_INLINE void 599arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, 600 size_t size) 601{ 602 size_t *mapbitsp; 603 604 mapbitsp = arena_mapbitsp_get(chunk, pageind); 605 assert((size & PAGE_MASK) == 0); 606 assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); 607 *mapbitsp = size | (*mapbitsp & PAGE_MASK); 608} 609 610JEMALLOC_INLINE void 611arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, 612 size_t flags) 613{ 614 size_t *mapbitsp; 615 size_t unzeroed; 616 617 mapbitsp = arena_mapbitsp_get(chunk, pageind); 618 assert((size & PAGE_MASK) == 0); 619 assert((flags & CHUNK_MAP_DIRTY) == flags); 620 unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ 621 *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | unzeroed | 622 CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; 623} 624 625JEMALLOC_INLINE void 626arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, 627 size_t binind) 628{ 629 size_t *mapbitsp; 630 631 assert(binind <= BININD_INVALID); 632 mapbitsp = arena_mapbitsp_get(chunk, pageind); 633 assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); 634 *mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind << 635 CHUNK_MAP_BININD_SHIFT); 636} 637 638JEMALLOC_INLINE void 639arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, 640 size_t binind, size_t flags) 641{ 642 size_t *mapbitsp; 643 size_t unzeroed; 644 645 assert(binind < BININD_INVALID); 646 mapbitsp = arena_mapbitsp_get(chunk, pageind); 647 assert(pageind - runind >= map_bias); 648 assert((flags & CHUNK_MAP_DIRTY) == flags); 649 unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ 650 *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | 651 flags | unzeroed | CHUNK_MAP_ALLOCATED; 652} 653 654JEMALLOC_INLINE void 655arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, 656 size_t unzeroed) 657{ 658 size_t *mapbitsp; 659 660 mapbitsp = arena_mapbitsp_get(chunk, pageind); 661 *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; 662} 663 664JEMALLOC_INLINE size_t 665arena_ptr_small_binind_get(const void *ptr, size_t mapbits) 666{ 667 size_t binind; 668 669 binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; 670 671 if (config_debug) { 672 arena_chunk_t *chunk; 673 arena_t *arena; 674 size_t pageind; 675 size_t actual_mapbits; 676 arena_run_t *run; 677 arena_bin_t *bin; 678 size_t actual_binind; 679 arena_bin_info_t *bin_info; 680 681 assert(binind != BININD_INVALID); 682 assert(binind < NBINS); 683 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 684 arena = chunk->arena; 685 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 686 actual_mapbits = arena_mapbits_get(chunk, pageind); 687 assert(mapbits == actual_mapbits); 688 assert(arena_mapbits_large_get(chunk, pageind) == 0); 689 assert(arena_mapbits_allocated_get(chunk, pageind) != 0); 690 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - 691 (actual_mapbits >> LG_PAGE)) << LG_PAGE)); 692 bin = run->bin; 693 actual_binind = bin - arena->bins; 694 assert(binind == actual_binind); 695 bin_info = &arena_bin_info[actual_binind]; 696 assert(((uintptr_t)ptr - ((uintptr_t)run + 697 (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval 698 == 0); 699 } 700 701 return (binind); 702} 703# endif /* JEMALLOC_ARENA_INLINE_A */ 704 705# ifdef JEMALLOC_ARENA_INLINE_B 706JEMALLOC_INLINE size_t 707arena_bin_index(arena_t *arena, arena_bin_t *bin) 708{ 709 size_t binind = bin - arena->bins; 710 assert(binind < NBINS); 711 return (binind); 712} 713 714JEMALLOC_INLINE unsigned 715arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) 716{ 717 unsigned shift, diff, regind; 718 size_t interval; 719 720 /* 721 * Freeing a pointer lower than region zero can cause assertion 722 * failure. 723 */ 724 assert((uintptr_t)ptr >= (uintptr_t)run + 725 (uintptr_t)bin_info->reg0_offset); 726 727 /* 728 * Avoid doing division with a variable divisor if possible. Using 729 * actual division here can reduce allocator throughput by over 20%! 730 */ 731 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - 732 bin_info->reg0_offset); 733 734 /* Rescale (factor powers of 2 out of the numerator and denominator). */ 735 interval = bin_info->reg_interval; 736 shift = ffs(interval) - 1; 737 diff >>= shift; 738 interval >>= shift; 739 740 if (interval == 1) { 741 /* The divisor was a power of 2. */ 742 regind = diff; 743 } else { 744 /* 745 * To divide by a number D that is not a power of two we 746 * multiply by (2^21 / D) and then right shift by 21 positions. 747 * 748 * X / D 749 * 750 * becomes 751 * 752 * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT 753 * 754 * We can omit the first three elements, because we never 755 * divide by 0, and 1 and 2 are both powers of two, which are 756 * handled above. 757 */ 758#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) 759#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) 760 static const unsigned interval_invs[] = { 761 SIZE_INV(3), 762 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), 763 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), 764 SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), 765 SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), 766 SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), 767 SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), 768 SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) 769 }; 770 771 if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + 772 2)) { 773 regind = (diff * interval_invs[interval - 3]) >> 774 SIZE_INV_SHIFT; 775 } else 776 regind = diff / interval; 777#undef SIZE_INV 778#undef SIZE_INV_SHIFT 779 } 780 assert(diff == regind * interval); 781 assert(regind < bin_info->nregs); 782 783 return (regind); 784} 785 786JEMALLOC_INLINE prof_ctx_t * 787arena_prof_ctx_get(const void *ptr) 788{ 789 prof_ctx_t *ret; 790 arena_chunk_t *chunk; 791 size_t pageind, mapbits; 792 793 cassert(config_prof); 794 assert(ptr != NULL); 795 assert(CHUNK_ADDR2BASE(ptr) != ptr); 796 797 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 798 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 799 mapbits = arena_mapbits_get(chunk, pageind); 800 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); 801 if ((mapbits & CHUNK_MAP_LARGE) == 0) { 802 if (prof_promote) 803 ret = (prof_ctx_t *)(uintptr_t)1U; 804 else { 805 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + 806 (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << 807 LG_PAGE)); 808 size_t binind = arena_ptr_small_binind_get(ptr, 809 mapbits); 810 arena_bin_info_t *bin_info = &arena_bin_info[binind]; 811 unsigned regind; 812 813 regind = arena_run_regind(run, bin_info, ptr); 814 ret = *(prof_ctx_t **)((uintptr_t)run + 815 bin_info->ctx0_offset + (regind * 816 sizeof(prof_ctx_t *))); 817 } 818 } else 819 ret = arena_mapp_get(chunk, pageind)->prof_ctx; 820 821 return (ret); 822} 823 824JEMALLOC_INLINE void 825arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) 826{ 827 arena_chunk_t *chunk; 828 size_t pageind, mapbits; 829 830 cassert(config_prof); 831 assert(ptr != NULL); 832 assert(CHUNK_ADDR2BASE(ptr) != ptr); 833 834 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 835 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 836 mapbits = arena_mapbits_get(chunk, pageind); 837 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); 838 if ((mapbits & CHUNK_MAP_LARGE) == 0) { 839 if (prof_promote == false) { 840 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + 841 (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << 842 LG_PAGE)); 843 size_t binind; 844 arena_bin_info_t *bin_info; 845 unsigned regind; 846 847 binind = arena_ptr_small_binind_get(ptr, mapbits); 848 bin_info = &arena_bin_info[binind]; 849 regind = arena_run_regind(run, bin_info, ptr); 850 851 *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset 852 + (regind * sizeof(prof_ctx_t *)))) = ctx; 853 } else 854 assert((uintptr_t)ctx == (uintptr_t)1U); 855 } else 856 arena_mapp_get(chunk, pageind)->prof_ctx = ctx; 857} 858 859JEMALLOC_INLINE void * 860arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) 861{ 862 tcache_t *tcache; 863 864 assert(size != 0); 865 assert(size <= arena_maxclass); 866 867 if (size <= SMALL_MAXCLASS) { 868 if (try_tcache && (tcache = tcache_get(true)) != NULL) 869 return (tcache_alloc_small(tcache, size, zero)); 870 else { 871 return (arena_malloc_small(choose_arena(arena), size, 872 zero)); 873 } 874 } else { 875 /* 876 * Initialize tcache after checking size in order to avoid 877 * infinite recursion during tcache initialization. 878 */ 879 if (try_tcache && size <= tcache_maxclass && (tcache = 880 tcache_get(true)) != NULL) 881 return (tcache_alloc_large(tcache, size, zero)); 882 else { 883 return (arena_malloc_large(choose_arena(arena), size, 884 zero)); 885 } 886 } 887} 888 889/* Return the size of the allocation pointed to by ptr. */ 890JEMALLOC_INLINE size_t 891arena_salloc(const void *ptr, bool demote) 892{ 893 size_t ret; 894 arena_chunk_t *chunk; 895 size_t pageind, binind; 896 897 assert(ptr != NULL); 898 assert(CHUNK_ADDR2BASE(ptr) != ptr); 899 900 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); 901 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 902 assert(arena_mapbits_allocated_get(chunk, pageind) != 0); 903 binind = arena_mapbits_binind_get(chunk, pageind); 904 if (binind == BININD_INVALID || (config_prof && demote == false && 905 prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { 906 /* 907 * Large allocation. In the common case (demote == true), and 908 * as this is an inline function, most callers will only end up 909 * looking at binind to determine that ptr is a small 910 * allocation. 911 */ 912 assert(((uintptr_t)ptr & PAGE_MASK) == 0); 913 ret = arena_mapbits_large_size_get(chunk, pageind); 914 assert(ret != 0); 915 assert(pageind + (ret>>LG_PAGE) <= chunk_npages); 916 assert(ret == PAGE || arena_mapbits_large_size_get(chunk, 917 pageind+(ret>>LG_PAGE)-1) == 0); 918 assert(binind == arena_mapbits_binind_get(chunk, 919 pageind+(ret>>LG_PAGE)-1)); 920 assert(arena_mapbits_dirty_get(chunk, pageind) == 921 arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); 922 } else { 923 /* 924 * Small allocation (possibly promoted to a large object due to 925 * prof_promote). 926 */ 927 assert(arena_mapbits_large_get(chunk, pageind) != 0 || 928 arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, 929 pageind)) == binind); 930 ret = arena_bin_info[binind].reg_size; 931 } 932 933 return (ret); 934} 935 936JEMALLOC_INLINE void 937arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) 938{ 939 size_t pageind, mapbits; 940 tcache_t *tcache; 941 942 assert(arena != NULL); 943 assert(chunk->arena == arena); 944 assert(ptr != NULL); 945 assert(CHUNK_ADDR2BASE(ptr) != ptr); 946 947 pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; 948 mapbits = arena_mapbits_get(chunk, pageind); 949 assert(arena_mapbits_allocated_get(chunk, pageind) != 0); 950 if ((mapbits & CHUNK_MAP_LARGE) == 0) { 951 /* Small allocation. */ 952 if (try_tcache && (tcache = tcache_get(false)) != NULL) { 953 size_t binind; 954 955 binind = arena_ptr_small_binind_get(ptr, mapbits); 956 tcache_dalloc_small(tcache, ptr, binind); 957 } else 958 arena_dalloc_small(arena, chunk, ptr, pageind); 959 } else { 960 size_t size = arena_mapbits_large_size_get(chunk, pageind); 961 962 assert(((uintptr_t)ptr & PAGE_MASK) == 0); 963 964 if (try_tcache && size <= tcache_maxclass && (tcache = 965 tcache_get(false)) != NULL) { 966 tcache_dalloc_large(tcache, ptr, size); 967 } else 968 arena_dalloc_large(arena, chunk, ptr); 969 } 970} 971# endif /* JEMALLOC_ARENA_INLINE_B */ 972#endif 973 974#endif /* JEMALLOC_H_INLINES */ 975/******************************************************************************/ 976