arena.h revision 7372b15a31c63ac5cb9ed8aeabc2a0a3c005e8bf
1/******************************************************************************/
2#ifdef JEMALLOC_H_TYPES
3
4/*
5 * Subpages are an artificially designated partitioning of pages.  Their only
6 * purpose is to support subpage-spaced size classes.
7 *
8 * There must be at least 4 subpages per page, due to the way size classes are
9 * handled.
10 */
11#define	LG_SUBPAGE		8
12#define	SUBPAGE			((size_t)(1U << LG_SUBPAGE))
13#define	SUBPAGE_MASK		(SUBPAGE - 1)
14
15/* Return the smallest subpage multiple that is >= s. */
16#define	SUBPAGE_CEILING(s)						\
17	(((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
18
19/* Smallest size class to support. */
20#define	LG_TINY_MIN		LG_SIZEOF_PTR
21#define	TINY_MIN		(1U << LG_TINY_MIN)
22
23/*
24 * Maximum size class that is a multiple of the quantum, but not (necessarily)
25 * a power of 2.  Above this size, allocations are rounded up to the nearest
26 * power of 2.
27 */
28#define	LG_QSPACE_MAX_DEFAULT	7
29
30/*
31 * Maximum size class that is a multiple of the cacheline, but not (necessarily)
32 * a power of 2.  Above this size, allocations are rounded up to the nearest
33 * power of 2.
34 */
35#define	LG_CSPACE_MAX_DEFAULT	9
36
37/*
38 * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
39 * as small as possible such that this setting is still honored, without
40 * violating other constraints.  The goal is to make runs as small as possible
41 * without exceeding a per run external fragmentation threshold.
42 *
43 * We use binary fixed point math for overhead computations, where the binary
44 * point is implicitly RUN_BFP bits to the left.
45 *
46 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
47 * honored for some/all object sizes, since when heap profiling is enabled
48 * there is one pointer of header overhead per object (plus a constant).  This
49 * constraint is relaxed (ignored) for runs that are so small that the
50 * per-region overhead is greater than:
51 *
52 *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
53 */
54#define	RUN_BFP			12
55/*                                    \/   Implicit binary fixed point. */
56#define	RUN_MAX_OVRHD		0x0000003dU
57#define	RUN_MAX_OVRHD_RELAX	0x00001800U
58
59/* Maximum number of regions in one run. */
60#define	LG_RUN_MAXREGS		11
61#define	RUN_MAXREGS		(1U << LG_RUN_MAXREGS)
62
63/*
64 * The minimum ratio of active:dirty pages per arena is computed as:
65 *
66 *   (nactive >> opt_lg_dirty_mult) >= ndirty
67 *
68 * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
69 * times as many active pages as dirty pages.
70 */
71#define	LG_DIRTY_MULT_DEFAULT	5
72
73typedef struct arena_chunk_map_s arena_chunk_map_t;
74typedef struct arena_chunk_s arena_chunk_t;
75typedef struct arena_run_s arena_run_t;
76typedef struct arena_bin_info_s arena_bin_info_t;
77typedef struct arena_bin_s arena_bin_t;
78typedef struct arena_s arena_t;
79
80#endif /* JEMALLOC_H_TYPES */
81/******************************************************************************/
82#ifdef JEMALLOC_H_STRUCTS
83
84/* Each element of the chunk map corresponds to one page within the chunk. */
85struct arena_chunk_map_s {
86#ifndef JEMALLOC_PROF
87	/*
88	 * Overlay prof_ctx in order to allow it to be referenced by dead code.
89	 * Such antics aren't warranted for per arena data structures, but
90	 * chunk map overhead accounts for a percentage of memory, rather than
91	 * being just a fixed cost.
92	 */
93	union {
94#endif
95	union {
96		/*
97		 * Linkage for run trees.  There are two disjoint uses:
98		 *
99		 * 1) arena_t's runs_avail_{clean,dirty} trees.
100		 * 2) arena_run_t conceptually uses this linkage for in-use
101		 *    non-full runs, rather than directly embedding linkage.
102		 */
103		rb_node(arena_chunk_map_t)	rb_link;
104		/*
105		 * List of runs currently in purgatory.  arena_chunk_purge()
106		 * temporarily allocates runs that contain dirty pages while
107		 * purging, so that other threads cannot use the runs while the
108		 * purging thread is operating without the arena lock held.
109		 */
110		ql_elm(arena_chunk_map_t)	ql_link;
111	}				u;
112
113	/* Profile counters, used for large object runs. */
114	prof_ctx_t			*prof_ctx;
115#ifndef JEMALLOC_PROF
116	}; /* union { ... }; */
117#endif
118
119	/*
120	 * Run address (or size) and various flags are stored together.  The bit
121	 * layout looks like (assuming 32-bit system):
122	 *
123	 *   ???????? ???????? ????---- ----dula
124	 *
125	 * ? : Unallocated: Run address for first/last pages, unset for internal
126	 *                  pages.
127	 *     Small: Run page offset.
128	 *     Large: Run size for first page, unset for trailing pages.
129	 * - : Unused.
130	 * d : dirty?
131	 * u : unzeroed?
132	 * l : large?
133	 * a : allocated?
134	 *
135	 * Following are example bit patterns for the three types of runs.
136	 *
137	 * p : run page offset
138	 * s : run size
139	 * c : (binind+1) for size class (used only if prof_promote is true)
140	 * x : don't care
141	 * - : 0
142	 * + : 1
143	 * [DULA] : bit set
144	 * [dula] : bit unset
145	 *
146	 *   Unallocated (clean):
147	 *     ssssssss ssssssss ssss---- ----du-a
148	 *     xxxxxxxx xxxxxxxx xxxx---- -----Uxx
149	 *     ssssssss ssssssss ssss---- ----dU-a
150	 *
151	 *   Unallocated (dirty):
152	 *     ssssssss ssssssss ssss---- ----D--a
153	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
154	 *     ssssssss ssssssss ssss---- ----D--a
155	 *
156	 *   Small:
157	 *     pppppppp pppppppp pppp---- ----d--A
158	 *     pppppppp pppppppp pppp---- -------A
159	 *     pppppppp pppppppp pppp---- ----d--A
160	 *
161	 *   Large:
162	 *     ssssssss ssssssss ssss---- ----D-LA
163	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
164	 *     -------- -------- -------- ----D-LA
165	 *
166	 *   Large (sampled, size <= PAGE_SIZE):
167	 *     ssssssss ssssssss sssscccc ccccD-LA
168	 *
169	 *   Large (not sampled, size == PAGE_SIZE):
170	 *     ssssssss ssssssss ssss---- ----D-LA
171	 */
172	size_t				bits;
173#define	CHUNK_MAP_CLASS_SHIFT	4
174#define	CHUNK_MAP_CLASS_MASK	((size_t)0xff0U)
175#define	CHUNK_MAP_FLAGS_MASK	((size_t)0xfU)
176#define	CHUNK_MAP_DIRTY		((size_t)0x8U)
177#define	CHUNK_MAP_UNZEROED	((size_t)0x4U)
178#define	CHUNK_MAP_LARGE		((size_t)0x2U)
179#define	CHUNK_MAP_ALLOCATED	((size_t)0x1U)
180#define	CHUNK_MAP_KEY		CHUNK_MAP_ALLOCATED
181};
182typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
183typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
184
185/* Arena chunk header. */
186struct arena_chunk_s {
187	/* Arena that owns the chunk. */
188	arena_t		*arena;
189
190	/* Linkage for the arena's chunks_dirty list. */
191	ql_elm(arena_chunk_t) link_dirty;
192
193	/*
194	 * True if the chunk is currently in the chunks_dirty list, due to
195	 * having at some point contained one or more dirty pages.  Removal
196	 * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
197	 */
198	bool		dirtied;
199
200	/* Number of dirty pages. */
201	size_t		ndirty;
202
203	/*
204	 * Map of pages within chunk that keeps track of free/large/small.  The
205	 * first map_bias entries are omitted, since the chunk header does not
206	 * need to be tracked in the map.  This omission saves a header page
207	 * for common chunk sizes (e.g. 4 MiB).
208	 */
209	arena_chunk_map_t map[1]; /* Dynamically sized. */
210};
211typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
212
213struct arena_run_s {
214	uint32_t	magic;
215#  define ARENA_RUN_MAGIC 0x384adf93
216
217	/* Bin this run is associated with. */
218	arena_bin_t	*bin;
219
220	/* Index of next region that has never been allocated, or nregs. */
221	uint32_t	nextind;
222
223	/* Number of free regions in run. */
224	unsigned	nfree;
225};
226
227/*
228 * Read-only information associated with each element of arena_t's bins array
229 * is stored separately, partly to reduce memory usage (only one copy, rather
230 * than one per arena), but mainly to avoid false cacheline sharing.
231 */
232struct arena_bin_info_s {
233	/* Size of regions in a run for this bin's size class. */
234	size_t		reg_size;
235
236	/* Total size of a run for this bin's size class. */
237	size_t		run_size;
238
239	/* Total number of regions in a run for this bin's size class. */
240	uint32_t	nregs;
241
242	/*
243	 * Offset of first bitmap_t element in a run header for this bin's size
244	 * class.
245	 */
246	uint32_t	bitmap_offset;
247
248	/*
249	 * Metadata used to manipulate bitmaps for runs associated with this
250	 * bin.
251	 */
252	bitmap_info_t	bitmap_info;
253
254	/*
255	 * Offset of first (prof_ctx_t *) in a run header for this bin's size
256	 * class, or 0 if (config_prof == false || opt_prof == false).
257	 */
258	uint32_t	ctx0_offset;
259
260	/* Offset of first region in a run for this bin's size class. */
261	uint32_t	reg0_offset;
262};
263
264struct arena_bin_s {
265	/*
266	 * All operations on runcur, runs, and stats require that lock be
267	 * locked.  Run allocation/deallocation are protected by the arena lock,
268	 * which may be acquired while holding one or more bin locks, but not
269	 * vise versa.
270	 */
271	malloc_mutex_t	lock;
272
273	/*
274	 * Current run being used to service allocations of this bin's size
275	 * class.
276	 */
277	arena_run_t	*runcur;
278
279	/*
280	 * Tree of non-full runs.  This tree is used when looking for an
281	 * existing run when runcur is no longer usable.  We choose the
282	 * non-full run that is lowest in memory; this policy tends to keep
283	 * objects packed well, and it can also help reduce the number of
284	 * almost-empty chunks.
285	 */
286	arena_run_tree_t runs;
287
288	/* Bin statistics. */
289	malloc_bin_stats_t stats;
290};
291
292struct arena_s {
293	uint32_t		magic;
294#  define ARENA_MAGIC 0x947d3d24
295
296	/* This arena's index within the arenas array. */
297	unsigned		ind;
298
299	/*
300	 * Number of threads currently assigned to this arena.  This field is
301	 * protected by arenas_lock.
302	 */
303	unsigned		nthreads;
304
305	/*
306	 * There are three classes of arena operations from a locking
307	 * perspective:
308	 * 1) Thread asssignment (modifies nthreads) is protected by
309	 *    arenas_lock.
310	 * 2) Bin-related operations are protected by bin locks.
311	 * 3) Chunk- and run-related operations are protected by this mutex.
312	 */
313	malloc_mutex_t		lock;
314
315	arena_stats_t		stats;
316	/*
317	 * List of tcaches for extant threads associated with this arena.
318	 * Stats from these are merged incrementally, and at exit.
319	 */
320	ql_head(tcache_t)	tcache_ql;
321
322	uint64_t		prof_accumbytes;
323
324	/* List of dirty-page-containing chunks this arena manages. */
325	ql_head(arena_chunk_t)	chunks_dirty;
326
327	/*
328	 * In order to avoid rapid chunk allocation/deallocation when an arena
329	 * oscillates right on the cusp of needing a new chunk, cache the most
330	 * recently freed chunk.  The spare is left in the arena's chunk trees
331	 * until it is deleted.
332	 *
333	 * There is one spare chunk per arena, rather than one spare total, in
334	 * order to avoid interactions between multiple threads that could make
335	 * a single spare inadequate.
336	 */
337	arena_chunk_t		*spare;
338
339	/* Number of pages in active runs. */
340	size_t			nactive;
341
342	/*
343	 * Current count of pages within unused runs that are potentially
344	 * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
345	 * By tracking this, we can institute a limit on how much dirty unused
346	 * memory is mapped for each arena.
347	 */
348	size_t			ndirty;
349
350	/*
351	 * Approximate number of pages being purged.  It is possible for
352	 * multiple threads to purge dirty pages concurrently, and they use
353	 * npurgatory to indicate the total number of pages all threads are
354	 * attempting to purge.
355	 */
356	size_t			npurgatory;
357
358	/*
359	 * Size/address-ordered trees of this arena's available runs.  The trees
360	 * are used for first-best-fit run allocation.  The dirty tree contains
361	 * runs with dirty pages (i.e. very likely to have been touched and
362	 * therefore have associated physical pages), whereas the clean tree
363	 * contains runs with pages that either have no associated physical
364	 * pages, or have pages that the kernel may recycle at any time due to
365	 * previous madvise(2) calls.  The dirty tree is used in preference to
366	 * the clean tree for allocations, because using dirty pages reduces
367	 * the amount of dirty purging necessary to keep the active:dirty page
368	 * ratio below the purge threshold.
369	 */
370	arena_avail_tree_t	runs_avail_clean;
371	arena_avail_tree_t	runs_avail_dirty;
372
373	/*
374	 * bins is used to store trees of free regions of the following sizes,
375	 * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and
376	 * default MALLOC_CONF.
377	 *
378	 *   bins[i] |   size |
379	 *   --------+--------+
380	 *        0  |      8 |
381	 *   --------+--------+
382	 *        1  |     16 |
383	 *        2  |     32 |
384	 *        3  |     48 |
385	 *           :        :
386	 *        6  |     96 |
387	 *        7  |    112 |
388	 *        8  |    128 |
389	 *   --------+--------+
390	 *        9  |    192 |
391	 *       10  |    256 |
392	 *       11  |    320 |
393	 *       12  |    384 |
394	 *       13  |    448 |
395	 *       14  |    512 |
396	 *   --------+--------+
397	 *       15  |    768 |
398	 *       16  |   1024 |
399	 *       17  |   1280 |
400	 *           :        :
401	 *       25  |   3328 |
402	 *       26  |   3584 |
403	 *       27  |   3840 |
404	 *   --------+--------+
405	 */
406	arena_bin_t		bins[1]; /* Dynamically sized. */
407};
408
409#endif /* JEMALLOC_H_STRUCTS */
410/******************************************************************************/
411#ifdef JEMALLOC_H_EXTERNS
412
413extern size_t	opt_lg_qspace_max;
414extern size_t	opt_lg_cspace_max;
415extern ssize_t	opt_lg_dirty_mult;
416/*
417 * small_size2bin is a compact lookup table that rounds request sizes up to
418 * size classes.  In order to reduce cache footprint, the table is compressed,
419 * and all accesses are via the SMALL_SIZE2BIN macro.
420 */
421extern uint8_t const	*small_size2bin;
422#define	SMALL_SIZE2BIN(s)	(small_size2bin[(s-1) >> LG_TINY_MIN])
423
424extern arena_bin_info_t	*arena_bin_info;
425
426/* Various bin-related settings. */
427#ifdef JEMALLOC_TINY		/* Number of (2^n)-spaced tiny bins. */
428#  define		ntbins	((unsigned)(LG_QUANTUM - LG_TINY_MIN))
429#else
430#  define		ntbins	0
431#endif
432extern unsigned		nqbins; /* Number of quantum-spaced bins. */
433extern unsigned		ncbins; /* Number of cacheline-spaced bins. */
434extern unsigned		nsbins; /* Number of subpage-spaced bins. */
435extern unsigned		nbins;
436#ifdef JEMALLOC_TINY
437#  define		tspace_max	((size_t)(QUANTUM >> 1))
438#endif
439#define			qspace_min	QUANTUM
440extern size_t		qspace_max;
441extern size_t		cspace_min;
442extern size_t		cspace_max;
443extern size_t		sspace_min;
444extern size_t		sspace_max;
445#define			small_maxclass	sspace_max
446
447#define			nlclasses (chunk_npages - map_bias)
448
449void	arena_purge_all(arena_t *arena);
450void	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
451void	arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
452    size_t binind, uint64_t prof_accumbytes);
453void	*arena_malloc_small(arena_t *arena, size_t size, bool zero);
454void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
455void	*arena_malloc(size_t size, bool zero);
456void	*arena_palloc(arena_t *arena, size_t size, size_t alloc_size,
457    size_t alignment, bool zero);
458size_t	arena_salloc(const void *ptr);
459void	arena_prof_promoted(const void *ptr, size_t size);
460size_t	arena_salloc_demote(const void *ptr);
461void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
462    arena_chunk_map_t *mapelm);
463void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
464void	arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
465    arena_stats_t *astats, malloc_bin_stats_t *bstats,
466    malloc_large_stats_t *lstats);
467void	*arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
468    size_t extra, bool zero);
469void	*arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
470    size_t alignment, bool zero);
471bool	arena_new(arena_t *arena, unsigned ind);
472bool	arena_boot(void);
473
474#endif /* JEMALLOC_H_EXTERNS */
475/******************************************************************************/
476#ifdef JEMALLOC_H_INLINES
477
478#ifndef JEMALLOC_ENABLE_INLINE
479size_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
480unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
481    const void *ptr);
482prof_ctx_t	*arena_prof_ctx_get(const void *ptr);
483void	arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
484void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
485#endif
486
487#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
488JEMALLOC_INLINE size_t
489arena_bin_index(arena_t *arena, arena_bin_t *bin)
490{
491	size_t binind = bin - arena->bins;
492	assert(binind < nbins);
493	return (binind);
494}
495
496JEMALLOC_INLINE unsigned
497arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
498{
499	unsigned shift, diff, regind;
500	size_t size;
501
502	assert(run->magic == ARENA_RUN_MAGIC);
503	/*
504	 * Freeing a pointer lower than region zero can cause assertion
505	 * failure.
506	 */
507	assert((uintptr_t)ptr >= (uintptr_t)run +
508	    (uintptr_t)bin_info->reg0_offset);
509
510	/*
511	 * Avoid doing division with a variable divisor if possible.  Using
512	 * actual division here can reduce allocator throughput by over 20%!
513	 */
514	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
515	    bin_info->reg0_offset);
516
517	/* Rescale (factor powers of 2 out of the numerator and denominator). */
518	size = bin_info->reg_size;
519	shift = ffs(size) - 1;
520	diff >>= shift;
521	size >>= shift;
522
523	if (size == 1) {
524		/* The divisor was a power of 2. */
525		regind = diff;
526	} else {
527		/*
528		 * To divide by a number D that is not a power of two we
529		 * multiply by (2^21 / D) and then right shift by 21 positions.
530		 *
531		 *   X / D
532		 *
533		 * becomes
534		 *
535		 *   (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
536		 *
537		 * We can omit the first three elements, because we never
538		 * divide by 0, and 1 and 2 are both powers of two, which are
539		 * handled above.
540		 */
541#define	SIZE_INV_SHIFT	((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
542#define	SIZE_INV(s)	(((1U << SIZE_INV_SHIFT) / (s)) + 1)
543		static const unsigned size_invs[] = {
544		    SIZE_INV(3),
545		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
546		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
547		    SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
548		    SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
549		    SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
550		    SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
551		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
552		};
553
554		if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
555			regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
556		else
557			regind = diff / size;
558#undef SIZE_INV
559#undef SIZE_INV_SHIFT
560	}
561	assert(diff == regind * size);
562	assert(regind < bin_info->nregs);
563
564	return (regind);
565}
566
567JEMALLOC_INLINE prof_ctx_t *
568arena_prof_ctx_get(const void *ptr)
569{
570	prof_ctx_t *ret;
571	arena_chunk_t *chunk;
572	size_t pageind, mapbits;
573
574	cassert(config_prof);
575	assert(ptr != NULL);
576	assert(CHUNK_ADDR2BASE(ptr) != ptr);
577
578	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
579	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
580	mapbits = chunk->map[pageind-map_bias].bits;
581	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
582	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
583		if (prof_promote)
584			ret = (prof_ctx_t *)(uintptr_t)1U;
585		else {
586			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
587			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
588			    PAGE_SHIFT));
589			size_t binind = arena_bin_index(chunk->arena, run->bin);
590			arena_bin_info_t *bin_info = &arena_bin_info[binind];
591			unsigned regind;
592
593			assert(run->magic == ARENA_RUN_MAGIC);
594			regind = arena_run_regind(run, bin_info, ptr);
595			ret = *(prof_ctx_t **)((uintptr_t)run +
596			    bin_info->ctx0_offset + (regind *
597			    sizeof(prof_ctx_t *)));
598		}
599	} else
600		ret = chunk->map[pageind-map_bias].prof_ctx;
601
602	return (ret);
603}
604
605JEMALLOC_INLINE void
606arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
607{
608	arena_chunk_t *chunk;
609	size_t pageind, mapbits;
610
611	cassert(config_prof);
612	assert(ptr != NULL);
613	assert(CHUNK_ADDR2BASE(ptr) != ptr);
614
615	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
616	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
617	mapbits = chunk->map[pageind-map_bias].bits;
618	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
619	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
620		if (prof_promote == false) {
621			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
622			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
623			    PAGE_SHIFT));
624			arena_bin_t *bin = run->bin;
625			size_t binind;
626			arena_bin_info_t *bin_info;
627			unsigned regind;
628
629			assert(run->magic == ARENA_RUN_MAGIC);
630			binind = arena_bin_index(chunk->arena, bin);
631			bin_info = &arena_bin_info[binind];
632			regind = arena_run_regind(run, bin_info, ptr);
633
634			*((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
635			    + (regind * sizeof(prof_ctx_t *)))) = ctx;
636		} else
637			assert((uintptr_t)ctx == (uintptr_t)1U);
638	} else
639		chunk->map[pageind-map_bias].prof_ctx = ctx;
640}
641
642JEMALLOC_INLINE void
643arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
644{
645	size_t pageind;
646	arena_chunk_map_t *mapelm;
647
648	assert(arena != NULL);
649	assert(arena->magic == ARENA_MAGIC);
650	assert(chunk->arena == arena);
651	assert(ptr != NULL);
652	assert(CHUNK_ADDR2BASE(ptr) != ptr);
653
654	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
655	mapelm = &chunk->map[pageind-map_bias];
656	assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
657	if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
658		/* Small allocation. */
659		tcache_t *tcache;
660
661		if (config_tcache && (tcache = tcache_get()) != NULL)
662			tcache_dalloc_small(tcache, ptr);
663		else {
664			arena_run_t *run;
665			arena_bin_t *bin;
666
667			run = (arena_run_t *)((uintptr_t)chunk +
668			    (uintptr_t)((pageind - (mapelm->bits >>
669			    PAGE_SHIFT)) << PAGE_SHIFT));
670			assert(run->magic == ARENA_RUN_MAGIC);
671			bin = run->bin;
672			if (config_debug) {
673				size_t binind = arena_bin_index(arena, bin);
674				UNUSED arena_bin_info_t *bin_info =
675				    &arena_bin_info[binind];
676				assert(((uintptr_t)ptr - ((uintptr_t)run +
677				    (uintptr_t)bin_info->reg0_offset)) %
678				    bin_info->reg_size == 0);
679			}
680			malloc_mutex_lock(&bin->lock);
681			arena_dalloc_bin(arena, chunk, ptr, mapelm);
682			malloc_mutex_unlock(&bin->lock);
683		}
684	} else {
685		if (config_tcache) {
686			size_t size = mapelm->bits & ~PAGE_MASK;
687
688			assert(((uintptr_t)ptr & PAGE_MASK) == 0);
689			if (size <= tcache_maxclass) {
690				tcache_t *tcache;
691
692				if ((tcache = tcache_get()) != NULL)
693					tcache_dalloc_large(tcache, ptr, size);
694				else {
695					malloc_mutex_lock(&arena->lock);
696					arena_dalloc_large(arena, chunk, ptr);
697					malloc_mutex_unlock(&arena->lock);
698				}
699			} else {
700				malloc_mutex_lock(&arena->lock);
701				arena_dalloc_large(arena, chunk, ptr);
702				malloc_mutex_unlock(&arena->lock);
703			}
704		} else {
705			assert(((uintptr_t)ptr & PAGE_MASK) == 0);
706			malloc_mutex_lock(&arena->lock);
707			arena_dalloc_large(arena, chunk, ptr);
708			malloc_mutex_unlock(&arena->lock);
709		}
710	}
711}
712#endif
713
714#endif /* JEMALLOC_H_INLINES */
715/******************************************************************************/
716