arena.c revision b718cf77e9917f6ae1995c2e2b219ff4219c9f46
1#define	JEMALLOC_ARENA_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3
4/******************************************************************************/
5/* Data. */
6
7ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
8arena_bin_info_t	arena_bin_info[NBINS];
9
10JEMALLOC_ALIGNED(CACHELINE)
11const uint32_t	small_bin2size_tab[NBINS] = {
12#define	B2S_bin_yes(size) \
13	size,
14#define	B2S_bin_no(size)
15#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
16	B2S_bin_##bin((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
17	SIZE_CLASSES
18#undef B2S_bin_yes
19#undef B2S_bin_no
20#undef SC
21};
22
23JEMALLOC_ALIGNED(CACHELINE)
24const uint8_t	small_size2bin_tab[] = {
25#define	S2B_3(i)	i,
26#define	S2B_4(i)	S2B_3(i) S2B_3(i)
27#define	S2B_5(i)	S2B_4(i) S2B_4(i)
28#define	S2B_6(i)	S2B_5(i) S2B_5(i)
29#define	S2B_7(i)	S2B_6(i) S2B_6(i)
30#define	S2B_8(i)	S2B_7(i) S2B_7(i)
31#define	S2B_9(i)	S2B_8(i) S2B_8(i)
32#define	S2B_no(i)
33#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
34	S2B_##lg_delta_lookup(index)
35	SIZE_CLASSES
36#undef S2B_3
37#undef S2B_4
38#undef S2B_5
39#undef S2B_6
40#undef S2B_7
41#undef S2B_8
42#undef S2B_9
43#undef S2B_no
44#undef SC
45};
46
47/******************************************************************************/
48/*
49 * Function prototypes for static functions that are referenced prior to
50 * definition.
51 */
52
53static void	arena_purge(arena_t *arena, bool all);
54static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
55    bool cleaned);
56static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
57    arena_run_t *run, arena_bin_t *bin);
58static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
59    arena_run_t *run, arena_bin_t *bin);
60
61/******************************************************************************/
62
63JEMALLOC_INLINE_C size_t
64arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm)
65{
66	size_t offset = CHUNK_ADDR2OFFSET(miscelm);
67
68	return ((offset - map_misc_offset) / sizeof(arena_chunk_map_misc_t) +
69	    map_bias);
70}
71
72JEMALLOC_INLINE_C size_t
73arena_miscelm_to_bits(arena_chunk_map_misc_t *miscelm)
74{
75	arena_chunk_t *chunk = CHUNK_ADDR2BASE(miscelm);
76	size_t pageind = arena_miscelm_to_pageind(miscelm);
77
78	return arena_mapbits_get(chunk, pageind);
79}
80
81static inline int
82arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b)
83{
84	uintptr_t a_miscelm = (uintptr_t)a;
85	uintptr_t b_miscelm = (uintptr_t)b;
86
87	assert(a != NULL);
88	assert(b != NULL);
89
90	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
91}
92
93/* Generate red-black tree functions. */
94rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
95    rb_link, arena_run_comp)
96
97static inline int
98arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b)
99{
100	int ret;
101	size_t a_size;
102	size_t b_size = arena_miscelm_to_bits(b) & ~PAGE_MASK;
103	uintptr_t a_miscelm = (uintptr_t)a;
104	uintptr_t b_miscelm = (uintptr_t)b;
105
106	if (a_miscelm & CHUNK_MAP_KEY)
107		a_size = a_miscelm & ~PAGE_MASK;
108	else
109		a_size = arena_miscelm_to_bits(a) & ~PAGE_MASK;
110
111	ret = (a_size > b_size) - (a_size < b_size);
112	if (ret == 0) {
113		if (!(a_miscelm & CHUNK_MAP_KEY))
114			ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm);
115		else {
116			/*
117			 * Treat keys as if they are lower than anything else.
118			 */
119			ret = -1;
120		}
121	}
122
123	return (ret);
124}
125
126/* Generate red-black tree functions. */
127rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t,
128    arena_chunk_map_misc_t, rb_link, arena_avail_comp)
129
130static void
131arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
132    size_t npages)
133{
134
135	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
136	    LG_PAGE));
137	arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk,
138	    pageind));
139}
140
141static void
142arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
143    size_t npages)
144{
145
146	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
147	    LG_PAGE));
148	arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk,
149	    pageind));
150}
151
152static void
153arena_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
154    size_t npages)
155{
156	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
157	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
158	    LG_PAGE));
159	assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY);
160	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
161	    CHUNK_MAP_DIRTY);
162	ql_elm_new(miscelm, dr_link);
163	ql_tail_insert(&arena->runs_dirty, miscelm, dr_link);
164	arena->ndirty += npages;
165}
166
167static void
168arena_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
169    size_t npages)
170{
171	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
172	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
173	    LG_PAGE));
174	assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY);
175	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
176	    CHUNK_MAP_DIRTY);
177	ql_remove(&arena->runs_dirty, miscelm, dr_link);
178	arena->ndirty -= npages;
179}
180
181static inline void *
182arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
183{
184	void *ret;
185	unsigned regind;
186	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
187	    (uintptr_t)bin_info->bitmap_offset);
188
189	assert(run->nfree > 0);
190	assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
191
192	regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
193	ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
194	    (uintptr_t)(bin_info->reg_interval * regind));
195	run->nfree--;
196	if (regind == run->nextind)
197		run->nextind++;
198	assert(regind < run->nextind);
199	return (ret);
200}
201
202static inline void
203arena_run_reg_dalloc(arena_run_t *run, void *ptr)
204{
205	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
206	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
207	size_t mapbits = arena_mapbits_get(chunk, pageind);
208	size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
209	arena_bin_info_t *bin_info = &arena_bin_info[binind];
210	unsigned regind = arena_run_regind(run, bin_info, ptr);
211	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
212	    (uintptr_t)bin_info->bitmap_offset);
213
214	assert(run->nfree < bin_info->nregs);
215	/* Freeing an interior pointer can cause assertion failure. */
216	assert(((uintptr_t)ptr - ((uintptr_t)run +
217	    (uintptr_t)bin_info->reg0_offset)) %
218	    (uintptr_t)bin_info->reg_interval == 0);
219	assert((uintptr_t)ptr >= (uintptr_t)run +
220	    (uintptr_t)bin_info->reg0_offset);
221	/* Freeing an unallocated pointer can cause assertion failure. */
222	assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind));
223
224	bitmap_unset(bitmap, &bin_info->bitmap_info, regind);
225	run->nfree++;
226}
227
228static inline void
229arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages)
230{
231
232	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
233	    (run_ind << LG_PAGE)), (npages << LG_PAGE));
234	memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0,
235	    (npages << LG_PAGE));
236}
237
238static inline void
239arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind)
240{
241
242	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind
243	    << LG_PAGE)), PAGE);
244}
245
246static inline void
247arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
248{
249	size_t i;
250	UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE));
251
252	arena_run_page_mark_zeroed(chunk, run_ind);
253	for (i = 0; i < PAGE / sizeof(size_t); i++)
254		assert(p[i] == 0);
255}
256
257static void
258arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages)
259{
260
261	if (config_stats) {
262		ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages
263		    - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive <<
264		    LG_PAGE);
265		if (cactive_diff != 0)
266			stats_cactive_add(cactive_diff);
267	}
268}
269
270static void
271arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
272    size_t flag_dirty, size_t need_pages)
273{
274	size_t total_pages, rem_pages;
275
276	total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >>
277	    LG_PAGE;
278	assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
279	    flag_dirty);
280	assert(need_pages <= total_pages);
281	rem_pages = total_pages - need_pages;
282
283	arena_avail_remove(arena, chunk, run_ind, total_pages);
284	if (flag_dirty != 0)
285		arena_dirty_remove(arena, chunk, run_ind, total_pages);
286	arena_cactive_update(arena, need_pages, 0);
287	arena->nactive += need_pages;
288
289	/* Keep track of trailing unused pages for later use. */
290	if (rem_pages > 0) {
291		if (flag_dirty != 0) {
292			arena_mapbits_unallocated_set(chunk,
293			    run_ind+need_pages, (rem_pages << LG_PAGE),
294			    flag_dirty);
295			arena_mapbits_unallocated_set(chunk,
296			    run_ind+total_pages-1, (rem_pages << LG_PAGE),
297			    flag_dirty);
298			arena_dirty_insert(arena, chunk, run_ind+need_pages,
299			    rem_pages);
300		} else {
301			arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
302			    (rem_pages << LG_PAGE),
303			    arena_mapbits_unzeroed_get(chunk,
304			    run_ind+need_pages));
305			arena_mapbits_unallocated_set(chunk,
306			    run_ind+total_pages-1, (rem_pages << LG_PAGE),
307			    arena_mapbits_unzeroed_get(chunk,
308			    run_ind+total_pages-1));
309		}
310		arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages);
311	}
312}
313
314static void
315arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
316    bool remove, bool zero)
317{
318	arena_chunk_t *chunk;
319	size_t flag_dirty, run_ind, need_pages, i;
320
321	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
322	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
323	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
324	need_pages = (size >> LG_PAGE);
325	assert(need_pages > 0);
326
327	if (remove) {
328		arena_run_split_remove(arena, chunk, run_ind, flag_dirty,
329		    need_pages);
330	}
331
332	if (zero) {
333		if (flag_dirty == 0) {
334			/*
335			 * The run is clean, so some pages may be zeroed (i.e.
336			 * never before touched).
337			 */
338			for (i = 0; i < need_pages; i++) {
339				if (arena_mapbits_unzeroed_get(chunk, run_ind+i)
340				    != 0)
341					arena_run_zero(chunk, run_ind+i, 1);
342				else if (config_debug) {
343					arena_run_page_validate_zeroed(chunk,
344					    run_ind+i);
345				} else {
346					arena_run_page_mark_zeroed(chunk,
347					    run_ind+i);
348				}
349			}
350		} else {
351			/* The run is dirty, so all pages must be zeroed. */
352			arena_run_zero(chunk, run_ind, need_pages);
353		}
354	} else {
355		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
356		    (run_ind << LG_PAGE)), (need_pages << LG_PAGE));
357	}
358
359	/*
360	 * Set the last element first, in case the run only contains one page
361	 * (i.e. both statements set the same element).
362	 */
363	arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty);
364	arena_mapbits_large_set(chunk, run_ind, size, flag_dirty);
365}
366
367static void
368arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
369{
370
371	arena_run_split_large_helper(arena, run, size, true, zero);
372}
373
374static void
375arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
376{
377
378	arena_run_split_large_helper(arena, run, size, false, zero);
379}
380
381static void
382arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size,
383    size_t binind)
384{
385	arena_chunk_t *chunk;
386	size_t flag_dirty, run_ind, need_pages, i;
387
388	assert(binind != BININD_INVALID);
389
390	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
391	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
392	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
393	need_pages = (size >> LG_PAGE);
394	assert(need_pages > 0);
395
396	arena_run_split_remove(arena, chunk, run_ind, flag_dirty, need_pages);
397
398	/*
399	 * Propagate the dirty and unzeroed flags to the allocated small run,
400	 * so that arena_dalloc_bin_run() has the ability to conditionally trim
401	 * clean pages.
402	 */
403	arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty);
404	/*
405	 * The first page will always be dirtied during small run
406	 * initialization, so a validation failure here would not actually
407	 * cause an observable failure.
408	 */
409	if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk,
410	    run_ind) == 0)
411		arena_run_page_validate_zeroed(chunk, run_ind);
412	for (i = 1; i < need_pages - 1; i++) {
413		arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0);
414		if (config_debug && flag_dirty == 0 &&
415		    arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0)
416			arena_run_page_validate_zeroed(chunk, run_ind+i);
417	}
418	arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1,
419	    binind, flag_dirty);
420	if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk,
421	    run_ind+need_pages-1) == 0)
422		arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1);
423	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
424	    (run_ind << LG_PAGE)), (need_pages << LG_PAGE));
425}
426
427static arena_chunk_t *
428arena_chunk_init_spare(arena_t *arena)
429{
430	arena_chunk_t *chunk;
431
432	assert(arena->spare != NULL);
433
434	chunk = arena->spare;
435	arena->spare = NULL;
436
437	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
438	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
439	assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
440	    arena_maxclass);
441	assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
442	    arena_maxclass);
443	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
444	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
445
446	return (chunk);
447}
448
449static arena_chunk_t *
450arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment,
451    bool *zero)
452{
453	arena_chunk_t *chunk;
454	chunk_alloc_t *chunk_alloc;
455	chunk_dalloc_t *chunk_dalloc;
456
457	chunk_alloc = arena->chunk_alloc;
458	chunk_dalloc = arena->chunk_dalloc;
459	malloc_mutex_unlock(&arena->lock);
460	chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc,
461	    arena->ind, size, alignment, zero);
462	malloc_mutex_lock(&arena->lock);
463	if (config_stats && chunk != NULL)
464		arena->stats.mapped += chunksize;
465
466	return (chunk);
467}
468
469void *
470arena_chunk_alloc_huge(arena_t *arena, size_t size, size_t alignment,
471    bool *zero)
472{
473	void *ret;
474	chunk_alloc_t *chunk_alloc;
475	chunk_dalloc_t *chunk_dalloc;
476
477	malloc_mutex_lock(&arena->lock);
478	chunk_alloc = arena->chunk_alloc;
479	chunk_dalloc = arena->chunk_dalloc;
480	if (config_stats) {
481		/* Optimistically update stats prior to unlocking. */
482		arena->stats.mapped += size;
483		arena->stats.allocated_huge += size;
484		arena->stats.nmalloc_huge++;
485		arena->stats.nrequests_huge++;
486	}
487	arena->nactive += (size >> LG_PAGE);
488	malloc_mutex_unlock(&arena->lock);
489
490	ret = chunk_alloc_arena(chunk_alloc, chunk_dalloc, arena->ind,
491	    size, alignment, zero);
492	if (config_stats) {
493		if (ret != NULL)
494			stats_cactive_add(size);
495		else {
496			/* Revert optimistic stats updates. */
497			malloc_mutex_lock(&arena->lock);
498			arena->stats.mapped -= size;
499			arena->stats.allocated_huge -= size;
500			arena->stats.nmalloc_huge--;
501			malloc_mutex_unlock(&arena->lock);
502		}
503	}
504
505	return (ret);
506}
507
508static arena_chunk_t *
509arena_chunk_init_hard(arena_t *arena)
510{
511	arena_chunk_t *chunk;
512	bool zero;
513	size_t unzeroed, i;
514
515	assert(arena->spare == NULL);
516
517	zero = false;
518	chunk = arena_chunk_alloc_internal(arena, chunksize, chunksize, &zero);
519	if (chunk == NULL)
520		return (NULL);
521
522	chunk->arena = arena;
523
524	/*
525	 * Initialize the map to contain one maximal free untouched run.  Mark
526	 * the pages as zeroed iff chunk_alloc() returned a zeroed chunk.
527	 */
528	unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED;
529	arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass,
530	    unzeroed);
531	/*
532	 * There is no need to initialize the internal page map entries unless
533	 * the chunk is not zeroed.
534	 */
535	if (zero == false) {
536		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
537		    (void *)arena_bitselm_get(chunk, map_bias+1),
538		    (size_t)((uintptr_t) arena_bitselm_get(chunk,
539		    chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk,
540		    map_bias+1)));
541		for (i = map_bias+1; i < chunk_npages-1; i++)
542			arena_mapbits_unzeroed_set(chunk, i, unzeroed);
543	} else {
544		JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
545		    *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t)
546		    arena_bitselm_get(chunk, chunk_npages-1) -
547		    (uintptr_t)arena_bitselm_get(chunk, map_bias+1)));
548		if (config_debug) {
549			for (i = map_bias+1; i < chunk_npages-1; i++) {
550				assert(arena_mapbits_unzeroed_get(chunk, i) ==
551				    unzeroed);
552			}
553		}
554	}
555	arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass,
556	    unzeroed);
557
558	return (chunk);
559}
560
561static arena_chunk_t *
562arena_chunk_alloc(arena_t *arena)
563{
564	arena_chunk_t *chunk;
565
566	if (arena->spare != NULL)
567		chunk = arena_chunk_init_spare(arena);
568	else {
569		chunk = arena_chunk_init_hard(arena);
570		if (chunk == NULL)
571			return (NULL);
572	}
573
574	/* Insert the run into the runs_avail tree. */
575	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
576
577	return (chunk);
578}
579
580static void
581arena_chunk_dalloc_internal(arena_t *arena, arena_chunk_t *chunk)
582{
583	chunk_dalloc_t *chunk_dalloc;
584
585	chunk_dalloc = arena->chunk_dalloc;
586	malloc_mutex_unlock(&arena->lock);
587	chunk_dalloc((void *)chunk, chunksize, arena->ind);
588	malloc_mutex_lock(&arena->lock);
589	if (config_stats)
590		arena->stats.mapped -= chunksize;
591}
592
593void
594arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size)
595{
596	chunk_dalloc_t *chunk_dalloc;
597
598	malloc_mutex_lock(&arena->lock);
599	chunk_dalloc = arena->chunk_dalloc;
600	if (config_stats) {
601		arena->stats.mapped -= size;
602		arena->stats.allocated_huge -= size;
603		arena->stats.ndalloc_huge++;
604		stats_cactive_sub(size);
605	}
606	arena->nactive -= (size >> LG_PAGE);
607	malloc_mutex_unlock(&arena->lock);
608	chunk_dalloc(chunk, size, arena->ind);
609}
610
611static void
612arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
613{
614
615	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
616	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
617	assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
618	    arena_maxclass);
619	assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
620	    arena_maxclass);
621	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
622	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
623
624	/*
625	 * Remove run from the runs_avail tree, so that the arena does not use
626	 * it.
627	 */
628	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
629
630	if (arena->spare != NULL) {
631		arena_chunk_t *spare = arena->spare;
632
633		arena->spare = chunk;
634		if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
635			arena_dirty_remove(arena, spare, map_bias,
636			    chunk_npages-map_bias);
637		}
638		arena_chunk_dalloc_internal(arena, spare);
639	} else
640		arena->spare = chunk;
641}
642
643static arena_run_t *
644arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
645{
646	arena_run_t *run;
647	arena_chunk_map_misc_t *miscelm;
648	arena_chunk_map_misc_t *key;
649
650	key = (arena_chunk_map_misc_t *)(size | CHUNK_MAP_KEY);
651	miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key);
652	if (miscelm != NULL) {
653		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(miscelm);
654		size_t pageind = arena_miscelm_to_pageind(miscelm);
655
656		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
657		    LG_PAGE));
658		arena_run_split_large(arena, run, size, zero);
659		return (run);
660	}
661
662	return (NULL);
663}
664
665static arena_run_t *
666arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
667{
668	arena_chunk_t *chunk;
669	arena_run_t *run;
670
671	assert(size <= arena_maxclass);
672	assert((size & PAGE_MASK) == 0);
673
674	/* Search the arena's chunks for the lowest best fit. */
675	run = arena_run_alloc_large_helper(arena, size, zero);
676	if (run != NULL)
677		return (run);
678
679	/*
680	 * No usable runs.  Create a new chunk from which to allocate the run.
681	 */
682	chunk = arena_chunk_alloc(arena);
683	if (chunk != NULL) {
684		run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE));
685		arena_run_split_large(arena, run, size, zero);
686		return (run);
687	}
688
689	/*
690	 * arena_chunk_alloc() failed, but another thread may have made
691	 * sufficient memory available while this one dropped arena->lock in
692	 * arena_chunk_alloc(), so search one more time.
693	 */
694	return (arena_run_alloc_large_helper(arena, size, zero));
695}
696
697static arena_run_t *
698arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind)
699{
700	arena_run_t *run;
701	arena_chunk_map_misc_t *miscelm;
702	arena_chunk_map_misc_t *key;
703
704	key = (arena_chunk_map_misc_t *)(size | CHUNK_MAP_KEY);
705	miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key);
706	if (miscelm != NULL) {
707		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(miscelm);
708		size_t pageind = arena_miscelm_to_pageind(miscelm);
709
710		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
711		    LG_PAGE));
712		arena_run_split_small(arena, run, size, binind);
713		return (run);
714	}
715
716	return (NULL);
717}
718
719static arena_run_t *
720arena_run_alloc_small(arena_t *arena, size_t size, size_t binind)
721{
722	arena_chunk_t *chunk;
723	arena_run_t *run;
724
725	assert(size <= arena_maxclass);
726	assert((size & PAGE_MASK) == 0);
727	assert(binind != BININD_INVALID);
728
729	/* Search the arena's chunks for the lowest best fit. */
730	run = arena_run_alloc_small_helper(arena, size, binind);
731	if (run != NULL)
732		return (run);
733
734	/*
735	 * No usable runs.  Create a new chunk from which to allocate the run.
736	 */
737	chunk = arena_chunk_alloc(arena);
738	if (chunk != NULL) {
739		run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE));
740		arena_run_split_small(arena, run, size, binind);
741		return (run);
742	}
743
744	/*
745	 * arena_chunk_alloc() failed, but another thread may have made
746	 * sufficient memory available while this one dropped arena->lock in
747	 * arena_chunk_alloc(), so search one more time.
748	 */
749	return (arena_run_alloc_small_helper(arena, size, binind));
750}
751
752static inline void
753arena_maybe_purge(arena_t *arena)
754{
755	size_t threshold;
756
757	/* Don't purge if the option is disabled. */
758	if (opt_lg_dirty_mult < 0)
759		return;
760	threshold = (arena->nactive >> opt_lg_dirty_mult);
761	/*
762	 * Don't purge unless the number of purgeable pages exceeds the
763	 * threshold.
764	 */
765	if (arena->ndirty <= threshold)
766		return;
767
768	arena_purge(arena, false);
769}
770
771static size_t
772arena_dirty_count(arena_t *arena)
773{
774	size_t ndirty = 0;
775	arena_chunk_map_misc_t *miscelm;
776	arena_chunk_t *chunk;
777	size_t pageind, npages;
778
779	ql_foreach(miscelm, &arena->runs_dirty, dr_link) {
780		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
781		pageind = arena_miscelm_to_pageind(miscelm);
782		assert(arena_mapbits_allocated_get(chunk, pageind) == 0);
783		assert(arena_mapbits_large_get(chunk, pageind) == 0);
784		assert(arena_mapbits_dirty_get(chunk, pageind) != 0);
785		npages = arena_mapbits_unallocated_size_get(chunk, pageind) >>
786		    LG_PAGE;
787		ndirty += npages;
788	}
789
790       return (ndirty);
791}
792
793static size_t
794arena_compute_npurge(arena_t *arena, bool all)
795{
796	size_t npurge;
797
798	/*
799	 * Compute the minimum number of pages that this thread should try to
800	 * purge.
801	 */
802	if (all == false) {
803		size_t threshold = (arena->nactive >> opt_lg_dirty_mult);
804
805		npurge = arena->ndirty - threshold;
806	} else
807		npurge = arena->ndirty;
808
809	return (npurge);
810}
811
812static size_t
813arena_stash_dirty(arena_t *arena, bool all, size_t npurge,
814    arena_chunk_miscelms_t *miscelms)
815{
816	arena_chunk_map_misc_t *miscelm;
817	size_t nstashed = 0;
818
819	/* Add at least npurge pages to purge_list. */
820	for (miscelm = ql_first(&arena->runs_dirty); miscelm != NULL;
821	    miscelm = ql_first(&arena->runs_dirty)) {
822		arena_chunk_t *chunk =
823		    (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
824		size_t pageind = arena_miscelm_to_pageind(miscelm);
825		size_t run_size = arena_mapbits_unallocated_size_get(chunk,
826		    pageind);
827		size_t npages = run_size >> LG_PAGE;
828		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
829		    (uintptr_t)(pageind << LG_PAGE));
830
831		assert(pageind + npages <= chunk_npages);
832		assert(arena_mapbits_dirty_get(chunk, pageind) ==
833		    arena_mapbits_dirty_get(chunk, pageind+npages-1));
834
835		/*
836		 * If purging the spare chunk's run, make it available prior to
837		 * allocation.
838		 */
839		if (chunk == arena->spare)
840			arena_chunk_alloc(arena);
841
842		/* Temporarily allocate the free dirty run. */
843		arena_run_split_large(arena, run, run_size, false);
844		/* Append to purge_list for later processing. */
845		ql_elm_new(miscelm, dr_link);
846		ql_tail_insert(miscelms, miscelm, dr_link);
847
848		nstashed += npages;
849
850		if (all == false && nstashed >= npurge)
851			break;
852	}
853
854	return (nstashed);
855}
856
857static size_t
858arena_purge_stashed(arena_t *arena, arena_chunk_miscelms_t *miscelms)
859{
860	size_t npurged, nmadvise;
861	arena_chunk_map_misc_t *miscelm;
862
863	if (config_stats)
864		nmadvise = 0;
865	npurged = 0;
866
867	malloc_mutex_unlock(&arena->lock);
868
869	ql_foreach(miscelm, miscelms, dr_link) {
870		arena_chunk_t *chunk;
871		size_t pageind, run_size, npages, flag_unzeroed, i;
872		bool unzeroed;
873
874		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
875		pageind = arena_miscelm_to_pageind(miscelm);
876		run_size = arena_mapbits_large_size_get(chunk, pageind);
877		npages = run_size >> LG_PAGE;
878
879		assert(pageind + npages <= chunk_npages);
880		unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind <<
881		    LG_PAGE)), run_size);
882		flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0;
883
884		/*
885		 * Set the unzeroed flag for all pages, now that pages_purge()
886		 * has returned whether the pages were zeroed as a side effect
887		 * of purging.  This chunk map modification is safe even though
888		 * the arena mutex isn't currently owned by this thread,
889		 * because the run is marked as allocated, thus protecting it
890		 * from being modified by any other thread.  As long as these
891		 * writes don't perturb the first and last elements'
892		 * CHUNK_MAP_ALLOCATED bits, behavior is well defined.
893		 */
894		for (i = 0; i < npages; i++) {
895			arena_mapbits_unzeroed_set(chunk, pageind+i,
896			    flag_unzeroed);
897		}
898
899		npurged += npages;
900		if (config_stats)
901			nmadvise++;
902	}
903
904	malloc_mutex_lock(&arena->lock);
905
906	if (config_stats) {
907		arena->stats.nmadvise += nmadvise;
908		arena->stats.purged += npurged;
909	}
910
911	return (npurged);
912}
913
914static void
915arena_unstash_purged(arena_t *arena, arena_chunk_miscelms_t *miscelms)
916{
917	arena_chunk_map_misc_t *miscelm;
918
919	/* Deallocate runs. */
920	for (miscelm = ql_first(miscelms); miscelm != NULL;
921	    miscelm = ql_first(miscelms)) {
922		arena_chunk_t *chunk =
923		    (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
924		size_t pageind = arena_miscelm_to_pageind(miscelm);
925		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
926		    (uintptr_t)(pageind << LG_PAGE));
927		ql_remove(miscelms, miscelm, dr_link);
928		arena_run_dalloc(arena, run, false, true);
929	}
930}
931
932void
933arena_purge(arena_t *arena, bool all)
934{
935	size_t npurge, npurgeable, npurged;
936	arena_chunk_miscelms_t purge_list;
937
938	if (config_debug) {
939		size_t ndirty = arena_dirty_count(arena);
940		assert(ndirty == arena->ndirty);
941	}
942	assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty || all);
943
944	if (config_stats)
945		arena->stats.npurge++;
946
947	npurge = arena_compute_npurge(arena, all);
948	ql_new(&purge_list);
949	npurgeable = arena_stash_dirty(arena, all, npurge, &purge_list);
950	assert(npurgeable >= npurge);
951	npurged = arena_purge_stashed(arena, &purge_list);
952	assert(npurged == npurgeable);
953	arena_unstash_purged(arena, &purge_list);
954}
955
956void
957arena_purge_all(arena_t *arena)
958{
959
960	malloc_mutex_lock(&arena->lock);
961	arena_purge(arena, true);
962	malloc_mutex_unlock(&arena->lock);
963}
964
965static void
966arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
967    size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty)
968{
969	size_t size = *p_size;
970	size_t run_ind = *p_run_ind;
971	size_t run_pages = *p_run_pages;
972
973	/* Try to coalesce forward. */
974	if (run_ind + run_pages < chunk_npages &&
975	    arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 &&
976	    arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) {
977		size_t nrun_size = arena_mapbits_unallocated_size_get(chunk,
978		    run_ind+run_pages);
979		size_t nrun_pages = nrun_size >> LG_PAGE;
980
981		/*
982		 * Remove successor from runs_avail; the coalesced run is
983		 * inserted later.
984		 */
985		assert(arena_mapbits_unallocated_size_get(chunk,
986		    run_ind+run_pages+nrun_pages-1) == nrun_size);
987		assert(arena_mapbits_dirty_get(chunk,
988		    run_ind+run_pages+nrun_pages-1) == flag_dirty);
989		arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages);
990
991		/* If the successor is dirty, remove it from runs_dirty. */
992		if (flag_dirty != 0) {
993			arena_dirty_remove(arena, chunk, run_ind+run_pages,
994			    nrun_pages);
995		}
996
997		size += nrun_size;
998		run_pages += nrun_pages;
999
1000		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
1001		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
1002		    size);
1003	}
1004
1005	/* Try to coalesce backward. */
1006	if (run_ind > map_bias && arena_mapbits_allocated_get(chunk,
1007	    run_ind-1) == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) ==
1008	    flag_dirty) {
1009		size_t prun_size = arena_mapbits_unallocated_size_get(chunk,
1010		    run_ind-1);
1011		size_t prun_pages = prun_size >> LG_PAGE;
1012
1013		run_ind -= prun_pages;
1014
1015		/*
1016		 * Remove predecessor from runs_avail; the coalesced run is
1017		 * inserted later.
1018		 */
1019		assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
1020		    prun_size);
1021		assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
1022		arena_avail_remove(arena, chunk, run_ind, prun_pages);
1023
1024		/* If the predecessor is dirty, remove it from runs_dirty. */
1025		if (flag_dirty != 0)
1026			arena_dirty_remove(arena, chunk, run_ind, prun_pages);
1027
1028		size += prun_size;
1029		run_pages += prun_pages;
1030
1031		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
1032		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
1033		    size);
1034	}
1035
1036	*p_size = size;
1037	*p_run_ind = run_ind;
1038	*p_run_pages = run_pages;
1039}
1040
1041static void
1042arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
1043{
1044	arena_chunk_t *chunk;
1045	size_t size, run_ind, run_pages, flag_dirty;
1046
1047	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1048	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
1049	assert(run_ind >= map_bias);
1050	assert(run_ind < chunk_npages);
1051	if (arena_mapbits_large_get(chunk, run_ind) != 0) {
1052		size = arena_mapbits_large_size_get(chunk, run_ind);
1053		assert(size == PAGE ||
1054		    arena_mapbits_large_size_get(chunk,
1055		    run_ind+(size>>LG_PAGE)-1) == 0);
1056	} else {
1057		size_t binind = arena_bin_index(arena, run->bin);
1058		arena_bin_info_t *bin_info = &arena_bin_info[binind];
1059		size = bin_info->run_size;
1060	}
1061	run_pages = (size >> LG_PAGE);
1062	arena_cactive_update(arena, 0, run_pages);
1063	arena->nactive -= run_pages;
1064
1065	/*
1066	 * The run is dirty if the caller claims to have dirtied it, as well as
1067	 * if it was already dirty before being allocated and the caller
1068	 * doesn't claim to have cleaned it.
1069	 */
1070	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
1071	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
1072	if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0)
1073		dirty = true;
1074	flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
1075
1076	/* Mark pages as unallocated in the chunk map. */
1077	if (dirty) {
1078		arena_mapbits_unallocated_set(chunk, run_ind, size,
1079		    CHUNK_MAP_DIRTY);
1080		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
1081		    CHUNK_MAP_DIRTY);
1082	} else {
1083		arena_mapbits_unallocated_set(chunk, run_ind, size,
1084		    arena_mapbits_unzeroed_get(chunk, run_ind));
1085		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
1086		    arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1));
1087	}
1088
1089	arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages,
1090	    flag_dirty);
1091
1092	/* Insert into runs_avail, now that coalescing is complete. */
1093	assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
1094	    arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1));
1095	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
1096	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
1097	arena_avail_insert(arena, chunk, run_ind, run_pages);
1098
1099	if (dirty)
1100		arena_dirty_insert(arena, chunk, run_ind, run_pages);
1101
1102	/* Deallocate chunk if it is now completely unused. */
1103	if (size == arena_maxclass) {
1104		assert(run_ind == map_bias);
1105		assert(run_pages == (arena_maxclass >> LG_PAGE));
1106		arena_chunk_dalloc(arena, chunk);
1107	}
1108
1109	/*
1110	 * It is okay to do dirty page processing here even if the chunk was
1111	 * deallocated above, since in that case it is the spare.  Waiting
1112	 * until after possible chunk deallocation to do dirty processing
1113	 * allows for an old spare to be fully deallocated, thus decreasing the
1114	 * chances of spuriously crossing the dirty page purging threshold.
1115	 */
1116	if (dirty)
1117		arena_maybe_purge(arena);
1118}
1119
1120static void
1121arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1122    size_t oldsize, size_t newsize)
1123{
1124	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
1125	size_t head_npages = (oldsize - newsize) >> LG_PAGE;
1126	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
1127
1128	assert(oldsize > newsize);
1129
1130	/*
1131	 * Update the chunk map so that arena_run_dalloc() can treat the
1132	 * leading run as separately allocated.  Set the last element of each
1133	 * run first, in case of single-page runs.
1134	 */
1135	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
1136	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty);
1137	arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty);
1138
1139	if (config_debug) {
1140		UNUSED size_t tail_npages = newsize >> LG_PAGE;
1141		assert(arena_mapbits_large_size_get(chunk,
1142		    pageind+head_npages+tail_npages-1) == 0);
1143		assert(arena_mapbits_dirty_get(chunk,
1144		    pageind+head_npages+tail_npages-1) == flag_dirty);
1145	}
1146	arena_mapbits_large_set(chunk, pageind+head_npages, newsize,
1147	    flag_dirty);
1148
1149	arena_run_dalloc(arena, run, false, false);
1150}
1151
1152static void
1153arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1154    size_t oldsize, size_t newsize, bool dirty)
1155{
1156	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
1157	size_t head_npages = newsize >> LG_PAGE;
1158	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
1159
1160	assert(oldsize > newsize);
1161
1162	/*
1163	 * Update the chunk map so that arena_run_dalloc() can treat the
1164	 * trailing run as separately allocated.  Set the last element of each
1165	 * run first, in case of single-page runs.
1166	 */
1167	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
1168	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty);
1169	arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty);
1170
1171	if (config_debug) {
1172		UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE;
1173		assert(arena_mapbits_large_size_get(chunk,
1174		    pageind+head_npages+tail_npages-1) == 0);
1175		assert(arena_mapbits_dirty_get(chunk,
1176		    pageind+head_npages+tail_npages-1) == flag_dirty);
1177	}
1178	arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize,
1179	    flag_dirty);
1180
1181	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
1182	    dirty, false);
1183}
1184
1185static arena_run_t *
1186arena_bin_runs_first(arena_bin_t *bin)
1187{
1188	arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs);
1189	if (miscelm != NULL) {
1190		arena_chunk_t *chunk;
1191		size_t pageind;
1192		arena_run_t *run;
1193
1194		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
1195		pageind = arena_miscelm_to_pageind(miscelm);
1196		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
1197		    arena_mapbits_small_runind_get(chunk, pageind)) <<
1198		    LG_PAGE));
1199		return (run);
1200	}
1201
1202	return (NULL);
1203}
1204
1205static void
1206arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
1207{
1208	arena_chunk_t *chunk = CHUNK_ADDR2BASE(run);
1209	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
1210	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
1211
1212	assert(arena_run_tree_search(&bin->runs, miscelm) == NULL);
1213
1214	arena_run_tree_insert(&bin->runs, miscelm);
1215}
1216
1217static void
1218arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
1219{
1220	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1221	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
1222	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
1223
1224	assert(arena_run_tree_search(&bin->runs, miscelm) != NULL);
1225
1226	arena_run_tree_remove(&bin->runs, miscelm);
1227}
1228
1229static arena_run_t *
1230arena_bin_nonfull_run_tryget(arena_bin_t *bin)
1231{
1232	arena_run_t *run = arena_bin_runs_first(bin);
1233	if (run != NULL) {
1234		arena_bin_runs_remove(bin, run);
1235		if (config_stats)
1236			bin->stats.reruns++;
1237	}
1238	return (run);
1239}
1240
1241static arena_run_t *
1242arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
1243{
1244	arena_run_t *run;
1245	size_t binind;
1246	arena_bin_info_t *bin_info;
1247
1248	/* Look for a usable run. */
1249	run = arena_bin_nonfull_run_tryget(bin);
1250	if (run != NULL)
1251		return (run);
1252	/* No existing runs have any space available. */
1253
1254	binind = arena_bin_index(arena, bin);
1255	bin_info = &arena_bin_info[binind];
1256
1257	/* Allocate a new run. */
1258	malloc_mutex_unlock(&bin->lock);
1259	/******************************/
1260	malloc_mutex_lock(&arena->lock);
1261	run = arena_run_alloc_small(arena, bin_info->run_size, binind);
1262	if (run != NULL) {
1263		bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
1264		    (uintptr_t)bin_info->bitmap_offset);
1265
1266		/* Initialize run internals. */
1267		run->bin = bin;
1268		run->nextind = 0;
1269		run->nfree = bin_info->nregs;
1270		bitmap_init(bitmap, &bin_info->bitmap_info);
1271	}
1272	malloc_mutex_unlock(&arena->lock);
1273	/********************************/
1274	malloc_mutex_lock(&bin->lock);
1275	if (run != NULL) {
1276		if (config_stats) {
1277			bin->stats.nruns++;
1278			bin->stats.curruns++;
1279		}
1280		return (run);
1281	}
1282
1283	/*
1284	 * arena_run_alloc_small() failed, but another thread may have made
1285	 * sufficient memory available while this one dropped bin->lock above,
1286	 * so search one more time.
1287	 */
1288	run = arena_bin_nonfull_run_tryget(bin);
1289	if (run != NULL)
1290		return (run);
1291
1292	return (NULL);
1293}
1294
1295/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
1296static void *
1297arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
1298{
1299	void *ret;
1300	size_t binind;
1301	arena_bin_info_t *bin_info;
1302	arena_run_t *run;
1303
1304	binind = arena_bin_index(arena, bin);
1305	bin_info = &arena_bin_info[binind];
1306	bin->runcur = NULL;
1307	run = arena_bin_nonfull_run_get(arena, bin);
1308	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
1309		/*
1310		 * Another thread updated runcur while this one ran without the
1311		 * bin lock in arena_bin_nonfull_run_get().
1312		 */
1313		assert(bin->runcur->nfree > 0);
1314		ret = arena_run_reg_alloc(bin->runcur, bin_info);
1315		if (run != NULL) {
1316			arena_chunk_t *chunk;
1317
1318			/*
1319			 * arena_run_alloc_small() may have allocated run, or
1320			 * it may have pulled run from the bin's run tree.
1321			 * Therefore it is unsafe to make any assumptions about
1322			 * how run has previously been used, and
1323			 * arena_bin_lower_run() must be called, as if a region
1324			 * were just deallocated from the run.
1325			 */
1326			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1327			if (run->nfree == bin_info->nregs)
1328				arena_dalloc_bin_run(arena, chunk, run, bin);
1329			else
1330				arena_bin_lower_run(arena, chunk, run, bin);
1331		}
1332		return (ret);
1333	}
1334
1335	if (run == NULL)
1336		return (NULL);
1337
1338	bin->runcur = run;
1339
1340	assert(bin->runcur->nfree > 0);
1341
1342	return (arena_run_reg_alloc(bin->runcur, bin_info));
1343}
1344
1345void
1346arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind,
1347    uint64_t prof_accumbytes)
1348{
1349	unsigned i, nfill;
1350	arena_bin_t *bin;
1351	arena_run_t *run;
1352	void *ptr;
1353
1354	assert(tbin->ncached == 0);
1355
1356	if (config_prof && arena_prof_accum(arena, prof_accumbytes))
1357		prof_idump();
1358	bin = &arena->bins[binind];
1359	malloc_mutex_lock(&bin->lock);
1360	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
1361	    tbin->lg_fill_div); i < nfill; i++) {
1362		if ((run = bin->runcur) != NULL && run->nfree > 0)
1363			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
1364		else
1365			ptr = arena_bin_malloc_hard(arena, bin);
1366		if (ptr == NULL)
1367			break;
1368		if (config_fill && opt_junk) {
1369			arena_alloc_junk_small(ptr, &arena_bin_info[binind],
1370			    true);
1371		}
1372		/* Insert such that low regions get used first. */
1373		tbin->avail[nfill - 1 - i] = ptr;
1374	}
1375	if (config_stats) {
1376		bin->stats.allocated += i * arena_bin_info[binind].reg_size;
1377		bin->stats.nmalloc += i;
1378		bin->stats.nrequests += tbin->tstats.nrequests;
1379		bin->stats.nfills++;
1380		tbin->tstats.nrequests = 0;
1381	}
1382	malloc_mutex_unlock(&bin->lock);
1383	tbin->ncached = i;
1384}
1385
1386void
1387arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
1388{
1389
1390	if (zero) {
1391		size_t redzone_size = bin_info->redzone_size;
1392		memset((void *)((uintptr_t)ptr - redzone_size), 0xa5,
1393		    redzone_size);
1394		memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5,
1395		    redzone_size);
1396	} else {
1397		memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5,
1398		    bin_info->reg_interval);
1399	}
1400}
1401
1402#ifdef JEMALLOC_JET
1403#undef arena_redzone_corruption
1404#define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl)
1405#endif
1406static void
1407arena_redzone_corruption(void *ptr, size_t usize, bool after,
1408    size_t offset, uint8_t byte)
1409{
1410
1411	malloc_printf("<jemalloc>: Corrupt redzone %zu byte%s %s %p "
1412	    "(size %zu), byte=%#x\n", offset, (offset == 1) ? "" : "s",
1413	    after ? "after" : "before", ptr, usize, byte);
1414}
1415#ifdef JEMALLOC_JET
1416#undef arena_redzone_corruption
1417#define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption)
1418arena_redzone_corruption_t *arena_redzone_corruption =
1419    JEMALLOC_N(arena_redzone_corruption_impl);
1420#endif
1421
1422static void
1423arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
1424{
1425	size_t size = bin_info->reg_size;
1426	size_t redzone_size = bin_info->redzone_size;
1427	size_t i;
1428	bool error = false;
1429
1430	for (i = 1; i <= redzone_size; i++) {
1431		uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
1432		if (*byte != 0xa5) {
1433			error = true;
1434			arena_redzone_corruption(ptr, size, false, i, *byte);
1435			if (reset)
1436				*byte = 0xa5;
1437		}
1438	}
1439	for (i = 0; i < redzone_size; i++) {
1440		uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i);
1441		if (*byte != 0xa5) {
1442			error = true;
1443			arena_redzone_corruption(ptr, size, true, i, *byte);
1444			if (reset)
1445				*byte = 0xa5;
1446		}
1447	}
1448	if (opt_abort && error)
1449		abort();
1450}
1451
1452#ifdef JEMALLOC_JET
1453#undef arena_dalloc_junk_small
1454#define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl)
1455#endif
1456void
1457arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
1458{
1459	size_t redzone_size = bin_info->redzone_size;
1460
1461	arena_redzones_validate(ptr, bin_info, false);
1462	memset((void *)((uintptr_t)ptr - redzone_size), 0x5a,
1463	    bin_info->reg_interval);
1464}
1465#ifdef JEMALLOC_JET
1466#undef arena_dalloc_junk_small
1467#define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
1468arena_dalloc_junk_small_t *arena_dalloc_junk_small =
1469    JEMALLOC_N(arena_dalloc_junk_small_impl);
1470#endif
1471
1472void
1473arena_quarantine_junk_small(void *ptr, size_t usize)
1474{
1475	size_t binind;
1476	arena_bin_info_t *bin_info;
1477	cassert(config_fill);
1478	assert(opt_junk);
1479	assert(opt_quarantine);
1480	assert(usize <= SMALL_MAXCLASS);
1481
1482	binind = small_size2bin(usize);
1483	bin_info = &arena_bin_info[binind];
1484	arena_redzones_validate(ptr, bin_info, true);
1485}
1486
1487void *
1488arena_malloc_small(arena_t *arena, size_t size, bool zero)
1489{
1490	void *ret;
1491	arena_bin_t *bin;
1492	arena_run_t *run;
1493	size_t binind;
1494
1495	binind = small_size2bin(size);
1496	assert(binind < NBINS);
1497	bin = &arena->bins[binind];
1498	size = small_bin2size(binind);
1499
1500	malloc_mutex_lock(&bin->lock);
1501	if ((run = bin->runcur) != NULL && run->nfree > 0)
1502		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
1503	else
1504		ret = arena_bin_malloc_hard(arena, bin);
1505
1506	if (ret == NULL) {
1507		malloc_mutex_unlock(&bin->lock);
1508		return (NULL);
1509	}
1510
1511	if (config_stats) {
1512		bin->stats.allocated += size;
1513		bin->stats.nmalloc++;
1514		bin->stats.nrequests++;
1515	}
1516	malloc_mutex_unlock(&bin->lock);
1517	if (config_prof && isthreaded == false && arena_prof_accum(arena, size))
1518		prof_idump();
1519
1520	if (zero == false) {
1521		if (config_fill) {
1522			if (opt_junk) {
1523				arena_alloc_junk_small(ret,
1524				    &arena_bin_info[binind], false);
1525			} else if (opt_zero)
1526				memset(ret, 0, size);
1527		}
1528		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
1529	} else {
1530		if (config_fill && opt_junk) {
1531			arena_alloc_junk_small(ret, &arena_bin_info[binind],
1532			    true);
1533		}
1534		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
1535		memset(ret, 0, size);
1536	}
1537
1538	return (ret);
1539}
1540
1541void *
1542arena_malloc_large(arena_t *arena, size_t size, bool zero)
1543{
1544	void *ret;
1545	UNUSED bool idump;
1546
1547	/* Large allocation. */
1548	size = PAGE_CEILING(size);
1549	malloc_mutex_lock(&arena->lock);
1550	ret = (void *)arena_run_alloc_large(arena, size, zero);
1551	if (ret == NULL) {
1552		malloc_mutex_unlock(&arena->lock);
1553		return (NULL);
1554	}
1555	if (config_stats) {
1556		arena->stats.nmalloc_large++;
1557		arena->stats.nrequests_large++;
1558		arena->stats.allocated_large += size;
1559		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
1560		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
1561		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
1562	}
1563	if (config_prof)
1564		idump = arena_prof_accum_locked(arena, size);
1565	malloc_mutex_unlock(&arena->lock);
1566	if (config_prof && idump)
1567		prof_idump();
1568
1569	if (zero == false) {
1570		if (config_fill) {
1571			if (opt_junk)
1572				memset(ret, 0xa5, size);
1573			else if (opt_zero)
1574				memset(ret, 0, size);
1575		}
1576	}
1577
1578	return (ret);
1579}
1580
1581/* Only handles large allocations that require more than page alignment. */
1582void *
1583arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero)
1584{
1585	void *ret;
1586	size_t alloc_size, leadsize, trailsize;
1587	arena_run_t *run;
1588	arena_chunk_t *chunk;
1589
1590	assert((size & PAGE_MASK) == 0);
1591
1592	alignment = PAGE_CEILING(alignment);
1593	alloc_size = size + alignment - PAGE;
1594
1595	malloc_mutex_lock(&arena->lock);
1596	run = arena_run_alloc_large(arena, alloc_size, false);
1597	if (run == NULL) {
1598		malloc_mutex_unlock(&arena->lock);
1599		return (NULL);
1600	}
1601	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1602
1603	leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) -
1604	    (uintptr_t)run;
1605	assert(alloc_size >= leadsize + size);
1606	trailsize = alloc_size - leadsize - size;
1607	ret = (void *)((uintptr_t)run + leadsize);
1608	if (leadsize != 0) {
1609		arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size -
1610		    leadsize);
1611	}
1612	if (trailsize != 0) {
1613		arena_run_trim_tail(arena, chunk, ret, size + trailsize, size,
1614		    false);
1615	}
1616	arena_run_init_large(arena, (arena_run_t *)ret, size, zero);
1617
1618	if (config_stats) {
1619		arena->stats.nmalloc_large++;
1620		arena->stats.nrequests_large++;
1621		arena->stats.allocated_large += size;
1622		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
1623		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
1624		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
1625	}
1626	malloc_mutex_unlock(&arena->lock);
1627
1628	if (config_fill && zero == false) {
1629		if (opt_junk)
1630			memset(ret, 0xa5, size);
1631		else if (opt_zero)
1632			memset(ret, 0, size);
1633	}
1634	return (ret);
1635}
1636
1637void
1638arena_prof_promoted(const void *ptr, size_t size)
1639{
1640	arena_chunk_t *chunk;
1641	size_t pageind, binind;
1642
1643	cassert(config_prof);
1644	assert(ptr != NULL);
1645	assert(CHUNK_ADDR2BASE(ptr) != ptr);
1646	assert(isalloc(ptr, false) == PAGE);
1647	assert(isalloc(ptr, true) == PAGE);
1648	assert(size <= SMALL_MAXCLASS);
1649
1650	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
1651	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
1652	binind = small_size2bin(size);
1653	assert(binind < NBINS);
1654	arena_mapbits_large_binind_set(chunk, pageind, binind);
1655
1656	assert(isalloc(ptr, false) == PAGE);
1657	assert(isalloc(ptr, true) == size);
1658}
1659
1660static void
1661arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
1662    arena_bin_t *bin)
1663{
1664
1665	/* Dissociate run from bin. */
1666	if (run == bin->runcur)
1667		bin->runcur = NULL;
1668	else {
1669		size_t binind = arena_bin_index(chunk->arena, bin);
1670		arena_bin_info_t *bin_info = &arena_bin_info[binind];
1671
1672		if (bin_info->nregs != 1) {
1673			/*
1674			 * This block's conditional is necessary because if the
1675			 * run only contains one region, then it never gets
1676			 * inserted into the non-full runs tree.
1677			 */
1678			arena_bin_runs_remove(bin, run);
1679		}
1680	}
1681}
1682
1683static void
1684arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1685    arena_bin_t *bin)
1686{
1687	size_t binind;
1688	arena_bin_info_t *bin_info;
1689	size_t npages, run_ind, past;
1690
1691	assert(run != bin->runcur);
1692	assert(arena_run_tree_search(&bin->runs, arena_miscelm_get(chunk,
1693	    ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) == NULL);
1694
1695	binind = arena_bin_index(chunk->arena, run->bin);
1696	bin_info = &arena_bin_info[binind];
1697
1698	malloc_mutex_unlock(&bin->lock);
1699	/******************************/
1700	npages = bin_info->run_size >> LG_PAGE;
1701	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
1702	past = (size_t)(PAGE_CEILING((uintptr_t)run +
1703	    (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
1704	    bin_info->reg_interval - bin_info->redzone_size) -
1705	    (uintptr_t)chunk) >> LG_PAGE);
1706	malloc_mutex_lock(&arena->lock);
1707
1708	/*
1709	 * If the run was originally clean, and some pages were never touched,
1710	 * trim the clean pages before deallocating the dirty portion of the
1711	 * run.
1712	 */
1713	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
1714	    arena_mapbits_dirty_get(chunk, run_ind+npages-1));
1715	if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind <
1716	    npages) {
1717		/* Trim clean pages.  Convert to large run beforehand. */
1718		assert(npages > 0);
1719		arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0);
1720		arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0);
1721		arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE),
1722		    ((past - run_ind) << LG_PAGE), false);
1723		/* npages = past - run_ind; */
1724	}
1725	arena_run_dalloc(arena, run, true, false);
1726	malloc_mutex_unlock(&arena->lock);
1727	/****************************/
1728	malloc_mutex_lock(&bin->lock);
1729	if (config_stats)
1730		bin->stats.curruns--;
1731}
1732
1733static void
1734arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1735    arena_bin_t *bin)
1736{
1737
1738	/*
1739	 * Make sure that if bin->runcur is non-NULL, it refers to the lowest
1740	 * non-full run.  It is okay to NULL runcur out rather than proactively
1741	 * keeping it pointing at the lowest non-full run.
1742	 */
1743	if ((uintptr_t)run < (uintptr_t)bin->runcur) {
1744		/* Switch runcur. */
1745		if (bin->runcur->nfree > 0)
1746			arena_bin_runs_insert(bin, bin->runcur);
1747		bin->runcur = run;
1748		if (config_stats)
1749			bin->stats.reruns++;
1750	} else
1751		arena_bin_runs_insert(bin, run);
1752}
1753
1754void
1755arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1756    arena_chunk_map_bits_t *bitselm)
1757{
1758	size_t pageind;
1759	arena_run_t *run;
1760	arena_bin_t *bin;
1761	arena_bin_info_t *bin_info;
1762	size_t size, binind;
1763
1764	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
1765	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
1766	    arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
1767	bin = run->bin;
1768	binind = arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
1769	    pageind));
1770	bin_info = &arena_bin_info[binind];
1771	if (config_fill || config_stats)
1772		size = bin_info->reg_size;
1773
1774	if (config_fill && opt_junk)
1775		arena_dalloc_junk_small(ptr, bin_info);
1776
1777	arena_run_reg_dalloc(run, ptr);
1778	if (run->nfree == bin_info->nregs) {
1779		arena_dissociate_bin_run(chunk, run, bin);
1780		arena_dalloc_bin_run(arena, chunk, run, bin);
1781	} else if (run->nfree == 1 && run != bin->runcur)
1782		arena_bin_lower_run(arena, chunk, run, bin);
1783
1784	if (config_stats) {
1785		bin->stats.allocated -= size;
1786		bin->stats.ndalloc++;
1787	}
1788}
1789
1790void
1791arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1792    size_t pageind, arena_chunk_map_bits_t *bitselm)
1793{
1794	arena_run_t *run;
1795	arena_bin_t *bin;
1796
1797	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
1798	    arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
1799	bin = run->bin;
1800	malloc_mutex_lock(&bin->lock);
1801	arena_dalloc_bin_locked(arena, chunk, ptr, bitselm);
1802	malloc_mutex_unlock(&bin->lock);
1803}
1804
1805void
1806arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1807    size_t pageind)
1808{
1809	arena_chunk_map_bits_t *bitselm;
1810
1811	if (config_debug) {
1812		/* arena_ptr_small_binind_get() does extra sanity checking. */
1813		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
1814		    pageind)) != BININD_INVALID);
1815	}
1816	bitselm = arena_bitselm_get(chunk, pageind);
1817	arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
1818}
1819
1820#ifdef JEMALLOC_JET
1821#undef arena_dalloc_junk_large
1822#define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl)
1823#endif
1824static void
1825arena_dalloc_junk_large(void *ptr, size_t usize)
1826{
1827
1828	if (config_fill && opt_junk)
1829		memset(ptr, 0x5a, usize);
1830}
1831#ifdef JEMALLOC_JET
1832#undef arena_dalloc_junk_large
1833#define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large)
1834arena_dalloc_junk_large_t *arena_dalloc_junk_large =
1835    JEMALLOC_N(arena_dalloc_junk_large_impl);
1836#endif
1837
1838void
1839arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
1840{
1841
1842	if (config_fill || config_stats) {
1843		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
1844		size_t usize = arena_mapbits_large_size_get(chunk, pageind);
1845
1846		arena_dalloc_junk_large(ptr, usize);
1847		if (config_stats) {
1848			arena->stats.ndalloc_large++;
1849			arena->stats.allocated_large -= usize;
1850			arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++;
1851			arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--;
1852		}
1853	}
1854
1855	arena_run_dalloc(arena, (arena_run_t *)ptr, true, false);
1856}
1857
1858void
1859arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
1860{
1861
1862	malloc_mutex_lock(&arena->lock);
1863	arena_dalloc_large_locked(arena, chunk, ptr);
1864	malloc_mutex_unlock(&arena->lock);
1865}
1866
1867static void
1868arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1869    size_t oldsize, size_t size)
1870{
1871
1872	assert(size < oldsize);
1873
1874	/*
1875	 * Shrink the run, and make trailing pages available for other
1876	 * allocations.
1877	 */
1878	malloc_mutex_lock(&arena->lock);
1879	arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
1880	    true);
1881	if (config_stats) {
1882		arena->stats.ndalloc_large++;
1883		arena->stats.allocated_large -= oldsize;
1884		arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
1885		arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
1886
1887		arena->stats.nmalloc_large++;
1888		arena->stats.nrequests_large++;
1889		arena->stats.allocated_large += size;
1890		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
1891		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
1892		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
1893	}
1894	malloc_mutex_unlock(&arena->lock);
1895}
1896
1897static bool
1898arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
1899    size_t oldsize, size_t size, size_t extra, bool zero)
1900{
1901	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
1902	size_t npages = oldsize >> LG_PAGE;
1903	size_t followsize;
1904
1905	assert(oldsize == arena_mapbits_large_size_get(chunk, pageind));
1906
1907	/* Try to extend the run. */
1908	assert(size + extra > oldsize);
1909	malloc_mutex_lock(&arena->lock);
1910	if (pageind + npages < chunk_npages &&
1911	    arena_mapbits_allocated_get(chunk, pageind+npages) == 0 &&
1912	    (followsize = arena_mapbits_unallocated_size_get(chunk,
1913	    pageind+npages)) >= size - oldsize) {
1914		/*
1915		 * The next run is available and sufficiently large.  Split the
1916		 * following run, then merge the first part with the existing
1917		 * allocation.
1918		 */
1919		size_t flag_dirty;
1920		size_t splitsize = (oldsize + followsize <= size + extra)
1921		    ? followsize : size + extra - oldsize;
1922		arena_run_split_large(arena, (arena_run_t *)((uintptr_t)chunk +
1923		    ((pageind+npages) << LG_PAGE)), splitsize, zero);
1924
1925		size = oldsize + splitsize;
1926		npages = size >> LG_PAGE;
1927
1928		/*
1929		 * Mark the extended run as dirty if either portion of the run
1930		 * was dirty before allocation.  This is rather pedantic,
1931		 * because there's not actually any sequence of events that
1932		 * could cause the resulting run to be passed to
1933		 * arena_run_dalloc() with the dirty argument set to false
1934		 * (which is when dirty flag consistency would really matter).
1935		 */
1936		flag_dirty = arena_mapbits_dirty_get(chunk, pageind) |
1937		    arena_mapbits_dirty_get(chunk, pageind+npages-1);
1938		arena_mapbits_large_set(chunk, pageind, size, flag_dirty);
1939		arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty);
1940
1941		if (config_stats) {
1942			arena->stats.ndalloc_large++;
1943			arena->stats.allocated_large -= oldsize;
1944			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
1945			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
1946
1947			arena->stats.nmalloc_large++;
1948			arena->stats.nrequests_large++;
1949			arena->stats.allocated_large += size;
1950			arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
1951			arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
1952			arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
1953		}
1954		malloc_mutex_unlock(&arena->lock);
1955		return (false);
1956	}
1957	malloc_mutex_unlock(&arena->lock);
1958
1959	return (true);
1960}
1961
1962#ifdef JEMALLOC_JET
1963#undef arena_ralloc_junk_large
1964#define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl)
1965#endif
1966static void
1967arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
1968{
1969
1970	if (config_fill && opt_junk) {
1971		memset((void *)((uintptr_t)ptr + usize), 0x5a,
1972		    old_usize - usize);
1973	}
1974}
1975#ifdef JEMALLOC_JET
1976#undef arena_ralloc_junk_large
1977#define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large)
1978arena_ralloc_junk_large_t *arena_ralloc_junk_large =
1979    JEMALLOC_N(arena_ralloc_junk_large_impl);
1980#endif
1981
1982/*
1983 * Try to resize a large allocation, in order to avoid copying.  This will
1984 * always fail if growing an object, and the following run is already in use.
1985 */
1986static bool
1987arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
1988    bool zero)
1989{
1990	size_t psize;
1991
1992	psize = PAGE_CEILING(size + extra);
1993	if (psize == oldsize) {
1994		/* Same size class. */
1995		return (false);
1996	} else {
1997		arena_chunk_t *chunk;
1998		arena_t *arena;
1999
2000		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2001		arena = chunk->arena;
2002
2003		if (psize < oldsize) {
2004			/* Fill before shrinking in order avoid a race. */
2005			arena_ralloc_junk_large(ptr, oldsize, psize);
2006			arena_ralloc_large_shrink(arena, chunk, ptr, oldsize,
2007			    psize);
2008			return (false);
2009		} else {
2010			bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
2011			    oldsize, PAGE_CEILING(size),
2012			    psize - PAGE_CEILING(size), zero);
2013			if (config_fill && ret == false && zero == false) {
2014				if (opt_junk) {
2015					memset((void *)((uintptr_t)ptr +
2016					    oldsize), 0xa5, isalloc(ptr,
2017					    config_prof) - oldsize);
2018				} else if (opt_zero) {
2019					memset((void *)((uintptr_t)ptr +
2020					    oldsize), 0, isalloc(ptr,
2021					    config_prof) - oldsize);
2022				}
2023			}
2024			return (ret);
2025		}
2026	}
2027}
2028
2029bool
2030arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
2031    bool zero)
2032{
2033
2034	/*
2035	 * Avoid moving the allocation if the size class can be left the same.
2036	 */
2037	if (oldsize <= arena_maxclass) {
2038		if (oldsize <= SMALL_MAXCLASS) {
2039			assert(arena_bin_info[small_size2bin(oldsize)].reg_size
2040			    == oldsize);
2041			if ((size + extra <= SMALL_MAXCLASS &&
2042			    small_size2bin(size + extra) ==
2043			    small_size2bin(oldsize)) || (size <= oldsize &&
2044			    size + extra >= oldsize))
2045				return (false);
2046		} else {
2047			assert(size <= arena_maxclass);
2048			if (size + extra > SMALL_MAXCLASS) {
2049				if (arena_ralloc_large(ptr, oldsize, size,
2050				    extra, zero) == false)
2051					return (false);
2052			}
2053		}
2054	}
2055
2056	/* Reallocation would require a move. */
2057	return (true);
2058}
2059
2060void *
2061arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
2062    size_t extra, size_t alignment, bool zero, bool try_tcache_alloc,
2063    bool try_tcache_dalloc)
2064{
2065	void *ret;
2066	size_t copysize;
2067
2068	/* Try to avoid moving the allocation. */
2069	if (arena_ralloc_no_move(ptr, oldsize, size, extra, zero) == false)
2070		return (ptr);
2071
2072	/*
2073	 * size and oldsize are different enough that we need to move the
2074	 * object.  In that case, fall back to allocating new space and
2075	 * copying.
2076	 */
2077	if (alignment != 0) {
2078		size_t usize = sa2u(size + extra, alignment);
2079		if (usize == 0)
2080			return (NULL);
2081		ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena);
2082	} else
2083		ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc);
2084
2085	if (ret == NULL) {
2086		if (extra == 0)
2087			return (NULL);
2088		/* Try again, this time without extra. */
2089		if (alignment != 0) {
2090			size_t usize = sa2u(size, alignment);
2091			if (usize == 0)
2092				return (NULL);
2093			ret = ipalloct(usize, alignment, zero, try_tcache_alloc,
2094			    arena);
2095		} else
2096			ret = arena_malloc(arena, size, zero, try_tcache_alloc);
2097
2098		if (ret == NULL)
2099			return (NULL);
2100	}
2101
2102	/* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */
2103
2104	/*
2105	 * Copy at most size bytes (not size+extra), since the caller has no
2106	 * expectation that the extra bytes will be reliably preserved.
2107	 */
2108	copysize = (size < oldsize) ? size : oldsize;
2109	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
2110	memcpy(ret, ptr, copysize);
2111	iqalloc(ptr, try_tcache_dalloc);
2112	return (ret);
2113}
2114
2115dss_prec_t
2116arena_dss_prec_get(arena_t *arena)
2117{
2118	dss_prec_t ret;
2119
2120	malloc_mutex_lock(&arena->lock);
2121	ret = arena->dss_prec;
2122	malloc_mutex_unlock(&arena->lock);
2123	return (ret);
2124}
2125
2126bool
2127arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec)
2128{
2129
2130	if (have_dss == false)
2131		return (dss_prec != dss_prec_disabled);
2132	malloc_mutex_lock(&arena->lock);
2133	arena->dss_prec = dss_prec;
2134	malloc_mutex_unlock(&arena->lock);
2135	return (false);
2136}
2137
2138void
2139arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
2140    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
2141    malloc_large_stats_t *lstats)
2142{
2143	unsigned i;
2144
2145	malloc_mutex_lock(&arena->lock);
2146	*dss = dss_prec_names[arena->dss_prec];
2147	*nactive += arena->nactive;
2148	*ndirty += arena->ndirty;
2149
2150	astats->mapped += arena->stats.mapped;
2151	astats->npurge += arena->stats.npurge;
2152	astats->nmadvise += arena->stats.nmadvise;
2153	astats->purged += arena->stats.purged;
2154	astats->allocated_large += arena->stats.allocated_large;
2155	astats->nmalloc_large += arena->stats.nmalloc_large;
2156	astats->ndalloc_large += arena->stats.ndalloc_large;
2157	astats->nrequests_large += arena->stats.nrequests_large;
2158	astats->allocated_huge += arena->stats.allocated_huge;
2159	astats->nmalloc_huge += arena->stats.nmalloc_huge;
2160	astats->ndalloc_huge += arena->stats.ndalloc_huge;
2161	astats->nrequests_huge += arena->stats.nrequests_huge;
2162
2163	for (i = 0; i < nlclasses; i++) {
2164		lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
2165		lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
2166		lstats[i].nrequests += arena->stats.lstats[i].nrequests;
2167		lstats[i].curruns += arena->stats.lstats[i].curruns;
2168	}
2169	malloc_mutex_unlock(&arena->lock);
2170
2171	for (i = 0; i < NBINS; i++) {
2172		arena_bin_t *bin = &arena->bins[i];
2173
2174		malloc_mutex_lock(&bin->lock);
2175		bstats[i].allocated += bin->stats.allocated;
2176		bstats[i].nmalloc += bin->stats.nmalloc;
2177		bstats[i].ndalloc += bin->stats.ndalloc;
2178		bstats[i].nrequests += bin->stats.nrequests;
2179		if (config_tcache) {
2180			bstats[i].nfills += bin->stats.nfills;
2181			bstats[i].nflushes += bin->stats.nflushes;
2182		}
2183		bstats[i].nruns += bin->stats.nruns;
2184		bstats[i].reruns += bin->stats.reruns;
2185		bstats[i].curruns += bin->stats.curruns;
2186		malloc_mutex_unlock(&bin->lock);
2187	}
2188}
2189
2190bool
2191arena_new(arena_t *arena, unsigned ind)
2192{
2193	unsigned i;
2194	arena_bin_t *bin;
2195
2196	arena->ind = ind;
2197	arena->nthreads = 0;
2198	arena->chunk_alloc = chunk_alloc_default;
2199	arena->chunk_dalloc = chunk_dalloc_default;
2200
2201	if (malloc_mutex_init(&arena->lock))
2202		return (true);
2203
2204	if (config_stats) {
2205		memset(&arena->stats, 0, sizeof(arena_stats_t));
2206		arena->stats.lstats =
2207		    (malloc_large_stats_t *)base_alloc(nlclasses *
2208		    sizeof(malloc_large_stats_t));
2209		if (arena->stats.lstats == NULL)
2210			return (true);
2211		memset(arena->stats.lstats, 0, nlclasses *
2212		    sizeof(malloc_large_stats_t));
2213		if (config_tcache)
2214			ql_new(&arena->tcache_ql);
2215	}
2216
2217	if (config_prof)
2218		arena->prof_accumbytes = 0;
2219
2220	arena->dss_prec = chunk_dss_prec_get();
2221
2222	arena->spare = NULL;
2223
2224	arena->nactive = 0;
2225	arena->ndirty = 0;
2226
2227	arena_avail_tree_new(&arena->runs_avail);
2228	ql_new(&arena->runs_dirty);
2229
2230	/* Initialize bins. */
2231	for (i = 0; i < NBINS; i++) {
2232		bin = &arena->bins[i];
2233		if (malloc_mutex_init(&bin->lock))
2234			return (true);
2235		bin->runcur = NULL;
2236		arena_run_tree_new(&bin->runs);
2237		if (config_stats)
2238			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2239	}
2240
2241	return (false);
2242}
2243
2244/*
2245 * Calculate bin_info->run_size such that it meets the following constraints:
2246 *
2247 *   *) bin_info->run_size >= min_run_size
2248 *   *) bin_info->run_size <= arena_maxclass
2249 *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2250 *   *) bin_info->nregs <= RUN_MAXREGS
2251 *
2252 * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also
2253 * calculated here, since these settings are all interdependent.
2254 */
2255static size_t
2256bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
2257{
2258	size_t pad_size;
2259	size_t try_run_size, good_run_size;
2260	uint32_t try_nregs, good_nregs;
2261	uint32_t try_hdr_size, good_hdr_size;
2262	uint32_t try_bitmap_offset, good_bitmap_offset;
2263	uint32_t try_redzone0_offset, good_redzone0_offset;
2264
2265	assert(min_run_size >= PAGE);
2266	assert(min_run_size <= arena_maxclass);
2267
2268	/*
2269	 * Determine redzone size based on minimum alignment and minimum
2270	 * redzone size.  Add padding to the end of the run if it is needed to
2271	 * align the regions.  The padding allows each redzone to be half the
2272	 * minimum alignment; without the padding, each redzone would have to
2273	 * be twice as large in order to maintain alignment.
2274	 */
2275	if (config_fill && opt_redzone) {
2276		size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) -
2277		    1);
2278		if (align_min <= REDZONE_MINSIZE) {
2279			bin_info->redzone_size = REDZONE_MINSIZE;
2280			pad_size = 0;
2281		} else {
2282			bin_info->redzone_size = align_min >> 1;
2283			pad_size = bin_info->redzone_size;
2284		}
2285	} else {
2286		bin_info->redzone_size = 0;
2287		pad_size = 0;
2288	}
2289	bin_info->reg_interval = bin_info->reg_size +
2290	    (bin_info->redzone_size << 1);
2291
2292	/*
2293	 * Calculate known-valid settings before entering the run_size
2294	 * expansion loop, so that the first part of the loop always copies
2295	 * valid settings.
2296	 *
2297	 * The do..while loop iteratively reduces the number of regions until
2298	 * the run header and the regions no longer overlap.  A closed formula
2299	 * would be quite messy, since there is an interdependency between the
2300	 * header's mask length and the number of regions.
2301	 */
2302	try_run_size = min_run_size;
2303	try_nregs = ((try_run_size - sizeof(arena_run_t)) /
2304	    bin_info->reg_interval)
2305	    + 1; /* Counter-act try_nregs-- in loop. */
2306	if (try_nregs > RUN_MAXREGS) {
2307		try_nregs = RUN_MAXREGS
2308		    + 1; /* Counter-act try_nregs-- in loop. */
2309	}
2310	do {
2311		try_nregs--;
2312		try_hdr_size = sizeof(arena_run_t);
2313		/* Pad to a long boundary. */
2314		try_hdr_size = LONG_CEILING(try_hdr_size);
2315		try_bitmap_offset = try_hdr_size;
2316		/* Add space for bitmap. */
2317		try_hdr_size += bitmap_size(try_nregs);
2318		try_redzone0_offset = try_run_size - (try_nregs *
2319		    bin_info->reg_interval) - pad_size;
2320	} while (try_hdr_size > try_redzone0_offset);
2321
2322	/* run_size expansion loop. */
2323	do {
2324		/*
2325		 * Copy valid settings before trying more aggressive settings.
2326		 */
2327		good_run_size = try_run_size;
2328		good_nregs = try_nregs;
2329		good_hdr_size = try_hdr_size;
2330		good_bitmap_offset = try_bitmap_offset;
2331		good_redzone0_offset = try_redzone0_offset;
2332
2333		/* Try more aggressive settings. */
2334		try_run_size += PAGE;
2335		try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) /
2336		    bin_info->reg_interval)
2337		    + 1; /* Counter-act try_nregs-- in loop. */
2338		if (try_nregs > RUN_MAXREGS) {
2339			try_nregs = RUN_MAXREGS
2340			    + 1; /* Counter-act try_nregs-- in loop. */
2341		}
2342		do {
2343			try_nregs--;
2344			try_hdr_size = sizeof(arena_run_t);
2345			/* Pad to a long boundary. */
2346			try_hdr_size = LONG_CEILING(try_hdr_size);
2347			try_bitmap_offset = try_hdr_size;
2348			/* Add space for bitmap. */
2349			try_hdr_size += bitmap_size(try_nregs);
2350			try_redzone0_offset = try_run_size - (try_nregs *
2351			    bin_info->reg_interval) - pad_size;
2352		} while (try_hdr_size > try_redzone0_offset);
2353	} while (try_run_size <= arena_maxclass
2354	    && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) >
2355	    RUN_MAX_OVRHD_RELAX
2356	    && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
2357	    && try_nregs < RUN_MAXREGS);
2358
2359	assert(good_hdr_size <= good_redzone0_offset);
2360
2361	/* Copy final settings. */
2362	bin_info->run_size = good_run_size;
2363	bin_info->nregs = good_nregs;
2364	bin_info->bitmap_offset = good_bitmap_offset;
2365	bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size;
2366
2367	assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
2368	    * bin_info->reg_interval) + pad_size == bin_info->run_size);
2369
2370	return (good_run_size);
2371}
2372
2373static void
2374bin_info_init(void)
2375{
2376	arena_bin_info_t *bin_info;
2377	size_t prev_run_size = PAGE;
2378
2379#define	BIN_INFO_INIT_bin_yes(index, size) \
2380	bin_info = &arena_bin_info[index];				\
2381	bin_info->reg_size = size;					\
2382	prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\
2383	bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
2384#define	BIN_INFO_INIT_bin_no(index, size)
2385#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
2386	BIN_INFO_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
2387	SIZE_CLASSES
2388#undef BIN_INFO_INIT_bin_yes
2389#undef BIN_INFO_INIT_bin_no
2390#undef SC
2391}
2392
2393void
2394arena_boot(void)
2395{
2396	size_t header_size;
2397	unsigned i;
2398
2399	/*
2400	 * Compute the header size such that it is large enough to contain the
2401	 * page map.  The page map is biased to omit entries for the header
2402	 * itself, so some iteration is necessary to compute the map bias.
2403	 *
2404	 * 1) Compute safe header_size and map_bias values that include enough
2405	 *    space for an unbiased page map.
2406	 * 2) Refine map_bias based on (1) to omit the header pages in the page
2407	 *    map.  The resulting map_bias may be one too small.
2408	 * 3) Refine map_bias based on (2).  The result will be >= the result
2409	 *    from (2), and will always be correct.
2410	 */
2411	map_bias = 0;
2412	for (i = 0; i < 3; i++) {
2413		header_size = offsetof(arena_chunk_t, map_bits) +
2414		    ((sizeof(arena_chunk_map_bits_t) +
2415		    sizeof(arena_chunk_map_misc_t)) * (chunk_npages-map_bias));
2416		map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK)
2417		    != 0);
2418	}
2419	assert(map_bias > 0);
2420
2421	map_misc_offset = offsetof(arena_chunk_t, map_bits) +
2422	    sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias);
2423
2424	arena_maxclass = chunksize - (map_bias << LG_PAGE);
2425
2426	bin_info_init();
2427}
2428
2429void
2430arena_prefork(arena_t *arena)
2431{
2432	unsigned i;
2433
2434	malloc_mutex_prefork(&arena->lock);
2435	for (i = 0; i < NBINS; i++)
2436		malloc_mutex_prefork(&arena->bins[i].lock);
2437}
2438
2439void
2440arena_postfork_parent(arena_t *arena)
2441{
2442	unsigned i;
2443
2444	for (i = 0; i < NBINS; i++)
2445		malloc_mutex_postfork_parent(&arena->bins[i].lock);
2446	malloc_mutex_postfork_parent(&arena->lock);
2447}
2448
2449void
2450arena_postfork_child(arena_t *arena)
2451{
2452	unsigned i;
2453
2454	for (i = 0; i < NBINS; i++)
2455		malloc_mutex_postfork_child(&arena->bins[i].lock);
2456	malloc_mutex_postfork_child(&arena->lock);
2457}
2458