1#define	JEMALLOC_PROF_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3/******************************************************************************/
4
5#ifdef JEMALLOC_PROF_LIBUNWIND
6#define	UNW_LOCAL_ONLY
7#include <libunwind.h>
8#endif
9
10#ifdef JEMALLOC_PROF_LIBGCC
11#include <unwind.h>
12#endif
13
14/******************************************************************************/
15/* Data. */
16
17bool		opt_prof = false;
18bool		opt_prof_active = true;
19bool		opt_prof_thread_active_init = true;
20size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
21ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
22bool		opt_prof_gdump = false;
23bool		opt_prof_final = false;
24bool		opt_prof_leak = false;
25bool		opt_prof_accum = false;
26char		opt_prof_prefix[
27    /* Minimize memory bloat for non-prof builds. */
28#ifdef JEMALLOC_PROF
29    PATH_MAX +
30#endif
31    1];
32
33/*
34 * Initialized as opt_prof_active, and accessed via
35 * prof_active_[gs]et{_unlocked,}().
36 */
37bool			prof_active;
38static malloc_mutex_t	prof_active_mtx;
39
40/*
41 * Initialized as opt_prof_thread_active_init, and accessed via
42 * prof_thread_active_init_[gs]et().
43 */
44static bool		prof_thread_active_init;
45static malloc_mutex_t	prof_thread_active_init_mtx;
46
47/*
48 * Initialized as opt_prof_gdump, and accessed via
49 * prof_gdump_[gs]et{_unlocked,}().
50 */
51bool			prof_gdump_val;
52static malloc_mutex_t	prof_gdump_mtx;
53
54uint64_t	prof_interval = 0;
55
56size_t		lg_prof_sample;
57
58/*
59 * Table of mutexes that are shared among gctx's.  These are leaf locks, so
60 * there is no problem with using them for more than one gctx at the same time.
61 * The primary motivation for this sharing though is that gctx's are ephemeral,
62 * and destroying mutexes causes complications for systems that allocate when
63 * creating/destroying mutexes.
64 */
65static malloc_mutex_t	*gctx_locks;
66static unsigned		cum_gctxs; /* Atomic counter. */
67
68/*
69 * Table of mutexes that are shared among tdata's.  No operations require
70 * holding multiple tdata locks, so there is no problem with using them for more
71 * than one tdata at the same time, even though a gctx lock may be acquired
72 * while holding a tdata lock.
73 */
74static malloc_mutex_t	*tdata_locks;
75
76/*
77 * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
78 * structure that knows about all backtraces currently captured.
79 */
80static ckh_t		bt2gctx;
81static malloc_mutex_t	bt2gctx_mtx;
82
83/*
84 * Tree of all extant prof_tdata_t structures, regardless of state,
85 * {attached,detached,expired}.
86 */
87static prof_tdata_tree_t	tdatas;
88static malloc_mutex_t	tdatas_mtx;
89
90static uint64_t		next_thr_uid;
91static malloc_mutex_t	next_thr_uid_mtx;
92
93static malloc_mutex_t	prof_dump_seq_mtx;
94static uint64_t		prof_dump_seq;
95static uint64_t		prof_dump_iseq;
96static uint64_t		prof_dump_mseq;
97static uint64_t		prof_dump_useq;
98
99/*
100 * This buffer is rather large for stack allocation, so use a single buffer for
101 * all profile dumps.
102 */
103static malloc_mutex_t	prof_dump_mtx;
104static char		prof_dump_buf[
105    /* Minimize memory bloat for non-prof builds. */
106#ifdef JEMALLOC_PROF
107    PROF_DUMP_BUFSIZE
108#else
109    1
110#endif
111];
112static size_t		prof_dump_buf_end;
113static int		prof_dump_fd;
114
115/* Do not dump any profiles until bootstrapping is complete. */
116static bool		prof_booted = false;
117
118/******************************************************************************/
119/*
120 * Function prototypes for static functions that are referenced prior to
121 * definition.
122 */
123
124static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
125static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
126static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
127    bool even_if_attached);
128static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
129    bool even_if_attached);
130static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
131
132/******************************************************************************/
133/* Red-black trees. */
134
135JEMALLOC_INLINE_C int
136prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b)
137{
138	uint64_t a_thr_uid = a->thr_uid;
139	uint64_t b_thr_uid = b->thr_uid;
140	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
141	if (ret == 0) {
142		uint64_t a_thr_discrim = a->thr_discrim;
143		uint64_t b_thr_discrim = b->thr_discrim;
144		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
145		    b_thr_discrim);
146		if (ret == 0) {
147			uint64_t a_tctx_uid = a->tctx_uid;
148			uint64_t b_tctx_uid = b->tctx_uid;
149			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
150			    b_tctx_uid);
151		}
152	}
153	return (ret);
154}
155
156rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
157    tctx_link, prof_tctx_comp)
158
159JEMALLOC_INLINE_C int
160prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b)
161{
162	unsigned a_len = a->bt.len;
163	unsigned b_len = b->bt.len;
164	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
165	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
166	if (ret == 0)
167		ret = (a_len > b_len) - (a_len < b_len);
168	return (ret);
169}
170
171rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
172    prof_gctx_comp)
173
174JEMALLOC_INLINE_C int
175prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b)
176{
177	int ret;
178	uint64_t a_uid = a->thr_uid;
179	uint64_t b_uid = b->thr_uid;
180
181	ret = ((a_uid > b_uid) - (a_uid < b_uid));
182	if (ret == 0) {
183		uint64_t a_discrim = a->thr_discrim;
184		uint64_t b_discrim = b->thr_discrim;
185
186		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
187	}
188	return (ret);
189}
190
191rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
192    prof_tdata_comp)
193
194/******************************************************************************/
195
196void
197prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
198{
199	prof_tdata_t *tdata;
200
201	cassert(config_prof);
202
203	if (updated) {
204		/*
205		 * Compute a new sample threshold.  This isn't very important in
206		 * practice, because this function is rarely executed, so the
207		 * potential for sample bias is minimal except in contrived
208		 * programs.
209		 */
210		tdata = prof_tdata_get(tsd, true);
211		if (tdata != NULL)
212			prof_sample_threshold_update(tdata);
213	}
214
215	if ((uintptr_t)tctx > (uintptr_t)1U) {
216		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
217		tctx->prepared = false;
218		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
219			prof_tctx_destroy(tsd, tctx);
220		else
221			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
222	}
223}
224
225void
226prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
227    prof_tctx_t *tctx)
228{
229
230	prof_tctx_set(tsdn, ptr, usize, tctx);
231
232	malloc_mutex_lock(tsdn, tctx->tdata->lock);
233	tctx->cnts.curobjs++;
234	tctx->cnts.curbytes += usize;
235	if (opt_prof_accum) {
236		tctx->cnts.accumobjs++;
237		tctx->cnts.accumbytes += usize;
238	}
239	tctx->prepared = false;
240	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
241}
242
243void
244prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
245{
246
247	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
248	assert(tctx->cnts.curobjs > 0);
249	assert(tctx->cnts.curbytes >= usize);
250	tctx->cnts.curobjs--;
251	tctx->cnts.curbytes -= usize;
252
253	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
254		prof_tctx_destroy(tsd, tctx);
255	else
256		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
257}
258
259void
260bt_init(prof_bt_t *bt, void **vec)
261{
262
263	cassert(config_prof);
264
265	bt->vec = vec;
266	bt->len = 0;
267}
268
269JEMALLOC_INLINE_C void
270prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
271{
272
273	cassert(config_prof);
274	assert(tdata == prof_tdata_get(tsd, false));
275
276	if (tdata != NULL) {
277		assert(!tdata->enq);
278		tdata->enq = true;
279	}
280
281	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
282}
283
284JEMALLOC_INLINE_C void
285prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
286{
287
288	cassert(config_prof);
289	assert(tdata == prof_tdata_get(tsd, false));
290
291	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
292
293	if (tdata != NULL) {
294		bool idump, gdump;
295
296		assert(tdata->enq);
297		tdata->enq = false;
298		idump = tdata->enq_idump;
299		tdata->enq_idump = false;
300		gdump = tdata->enq_gdump;
301		tdata->enq_gdump = false;
302
303		if (idump)
304			prof_idump(tsd_tsdn(tsd));
305		if (gdump)
306			prof_gdump(tsd_tsdn(tsd));
307	}
308}
309
310#ifdef JEMALLOC_PROF_LIBUNWIND
311void
312prof_backtrace(prof_bt_t *bt)
313{
314	int nframes;
315
316	cassert(config_prof);
317	assert(bt->len == 0);
318	assert(bt->vec != NULL);
319
320	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
321	if (nframes <= 0)
322		return;
323	bt->len = nframes;
324}
325#elif (defined(JEMALLOC_PROF_LIBGCC))
326static _Unwind_Reason_Code
327prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
328{
329
330	cassert(config_prof);
331
332	return (_URC_NO_REASON);
333}
334
335static _Unwind_Reason_Code
336prof_unwind_callback(struct _Unwind_Context *context, void *arg)
337{
338	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
339	void *ip;
340
341	cassert(config_prof);
342
343	ip = (void *)_Unwind_GetIP(context);
344	if (ip == NULL)
345		return (_URC_END_OF_STACK);
346	data->bt->vec[data->bt->len] = ip;
347	data->bt->len++;
348	if (data->bt->len == data->max)
349		return (_URC_END_OF_STACK);
350
351	return (_URC_NO_REASON);
352}
353
354void
355prof_backtrace(prof_bt_t *bt)
356{
357	prof_unwind_data_t data = {bt, PROF_BT_MAX};
358
359	cassert(config_prof);
360
361	_Unwind_Backtrace(prof_unwind_callback, &data);
362}
363#elif (defined(JEMALLOC_PROF_GCC))
364void
365prof_backtrace(prof_bt_t *bt)
366{
367#define	BT_FRAME(i)							\
368	if ((i) < PROF_BT_MAX) {					\
369		void *p;						\
370		if (__builtin_frame_address(i) == 0)			\
371			return;						\
372		p = __builtin_return_address(i);			\
373		if (p == NULL)						\
374			return;						\
375		bt->vec[(i)] = p;					\
376		bt->len = (i) + 1;					\
377	} else								\
378		return;
379
380	cassert(config_prof);
381
382	BT_FRAME(0)
383	BT_FRAME(1)
384	BT_FRAME(2)
385	BT_FRAME(3)
386	BT_FRAME(4)
387	BT_FRAME(5)
388	BT_FRAME(6)
389	BT_FRAME(7)
390	BT_FRAME(8)
391	BT_FRAME(9)
392
393	BT_FRAME(10)
394	BT_FRAME(11)
395	BT_FRAME(12)
396	BT_FRAME(13)
397	BT_FRAME(14)
398	BT_FRAME(15)
399	BT_FRAME(16)
400	BT_FRAME(17)
401	BT_FRAME(18)
402	BT_FRAME(19)
403
404	BT_FRAME(20)
405	BT_FRAME(21)
406	BT_FRAME(22)
407	BT_FRAME(23)
408	BT_FRAME(24)
409	BT_FRAME(25)
410	BT_FRAME(26)
411	BT_FRAME(27)
412	BT_FRAME(28)
413	BT_FRAME(29)
414
415	BT_FRAME(30)
416	BT_FRAME(31)
417	BT_FRAME(32)
418	BT_FRAME(33)
419	BT_FRAME(34)
420	BT_FRAME(35)
421	BT_FRAME(36)
422	BT_FRAME(37)
423	BT_FRAME(38)
424	BT_FRAME(39)
425
426	BT_FRAME(40)
427	BT_FRAME(41)
428	BT_FRAME(42)
429	BT_FRAME(43)
430	BT_FRAME(44)
431	BT_FRAME(45)
432	BT_FRAME(46)
433	BT_FRAME(47)
434	BT_FRAME(48)
435	BT_FRAME(49)
436
437	BT_FRAME(50)
438	BT_FRAME(51)
439	BT_FRAME(52)
440	BT_FRAME(53)
441	BT_FRAME(54)
442	BT_FRAME(55)
443	BT_FRAME(56)
444	BT_FRAME(57)
445	BT_FRAME(58)
446	BT_FRAME(59)
447
448	BT_FRAME(60)
449	BT_FRAME(61)
450	BT_FRAME(62)
451	BT_FRAME(63)
452	BT_FRAME(64)
453	BT_FRAME(65)
454	BT_FRAME(66)
455	BT_FRAME(67)
456	BT_FRAME(68)
457	BT_FRAME(69)
458
459	BT_FRAME(70)
460	BT_FRAME(71)
461	BT_FRAME(72)
462	BT_FRAME(73)
463	BT_FRAME(74)
464	BT_FRAME(75)
465	BT_FRAME(76)
466	BT_FRAME(77)
467	BT_FRAME(78)
468	BT_FRAME(79)
469
470	BT_FRAME(80)
471	BT_FRAME(81)
472	BT_FRAME(82)
473	BT_FRAME(83)
474	BT_FRAME(84)
475	BT_FRAME(85)
476	BT_FRAME(86)
477	BT_FRAME(87)
478	BT_FRAME(88)
479	BT_FRAME(89)
480
481	BT_FRAME(90)
482	BT_FRAME(91)
483	BT_FRAME(92)
484	BT_FRAME(93)
485	BT_FRAME(94)
486	BT_FRAME(95)
487	BT_FRAME(96)
488	BT_FRAME(97)
489	BT_FRAME(98)
490	BT_FRAME(99)
491
492	BT_FRAME(100)
493	BT_FRAME(101)
494	BT_FRAME(102)
495	BT_FRAME(103)
496	BT_FRAME(104)
497	BT_FRAME(105)
498	BT_FRAME(106)
499	BT_FRAME(107)
500	BT_FRAME(108)
501	BT_FRAME(109)
502
503	BT_FRAME(110)
504	BT_FRAME(111)
505	BT_FRAME(112)
506	BT_FRAME(113)
507	BT_FRAME(114)
508	BT_FRAME(115)
509	BT_FRAME(116)
510	BT_FRAME(117)
511	BT_FRAME(118)
512	BT_FRAME(119)
513
514	BT_FRAME(120)
515	BT_FRAME(121)
516	BT_FRAME(122)
517	BT_FRAME(123)
518	BT_FRAME(124)
519	BT_FRAME(125)
520	BT_FRAME(126)
521	BT_FRAME(127)
522#undef BT_FRAME
523}
524#else
525void
526prof_backtrace(prof_bt_t *bt)
527{
528
529	cassert(config_prof);
530	not_reached();
531}
532#endif
533
534static malloc_mutex_t *
535prof_gctx_mutex_choose(void)
536{
537	unsigned ngctxs = atomic_add_u(&cum_gctxs, 1);
538
539	return (&gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS]);
540}
541
542static malloc_mutex_t *
543prof_tdata_mutex_choose(uint64_t thr_uid)
544{
545
546	return (&tdata_locks[thr_uid % PROF_NTDATA_LOCKS]);
547}
548
549static prof_gctx_t *
550prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt)
551{
552	/*
553	 * Create a single allocation that has space for vec of length bt->len.
554	 */
555	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
556	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
557	    size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
558	    true);
559	if (gctx == NULL)
560		return (NULL);
561	gctx->lock = prof_gctx_mutex_choose();
562	/*
563	 * Set nlimbo to 1, in order to avoid a race condition with
564	 * prof_tctx_destroy()/prof_gctx_try_destroy().
565	 */
566	gctx->nlimbo = 1;
567	tctx_tree_new(&gctx->tctxs);
568	/* Duplicate bt. */
569	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
570	gctx->bt.vec = gctx->vec;
571	gctx->bt.len = bt->len;
572	return (gctx);
573}
574
575static void
576prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
577    prof_tdata_t *tdata)
578{
579
580	cassert(config_prof);
581
582	/*
583	 * Check that gctx is still unused by any thread cache before destroying
584	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
585	 * condition with this function, as does prof_tctx_destroy() in order to
586	 * avoid a race between the main body of prof_tctx_destroy() and entry
587	 * into this function.
588	 */
589	prof_enter(tsd, tdata_self);
590	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
591	assert(gctx->nlimbo != 0);
592	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
593		/* Remove gctx from bt2gctx. */
594		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
595			not_reached();
596		prof_leave(tsd, tdata_self);
597		/* Destroy gctx. */
598		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
599		idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true);
600	} else {
601		/*
602		 * Compensate for increment in prof_tctx_destroy() or
603		 * prof_lookup().
604		 */
605		gctx->nlimbo--;
606		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
607		prof_leave(tsd, tdata_self);
608	}
609}
610
611static bool
612prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx)
613{
614
615	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
616
617	if (opt_prof_accum)
618		return (false);
619	if (tctx->cnts.curobjs != 0)
620		return (false);
621	if (tctx->prepared)
622		return (false);
623	return (true);
624}
625
626static bool
627prof_gctx_should_destroy(prof_gctx_t *gctx)
628{
629
630	if (opt_prof_accum)
631		return (false);
632	if (!tctx_tree_empty(&gctx->tctxs))
633		return (false);
634	if (gctx->nlimbo != 0)
635		return (false);
636	return (true);
637}
638
639static void
640prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
641{
642	prof_tdata_t *tdata = tctx->tdata;
643	prof_gctx_t *gctx = tctx->gctx;
644	bool destroy_tdata, destroy_tctx, destroy_gctx;
645
646	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
647
648	assert(tctx->cnts.curobjs == 0);
649	assert(tctx->cnts.curbytes == 0);
650	assert(!opt_prof_accum);
651	assert(tctx->cnts.accumobjs == 0);
652	assert(tctx->cnts.accumbytes == 0);
653
654	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
655	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
656	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
657
658	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
659	switch (tctx->state) {
660	case prof_tctx_state_nominal:
661		tctx_tree_remove(&gctx->tctxs, tctx);
662		destroy_tctx = true;
663		if (prof_gctx_should_destroy(gctx)) {
664			/*
665			 * Increment gctx->nlimbo in order to keep another
666			 * thread from winning the race to destroy gctx while
667			 * this one has gctx->lock dropped.  Without this, it
668			 * would be possible for another thread to:
669			 *
670			 * 1) Sample an allocation associated with gctx.
671			 * 2) Deallocate the sampled object.
672			 * 3) Successfully prof_gctx_try_destroy(gctx).
673			 *
674			 * The result would be that gctx no longer exists by the
675			 * time this thread accesses it in
676			 * prof_gctx_try_destroy().
677			 */
678			gctx->nlimbo++;
679			destroy_gctx = true;
680		} else
681			destroy_gctx = false;
682		break;
683	case prof_tctx_state_dumping:
684		/*
685		 * A dumping thread needs tctx to remain valid until dumping
686		 * has finished.  Change state such that the dumping thread will
687		 * complete destruction during a late dump iteration phase.
688		 */
689		tctx->state = prof_tctx_state_purgatory;
690		destroy_tctx = false;
691		destroy_gctx = false;
692		break;
693	default:
694		not_reached();
695		destroy_tctx = false;
696		destroy_gctx = false;
697	}
698	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
699	if (destroy_gctx) {
700		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
701		    tdata);
702	}
703
704	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
705
706	if (destroy_tdata)
707		prof_tdata_destroy(tsd, tdata, false);
708
709	if (destroy_tctx)
710		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
711}
712
713static bool
714prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
715    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx)
716{
717	union {
718		prof_gctx_t	*p;
719		void		*v;
720	} gctx;
721	union {
722		prof_bt_t	*p;
723		void		*v;
724	} btkey;
725	bool new_gctx;
726
727	prof_enter(tsd, tdata);
728	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
729		/* bt has never been seen before.  Insert it. */
730		gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
731		if (gctx.v == NULL) {
732			prof_leave(tsd, tdata);
733			return (true);
734		}
735		btkey.p = &gctx.p->bt;
736		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
737			/* OOM. */
738			prof_leave(tsd, tdata);
739			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
740			return (true);
741		}
742		new_gctx = true;
743	} else {
744		/*
745		 * Increment nlimbo, in order to avoid a race condition with
746		 * prof_tctx_destroy()/prof_gctx_try_destroy().
747		 */
748		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
749		gctx.p->nlimbo++;
750		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
751		new_gctx = false;
752	}
753	prof_leave(tsd, tdata);
754
755	*p_btkey = btkey.v;
756	*p_gctx = gctx.p;
757	*p_new_gctx = new_gctx;
758	return (false);
759}
760
761prof_tctx_t *
762prof_lookup(tsd_t *tsd, prof_bt_t *bt)
763{
764	union {
765		prof_tctx_t	*p;
766		void		*v;
767	} ret;
768	prof_tdata_t *tdata;
769	bool not_found;
770
771	cassert(config_prof);
772
773	tdata = prof_tdata_get(tsd, false);
774	if (tdata == NULL)
775		return (NULL);
776
777	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
778	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
779	if (!not_found) /* Note double negative! */
780		ret.p->prepared = true;
781	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
782	if (not_found) {
783		void *btkey;
784		prof_gctx_t *gctx;
785		bool new_gctx, error;
786
787		/*
788		 * This thread's cache lacks bt.  Look for it in the global
789		 * cache.
790		 */
791		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
792		    &new_gctx))
793			return (NULL);
794
795		/* Link a prof_tctx_t into gctx for this thread. */
796		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
797		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
798		    arena_ichoose(tsd, NULL), true);
799		if (ret.p == NULL) {
800			if (new_gctx)
801				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
802			return (NULL);
803		}
804		ret.p->tdata = tdata;
805		ret.p->thr_uid = tdata->thr_uid;
806		ret.p->thr_discrim = tdata->thr_discrim;
807		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
808		ret.p->gctx = gctx;
809		ret.p->tctx_uid = tdata->tctx_uid_next++;
810		ret.p->prepared = true;
811		ret.p->state = prof_tctx_state_initializing;
812		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
813		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
814		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
815		if (error) {
816			if (new_gctx)
817				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
818			idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true);
819			return (NULL);
820		}
821		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
822		ret.p->state = prof_tctx_state_nominal;
823		tctx_tree_insert(&gctx->tctxs, ret.p);
824		gctx->nlimbo--;
825		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
826	}
827
828	return (ret.p);
829}
830
831/*
832 * The bodies of this function and prof_leakcheck() are compiled out unless heap
833 * profiling is enabled, so that it is possible to compile jemalloc with
834 * floating point support completely disabled.  Avoiding floating point code is
835 * important on memory-constrained systems, but it also enables a workaround for
836 * versions of glibc that don't properly save/restore floating point registers
837 * during dynamic lazy symbol loading (which internally calls into whatever
838 * malloc implementation happens to be integrated into the application).  Note
839 * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
840 * memory moves, so jemalloc must be compiled with such optimizations disabled
841 * (e.g.
842 * -mno-sse) in order for the workaround to be complete.
843 */
844void
845prof_sample_threshold_update(prof_tdata_t *tdata)
846{
847#ifdef JEMALLOC_PROF
848	uint64_t r;
849	double u;
850
851	if (!config_prof)
852		return;
853
854	if (lg_prof_sample == 0) {
855		tdata->bytes_until_sample = 0;
856		return;
857	}
858
859	/*
860	 * Compute sample interval as a geometrically distributed random
861	 * variable with mean (2^lg_prof_sample).
862	 *
863	 *                             __        __
864	 *                             |  log(u)  |                     1
865	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
866	 *                             | log(1-p) |             lg_prof_sample
867	 *                                                     2
868	 *
869	 * For more information on the math, see:
870	 *
871	 *   Non-Uniform Random Variate Generation
872	 *   Luc Devroye
873	 *   Springer-Verlag, New York, 1986
874	 *   pp 500
875	 *   (http://luc.devroye.org/rnbookindex.html)
876	 */
877	r = prng_lg_range_u64(&tdata->prng_state, 53);
878	u = (double)r * (1.0/9007199254740992.0L);
879	tdata->bytes_until_sample = (uint64_t)(log(u) /
880	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
881	    + (uint64_t)1U;
882#endif
883}
884
885#ifdef JEMALLOC_JET
886static prof_tdata_t *
887prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
888{
889	size_t *tdata_count = (size_t *)arg;
890
891	(*tdata_count)++;
892
893	return (NULL);
894}
895
896size_t
897prof_tdata_count(void)
898{
899	size_t tdata_count = 0;
900	tsdn_t *tsdn;
901
902	tsdn = tsdn_fetch();
903	malloc_mutex_lock(tsdn, &tdatas_mtx);
904	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
905	    (void *)&tdata_count);
906	malloc_mutex_unlock(tsdn, &tdatas_mtx);
907
908	return (tdata_count);
909}
910#endif
911
912#ifdef JEMALLOC_JET
913size_t
914prof_bt_count(void)
915{
916	size_t bt_count;
917	tsd_t *tsd;
918	prof_tdata_t *tdata;
919
920	tsd = tsd_fetch();
921	tdata = prof_tdata_get(tsd, false);
922	if (tdata == NULL)
923		return (0);
924
925	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
926	bt_count = ckh_count(&bt2gctx);
927	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
928
929	return (bt_count);
930}
931#endif
932
933#ifdef JEMALLOC_JET
934#undef prof_dump_open
935#define	prof_dump_open JEMALLOC_N(prof_dump_open_impl)
936#endif
937static int
938prof_dump_open(bool propagate_err, const char *filename)
939{
940	int fd;
941
942	fd = creat(filename, 0644);
943	if (fd == -1 && !propagate_err) {
944		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
945		    filename);
946		if (opt_abort)
947			abort();
948	}
949
950	return (fd);
951}
952#ifdef JEMALLOC_JET
953#undef prof_dump_open
954#define	prof_dump_open JEMALLOC_N(prof_dump_open)
955prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
956#endif
957
958static bool
959prof_dump_flush(bool propagate_err)
960{
961	bool ret = false;
962	ssize_t err;
963
964	cassert(config_prof);
965
966	err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
967	if (err == -1) {
968		if (!propagate_err) {
969			malloc_write("<jemalloc>: write() failed during heap "
970			    "profile flush\n");
971			if (opt_abort)
972				abort();
973		}
974		ret = true;
975	}
976	prof_dump_buf_end = 0;
977
978	return (ret);
979}
980
981static bool
982prof_dump_close(bool propagate_err)
983{
984	bool ret;
985
986	assert(prof_dump_fd != -1);
987	ret = prof_dump_flush(propagate_err);
988	close(prof_dump_fd);
989	prof_dump_fd = -1;
990
991	return (ret);
992}
993
994static bool
995prof_dump_write(bool propagate_err, const char *s)
996{
997	size_t i, slen, n;
998
999	cassert(config_prof);
1000
1001	i = 0;
1002	slen = strlen(s);
1003	while (i < slen) {
1004		/* Flush the buffer if it is full. */
1005		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
1006			if (prof_dump_flush(propagate_err) && propagate_err)
1007				return (true);
1008
1009		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
1010			/* Finish writing. */
1011			n = slen - i;
1012		} else {
1013			/* Write as much of s as will fit. */
1014			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
1015		}
1016		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
1017		prof_dump_buf_end += n;
1018		i += n;
1019	}
1020
1021	return (false);
1022}
1023
1024JEMALLOC_FORMAT_PRINTF(2, 3)
1025static bool
1026prof_dump_printf(bool propagate_err, const char *format, ...)
1027{
1028	bool ret;
1029	va_list ap;
1030	char buf[PROF_PRINTF_BUFSIZE];
1031
1032	va_start(ap, format);
1033	malloc_vsnprintf(buf, sizeof(buf), format, ap);
1034	va_end(ap);
1035	ret = prof_dump_write(propagate_err, buf);
1036
1037	return (ret);
1038}
1039
1040static void
1041prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
1042{
1043
1044	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
1045
1046	malloc_mutex_lock(tsdn, tctx->gctx->lock);
1047
1048	switch (tctx->state) {
1049	case prof_tctx_state_initializing:
1050		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1051		return;
1052	case prof_tctx_state_nominal:
1053		tctx->state = prof_tctx_state_dumping;
1054		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1055
1056		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
1057
1058		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1059		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1060		if (opt_prof_accum) {
1061			tdata->cnt_summed.accumobjs +=
1062			    tctx->dump_cnts.accumobjs;
1063			tdata->cnt_summed.accumbytes +=
1064			    tctx->dump_cnts.accumbytes;
1065		}
1066		break;
1067	case prof_tctx_state_dumping:
1068	case prof_tctx_state_purgatory:
1069		not_reached();
1070	}
1071}
1072
1073static void
1074prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx)
1075{
1076
1077	malloc_mutex_assert_owner(tsdn, gctx->lock);
1078
1079	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1080	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1081	if (opt_prof_accum) {
1082		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
1083		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
1084	}
1085}
1086
1087static prof_tctx_t *
1088prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
1089{
1090	tsdn_t *tsdn = (tsdn_t *)arg;
1091
1092	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1093
1094	switch (tctx->state) {
1095	case prof_tctx_state_nominal:
1096		/* New since dumping started; ignore. */
1097		break;
1098	case prof_tctx_state_dumping:
1099	case prof_tctx_state_purgatory:
1100		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
1101		break;
1102	default:
1103		not_reached();
1104	}
1105
1106	return (NULL);
1107}
1108
1109struct prof_tctx_dump_iter_arg_s {
1110	tsdn_t	*tsdn;
1111	bool	propagate_err;
1112};
1113
1114static prof_tctx_t *
1115prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
1116{
1117	struct prof_tctx_dump_iter_arg_s *arg =
1118	    (struct prof_tctx_dump_iter_arg_s *)opaque;
1119
1120	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
1121
1122	switch (tctx->state) {
1123	case prof_tctx_state_initializing:
1124	case prof_tctx_state_nominal:
1125		/* Not captured by this dump. */
1126		break;
1127	case prof_tctx_state_dumping:
1128	case prof_tctx_state_purgatory:
1129		if (prof_dump_printf(arg->propagate_err,
1130		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
1131		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
1132		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
1133		    tctx->dump_cnts.accumbytes))
1134			return (tctx);
1135		break;
1136	default:
1137		not_reached();
1138	}
1139	return (NULL);
1140}
1141
1142static prof_tctx_t *
1143prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
1144{
1145	tsdn_t *tsdn = (tsdn_t *)arg;
1146	prof_tctx_t *ret;
1147
1148	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1149
1150	switch (tctx->state) {
1151	case prof_tctx_state_nominal:
1152		/* New since dumping started; ignore. */
1153		break;
1154	case prof_tctx_state_dumping:
1155		tctx->state = prof_tctx_state_nominal;
1156		break;
1157	case prof_tctx_state_purgatory:
1158		ret = tctx;
1159		goto label_return;
1160	default:
1161		not_reached();
1162	}
1163
1164	ret = NULL;
1165label_return:
1166	return (ret);
1167}
1168
1169static void
1170prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
1171{
1172
1173	cassert(config_prof);
1174
1175	malloc_mutex_lock(tsdn, gctx->lock);
1176
1177	/*
1178	 * Increment nlimbo so that gctx won't go away before dump.
1179	 * Additionally, link gctx into the dump list so that it is included in
1180	 * prof_dump()'s second pass.
1181	 */
1182	gctx->nlimbo++;
1183	gctx_tree_insert(gctxs, gctx);
1184
1185	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
1186
1187	malloc_mutex_unlock(tsdn, gctx->lock);
1188}
1189
1190struct prof_gctx_merge_iter_arg_s {
1191	tsdn_t	*tsdn;
1192	size_t	leak_ngctx;
1193};
1194
1195static prof_gctx_t *
1196prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
1197{
1198	struct prof_gctx_merge_iter_arg_s *arg =
1199	    (struct prof_gctx_merge_iter_arg_s *)opaque;
1200
1201	malloc_mutex_lock(arg->tsdn, gctx->lock);
1202	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
1203	    (void *)arg->tsdn);
1204	if (gctx->cnt_summed.curobjs != 0)
1205		arg->leak_ngctx++;
1206	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1207
1208	return (NULL);
1209}
1210
1211static void
1212prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
1213{
1214	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
1215	prof_gctx_t *gctx;
1216
1217	/*
1218	 * Standard tree iteration won't work here, because as soon as we
1219	 * decrement gctx->nlimbo and unlock gctx, another thread can
1220	 * concurrently destroy it, which will corrupt the tree.  Therefore,
1221	 * tear down the tree one node at a time during iteration.
1222	 */
1223	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
1224		gctx_tree_remove(gctxs, gctx);
1225		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1226		{
1227			prof_tctx_t *next;
1228
1229			next = NULL;
1230			do {
1231				prof_tctx_t *to_destroy =
1232				    tctx_tree_iter(&gctx->tctxs, next,
1233				    prof_tctx_finish_iter,
1234				    (void *)tsd_tsdn(tsd));
1235				if (to_destroy != NULL) {
1236					next = tctx_tree_next(&gctx->tctxs,
1237					    to_destroy);
1238					tctx_tree_remove(&gctx->tctxs,
1239					    to_destroy);
1240					idalloctm(tsd_tsdn(tsd), to_destroy,
1241					    NULL, true, true);
1242				} else
1243					next = NULL;
1244			} while (next != NULL);
1245		}
1246		gctx->nlimbo--;
1247		if (prof_gctx_should_destroy(gctx)) {
1248			gctx->nlimbo++;
1249			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1250			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1251		} else
1252			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1253	}
1254}
1255
1256struct prof_tdata_merge_iter_arg_s {
1257	tsdn_t		*tsdn;
1258	prof_cnt_t	cnt_all;
1259};
1260
1261static prof_tdata_t *
1262prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1263    void *opaque)
1264{
1265	struct prof_tdata_merge_iter_arg_s *arg =
1266	    (struct prof_tdata_merge_iter_arg_s *)opaque;
1267
1268	malloc_mutex_lock(arg->tsdn, tdata->lock);
1269	if (!tdata->expired) {
1270		size_t tabind;
1271		union {
1272			prof_tctx_t	*p;
1273			void		*v;
1274		} tctx;
1275
1276		tdata->dumping = true;
1277		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
1278		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
1279		    &tctx.v);)
1280			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
1281
1282		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
1283		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
1284		if (opt_prof_accum) {
1285			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
1286			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
1287		}
1288	} else
1289		tdata->dumping = false;
1290	malloc_mutex_unlock(arg->tsdn, tdata->lock);
1291
1292	return (NULL);
1293}
1294
1295static prof_tdata_t *
1296prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
1297{
1298	bool propagate_err = *(bool *)arg;
1299
1300	if (!tdata->dumping)
1301		return (NULL);
1302
1303	if (prof_dump_printf(propagate_err,
1304	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
1305	    tdata->thr_uid, tdata->cnt_summed.curobjs,
1306	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
1307	    tdata->cnt_summed.accumbytes,
1308	    (tdata->thread_name != NULL) ? " " : "",
1309	    (tdata->thread_name != NULL) ? tdata->thread_name : ""))
1310		return (tdata);
1311	return (NULL);
1312}
1313
1314#ifdef JEMALLOC_JET
1315#undef prof_dump_header
1316#define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
1317#endif
1318static bool
1319prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all)
1320{
1321	bool ret;
1322
1323	if (prof_dump_printf(propagate_err,
1324	    "heap_v2/%"FMTu64"\n"
1325	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1326	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
1327	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
1328		return (true);
1329
1330	malloc_mutex_lock(tsdn, &tdatas_mtx);
1331	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
1332	    (void *)&propagate_err) != NULL);
1333	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1334	return (ret);
1335}
1336#ifdef JEMALLOC_JET
1337#undef prof_dump_header
1338#define	prof_dump_header JEMALLOC_N(prof_dump_header)
1339prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
1340#endif
1341
1342static bool
1343prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
1344    const prof_bt_t *bt, prof_gctx_tree_t *gctxs)
1345{
1346	bool ret;
1347	unsigned i;
1348	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
1349
1350	cassert(config_prof);
1351	malloc_mutex_assert_owner(tsdn, gctx->lock);
1352
1353	/* Avoid dumping such gctx's that have no useful data. */
1354	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
1355	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
1356		assert(gctx->cnt_summed.curobjs == 0);
1357		assert(gctx->cnt_summed.curbytes == 0);
1358		assert(gctx->cnt_summed.accumobjs == 0);
1359		assert(gctx->cnt_summed.accumbytes == 0);
1360		ret = false;
1361		goto label_return;
1362	}
1363
1364	if (prof_dump_printf(propagate_err, "@")) {
1365		ret = true;
1366		goto label_return;
1367	}
1368	for (i = 0; i < bt->len; i++) {
1369		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
1370		    (uintptr_t)bt->vec[i])) {
1371			ret = true;
1372			goto label_return;
1373		}
1374	}
1375
1376	if (prof_dump_printf(propagate_err,
1377	    "\n"
1378	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1379	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
1380	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
1381		ret = true;
1382		goto label_return;
1383	}
1384
1385	prof_tctx_dump_iter_arg.tsdn = tsdn;
1386	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
1387	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
1388	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
1389		ret = true;
1390		goto label_return;
1391	}
1392
1393	ret = false;
1394label_return:
1395	return (ret);
1396}
1397
1398#ifndef _WIN32
1399JEMALLOC_FORMAT_PRINTF(1, 2)
1400static int
1401prof_open_maps(const char *format, ...)
1402{
1403	int mfd;
1404	va_list ap;
1405	char filename[PATH_MAX + 1];
1406
1407	va_start(ap, format);
1408	malloc_vsnprintf(filename, sizeof(filename), format, ap);
1409	va_end(ap);
1410	mfd = open(filename, O_RDONLY);
1411
1412	return (mfd);
1413}
1414#endif
1415
1416static int
1417prof_getpid(void)
1418{
1419
1420#ifdef _WIN32
1421	return (GetCurrentProcessId());
1422#else
1423	return (getpid());
1424#endif
1425}
1426
1427static bool
1428prof_dump_maps(bool propagate_err)
1429{
1430	bool ret;
1431	int mfd;
1432
1433	cassert(config_prof);
1434#ifdef __FreeBSD__
1435	mfd = prof_open_maps("/proc/curproc/map");
1436#elif defined(_WIN32)
1437	mfd = -1; // Not implemented
1438#else
1439	{
1440		int pid = prof_getpid();
1441
1442		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
1443		if (mfd == -1)
1444			mfd = prof_open_maps("/proc/%d/maps", pid);
1445	}
1446#endif
1447	if (mfd != -1) {
1448		ssize_t nread;
1449
1450		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
1451		    propagate_err) {
1452			ret = true;
1453			goto label_return;
1454		}
1455		nread = 0;
1456		do {
1457			prof_dump_buf_end += nread;
1458			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1459				/* Make space in prof_dump_buf before read(). */
1460				if (prof_dump_flush(propagate_err) &&
1461				    propagate_err) {
1462					ret = true;
1463					goto label_return;
1464				}
1465			}
1466			nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
1467			    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
1468		} while (nread > 0);
1469	} else {
1470		ret = true;
1471		goto label_return;
1472	}
1473
1474	ret = false;
1475label_return:
1476	if (mfd != -1)
1477		close(mfd);
1478	return (ret);
1479}
1480
1481/*
1482 * See prof_sample_threshold_update() comment for why the body of this function
1483 * is conditionally compiled.
1484 */
1485static void
1486prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
1487    const char *filename)
1488{
1489
1490#ifdef JEMALLOC_PROF
1491	/*
1492	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
1493	 * differ slightly from what jeprof reports, because here we scale the
1494	 * summary values, whereas jeprof scales each context individually and
1495	 * reports the sums of the scaled values.
1496	 */
1497	if (cnt_all->curbytes != 0) {
1498		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
1499		double ratio = (((double)cnt_all->curbytes) /
1500		    (double)cnt_all->curobjs) / sample_period;
1501		double scale_factor = 1.0 / (1.0 - exp(-ratio));
1502		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
1503		    * scale_factor);
1504		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
1505		    scale_factor);
1506
1507		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
1508		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
1509		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
1510		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
1511		malloc_printf(
1512		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
1513		    filename);
1514	}
1515#endif
1516}
1517
1518struct prof_gctx_dump_iter_arg_s {
1519	tsdn_t	*tsdn;
1520	bool	propagate_err;
1521};
1522
1523static prof_gctx_t *
1524prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
1525{
1526	prof_gctx_t *ret;
1527	struct prof_gctx_dump_iter_arg_s *arg =
1528	    (struct prof_gctx_dump_iter_arg_s *)opaque;
1529
1530	malloc_mutex_lock(arg->tsdn, gctx->lock);
1531
1532	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
1533	    gctxs)) {
1534		ret = gctx;
1535		goto label_return;
1536	}
1537
1538	ret = NULL;
1539label_return:
1540	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1541	return (ret);
1542}
1543
1544static bool
1545prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
1546{
1547	prof_tdata_t *tdata;
1548	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1549	size_t tabind;
1550	union {
1551		prof_gctx_t	*p;
1552		void		*v;
1553	} gctx;
1554	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1555	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
1556	prof_gctx_tree_t gctxs;
1557
1558	cassert(config_prof);
1559
1560	tdata = prof_tdata_get(tsd, true);
1561	if (tdata == NULL)
1562		return (true);
1563
1564	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1565	prof_enter(tsd, tdata);
1566
1567	/*
1568	 * Put gctx's in limbo and clear their counters in preparation for
1569	 * summing.
1570	 */
1571	gctx_tree_new(&gctxs);
1572	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
1573		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, &gctxs);
1574
1575	/*
1576	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
1577	 * stats and merge them into the associated gctx's.
1578	 */
1579	prof_tdata_merge_iter_arg.tsdn = tsd_tsdn(tsd);
1580	memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t));
1581	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1582	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
1583	    (void *)&prof_tdata_merge_iter_arg);
1584	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1585
1586	/* Merge tctx stats into gctx's. */
1587	prof_gctx_merge_iter_arg.tsdn = tsd_tsdn(tsd);
1588	prof_gctx_merge_iter_arg.leak_ngctx = 0;
1589	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter,
1590	    (void *)&prof_gctx_merge_iter_arg);
1591
1592	prof_leave(tsd, tdata);
1593
1594	/* Create dump file. */
1595	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
1596		goto label_open_close_error;
1597
1598	/* Dump profile header. */
1599	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
1600	    &prof_tdata_merge_iter_arg.cnt_all))
1601		goto label_write_error;
1602
1603	/* Dump per gctx profile stats. */
1604	prof_gctx_dump_iter_arg.tsdn = tsd_tsdn(tsd);
1605	prof_gctx_dump_iter_arg.propagate_err = propagate_err;
1606	if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
1607	    (void *)&prof_gctx_dump_iter_arg) != NULL)
1608		goto label_write_error;
1609
1610	/* Dump /proc/<pid>/maps if possible. */
1611	if (prof_dump_maps(propagate_err))
1612		goto label_write_error;
1613
1614	if (prof_dump_close(propagate_err))
1615		goto label_open_close_error;
1616
1617	prof_gctx_finish(tsd, &gctxs);
1618	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1619
1620	if (leakcheck) {
1621		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
1622		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
1623	}
1624	return (false);
1625label_write_error:
1626	prof_dump_close(propagate_err);
1627label_open_close_error:
1628	prof_gctx_finish(tsd, &gctxs);
1629	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1630	return (true);
1631}
1632
1633#define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
1634#define	VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
1635static void
1636prof_dump_filename(char *filename, char v, uint64_t vseq)
1637{
1638
1639	cassert(config_prof);
1640
1641	if (vseq != VSEQ_INVALID) {
1642	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1643		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1644		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
1645		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
1646	} else {
1647	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
1648		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1649		    "%s.%d.%"FMTu64".%c.heap",
1650		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
1651	}
1652	prof_dump_seq++;
1653}
1654
1655static void
1656prof_fdump(void)
1657{
1658	tsd_t *tsd;
1659	char filename[DUMP_FILENAME_BUFSIZE];
1660
1661	cassert(config_prof);
1662	assert(opt_prof_final);
1663	assert(opt_prof_prefix[0] != '\0');
1664
1665	if (!prof_booted)
1666		return;
1667	tsd = tsd_fetch();
1668
1669	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1670	prof_dump_filename(filename, 'f', VSEQ_INVALID);
1671	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1672	prof_dump(tsd, false, filename, opt_prof_leak);
1673}
1674
1675void
1676prof_idump(tsdn_t *tsdn)
1677{
1678	tsd_t *tsd;
1679	prof_tdata_t *tdata;
1680
1681	cassert(config_prof);
1682
1683	if (!prof_booted || tsdn_null(tsdn))
1684		return;
1685	tsd = tsdn_tsd(tsdn);
1686	tdata = prof_tdata_get(tsd, false);
1687	if (tdata == NULL)
1688		return;
1689	if (tdata->enq) {
1690		tdata->enq_idump = true;
1691		return;
1692	}
1693
1694	if (opt_prof_prefix[0] != '\0') {
1695		char filename[PATH_MAX + 1];
1696		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1697		prof_dump_filename(filename, 'i', prof_dump_iseq);
1698		prof_dump_iseq++;
1699		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1700		prof_dump(tsd, false, filename, false);
1701	}
1702}
1703
1704bool
1705prof_mdump(tsd_t *tsd, const char *filename)
1706{
1707	char filename_buf[DUMP_FILENAME_BUFSIZE];
1708
1709	cassert(config_prof);
1710
1711	if (!opt_prof || !prof_booted)
1712		return (true);
1713
1714	if (filename == NULL) {
1715		/* No filename specified, so automatically generate one. */
1716		if (opt_prof_prefix[0] == '\0')
1717			return (true);
1718		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1719		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1720		prof_dump_mseq++;
1721		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1722		filename = filename_buf;
1723	}
1724	return (prof_dump(tsd, true, filename, false));
1725}
1726
1727void
1728prof_gdump(tsdn_t *tsdn)
1729{
1730	tsd_t *tsd;
1731	prof_tdata_t *tdata;
1732
1733	cassert(config_prof);
1734
1735	if (!prof_booted || tsdn_null(tsdn))
1736		return;
1737	tsd = tsdn_tsd(tsdn);
1738	tdata = prof_tdata_get(tsd, false);
1739	if (tdata == NULL)
1740		return;
1741	if (tdata->enq) {
1742		tdata->enq_gdump = true;
1743		return;
1744	}
1745
1746	if (opt_prof_prefix[0] != '\0') {
1747		char filename[DUMP_FILENAME_BUFSIZE];
1748		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
1749		prof_dump_filename(filename, 'u', prof_dump_useq);
1750		prof_dump_useq++;
1751		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
1752		prof_dump(tsd, false, filename, false);
1753	}
1754}
1755
1756static void
1757prof_bt_hash(const void *key, size_t r_hash[2])
1758{
1759	prof_bt_t *bt = (prof_bt_t *)key;
1760
1761	cassert(config_prof);
1762
1763	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
1764}
1765
1766static bool
1767prof_bt_keycomp(const void *k1, const void *k2)
1768{
1769	const prof_bt_t *bt1 = (prof_bt_t *)k1;
1770	const prof_bt_t *bt2 = (prof_bt_t *)k2;
1771
1772	cassert(config_prof);
1773
1774	if (bt1->len != bt2->len)
1775		return (false);
1776	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1777}
1778
1779JEMALLOC_INLINE_C uint64_t
1780prof_thr_uid_alloc(tsdn_t *tsdn)
1781{
1782	uint64_t thr_uid;
1783
1784	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
1785	thr_uid = next_thr_uid;
1786	next_thr_uid++;
1787	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
1788
1789	return (thr_uid);
1790}
1791
1792static prof_tdata_t *
1793prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
1794    char *thread_name, bool active)
1795{
1796	prof_tdata_t *tdata;
1797
1798	cassert(config_prof);
1799
1800	/* Initialize an empty cache for this thread. */
1801	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
1802	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
1803	    arena_get(TSDN_NULL, 0, true), true);
1804	if (tdata == NULL)
1805		return (NULL);
1806
1807	tdata->lock = prof_tdata_mutex_choose(thr_uid);
1808	tdata->thr_uid = thr_uid;
1809	tdata->thr_discrim = thr_discrim;
1810	tdata->thread_name = thread_name;
1811	tdata->attached = true;
1812	tdata->expired = false;
1813	tdata->tctx_uid_next = 0;
1814
1815	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
1816	    prof_bt_keycomp)) {
1817		idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
1818		return (NULL);
1819	}
1820
1821	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
1822	prof_sample_threshold_update(tdata);
1823
1824	tdata->enq = false;
1825	tdata->enq_idump = false;
1826	tdata->enq_gdump = false;
1827
1828	tdata->dumping = false;
1829	tdata->active = active;
1830
1831	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1832	tdata_tree_insert(&tdatas, tdata);
1833	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1834
1835	return (tdata);
1836}
1837
1838prof_tdata_t *
1839prof_tdata_init(tsd_t *tsd)
1840{
1841
1842	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
1843	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd))));
1844}
1845
1846static bool
1847prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached)
1848{
1849
1850	if (tdata->attached && !even_if_attached)
1851		return (false);
1852	if (ckh_count(&tdata->bt2tctx) != 0)
1853		return (false);
1854	return (true);
1855}
1856
1857static bool
1858prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
1859    bool even_if_attached)
1860{
1861
1862	malloc_mutex_assert_owner(tsdn, tdata->lock);
1863
1864	return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
1865}
1866
1867static void
1868prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
1869    bool even_if_attached)
1870{
1871
1872	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
1873
1874	tdata_tree_remove(&tdatas, tdata);
1875
1876	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
1877
1878	if (tdata->thread_name != NULL)
1879		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
1880	ckh_delete(tsd, &tdata->bt2tctx);
1881	idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
1882}
1883
1884static void
1885prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
1886{
1887
1888	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1889	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
1890	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1891}
1892
1893static void
1894prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
1895{
1896	bool destroy_tdata;
1897
1898	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1899	if (tdata->attached) {
1900		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
1901		    true);
1902		/*
1903		 * Only detach if !destroy_tdata, because detaching would allow
1904		 * another thread to win the race to destroy tdata.
1905		 */
1906		if (!destroy_tdata)
1907			tdata->attached = false;
1908		tsd_prof_tdata_set(tsd, NULL);
1909	} else
1910		destroy_tdata = false;
1911	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1912	if (destroy_tdata)
1913		prof_tdata_destroy(tsd, tdata, true);
1914}
1915
1916prof_tdata_t *
1917prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
1918{
1919	uint64_t thr_uid = tdata->thr_uid;
1920	uint64_t thr_discrim = tdata->thr_discrim + 1;
1921	char *thread_name = (tdata->thread_name != NULL) ?
1922	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
1923	bool active = tdata->active;
1924
1925	prof_tdata_detach(tsd, tdata);
1926	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
1927	    active));
1928}
1929
1930static bool
1931prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata)
1932{
1933	bool destroy_tdata;
1934
1935	malloc_mutex_lock(tsdn, tdata->lock);
1936	if (!tdata->expired) {
1937		tdata->expired = true;
1938		destroy_tdata = tdata->attached ? false :
1939		    prof_tdata_should_destroy(tsdn, tdata, false);
1940	} else
1941		destroy_tdata = false;
1942	malloc_mutex_unlock(tsdn, tdata->lock);
1943
1944	return (destroy_tdata);
1945}
1946
1947static prof_tdata_t *
1948prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
1949{
1950	tsdn_t *tsdn = (tsdn_t *)arg;
1951
1952	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
1953}
1954
1955void
1956prof_reset(tsd_t *tsd, size_t lg_sample)
1957{
1958	prof_tdata_t *next;
1959
1960	assert(lg_sample < (sizeof(uint64_t) << 3));
1961
1962	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1963	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1964
1965	lg_prof_sample = lg_sample;
1966
1967	next = NULL;
1968	do {
1969		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
1970		    prof_tdata_reset_iter, (void *)tsd);
1971		if (to_destroy != NULL) {
1972			next = tdata_tree_next(&tdatas, to_destroy);
1973			prof_tdata_destroy_locked(tsd, to_destroy, false);
1974		} else
1975			next = NULL;
1976	} while (next != NULL);
1977
1978	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1979	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1980}
1981
1982void
1983prof_tdata_cleanup(tsd_t *tsd)
1984{
1985	prof_tdata_t *tdata;
1986
1987	if (!config_prof)
1988		return;
1989
1990	tdata = tsd_prof_tdata_get(tsd);
1991	if (tdata != NULL)
1992		prof_tdata_detach(tsd, tdata);
1993}
1994
1995bool
1996prof_active_get(tsdn_t *tsdn)
1997{
1998	bool prof_active_current;
1999
2000	malloc_mutex_lock(tsdn, &prof_active_mtx);
2001	prof_active_current = prof_active;
2002	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2003	return (prof_active_current);
2004}
2005
2006bool
2007prof_active_set(tsdn_t *tsdn, bool active)
2008{
2009	bool prof_active_old;
2010
2011	malloc_mutex_lock(tsdn, &prof_active_mtx);
2012	prof_active_old = prof_active;
2013	prof_active = active;
2014	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2015	return (prof_active_old);
2016}
2017
2018const char *
2019prof_thread_name_get(tsd_t *tsd)
2020{
2021	prof_tdata_t *tdata;
2022
2023	tdata = prof_tdata_get(tsd, true);
2024	if (tdata == NULL)
2025		return ("");
2026	return (tdata->thread_name != NULL ? tdata->thread_name : "");
2027}
2028
2029static char *
2030prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name)
2031{
2032	char *ret;
2033	size_t size;
2034
2035	if (thread_name == NULL)
2036		return (NULL);
2037
2038	size = strlen(thread_name) + 1;
2039	if (size == 1)
2040		return ("");
2041
2042	ret = iallocztm(tsdn, size, size2index(size), false, NULL, true,
2043	    arena_get(TSDN_NULL, 0, true), true);
2044	if (ret == NULL)
2045		return (NULL);
2046	memcpy(ret, thread_name, size);
2047	return (ret);
2048}
2049
2050int
2051prof_thread_name_set(tsd_t *tsd, const char *thread_name)
2052{
2053	prof_tdata_t *tdata;
2054	unsigned i;
2055	char *s;
2056
2057	tdata = prof_tdata_get(tsd, true);
2058	if (tdata == NULL)
2059		return (EAGAIN);
2060
2061	/* Validate input. */
2062	if (thread_name == NULL)
2063		return (EFAULT);
2064	for (i = 0; thread_name[i] != '\0'; i++) {
2065		char c = thread_name[i];
2066		if (!isgraph(c) && !isblank(c))
2067			return (EFAULT);
2068	}
2069
2070	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
2071	if (s == NULL)
2072		return (EAGAIN);
2073
2074	if (tdata->thread_name != NULL) {
2075		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
2076		tdata->thread_name = NULL;
2077	}
2078	if (strlen(s) > 0)
2079		tdata->thread_name = s;
2080	return (0);
2081}
2082
2083bool
2084prof_thread_active_get(tsd_t *tsd)
2085{
2086	prof_tdata_t *tdata;
2087
2088	tdata = prof_tdata_get(tsd, true);
2089	if (tdata == NULL)
2090		return (false);
2091	return (tdata->active);
2092}
2093
2094bool
2095prof_thread_active_set(tsd_t *tsd, bool active)
2096{
2097	prof_tdata_t *tdata;
2098
2099	tdata = prof_tdata_get(tsd, true);
2100	if (tdata == NULL)
2101		return (true);
2102	tdata->active = active;
2103	return (false);
2104}
2105
2106bool
2107prof_thread_active_init_get(tsdn_t *tsdn)
2108{
2109	bool active_init;
2110
2111	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2112	active_init = prof_thread_active_init;
2113	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2114	return (active_init);
2115}
2116
2117bool
2118prof_thread_active_init_set(tsdn_t *tsdn, bool active_init)
2119{
2120	bool active_init_old;
2121
2122	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2123	active_init_old = prof_thread_active_init;
2124	prof_thread_active_init = active_init;
2125	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2126	return (active_init_old);
2127}
2128
2129bool
2130prof_gdump_get(tsdn_t *tsdn)
2131{
2132	bool prof_gdump_current;
2133
2134	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2135	prof_gdump_current = prof_gdump_val;
2136	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2137	return (prof_gdump_current);
2138}
2139
2140bool
2141prof_gdump_set(tsdn_t *tsdn, bool gdump)
2142{
2143	bool prof_gdump_old;
2144
2145	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2146	prof_gdump_old = prof_gdump_val;
2147	prof_gdump_val = gdump;
2148	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2149	return (prof_gdump_old);
2150}
2151
2152void
2153prof_boot0(void)
2154{
2155
2156	cassert(config_prof);
2157
2158	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
2159	    sizeof(PROF_PREFIX_DEFAULT));
2160}
2161
2162void
2163prof_boot1(void)
2164{
2165
2166	cassert(config_prof);
2167
2168	/*
2169	 * opt_prof must be in its final state before any arenas are
2170	 * initialized, so this function must be executed early.
2171	 */
2172
2173	if (opt_prof_leak && !opt_prof) {
2174		/*
2175		 * Enable opt_prof, but in such a way that profiles are never
2176		 * automatically dumped.
2177		 */
2178		opt_prof = true;
2179		opt_prof_gdump = false;
2180	} else if (opt_prof) {
2181		if (opt_lg_prof_interval >= 0) {
2182			prof_interval = (((uint64_t)1U) <<
2183			    opt_lg_prof_interval);
2184		}
2185	}
2186}
2187
2188bool
2189prof_boot2(tsd_t *tsd)
2190{
2191
2192	cassert(config_prof);
2193
2194	if (opt_prof) {
2195		unsigned i;
2196
2197		lg_prof_sample = opt_lg_prof_sample;
2198
2199		prof_active = opt_prof_active;
2200		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
2201		    WITNESS_RANK_PROF_ACTIVE))
2202			return (true);
2203
2204		prof_gdump_val = opt_prof_gdump;
2205		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
2206		    WITNESS_RANK_PROF_GDUMP))
2207			return (true);
2208
2209		prof_thread_active_init = opt_prof_thread_active_init;
2210		if (malloc_mutex_init(&prof_thread_active_init_mtx,
2211		    "prof_thread_active_init",
2212		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
2213			return (true);
2214
2215		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
2216		    prof_bt_keycomp))
2217			return (true);
2218		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
2219		    WITNESS_RANK_PROF_BT2GCTX))
2220			return (true);
2221
2222		tdata_tree_new(&tdatas);
2223		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
2224		    WITNESS_RANK_PROF_TDATAS))
2225			return (true);
2226
2227		next_thr_uid = 0;
2228		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
2229		    WITNESS_RANK_PROF_NEXT_THR_UID))
2230			return (true);
2231
2232		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
2233		    WITNESS_RANK_PROF_DUMP_SEQ))
2234			return (true);
2235		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
2236		    WITNESS_RANK_PROF_DUMP))
2237			return (true);
2238
2239		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
2240		    atexit(prof_fdump) != 0) {
2241			malloc_write("<jemalloc>: Error in atexit()\n");
2242			if (opt_abort)
2243				abort();
2244		}
2245
2246		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
2247		    PROF_NCTX_LOCKS * sizeof(malloc_mutex_t));
2248		if (gctx_locks == NULL)
2249			return (true);
2250		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2251			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
2252			    WITNESS_RANK_PROF_GCTX))
2253				return (true);
2254		}
2255
2256		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
2257		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
2258		if (tdata_locks == NULL)
2259			return (true);
2260		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2261			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
2262			    WITNESS_RANK_PROF_TDATA))
2263				return (true);
2264		}
2265	}
2266
2267#ifdef JEMALLOC_PROF_LIBGCC
2268	/*
2269	 * Cause the backtracing machinery to allocate its internal state
2270	 * before enabling profiling.
2271	 */
2272	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
2273#endif
2274
2275	prof_booted = true;
2276
2277	return (false);
2278}
2279
2280void
2281prof_prefork0(tsdn_t *tsdn)
2282{
2283
2284	if (opt_prof) {
2285		unsigned i;
2286
2287		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
2288		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
2289		malloc_mutex_prefork(tsdn, &tdatas_mtx);
2290		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
2291			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
2292		for (i = 0; i < PROF_NCTX_LOCKS; i++)
2293			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
2294	}
2295}
2296
2297void
2298prof_prefork1(tsdn_t *tsdn)
2299{
2300
2301	if (opt_prof) {
2302		malloc_mutex_prefork(tsdn, &prof_active_mtx);
2303		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
2304		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
2305		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
2306		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
2307	}
2308}
2309
2310void
2311prof_postfork_parent(tsdn_t *tsdn)
2312{
2313
2314	if (opt_prof) {
2315		unsigned i;
2316
2317		malloc_mutex_postfork_parent(tsdn,
2318		    &prof_thread_active_init_mtx);
2319		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
2320		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
2321		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
2322		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
2323		for (i = 0; i < PROF_NCTX_LOCKS; i++)
2324			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
2325		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
2326			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
2327		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
2328		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
2329		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
2330	}
2331}
2332
2333void
2334prof_postfork_child(tsdn_t *tsdn)
2335{
2336
2337	if (opt_prof) {
2338		unsigned i;
2339
2340		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
2341		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
2342		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
2343		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
2344		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
2345		for (i = 0; i < PROF_NCTX_LOCKS; i++)
2346			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
2347		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
2348			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
2349		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
2350		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
2351		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
2352	}
2353}
2354
2355/******************************************************************************/
2356