prof.h revision 0050a0f7e6ea5a33c9aed769e2652afe20714194
1/******************************************************************************/
2#ifdef JEMALLOC_H_TYPES
3
4typedef struct prof_bt_s prof_bt_t;
5typedef struct prof_cnt_s prof_cnt_t;
6typedef struct prof_thr_cnt_s prof_thr_cnt_t;
7typedef struct prof_ctx_s prof_ctx_t;
8typedef struct prof_tdata_s prof_tdata_t;
9
10/* Option defaults. */
11#define	PROF_PREFIX_DEFAULT		"jeprof"
12#define	LG_PROF_SAMPLE_DEFAULT		19
13#define	LG_PROF_INTERVAL_DEFAULT	-1
14
15/*
16 * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
17 * is based on __builtin_return_address() necessarily has a hard-coded number
18 * of backtrace frame handlers, and should be kept in sync with this setting.
19 */
20#define	PROF_BT_MAX			128
21
22/* Maximum number of backtraces to store in each per thread LRU cache. */
23#define	PROF_TCMAX			1024
24
25/* Initial hash table size. */
26#define	PROF_CKH_MINITEMS		64
27
28/* Size of memory buffer to use when writing dump files. */
29#define	PROF_DUMP_BUFSIZE		65536
30
31/* Size of stack-allocated buffer used by prof_printf(). */
32#define	PROF_PRINTF_BUFSIZE		128
33
34/*
35 * Number of mutexes shared among all ctx's.  No space is allocated for these
36 * unless profiling is enabled, so it's okay to over-provision.
37 */
38#define	PROF_NCTX_LOCKS			1024
39
40/*
41 * prof_tdata pointers close to NULL are used to encode state information that
42 * is used for cleaning up during thread shutdown.
43 */
44#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
45#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
46#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
47
48#endif /* JEMALLOC_H_TYPES */
49/******************************************************************************/
50#ifdef JEMALLOC_H_STRUCTS
51
52struct prof_bt_s {
53	/* Backtrace, stored as len program counters. */
54	void		**vec;
55	unsigned	len;
56};
57
58#ifdef JEMALLOC_PROF_LIBGCC
59/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
60typedef struct {
61	prof_bt_t	*bt;
62	unsigned	nignore;
63	unsigned	max;
64} prof_unwind_data_t;
65#endif
66
67struct prof_cnt_s {
68	/*
69	 * Profiling counters.  An allocation/deallocation pair can operate on
70	 * different prof_thr_cnt_t objects that are linked into the same
71	 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
72	 * negative.  In principle it is possible for the *bytes counters to
73	 * overflow/underflow, but a general solution would require something
74	 * like 128-bit counters; this implementation doesn't bother to solve
75	 * that problem.
76	 */
77	int64_t		curobjs;
78	int64_t		curbytes;
79	uint64_t	accumobjs;
80	uint64_t	accumbytes;
81};
82
83struct prof_thr_cnt_s {
84	/* Linkage into prof_ctx_t's cnts_ql. */
85	ql_elm(prof_thr_cnt_t)	cnts_link;
86
87	/* Linkage into thread's LRU. */
88	ql_elm(prof_thr_cnt_t)	lru_link;
89
90	/*
91	 * Associated context.  If a thread frees an object that it did not
92	 * allocate, it is possible that the context is not cached in the
93	 * thread's hash table, in which case it must be able to look up the
94	 * context, insert a new prof_thr_cnt_t into the thread's hash table,
95	 * and link it into the prof_ctx_t's cnts_ql.
96	 */
97	prof_ctx_t		*ctx;
98
99	/*
100	 * Threads use memory barriers to update the counters.  Since there is
101	 * only ever one writer, the only challenge is for the reader to get a
102	 * consistent read of the counters.
103	 *
104	 * The writer uses this series of operations:
105	 *
106	 * 1) Increment epoch to an odd number.
107	 * 2) Update counters.
108	 * 3) Increment epoch to an even number.
109	 *
110	 * The reader must assure 1) that the epoch is even while it reads the
111	 * counters, and 2) that the epoch doesn't change between the time it
112	 * starts and finishes reading the counters.
113	 */
114	unsigned		epoch;
115
116	/* Profiling counters. */
117	prof_cnt_t		cnts;
118};
119
120struct prof_ctx_s {
121	/* Associated backtrace. */
122	prof_bt_t		*bt;
123
124	/* Protects nlimbo, cnt_merged, and cnts_ql. */
125	malloc_mutex_t		*lock;
126
127	/*
128	 * Number of threads that currently cause this ctx to be in a state of
129	 * limbo due to one of:
130	 *   - Initializing per thread counters associated with this ctx.
131	 *   - Preparing to destroy this ctx.
132	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
133	 * ctx.
134	 */
135	unsigned		nlimbo;
136
137	/* Temporary storage for summation during dump. */
138	prof_cnt_t		cnt_summed;
139
140	/* When threads exit, they merge their stats into cnt_merged. */
141	prof_cnt_t		cnt_merged;
142
143	/*
144	 * List of profile counters, one for each thread that has allocated in
145	 * this context.
146	 */
147	ql_head(prof_thr_cnt_t)	cnts_ql;
148};
149
150struct prof_tdata_s {
151	/*
152	 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread keeps a
153	 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
154	 * objects.  Other threads may read the prof_thr_cnt_t contents, but no
155	 * others will ever write them.
156	 *
157	 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
158	 * counter data into the associated prof_ctx_t objects, and unlink/free
159	 * the prof_thr_cnt_t objects.
160	 */
161	ckh_t			bt2cnt;
162
163	/* LRU for contents of bt2cnt. */
164	ql_head(prof_thr_cnt_t)	lru_ql;
165
166	/* Backtrace vector, used for calls to prof_backtrace(). */
167	void			**vec;
168
169	/* Sampling state. */
170	uint64_t		prng_state;
171	uint64_t		threshold;
172	uint64_t		accum;
173
174	/* State used to avoid dumping while operating on prof internals. */
175	bool			enq;
176	bool			enq_idump;
177	bool			enq_gdump;
178};
179
180#endif /* JEMALLOC_H_STRUCTS */
181/******************************************************************************/
182#ifdef JEMALLOC_H_EXTERNS
183
184extern bool	opt_prof;
185/*
186 * Even if opt_prof is true, sampling can be temporarily disabled by setting
187 * opt_prof_active to false.  No locking is used when updating opt_prof_active,
188 * so there are no guarantees regarding how long it will take for all threads
189 * to notice state changes.
190 */
191extern bool	opt_prof_active;
192extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
193extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
194extern bool	opt_prof_gdump;       /* High-water memory dumping. */
195extern bool	opt_prof_final;       /* Final profile dumping. */
196extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
197extern bool	opt_prof_accum;       /* Report cumulative bytes. */
198extern char	opt_prof_prefix[PATH_MAX + 1];
199
200/*
201 * Profile dump interval, measured in bytes allocated.  Each arena triggers a
202 * profile dump when it reaches this threshold.  The effect is that the
203 * interval between profile dumps averages prof_interval, though the actual
204 * interval between dumps will tend to be sporadic, and the interval will be a
205 * maximum of approximately (prof_interval * narenas).
206 */
207extern uint64_t	prof_interval;
208
209/*
210 * If true, promote small sampled objects to large objects, since small run
211 * headers do not have embedded profile context pointers.
212 */
213extern bool	prof_promote;
214
215void	bt_init(prof_bt_t *bt, void **vec);
216void	prof_backtrace(prof_bt_t *bt, unsigned nignore);
217prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
218void	prof_idump(void);
219bool	prof_mdump(const char *filename);
220void	prof_gdump(void);
221prof_tdata_t	*prof_tdata_init(void);
222void	prof_tdata_cleanup(void *arg);
223void	prof_boot0(void);
224void	prof_boot1(void);
225bool	prof_boot2(void);
226
227#endif /* JEMALLOC_H_EXTERNS */
228/******************************************************************************/
229#ifdef JEMALLOC_H_INLINES
230
231#define	PROF_ALLOC_PREP(nignore, size, ret) do {			\
232	prof_tdata_t *prof_tdata;					\
233	prof_bt_t bt;							\
234									\
235	assert(size == s2u(size));					\
236									\
237	prof_tdata = prof_tdata_get();					\
238	if (prof_tdata == NULL) {					\
239		ret = NULL;						\
240		break;							\
241	}								\
242									\
243	if (opt_prof_active == false) {					\
244		/* Sampling is currently inactive, so avoid sampling. */\
245		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
246	} else if (opt_lg_prof_sample == 0) {				\
247		/* Don't bother with sampling logic, since sampling   */\
248		/* interval is 1.                                     */\
249		bt_init(&bt, prof_tdata->vec);				\
250		prof_backtrace(&bt, nignore);				\
251		ret = prof_lookup(&bt);					\
252	} else {							\
253		if (prof_tdata->threshold == 0) {			\
254			/* Initialize.  Seed the prng differently for */\
255			/* each thread.                               */\
256			prof_tdata->prng_state =			\
257			    (uint64_t)(uintptr_t)&size;			\
258			prof_sample_threshold_update(prof_tdata);	\
259		}							\
260									\
261		/* Determine whether to capture a backtrace based on  */\
262		/* whether size is enough for prof_accum to reach     */\
263		/* prof_tdata->threshold.  However, delay updating    */\
264		/* these variables until prof_{m,re}alloc(), because  */\
265		/* we don't know for sure that the allocation will    */\
266		/* succeed.                                           */\
267		/*                                                    */\
268		/* Use subtraction rather than addition to avoid      */\
269		/* potential integer overflow.                        */\
270		if (size >= prof_tdata->threshold -			\
271		    prof_tdata->accum) {				\
272			bt_init(&bt, prof_tdata->vec);			\
273			prof_backtrace(&bt, nignore);			\
274			ret = prof_lookup(&bt);				\
275		} else							\
276			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
277	}								\
278} while (0)
279
280#ifndef JEMALLOC_ENABLE_INLINE
281malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
282
283prof_tdata_t	*prof_tdata_get(void);
284void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
285prof_ctx_t	*prof_ctx_get(const void *ptr);
286void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
287bool	prof_sample_accum_update(size_t size);
288void	prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
289void	prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
290    size_t old_size, prof_ctx_t *old_ctx);
291void	prof_free(const void *ptr, size_t size);
292#endif
293
294#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
295/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
296malloc_tsd_externs(prof_tdata, prof_tdata_t *)
297malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
298    prof_tdata_cleanup)
299
300JEMALLOC_INLINE prof_tdata_t *
301prof_tdata_get(void)
302{
303	prof_tdata_t *prof_tdata;
304
305	cassert(config_prof);
306
307	prof_tdata = *prof_tdata_tsd_get();
308	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {
309		if (prof_tdata == NULL)
310			prof_tdata = prof_tdata_init();
311		else
312			prof_tdata = NULL;
313	}
314
315	return (prof_tdata);
316}
317
318JEMALLOC_INLINE void
319prof_sample_threshold_update(prof_tdata_t *prof_tdata)
320{
321	uint64_t r;
322	double u;
323
324	cassert(config_prof);
325
326	/*
327	 * Compute sample threshold as a geometrically distributed random
328	 * variable with mean (2^opt_lg_prof_sample).
329	 *
330	 *                         __        __
331	 *                         |  log(u)  |                     1
332	 * prof_tdata->threshold = | -------- |, where p = -------------------
333	 *                         | log(1-p) |             opt_lg_prof_sample
334	 *                                                 2
335	 *
336	 * For more information on the math, see:
337	 *
338	 *   Non-Uniform Random Variate Generation
339	 *   Luc Devroye
340	 *   Springer-Verlag, New York, 1986
341	 *   pp 500
342	 *   (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
343	 */
344	prng64(r, 53, prof_tdata->prng_state,
345	    UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
346	u = (double)r * (1.0/9007199254740992.0L);
347	prof_tdata->threshold = (uint64_t)(log(u) /
348	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
349	    + (uint64_t)1U;
350}
351
352JEMALLOC_INLINE prof_ctx_t *
353prof_ctx_get(const void *ptr)
354{
355	prof_ctx_t *ret;
356	arena_chunk_t *chunk;
357
358	cassert(config_prof);
359	assert(ptr != NULL);
360
361	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
362	if (chunk != ptr) {
363		/* Region. */
364		ret = arena_prof_ctx_get(ptr);
365	} else
366		ret = huge_prof_ctx_get(ptr);
367
368	return (ret);
369}
370
371JEMALLOC_INLINE void
372prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
373{
374	arena_chunk_t *chunk;
375
376	cassert(config_prof);
377	assert(ptr != NULL);
378
379	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
380	if (chunk != ptr) {
381		/* Region. */
382		arena_prof_ctx_set(ptr, ctx);
383	} else
384		huge_prof_ctx_set(ptr, ctx);
385}
386
387JEMALLOC_INLINE bool
388prof_sample_accum_update(size_t size)
389{
390	prof_tdata_t *prof_tdata;
391
392	cassert(config_prof);
393	/* Sampling logic is unnecessary if the interval is 1. */
394	assert(opt_lg_prof_sample != 0);
395
396	prof_tdata = *prof_tdata_tsd_get();
397	assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
398
399	/* Take care to avoid integer overflow. */
400	if (size >= prof_tdata->threshold - prof_tdata->accum) {
401		prof_tdata->accum -= (prof_tdata->threshold - size);
402		/* Compute new sample threshold. */
403		prof_sample_threshold_update(prof_tdata);
404		while (prof_tdata->accum >= prof_tdata->threshold) {
405			prof_tdata->accum -= prof_tdata->threshold;
406			prof_sample_threshold_update(prof_tdata);
407		}
408		return (false);
409	} else {
410		prof_tdata->accum += size;
411		return (true);
412	}
413}
414
415JEMALLOC_INLINE void
416prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
417{
418
419	cassert(config_prof);
420	assert(ptr != NULL);
421	assert(size == isalloc(ptr, true));
422
423	if (opt_lg_prof_sample != 0) {
424		if (prof_sample_accum_update(size)) {
425			/*
426			 * Don't sample.  For malloc()-like allocation, it is
427			 * always possible to tell in advance how large an
428			 * object's usable size will be, so there should never
429			 * be a difference between the size passed to
430			 * PROF_ALLOC_PREP() and prof_malloc().
431			 */
432			assert((uintptr_t)cnt == (uintptr_t)1U);
433		}
434	}
435
436	if ((uintptr_t)cnt > (uintptr_t)1U) {
437		prof_ctx_set(ptr, cnt->ctx);
438
439		cnt->epoch++;
440		/*********/
441		mb_write();
442		/*********/
443		cnt->cnts.curobjs++;
444		cnt->cnts.curbytes += size;
445		if (opt_prof_accum) {
446			cnt->cnts.accumobjs++;
447			cnt->cnts.accumbytes += size;
448		}
449		/*********/
450		mb_write();
451		/*********/
452		cnt->epoch++;
453		/*********/
454		mb_write();
455		/*********/
456	} else
457		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
458}
459
460JEMALLOC_INLINE void
461prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
462    size_t old_size, prof_ctx_t *old_ctx)
463{
464	prof_thr_cnt_t *told_cnt;
465
466	cassert(config_prof);
467	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
468
469	if (ptr != NULL) {
470		assert(size == isalloc(ptr, true));
471		if (opt_lg_prof_sample != 0) {
472			if (prof_sample_accum_update(size)) {
473				/*
474				 * Don't sample.  The size passed to
475				 * PROF_ALLOC_PREP() was larger than what
476				 * actually got allocated, so a backtrace was
477				 * captured for this allocation, even though
478				 * its actual size was insufficient to cross
479				 * the sample threshold.
480				 */
481				cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
482			}
483		}
484	}
485
486	if ((uintptr_t)old_ctx > (uintptr_t)1U) {
487		told_cnt = prof_lookup(old_ctx->bt);
488		if (told_cnt == NULL) {
489			/*
490			 * It's too late to propagate OOM for this realloc(),
491			 * so operate directly on old_cnt->ctx->cnt_merged.
492			 */
493			malloc_mutex_lock(old_ctx->lock);
494			old_ctx->cnt_merged.curobjs--;
495			old_ctx->cnt_merged.curbytes -= old_size;
496			malloc_mutex_unlock(old_ctx->lock);
497			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
498		}
499	} else
500		told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
501
502	if ((uintptr_t)told_cnt > (uintptr_t)1U)
503		told_cnt->epoch++;
504	if ((uintptr_t)cnt > (uintptr_t)1U) {
505		prof_ctx_set(ptr, cnt->ctx);
506		cnt->epoch++;
507	} else
508		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
509	/*********/
510	mb_write();
511	/*********/
512	if ((uintptr_t)told_cnt > (uintptr_t)1U) {
513		told_cnt->cnts.curobjs--;
514		told_cnt->cnts.curbytes -= old_size;
515	}
516	if ((uintptr_t)cnt > (uintptr_t)1U) {
517		cnt->cnts.curobjs++;
518		cnt->cnts.curbytes += size;
519		if (opt_prof_accum) {
520			cnt->cnts.accumobjs++;
521			cnt->cnts.accumbytes += size;
522		}
523	}
524	/*********/
525	mb_write();
526	/*********/
527	if ((uintptr_t)told_cnt > (uintptr_t)1U)
528		told_cnt->epoch++;
529	if ((uintptr_t)cnt > (uintptr_t)1U)
530		cnt->epoch++;
531	/*********/
532	mb_write(); /* Not strictly necessary. */
533}
534
535JEMALLOC_INLINE void
536prof_free(const void *ptr, size_t size)
537{
538	prof_ctx_t *ctx = prof_ctx_get(ptr);
539
540	cassert(config_prof);
541
542	if ((uintptr_t)ctx > (uintptr_t)1) {
543		assert(size == isalloc(ptr, true));
544		prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
545
546		if (tcnt != NULL) {
547			tcnt->epoch++;
548			/*********/
549			mb_write();
550			/*********/
551			tcnt->cnts.curobjs--;
552			tcnt->cnts.curbytes -= size;
553			/*********/
554			mb_write();
555			/*********/
556			tcnt->epoch++;
557			/*********/
558			mb_write();
559			/*********/
560		} else {
561			/*
562			 * OOM during free() cannot be propagated, so operate
563			 * directly on cnt->ctx->cnt_merged.
564			 */
565			malloc_mutex_lock(ctx->lock);
566			ctx->cnt_merged.curobjs--;
567			ctx->cnt_merged.curbytes -= size;
568			malloc_mutex_unlock(ctx->lock);
569		}
570	}
571}
572#endif
573
574#endif /* JEMALLOC_H_INLINES */
575/******************************************************************************/
576