prof.h revision 3860eac17023933180ef5dfb5bd24077cda57dfe
1/******************************************************************************/
2#ifdef JEMALLOC_H_TYPES
3
4typedef struct prof_bt_s prof_bt_t;
5typedef struct prof_cnt_s prof_cnt_t;
6typedef struct prof_thr_cnt_s prof_thr_cnt_t;
7typedef struct prof_ctx_s prof_ctx_t;
8typedef struct prof_tdata_s prof_tdata_t;
9
10/* Option defaults. */
11#define	PROF_PREFIX_DEFAULT		"jeprof"
12#define	LG_PROF_SAMPLE_DEFAULT		19
13#define	LG_PROF_INTERVAL_DEFAULT	-1
14
15/*
16 * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
17 * is based on __builtin_return_address() necessarily has a hard-coded number
18 * of backtrace frame handlers, and should be kept in sync with this setting.
19 */
20#define	PROF_BT_MAX			128
21
22/* Maximum number of backtraces to store in each per thread LRU cache. */
23#define	PROF_TCMAX			1024
24
25/* Initial hash table size. */
26#define	PROF_CKH_MINITEMS		64
27
28/* Size of memory buffer to use when writing dump files. */
29#define	PROF_DUMP_BUFSIZE		65536
30
31/* Size of stack-allocated buffer used by prof_printf(). */
32#define	PROF_PRINTF_BUFSIZE		128
33
34/*
35 * Number of mutexes shared among all ctx's.  No space is allocated for these
36 * unless profiling is enabled, so it's okay to over-provision.
37 */
38#define	PROF_NCTX_LOCKS			1024
39
40/*
41 * prof_tdata pointers close to NULL are used to encode state information that
42 * is used for cleaning up during thread shutdown.
43 */
44#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
45#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
46#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
47
48#endif /* JEMALLOC_H_TYPES */
49/******************************************************************************/
50#ifdef JEMALLOC_H_STRUCTS
51
52struct prof_bt_s {
53	/* Backtrace, stored as len program counters. */
54	void		**vec;
55	unsigned	len;
56};
57
58#ifdef JEMALLOC_PROF_LIBGCC
59/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
60typedef struct {
61	prof_bt_t	*bt;
62	unsigned	nignore;
63	unsigned	max;
64} prof_unwind_data_t;
65#endif
66
67struct prof_cnt_s {
68	/*
69	 * Profiling counters.  An allocation/deallocation pair can operate on
70	 * different prof_thr_cnt_t objects that are linked into the same
71	 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
72	 * negative.  In principle it is possible for the *bytes counters to
73	 * overflow/underflow, but a general solution would require something
74	 * like 128-bit counters; this implementation doesn't bother to solve
75	 * that problem.
76	 */
77	int64_t		curobjs;
78	int64_t		curbytes;
79	uint64_t	accumobjs;
80	uint64_t	accumbytes;
81};
82
83struct prof_thr_cnt_s {
84	/* Linkage into prof_ctx_t's cnts_ql. */
85	ql_elm(prof_thr_cnt_t)	cnts_link;
86
87	/* Linkage into thread's LRU. */
88	ql_elm(prof_thr_cnt_t)	lru_link;
89
90	/*
91	 * Associated context.  If a thread frees an object that it did not
92	 * allocate, it is possible that the context is not cached in the
93	 * thread's hash table, in which case it must be able to look up the
94	 * context, insert a new prof_thr_cnt_t into the thread's hash table,
95	 * and link it into the prof_ctx_t's cnts_ql.
96	 */
97	prof_ctx_t		*ctx;
98
99	/*
100	 * Threads use memory barriers to update the counters.  Since there is
101	 * only ever one writer, the only challenge is for the reader to get a
102	 * consistent read of the counters.
103	 *
104	 * The writer uses this series of operations:
105	 *
106	 * 1) Increment epoch to an odd number.
107	 * 2) Update counters.
108	 * 3) Increment epoch to an even number.
109	 *
110	 * The reader must assure 1) that the epoch is even while it reads the
111	 * counters, and 2) that the epoch doesn't change between the time it
112	 * starts and finishes reading the counters.
113	 */
114	unsigned		epoch;
115
116	/* Profiling counters. */
117	prof_cnt_t		cnts;
118};
119
120struct prof_ctx_s {
121	/* Associated backtrace. */
122	prof_bt_t		*bt;
123
124	/* Protects nlimbo, cnt_merged, and cnts_ql. */
125	malloc_mutex_t		*lock;
126
127	/*
128	 * Number of threads that currently cause this ctx to be in a state of
129	 * limbo due to one of:
130	 *   - Initializing per thread counters associated with this ctx.
131	 *   - Preparing to destroy this ctx.
132	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
133	 * ctx.
134	 */
135	unsigned		nlimbo;
136
137	/* Temporary storage for summation during dump. */
138	prof_cnt_t		cnt_summed;
139
140	/* When threads exit, they merge their stats into cnt_merged. */
141	prof_cnt_t		cnt_merged;
142
143	/*
144	 * List of profile counters, one for each thread that has allocated in
145	 * this context.
146	 */
147	ql_head(prof_thr_cnt_t)	cnts_ql;
148};
149
150struct prof_tdata_s {
151	/*
152	 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread keeps a
153	 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
154	 * objects.  Other threads may read the prof_thr_cnt_t contents, but no
155	 * others will ever write them.
156	 *
157	 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
158	 * counter data into the associated prof_ctx_t objects, and unlink/free
159	 * the prof_thr_cnt_t objects.
160	 */
161	ckh_t			bt2cnt;
162
163	/* LRU for contents of bt2cnt. */
164	ql_head(prof_thr_cnt_t)	lru_ql;
165
166	/* Backtrace vector, used for calls to prof_backtrace(). */
167	void			**vec;
168
169	/* Sampling state. */
170	uint64_t		prng_state;
171	uint64_t		threshold;
172	uint64_t		accum;
173
174	/* State used to avoid dumping while operating on prof internals. */
175	bool			enq;
176	bool			enq_idump;
177	bool			enq_gdump;
178};
179
180#endif /* JEMALLOC_H_STRUCTS */
181/******************************************************************************/
182#ifdef JEMALLOC_H_EXTERNS
183
184extern bool	opt_prof;
185/*
186 * Even if opt_prof is true, sampling can be temporarily disabled by setting
187 * opt_prof_active to false.  No locking is used when updating opt_prof_active,
188 * so there are no guarantees regarding how long it will take for all threads
189 * to notice state changes.
190 */
191extern bool	opt_prof_active;
192extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
193extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
194extern bool	opt_prof_gdump;       /* High-water memory dumping. */
195extern bool	opt_prof_final;       /* Final profile dumping. */
196extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
197extern bool	opt_prof_accum;       /* Report cumulative bytes. */
198extern char	opt_prof_prefix[PATH_MAX + 1];
199
200/*
201 * Profile dump interval, measured in bytes allocated.  Each arena triggers a
202 * profile dump when it reaches this threshold.  The effect is that the
203 * interval between profile dumps averages prof_interval, though the actual
204 * interval between dumps will tend to be sporadic, and the interval will be a
205 * maximum of approximately (prof_interval * narenas).
206 */
207extern uint64_t	prof_interval;
208
209/*
210 * If true, promote small sampled objects to large objects, since small run
211 * headers do not have embedded profile context pointers.
212 */
213extern bool	prof_promote;
214
215void	bt_init(prof_bt_t *bt, void **vec);
216void	prof_backtrace(prof_bt_t *bt, unsigned nignore);
217prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
218void	prof_idump(void);
219bool	prof_mdump(const char *filename);
220void	prof_gdump(void);
221prof_tdata_t	*prof_tdata_init(void);
222void	prof_tdata_cleanup(void *arg);
223void	prof_boot0(void);
224void	prof_boot1(void);
225bool	prof_boot2(void);
226
227#endif /* JEMALLOC_H_EXTERNS */
228/******************************************************************************/
229#ifdef JEMALLOC_H_INLINES
230
231#define	PROF_ALLOC_PREP(nignore, size, ret) do {			\
232	prof_tdata_t *prof_tdata;					\
233	prof_bt_t bt;							\
234									\
235	assert(size == s2u(size));					\
236									\
237	prof_tdata = prof_tdata_get();					\
238	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {	\
239		if (prof_tdata != NULL)					\
240			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
241		else							\
242			ret = NULL;					\
243		break;							\
244	}								\
245									\
246	if (opt_prof_active == false) {					\
247		/* Sampling is currently inactive, so avoid sampling. */\
248		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
249	} else if (opt_lg_prof_sample == 0) {				\
250		/* Don't bother with sampling logic, since sampling   */\
251		/* interval is 1.                                     */\
252		bt_init(&bt, prof_tdata->vec);				\
253		prof_backtrace(&bt, nignore);				\
254		ret = prof_lookup(&bt);					\
255	} else {							\
256		if (prof_tdata->threshold == 0) {			\
257			/* Initialize.  Seed the prng differently for */\
258			/* each thread.                               */\
259			prof_tdata->prng_state =			\
260			    (uint64_t)(uintptr_t)&size;			\
261			prof_sample_threshold_update(prof_tdata);	\
262		}							\
263									\
264		/* Determine whether to capture a backtrace based on  */\
265		/* whether size is enough for prof_accum to reach     */\
266		/* prof_tdata->threshold.  However, delay updating    */\
267		/* these variables until prof_{m,re}alloc(), because  */\
268		/* we don't know for sure that the allocation will    */\
269		/* succeed.                                           */\
270		/*                                                    */\
271		/* Use subtraction rather than addition to avoid      */\
272		/* potential integer overflow.                        */\
273		if (size >= prof_tdata->threshold -			\
274		    prof_tdata->accum) {				\
275			bt_init(&bt, prof_tdata->vec);			\
276			prof_backtrace(&bt, nignore);			\
277			ret = prof_lookup(&bt);				\
278		} else							\
279			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
280	}								\
281} while (0)
282
283#ifndef JEMALLOC_ENABLE_INLINE
284malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
285
286prof_tdata_t	*prof_tdata_get(void);
287void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
288prof_ctx_t	*prof_ctx_get(const void *ptr);
289void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
290bool	prof_sample_accum_update(size_t size);
291void	prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
292void	prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
293    size_t old_size, prof_ctx_t *old_ctx);
294void	prof_free(const void *ptr, size_t size);
295#endif
296
297#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
298/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
299malloc_tsd_externs(prof_tdata, prof_tdata_t *)
300malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
301    prof_tdata_cleanup)
302
303JEMALLOC_INLINE prof_tdata_t *
304prof_tdata_get(void)
305{
306	prof_tdata_t *prof_tdata;
307
308	cassert(config_prof);
309
310	prof_tdata = *prof_tdata_tsd_get();
311	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {
312		if (prof_tdata == NULL)
313			prof_tdata = prof_tdata_init();
314	}
315
316	return (prof_tdata);
317}
318
319JEMALLOC_INLINE void
320prof_sample_threshold_update(prof_tdata_t *prof_tdata)
321{
322	uint64_t r;
323	double u;
324
325	cassert(config_prof);
326
327	/*
328	 * Compute sample threshold as a geometrically distributed random
329	 * variable with mean (2^opt_lg_prof_sample).
330	 *
331	 *                         __        __
332	 *                         |  log(u)  |                     1
333	 * prof_tdata->threshold = | -------- |, where p = -------------------
334	 *                         | log(1-p) |             opt_lg_prof_sample
335	 *                                                 2
336	 *
337	 * For more information on the math, see:
338	 *
339	 *   Non-Uniform Random Variate Generation
340	 *   Luc Devroye
341	 *   Springer-Verlag, New York, 1986
342	 *   pp 500
343	 *   (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
344	 */
345	prng64(r, 53, prof_tdata->prng_state,
346	    UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
347	u = (double)r * (1.0/9007199254740992.0L);
348	prof_tdata->threshold = (uint64_t)(log(u) /
349	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
350	    + (uint64_t)1U;
351}
352
353JEMALLOC_INLINE prof_ctx_t *
354prof_ctx_get(const void *ptr)
355{
356	prof_ctx_t *ret;
357	arena_chunk_t *chunk;
358
359	cassert(config_prof);
360	assert(ptr != NULL);
361
362	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
363	if (chunk != ptr) {
364		/* Region. */
365		ret = arena_prof_ctx_get(ptr);
366	} else
367		ret = huge_prof_ctx_get(ptr);
368
369	return (ret);
370}
371
372JEMALLOC_INLINE void
373prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
374{
375	arena_chunk_t *chunk;
376
377	cassert(config_prof);
378	assert(ptr != NULL);
379
380	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
381	if (chunk != ptr) {
382		/* Region. */
383		arena_prof_ctx_set(ptr, ctx);
384	} else
385		huge_prof_ctx_set(ptr, ctx);
386}
387
388JEMALLOC_INLINE bool
389prof_sample_accum_update(size_t size)
390{
391	prof_tdata_t *prof_tdata;
392
393	cassert(config_prof);
394	/* Sampling logic is unnecessary if the interval is 1. */
395	assert(opt_lg_prof_sample != 0);
396
397	prof_tdata = *prof_tdata_tsd_get();
398	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
399		return (true);
400
401	/* Take care to avoid integer overflow. */
402	if (size >= prof_tdata->threshold - prof_tdata->accum) {
403		prof_tdata->accum -= (prof_tdata->threshold - size);
404		/* Compute new sample threshold. */
405		prof_sample_threshold_update(prof_tdata);
406		while (prof_tdata->accum >= prof_tdata->threshold) {
407			prof_tdata->accum -= prof_tdata->threshold;
408			prof_sample_threshold_update(prof_tdata);
409		}
410		return (false);
411	} else {
412		prof_tdata->accum += size;
413		return (true);
414	}
415}
416
417JEMALLOC_INLINE void
418prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
419{
420
421	cassert(config_prof);
422	assert(ptr != NULL);
423	assert(size == isalloc(ptr, true));
424
425	if (opt_lg_prof_sample != 0) {
426		if (prof_sample_accum_update(size)) {
427			/*
428			 * Don't sample.  For malloc()-like allocation, it is
429			 * always possible to tell in advance how large an
430			 * object's usable size will be, so there should never
431			 * be a difference between the size passed to
432			 * PROF_ALLOC_PREP() and prof_malloc().
433			 */
434			assert((uintptr_t)cnt == (uintptr_t)1U);
435		}
436	}
437
438	if ((uintptr_t)cnt > (uintptr_t)1U) {
439		prof_ctx_set(ptr, cnt->ctx);
440
441		cnt->epoch++;
442		/*********/
443		mb_write();
444		/*********/
445		cnt->cnts.curobjs++;
446		cnt->cnts.curbytes += size;
447		if (opt_prof_accum) {
448			cnt->cnts.accumobjs++;
449			cnt->cnts.accumbytes += size;
450		}
451		/*********/
452		mb_write();
453		/*********/
454		cnt->epoch++;
455		/*********/
456		mb_write();
457		/*********/
458	} else
459		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
460}
461
462JEMALLOC_INLINE void
463prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
464    size_t old_size, prof_ctx_t *old_ctx)
465{
466	prof_thr_cnt_t *told_cnt;
467
468	cassert(config_prof);
469	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
470
471	if (ptr != NULL) {
472		assert(size == isalloc(ptr, true));
473		if (opt_lg_prof_sample != 0) {
474			if (prof_sample_accum_update(size)) {
475				/*
476				 * Don't sample.  The size passed to
477				 * PROF_ALLOC_PREP() was larger than what
478				 * actually got allocated, so a backtrace was
479				 * captured for this allocation, even though
480				 * its actual size was insufficient to cross
481				 * the sample threshold.
482				 */
483				cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
484			}
485		}
486	}
487
488	if ((uintptr_t)old_ctx > (uintptr_t)1U) {
489		told_cnt = prof_lookup(old_ctx->bt);
490		if (told_cnt == NULL) {
491			/*
492			 * It's too late to propagate OOM for this realloc(),
493			 * so operate directly on old_cnt->ctx->cnt_merged.
494			 */
495			malloc_mutex_lock(old_ctx->lock);
496			old_ctx->cnt_merged.curobjs--;
497			old_ctx->cnt_merged.curbytes -= old_size;
498			malloc_mutex_unlock(old_ctx->lock);
499			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
500		}
501	} else
502		told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
503
504	if ((uintptr_t)told_cnt > (uintptr_t)1U)
505		told_cnt->epoch++;
506	if ((uintptr_t)cnt > (uintptr_t)1U) {
507		prof_ctx_set(ptr, cnt->ctx);
508		cnt->epoch++;
509	} else if (ptr != NULL)
510		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
511	/*********/
512	mb_write();
513	/*********/
514	if ((uintptr_t)told_cnt > (uintptr_t)1U) {
515		told_cnt->cnts.curobjs--;
516		told_cnt->cnts.curbytes -= old_size;
517	}
518	if ((uintptr_t)cnt > (uintptr_t)1U) {
519		cnt->cnts.curobjs++;
520		cnt->cnts.curbytes += size;
521		if (opt_prof_accum) {
522			cnt->cnts.accumobjs++;
523			cnt->cnts.accumbytes += size;
524		}
525	}
526	/*********/
527	mb_write();
528	/*********/
529	if ((uintptr_t)told_cnt > (uintptr_t)1U)
530		told_cnt->epoch++;
531	if ((uintptr_t)cnt > (uintptr_t)1U)
532		cnt->epoch++;
533	/*********/
534	mb_write(); /* Not strictly necessary. */
535}
536
537JEMALLOC_INLINE void
538prof_free(const void *ptr, size_t size)
539{
540	prof_ctx_t *ctx = prof_ctx_get(ptr);
541
542	cassert(config_prof);
543
544	if ((uintptr_t)ctx > (uintptr_t)1) {
545		prof_thr_cnt_t *tcnt;
546		assert(size == isalloc(ptr, true));
547		tcnt = prof_lookup(ctx->bt);
548
549		if (tcnt != NULL) {
550			tcnt->epoch++;
551			/*********/
552			mb_write();
553			/*********/
554			tcnt->cnts.curobjs--;
555			tcnt->cnts.curbytes -= size;
556			/*********/
557			mb_write();
558			/*********/
559			tcnt->epoch++;
560			/*********/
561			mb_write();
562			/*********/
563		} else {
564			/*
565			 * OOM during free() cannot be propagated, so operate
566			 * directly on cnt->ctx->cnt_merged.
567			 */
568			malloc_mutex_lock(ctx->lock);
569			ctx->cnt_merged.curobjs--;
570			ctx->cnt_merged.curbytes -= size;
571			malloc_mutex_unlock(ctx->lock);
572		}
573	}
574}
575#endif
576
577#endif /* JEMALLOC_H_INLINES */
578/******************************************************************************/
579