tcache.c revision a8118233ec0369e00629fe853a5655c0dabf83d2
1#define	JEMALLOC_TCACHE_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3#ifdef JEMALLOC_TCACHE
4/******************************************************************************/
5/* Data. */
6
7bool	opt_tcache = true;
8ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
9ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
10
11/* Map of thread-specific caches. */
12#ifndef NO_TLS
13__thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
14#endif
15
16/*
17 * Same contents as tcache, but initialized such that the TSD destructor is
18 * called when a thread exits, so that the cache can be cleaned up.
19 */
20pthread_key_t		tcache_tsd;
21
22size_t				nhbins;
23size_t				tcache_maxclass;
24unsigned			tcache_gc_incr;
25
26/******************************************************************************/
27/* Function prototypes for non-inline static functions. */
28
29static void	tcache_thread_cleanup(void *arg);
30
31/******************************************************************************/
32
33void *
34tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
35{
36	void *ret;
37
38	arena_tcache_fill_small(tcache->arena, tbin, binind
39#ifdef JEMALLOC_PROF
40	    , tcache->prof_accumbytes
41#endif
42	    );
43#ifdef JEMALLOC_PROF
44	tcache->prof_accumbytes = 0;
45#endif
46	ret = tcache_alloc_easy(tbin);
47
48	return (ret);
49}
50
51void
52tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
53#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
54    , tcache_t *tcache
55#endif
56    )
57{
58	void *flush, *deferred, *ptr;
59	unsigned i, nflush, ndeferred;
60	bool first_pass;
61#ifdef JEMALLOC_STATS
62	bool merged_stats = false;
63#endif
64
65	assert(binind < nbins);
66	assert(rem <= tbin->ncached);
67	assert(tbin->ncached > 0 || tbin->avail == NULL);
68
69	for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
70	    true; flush != NULL; flush = deferred, nflush = ndeferred) {
71		/* Lock the arena bin associated with the first object. */
72		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
73		arena_t *arena = chunk->arena;
74		arena_bin_t *bin = &arena->bins[binind];
75
76#ifdef JEMALLOC_PROF
77		if (arena == tcache->arena) {
78			malloc_mutex_lock(&arena->lock);
79			arena_prof_accum(arena, tcache->prof_accumbytes);
80			malloc_mutex_unlock(&arena->lock);
81			tcache->prof_accumbytes = 0;
82		}
83#endif
84
85		malloc_mutex_lock(&bin->lock);
86#ifdef JEMALLOC_STATS
87		if (arena == tcache->arena) {
88			assert(merged_stats == false);
89			merged_stats = true;
90			bin->stats.nflushes++;
91			bin->stats.nrequests += tbin->tstats.nrequests;
92			tbin->tstats.nrequests = 0;
93		}
94#endif
95		deferred = NULL;
96		ndeferred = 0;
97		for (i = 0; i < nflush; i++) {
98			ptr = flush;
99			assert(ptr != NULL);
100			flush = *(void **)ptr;
101			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
102			if (chunk->arena == arena) {
103				size_t pageind = ((uintptr_t)ptr -
104				    (uintptr_t)chunk) >> PAGE_SHIFT;
105				arena_chunk_map_t *mapelm =
106				    &chunk->map[pageind-map_bias];
107				arena_dalloc_bin(arena, chunk, ptr, mapelm);
108			} else {
109				/*
110				 * This object was allocated via a different
111				 * arena bin than the one that is currently
112				 * locked.  Stash the object, so that it can be
113				 * handled in a future pass.
114				 */
115				*(void **)ptr = deferred;
116				deferred = ptr;
117				ndeferred++;
118			}
119		}
120		malloc_mutex_unlock(&bin->lock);
121
122		if (first_pass) {
123			tbin->avail = flush;
124			first_pass = false;
125		}
126	}
127#ifdef JEMALLOC_STATS
128	if (merged_stats == false) {
129		/*
130		 * The flush loop didn't happen to flush to this thread's
131		 * arena, so the stats didn't get merged.  Manually do so now.
132		 */
133		arena_bin_t *bin = &tcache->arena->bins[binind];
134		malloc_mutex_lock(&bin->lock);
135		bin->stats.nflushes++;
136		bin->stats.nrequests += tbin->tstats.nrequests;
137		tbin->tstats.nrequests = 0;
138		malloc_mutex_unlock(&bin->lock);
139	}
140#endif
141
142	tbin->ncached = rem;
143	if (tbin->ncached < tbin->low_water)
144		tbin->low_water = tbin->ncached;
145}
146
147void
148tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
149#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
150    , tcache_t *tcache
151#endif
152    )
153{
154	void *flush, *deferred, *ptr;
155	unsigned i, nflush, ndeferred;
156	bool first_pass;
157
158	assert(binind < nhbins);
159	assert(rem <= tbin->ncached);
160	assert(tbin->ncached > 0 || tbin->avail == NULL);
161
162	for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass =
163	    true; flush != NULL; flush = deferred, nflush = ndeferred) {
164		/* Lock the arena associated with the first object. */
165		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
166		arena_t *arena = chunk->arena;
167
168		malloc_mutex_lock(&arena->lock);
169#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
170		if (arena == tcache->arena) {
171#endif
172#ifdef JEMALLOC_PROF
173			arena_prof_accum(arena, tcache->prof_accumbytes);
174			tcache->prof_accumbytes = 0;
175#endif
176#ifdef JEMALLOC_STATS
177			arena->stats.nrequests_large += tbin->tstats.nrequests;
178			arena->stats.lstats[binind - nbins].nrequests +=
179			    tbin->tstats.nrequests;
180			tbin->tstats.nrequests = 0;
181#endif
182#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
183		}
184#endif
185		deferred = NULL;
186		ndeferred = 0;
187		for (i = 0; i < nflush; i++) {
188			ptr = flush;
189			assert(ptr != NULL);
190			flush = *(void **)ptr;
191			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
192			if (chunk->arena == arena)
193				arena_dalloc_large(arena, chunk, ptr);
194			else {
195				/*
196				 * This object was allocated via a different
197				 * arena than the one that is currently locked.
198				 * Stash the object, so that it can be handled
199				 * in a future pass.
200				 */
201				*(void **)ptr = deferred;
202				deferred = ptr;
203				ndeferred++;
204			}
205		}
206		malloc_mutex_unlock(&arena->lock);
207
208		if (first_pass) {
209			tbin->avail = flush;
210			first_pass = false;
211		}
212	}
213
214	tbin->ncached = rem;
215	if (tbin->ncached < tbin->low_water)
216		tbin->low_water = tbin->ncached;
217}
218
219tcache_t *
220tcache_create(arena_t *arena)
221{
222	tcache_t *tcache;
223	size_t size;
224	unsigned i;
225
226	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
227	/*
228	 * Round up to the nearest multiple of the cacheline size, in order to
229	 * avoid the possibility of false cacheline sharing.
230	 *
231	 * That this works relies on the same logic as in ipalloc(), but we
232	 * cannot directly call ipalloc() here due to tcache bootstrapping
233	 * issues.
234	 */
235	size = (size + CACHELINE_MASK) & (-CACHELINE);
236
237	if (size <= small_maxclass)
238		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
239	else
240		tcache = (tcache_t *)icalloc(size);
241
242	if (tcache == NULL)
243		return (NULL);
244
245#ifdef JEMALLOC_STATS
246	/* Link into list of extant tcaches. */
247	malloc_mutex_lock(&arena->lock);
248	ql_elm_new(tcache, link);
249	ql_tail_insert(&arena->tcache_ql, tcache, link);
250	malloc_mutex_unlock(&arena->lock);
251#endif
252
253	tcache->arena = arena;
254	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
255	for (i = 0; i < nbins; i++) {
256		if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
257			tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
258			    1);
259		} else
260			tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX;
261	}
262	for (; i < nhbins; i++)
263		tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
264
265	TCACHE_SET(tcache);
266
267	return (tcache);
268}
269
270void
271tcache_destroy(tcache_t *tcache)
272{
273	unsigned i;
274
275#ifdef JEMALLOC_STATS
276	/* Unlink from list of extant tcaches. */
277	malloc_mutex_lock(&tcache->arena->lock);
278	ql_remove(&tcache->arena->tcache_ql, tcache, link);
279	malloc_mutex_unlock(&tcache->arena->lock);
280	tcache_stats_merge(tcache, tcache->arena);
281#endif
282
283	for (i = 0; i < nbins; i++) {
284		tcache_bin_t *tbin = &tcache->tbins[i];
285		tcache_bin_flush_small(tbin, i, 0
286#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
287		    , tcache
288#endif
289		    );
290
291#ifdef JEMALLOC_STATS
292		if (tbin->tstats.nrequests != 0) {
293			arena_t *arena = tcache->arena;
294			arena_bin_t *bin = &arena->bins[i];
295			malloc_mutex_lock(&bin->lock);
296			bin->stats.nrequests += tbin->tstats.nrequests;
297			malloc_mutex_unlock(&bin->lock);
298		}
299#endif
300	}
301
302	for (; i < nhbins; i++) {
303		tcache_bin_t *tbin = &tcache->tbins[i];
304		tcache_bin_flush_large(tbin, i, 0
305#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
306		    , tcache
307#endif
308		    );
309
310#ifdef JEMALLOC_STATS
311		if (tbin->tstats.nrequests != 0) {
312			arena_t *arena = tcache->arena;
313			malloc_mutex_lock(&arena->lock);
314			arena->stats.nrequests_large += tbin->tstats.nrequests;
315			arena->stats.lstats[i - nbins].nrequests +=
316			    tbin->tstats.nrequests;
317			malloc_mutex_unlock(&arena->lock);
318		}
319#endif
320	}
321
322#ifdef JEMALLOC_PROF
323	if (tcache->prof_accumbytes > 0) {
324		malloc_mutex_lock(&tcache->arena->lock);
325		arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
326		malloc_mutex_unlock(&tcache->arena->lock);
327	}
328#endif
329
330	if (arena_salloc(tcache) <= small_maxclass) {
331		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
332		arena_t *arena = chunk->arena;
333		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
334		    PAGE_SHIFT;
335		arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
336		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
337		    (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
338		    PAGE_SHIFT));
339		arena_bin_t *bin = run->bin;
340
341		malloc_mutex_lock(&bin->lock);
342		arena_dalloc_bin(arena, chunk, tcache, mapelm);
343		malloc_mutex_unlock(&bin->lock);
344	} else
345		idalloc(tcache);
346}
347
348static void
349tcache_thread_cleanup(void *arg)
350{
351	tcache_t *tcache = (tcache_t *)arg;
352
353	if (tcache == (void *)(uintptr_t)1) {
354		/*
355		 * The previous time this destructor was called, we set the key
356		 * to 1 so that other destructors wouldn't cause re-creation of
357		 * the tcache.  This time, do nothing, so that the destructor
358		 * will not be called again.
359		 */
360	} else if (tcache == (void *)(uintptr_t)2) {
361		/*
362		 * Another destructor called an allocator function after this
363		 * destructor was called.  Reset tcache to 1 in order to
364		 * receive another callback.
365		 */
366		TCACHE_SET((uintptr_t)1);
367	} else if (tcache != NULL) {
368		assert(tcache != (void *)(uintptr_t)1);
369		tcache_destroy(tcache);
370		TCACHE_SET((uintptr_t)1);
371	}
372}
373
374#ifdef JEMALLOC_STATS
375void
376tcache_stats_merge(tcache_t *tcache, arena_t *arena)
377{
378	unsigned i;
379
380	/* Merge and reset tcache stats. */
381	for (i = 0; i < nbins; i++) {
382		arena_bin_t *bin = &arena->bins[i];
383		tcache_bin_t *tbin = &tcache->tbins[i];
384		malloc_mutex_lock(&bin->lock);
385		bin->stats.nrequests += tbin->tstats.nrequests;
386		malloc_mutex_unlock(&bin->lock);
387		tbin->tstats.nrequests = 0;
388	}
389
390	for (; i < nhbins; i++) {
391		malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins];
392		tcache_bin_t *tbin = &tcache->tbins[i];
393		arena->stats.nrequests_large += tbin->tstats.nrequests;
394		lstats->nrequests += tbin->tstats.nrequests;
395		tbin->tstats.nrequests = 0;
396	}
397}
398#endif
399
400void
401tcache_boot(void)
402{
403
404	if (opt_tcache) {
405		/*
406		 * If necessary, clamp opt_lg_tcache_max, now that
407		 * small_maxclass and arena_maxclass are known.
408		 */
409		if (opt_lg_tcache_max < 0 || (1U <<
410		    opt_lg_tcache_max) < small_maxclass)
411			tcache_maxclass = small_maxclass;
412		else if ((1U << opt_lg_tcache_max) > arena_maxclass)
413			tcache_maxclass = arena_maxclass;
414		else
415			tcache_maxclass = (1U << opt_lg_tcache_max);
416
417		nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
418
419		/* Compute incremental GC event threshold. */
420		if (opt_lg_tcache_gc_sweep >= 0) {
421			tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
422			    nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
423			    0) ? 0 : 1);
424		} else
425			tcache_gc_incr = 0;
426
427		if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
428		    0) {
429			malloc_write(
430			    "<jemalloc>: Error in pthread_key_create()\n");
431			abort();
432		}
433	}
434}
435/******************************************************************************/
436#endif /* JEMALLOC_TCACHE */
437