tcache.c revision 4507f34628dfae26e6b0a6faa13e5f9a49600616
1#define	JEMALLOC_TCACHE_C_
2#include "jemalloc/internal/jemalloc_internal.h"
3
4/******************************************************************************/
5/* Data. */
6
7bool	opt_tcache = true;
8ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
9
10tcache_bin_info_t	*tcache_bin_info;
11static unsigned		stack_nelms; /* Total stack elms per tcache. */
12
13/* Map of thread-specific caches. */
14#ifndef NO_TLS
15__thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
16#endif
17
18/*
19 * Same contents as tcache, but initialized such that the TSD destructor is
20 * called when a thread exits, so that the cache can be cleaned up.
21 */
22pthread_key_t		tcache_tsd;
23
24size_t				nhbins;
25size_t				tcache_maxclass;
26
27/******************************************************************************/
28/* Function prototypes for non-inline static functions. */
29
30static void	tcache_thread_cleanup(void *arg);
31
32/******************************************************************************/
33
34void *
35tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
36{
37	void *ret;
38
39	arena_tcache_fill_small(tcache->arena, tbin, binind,
40	    config_prof ? tcache->prof_accumbytes : 0);
41	if (config_prof)
42		tcache->prof_accumbytes = 0;
43	ret = tcache_alloc_easy(tbin);
44
45	return (ret);
46}
47
48void
49tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
50    tcache_t *tcache)
51{
52	void *ptr;
53	unsigned i, nflush, ndeferred;
54	bool merged_stats = false;
55
56	assert(binind < NBINS);
57	assert(rem <= tbin->ncached);
58
59	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
60		/* Lock the arena bin associated with the first object. */
61		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
62		    tbin->avail[0]);
63		arena_t *arena = chunk->arena;
64		arena_bin_t *bin = &arena->bins[binind];
65
66		if (config_prof && arena == tcache->arena) {
67			malloc_mutex_lock(&arena->lock);
68			arena_prof_accum(arena, tcache->prof_accumbytes);
69			malloc_mutex_unlock(&arena->lock);
70			tcache->prof_accumbytes = 0;
71		}
72
73		malloc_mutex_lock(&bin->lock);
74		if (config_stats && arena == tcache->arena) {
75			assert(merged_stats == false);
76			merged_stats = true;
77			bin->stats.nflushes++;
78			bin->stats.nrequests += tbin->tstats.nrequests;
79			tbin->tstats.nrequests = 0;
80		}
81		ndeferred = 0;
82		for (i = 0; i < nflush; i++) {
83			ptr = tbin->avail[i];
84			assert(ptr != NULL);
85			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
86			if (chunk->arena == arena) {
87				size_t pageind = ((uintptr_t)ptr -
88				    (uintptr_t)chunk) >> PAGE_SHIFT;
89				arena_chunk_map_t *mapelm =
90				    &chunk->map[pageind-map_bias];
91				arena_dalloc_bin(arena, chunk, ptr, mapelm);
92			} else {
93				/*
94				 * This object was allocated via a different
95				 * arena bin than the one that is currently
96				 * locked.  Stash the object, so that it can be
97				 * handled in a future pass.
98				 */
99				tbin->avail[ndeferred] = ptr;
100				ndeferred++;
101			}
102		}
103		malloc_mutex_unlock(&bin->lock);
104	}
105	if (config_stats && merged_stats == false) {
106		/*
107		 * The flush loop didn't happen to flush to this thread's
108		 * arena, so the stats didn't get merged.  Manually do so now.
109		 */
110		arena_bin_t *bin = &tcache->arena->bins[binind];
111		malloc_mutex_lock(&bin->lock);
112		bin->stats.nflushes++;
113		bin->stats.nrequests += tbin->tstats.nrequests;
114		tbin->tstats.nrequests = 0;
115		malloc_mutex_unlock(&bin->lock);
116	}
117
118	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
119	    rem * sizeof(void *));
120	tbin->ncached = rem;
121	if ((int)tbin->ncached < tbin->low_water)
122		tbin->low_water = tbin->ncached;
123}
124
125void
126tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
127    tcache_t *tcache)
128{
129	void *ptr;
130	unsigned i, nflush, ndeferred;
131	bool merged_stats = false;
132
133	assert(binind < nhbins);
134	assert(rem <= tbin->ncached);
135
136	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
137		/* Lock the arena associated with the first object. */
138		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
139		    tbin->avail[0]);
140		arena_t *arena = chunk->arena;
141
142		malloc_mutex_lock(&arena->lock);
143		if ((config_prof || config_stats) && arena == tcache->arena) {
144			if (config_prof) {
145				arena_prof_accum(arena,
146				    tcache->prof_accumbytes);
147				tcache->prof_accumbytes = 0;
148			}
149			if (config_stats) {
150				merged_stats = true;
151				arena->stats.nrequests_large +=
152				    tbin->tstats.nrequests;
153				arena->stats.lstats[binind - NBINS].nrequests +=
154				    tbin->tstats.nrequests;
155				tbin->tstats.nrequests = 0;
156			}
157		}
158		ndeferred = 0;
159		for (i = 0; i < nflush; i++) {
160			ptr = tbin->avail[i];
161			assert(ptr != NULL);
162			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
163			if (chunk->arena == arena)
164				arena_dalloc_large(arena, chunk, ptr);
165			else {
166				/*
167				 * This object was allocated via a different
168				 * arena than the one that is currently locked.
169				 * Stash the object, so that it can be handled
170				 * in a future pass.
171				 */
172				tbin->avail[ndeferred] = ptr;
173				ndeferred++;
174			}
175		}
176		malloc_mutex_unlock(&arena->lock);
177	}
178	if (config_stats && merged_stats == false) {
179		/*
180		 * The flush loop didn't happen to flush to this thread's
181		 * arena, so the stats didn't get merged.  Manually do so now.
182		 */
183		arena_t *arena = tcache->arena;
184		malloc_mutex_lock(&arena->lock);
185		arena->stats.nrequests_large += tbin->tstats.nrequests;
186		arena->stats.lstats[binind - NBINS].nrequests +=
187		    tbin->tstats.nrequests;
188		tbin->tstats.nrequests = 0;
189		malloc_mutex_unlock(&arena->lock);
190	}
191
192	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
193	    rem * sizeof(void *));
194	tbin->ncached = rem;
195	if ((int)tbin->ncached < tbin->low_water)
196		tbin->low_water = tbin->ncached;
197}
198
199tcache_t *
200tcache_create(arena_t *arena)
201{
202	tcache_t *tcache;
203	size_t size, stack_offset;
204	unsigned i;
205
206	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
207	/* Naturally align the pointer stacks. */
208	size = PTR_CEILING(size);
209	stack_offset = size;
210	size += stack_nelms * sizeof(void *);
211	/*
212	 * Round up to the nearest multiple of the cacheline size, in order to
213	 * avoid the possibility of false cacheline sharing.
214	 *
215	 * That this works relies on the same logic as in ipalloc(), but we
216	 * cannot directly call ipalloc() here due to tcache bootstrapping
217	 * issues.
218	 */
219	size = (size + CACHELINE_MASK) & (-CACHELINE);
220
221	if (size <= SMALL_MAXCLASS)
222		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
223	else if (size <= tcache_maxclass)
224		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
225	else
226		tcache = (tcache_t *)icalloc(size);
227
228	if (tcache == NULL)
229		return (NULL);
230
231	if (config_stats) {
232		/* Link into list of extant tcaches. */
233		malloc_mutex_lock(&arena->lock);
234		ql_elm_new(tcache, link);
235		ql_tail_insert(&arena->tcache_ql, tcache, link);
236		malloc_mutex_unlock(&arena->lock);
237	}
238
239	tcache->arena = arena;
240	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
241	for (i = 0; i < nhbins; i++) {
242		tcache->tbins[i].lg_fill_div = 1;
243		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
244		    (uintptr_t)stack_offset);
245		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
246	}
247
248	TCACHE_SET(tcache);
249
250	return (tcache);
251}
252
253void
254tcache_destroy(tcache_t *tcache)
255{
256	unsigned i;
257	size_t tcache_size;
258
259	if (config_stats) {
260		/* Unlink from list of extant tcaches. */
261		malloc_mutex_lock(&tcache->arena->lock);
262		ql_remove(&tcache->arena->tcache_ql, tcache, link);
263		malloc_mutex_unlock(&tcache->arena->lock);
264		tcache_stats_merge(tcache, tcache->arena);
265	}
266
267	for (i = 0; i < NBINS; i++) {
268		tcache_bin_t *tbin = &tcache->tbins[i];
269		tcache_bin_flush_small(tbin, i, 0, tcache);
270
271		if (config_stats && tbin->tstats.nrequests != 0) {
272			arena_t *arena = tcache->arena;
273			arena_bin_t *bin = &arena->bins[i];
274			malloc_mutex_lock(&bin->lock);
275			bin->stats.nrequests += tbin->tstats.nrequests;
276			malloc_mutex_unlock(&bin->lock);
277		}
278	}
279
280	for (; i < nhbins; i++) {
281		tcache_bin_t *tbin = &tcache->tbins[i];
282		tcache_bin_flush_large(tbin, i, 0, tcache);
283
284		if (config_stats && tbin->tstats.nrequests != 0) {
285			arena_t *arena = tcache->arena;
286			malloc_mutex_lock(&arena->lock);
287			arena->stats.nrequests_large += tbin->tstats.nrequests;
288			arena->stats.lstats[i - NBINS].nrequests +=
289			    tbin->tstats.nrequests;
290			malloc_mutex_unlock(&arena->lock);
291		}
292	}
293
294	if (config_prof && tcache->prof_accumbytes > 0) {
295		malloc_mutex_lock(&tcache->arena->lock);
296		arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
297		malloc_mutex_unlock(&tcache->arena->lock);
298	}
299
300	tcache_size = arena_salloc(tcache);
301	if (tcache_size <= SMALL_MAXCLASS) {
302		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
303		arena_t *arena = chunk->arena;
304		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
305		    PAGE_SHIFT;
306		arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
307		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
308		    (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
309		    PAGE_SHIFT));
310		arena_bin_t *bin = run->bin;
311
312		malloc_mutex_lock(&bin->lock);
313		arena_dalloc_bin(arena, chunk, tcache, mapelm);
314		malloc_mutex_unlock(&bin->lock);
315	} else if (tcache_size <= tcache_maxclass) {
316		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
317		arena_t *arena = chunk->arena;
318
319		malloc_mutex_lock(&arena->lock);
320		arena_dalloc_large(arena, chunk, tcache);
321		malloc_mutex_unlock(&arena->lock);
322	} else
323		idalloc(tcache);
324}
325
326static void
327tcache_thread_cleanup(void *arg)
328{
329	tcache_t *tcache = (tcache_t *)arg;
330
331	if (tcache == (void *)(uintptr_t)1) {
332		/*
333		 * The previous time this destructor was called, we set the key
334		 * to 1 so that other destructors wouldn't cause re-creation of
335		 * the tcache.  This time, do nothing, so that the destructor
336		 * will not be called again.
337		 */
338	} else if (tcache == (void *)(uintptr_t)2) {
339		/*
340		 * Another destructor called an allocator function after this
341		 * destructor was called.  Reset tcache to 1 in order to
342		 * receive another callback.
343		 */
344		TCACHE_SET((uintptr_t)1);
345	} else if (tcache != NULL) {
346		assert(tcache != (void *)(uintptr_t)1);
347		tcache_destroy(tcache);
348		TCACHE_SET((uintptr_t)1);
349	}
350}
351
352void
353tcache_stats_merge(tcache_t *tcache, arena_t *arena)
354{
355	unsigned i;
356
357	/* Merge and reset tcache stats. */
358	for (i = 0; i < NBINS; i++) {
359		arena_bin_t *bin = &arena->bins[i];
360		tcache_bin_t *tbin = &tcache->tbins[i];
361		malloc_mutex_lock(&bin->lock);
362		bin->stats.nrequests += tbin->tstats.nrequests;
363		malloc_mutex_unlock(&bin->lock);
364		tbin->tstats.nrequests = 0;
365	}
366
367	for (; i < nhbins; i++) {
368		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
369		tcache_bin_t *tbin = &tcache->tbins[i];
370		arena->stats.nrequests_large += tbin->tstats.nrequests;
371		lstats->nrequests += tbin->tstats.nrequests;
372		tbin->tstats.nrequests = 0;
373	}
374}
375
376bool
377tcache_boot(void)
378{
379
380	if (opt_tcache) {
381		unsigned i;
382
383		/*
384		 * If necessary, clamp opt_lg_tcache_max, now that
385		 * SMALL_MAXCLASS and arena_maxclass are known.
386		 * XXX Can this be done earlier?
387		 */
388		if (opt_lg_tcache_max < 0 || (1U <<
389		    opt_lg_tcache_max) < SMALL_MAXCLASS)
390			tcache_maxclass = SMALL_MAXCLASS;
391		else if ((1U << opt_lg_tcache_max) > arena_maxclass)
392			tcache_maxclass = arena_maxclass;
393		else
394			tcache_maxclass = (1U << opt_lg_tcache_max);
395
396		nhbins = NBINS + (tcache_maxclass >> PAGE_SHIFT);
397
398		/* Initialize tcache_bin_info. */
399		tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
400		    sizeof(tcache_bin_info_t));
401		if (tcache_bin_info == NULL)
402			return (true);
403		stack_nelms = 0;
404		for (i = 0; i < NBINS; i++) {
405			if ((arena_bin_info[i].nregs << 1) <=
406			    TCACHE_NSLOTS_SMALL_MAX) {
407				tcache_bin_info[i].ncached_max =
408				    (arena_bin_info[i].nregs << 1);
409			} else {
410				tcache_bin_info[i].ncached_max =
411				    TCACHE_NSLOTS_SMALL_MAX;
412			}
413			stack_nelms += tcache_bin_info[i].ncached_max;
414		}
415		for (; i < nhbins; i++) {
416			tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
417			stack_nelms += tcache_bin_info[i].ncached_max;
418		}
419
420		if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
421		    0) {
422			malloc_write(
423			    "<jemalloc>: Error in pthread_key_create()\n");
424			abort();
425		}
426	}
427
428	return (false);
429}
430