1/*
2 *******************************************************************************
3 * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each
4 * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash
5 * functions are employed.  The original cuckoo hashing algorithm was described
6 * in:
7 *
8 *   Pagh, R., F.F. Rodler (2004) Cuckoo Hashing.  Journal of Algorithms
9 *     51(2):122-144.
10 *
11 * Generalization of cuckoo hashing was discussed in:
12 *
13 *   Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical
14 *     alternative to traditional hash tables.  In Proceedings of the 7th
15 *     Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA,
16 *     January 2006.
17 *
18 * This implementation uses precisely two hash functions because that is the
19 * fewest that can work, and supporting multiple hashes is an implementation
20 * burden.  Here is a reproduction of Figure 1 from Erlingsson et al. (2006)
21 * that shows approximate expected maximum load factors for various
22 * configurations:
23 *
24 *           |         #cells/bucket         |
25 *   #hashes |   1   |   2   |   4   |   8   |
26 *   --------+-------+-------+-------+-------+
27 *         1 | 0.006 | 0.006 | 0.03  | 0.12  |
28 *         2 | 0.49  | 0.86  |>0.93< |>0.96< |
29 *         3 | 0.91  | 0.97  | 0.98  | 0.999 |
30 *         4 | 0.97  | 0.99  | 0.999 |       |
31 *
32 * The number of cells per bucket is chosen such that a bucket fits in one cache
33 * line.  So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing,
34 * respectively.
35 *
36 ******************************************************************************/
37#define	JEMALLOC_CKH_C_
38#include "jemalloc/internal/jemalloc_internal.h"
39
40/******************************************************************************/
41/* Function prototypes for non-inline static functions. */
42
43static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
44static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
45
46/******************************************************************************/
47
48/*
49 * Search bucket for key and return the cell number if found; SIZE_T_MAX
50 * otherwise.
51 */
52JEMALLOC_INLINE_C size_t
53ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
54{
55	ckhc_t *cell;
56	unsigned i;
57
58	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
59		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
60		if (cell->key != NULL && ckh->keycomp(key, cell->key))
61			return ((bucket << LG_CKH_BUCKET_CELLS) + i);
62	}
63
64	return (SIZE_T_MAX);
65}
66
67/*
68 * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
69 */
70JEMALLOC_INLINE_C size_t
71ckh_isearch(ckh_t *ckh, const void *key)
72{
73	size_t hashes[2], bucket, cell;
74
75	assert(ckh != NULL);
76
77	ckh->hash(key, hashes);
78
79	/* Search primary bucket. */
80	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
81	cell = ckh_bucket_search(ckh, bucket, key);
82	if (cell != SIZE_T_MAX)
83		return (cell);
84
85	/* Search secondary bucket. */
86	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
87	cell = ckh_bucket_search(ckh, bucket, key);
88	return (cell);
89}
90
91JEMALLOC_INLINE_C bool
92ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
93    const void *data)
94{
95	ckhc_t *cell;
96	unsigned offset, i;
97
98	/*
99	 * Cycle through the cells in the bucket, starting at a random position.
100	 * The randomness avoids worst-case search overhead as buckets fill up.
101	 */
102	offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
103	    LG_CKH_BUCKET_CELLS);
104	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
105		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
106		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
107		if (cell->key == NULL) {
108			cell->key = key;
109			cell->data = data;
110			ckh->count++;
111			return (false);
112		}
113	}
114
115	return (true);
116}
117
118/*
119 * No space is available in bucket.  Randomly evict an item, then try to find an
120 * alternate location for that item.  Iteratively repeat this
121 * eviction/relocation procedure until either success or detection of an
122 * eviction/relocation bucket cycle.
123 */
124JEMALLOC_INLINE_C bool
125ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
126    void const **argdata)
127{
128	const void *key, *data, *tkey, *tdata;
129	ckhc_t *cell;
130	size_t hashes[2], bucket, tbucket;
131	unsigned i;
132
133	bucket = argbucket;
134	key = *argkey;
135	data = *argdata;
136	while (true) {
137		/*
138		 * Choose a random item within the bucket to evict.  This is
139		 * critical to correct function, because without (eventually)
140		 * evicting all items within a bucket during iteration, it
141		 * would be possible to get stuck in an infinite loop if there
142		 * were an item for which both hashes indicated the same
143		 * bucket.
144		 */
145		i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
146		    LG_CKH_BUCKET_CELLS);
147		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
148		assert(cell->key != NULL);
149
150		/* Swap cell->{key,data} and {key,data} (evict). */
151		tkey = cell->key; tdata = cell->data;
152		cell->key = key; cell->data = data;
153		key = tkey; data = tdata;
154
155#ifdef CKH_COUNT
156		ckh->nrelocs++;
157#endif
158
159		/* Find the alternate bucket for the evicted item. */
160		ckh->hash(key, hashes);
161		tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
162		if (tbucket == bucket) {
163			tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets)
164			    - 1);
165			/*
166			 * It may be that (tbucket == bucket) still, if the
167			 * item's hashes both indicate this bucket.  However,
168			 * we are guaranteed to eventually escape this bucket
169			 * during iteration, assuming pseudo-random item
170			 * selection (true randomness would make infinite
171			 * looping a remote possibility).  The reason we can
172			 * never get trapped forever is that there are two
173			 * cases:
174			 *
175			 * 1) This bucket == argbucket, so we will quickly
176			 *    detect an eviction cycle and terminate.
177			 * 2) An item was evicted to this bucket from another,
178			 *    which means that at least one item in this bucket
179			 *    has hashes that indicate distinct buckets.
180			 */
181		}
182		/* Check for a cycle. */
183		if (tbucket == argbucket) {
184			*argkey = key;
185			*argdata = data;
186			return (true);
187		}
188
189		bucket = tbucket;
190		if (!ckh_try_bucket_insert(ckh, bucket, key, data))
191			return (false);
192	}
193}
194
195JEMALLOC_INLINE_C bool
196ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
197{
198	size_t hashes[2], bucket;
199	const void *key = *argkey;
200	const void *data = *argdata;
201
202	ckh->hash(key, hashes);
203
204	/* Try to insert in primary bucket. */
205	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
206	if (!ckh_try_bucket_insert(ckh, bucket, key, data))
207		return (false);
208
209	/* Try to insert in secondary bucket. */
210	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
211	if (!ckh_try_bucket_insert(ckh, bucket, key, data))
212		return (false);
213
214	/*
215	 * Try to find a place for this item via iterative eviction/relocation.
216	 */
217	return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
218}
219
220/*
221 * Try to rebuild the hash table from scratch by inserting all items from the
222 * old table into the new.
223 */
224JEMALLOC_INLINE_C bool
225ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
226{
227	size_t count, i, nins;
228	const void *key, *data;
229
230	count = ckh->count;
231	ckh->count = 0;
232	for (i = nins = 0; nins < count; i++) {
233		if (aTab[i].key != NULL) {
234			key = aTab[i].key;
235			data = aTab[i].data;
236			if (ckh_try_insert(ckh, &key, &data)) {
237				ckh->count = count;
238				return (true);
239			}
240			nins++;
241		}
242	}
243
244	return (false);
245}
246
247static bool
248ckh_grow(tsd_t *tsd, ckh_t *ckh)
249{
250	bool ret;
251	ckhc_t *tab, *ttab;
252	unsigned lg_prevbuckets, lg_curcells;
253
254#ifdef CKH_COUNT
255	ckh->ngrows++;
256#endif
257
258	/*
259	 * It is possible (though unlikely, given well behaved hashes) that the
260	 * table will have to be doubled more than once in order to create a
261	 * usable table.
262	 */
263	lg_prevbuckets = ckh->lg_curbuckets;
264	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
265	while (true) {
266		size_t usize;
267
268		lg_curcells++;
269		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
270		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
271			ret = true;
272			goto label_return;
273		}
274		tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE,
275		    true, NULL, true, arena_ichoose(tsd, NULL));
276		if (tab == NULL) {
277			ret = true;
278			goto label_return;
279		}
280		/* Swap in new table. */
281		ttab = ckh->tab;
282		ckh->tab = tab;
283		tab = ttab;
284		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
285
286		if (!ckh_rebuild(ckh, tab)) {
287			idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
288			break;
289		}
290
291		/* Rebuilding failed, so back out partially rebuilt table. */
292		idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
293		ckh->tab = tab;
294		ckh->lg_curbuckets = lg_prevbuckets;
295	}
296
297	ret = false;
298label_return:
299	return (ret);
300}
301
302static void
303ckh_shrink(tsd_t *tsd, ckh_t *ckh)
304{
305	ckhc_t *tab, *ttab;
306	size_t usize;
307	unsigned lg_prevbuckets, lg_curcells;
308
309	/*
310	 * It is possible (though unlikely, given well behaved hashes) that the
311	 * table rebuild will fail.
312	 */
313	lg_prevbuckets = ckh->lg_curbuckets;
314	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
315	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
316	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
317		return;
318	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
319	    true, arena_ichoose(tsd, NULL));
320	if (tab == NULL) {
321		/*
322		 * An OOM error isn't worth propagating, since it doesn't
323		 * prevent this or future operations from proceeding.
324		 */
325		return;
326	}
327	/* Swap in new table. */
328	ttab = ckh->tab;
329	ckh->tab = tab;
330	tab = ttab;
331	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
332
333	if (!ckh_rebuild(ckh, tab)) {
334		idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
335#ifdef CKH_COUNT
336		ckh->nshrinks++;
337#endif
338		return;
339	}
340
341	/* Rebuilding failed, so back out partially rebuilt table. */
342	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
343	ckh->tab = tab;
344	ckh->lg_curbuckets = lg_prevbuckets;
345#ifdef CKH_COUNT
346	ckh->nshrinkfails++;
347#endif
348}
349
350bool
351ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
352    ckh_keycomp_t *keycomp)
353{
354	bool ret;
355	size_t mincells, usize;
356	unsigned lg_mincells;
357
358	assert(minitems > 0);
359	assert(hash != NULL);
360	assert(keycomp != NULL);
361
362#ifdef CKH_COUNT
363	ckh->ngrows = 0;
364	ckh->nshrinks = 0;
365	ckh->nshrinkfails = 0;
366	ckh->ninserts = 0;
367	ckh->nrelocs = 0;
368#endif
369	ckh->prng_state = 42; /* Value doesn't really matter. */
370	ckh->count = 0;
371
372	/*
373	 * Find the minimum power of 2 that is large enough to fit minitems
374	 * entries.  We are using (2+,2) cuckoo hashing, which has an expected
375	 * maximum load factor of at least ~0.86, so 0.75 is a conservative load
376	 * factor that will typically allow mincells items to fit without ever
377	 * growing the table.
378	 */
379	assert(LG_CKH_BUCKET_CELLS > 0);
380	mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
381	for (lg_mincells = LG_CKH_BUCKET_CELLS;
382	    (ZU(1) << lg_mincells) < mincells;
383	    lg_mincells++)
384		; /* Do nothing. */
385	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
386	ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
387	ckh->hash = hash;
388	ckh->keycomp = keycomp;
389
390	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
391	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
392		ret = true;
393		goto label_return;
394	}
395	ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true,
396	    NULL, true, arena_ichoose(tsd, NULL));
397	if (ckh->tab == NULL) {
398		ret = true;
399		goto label_return;
400	}
401
402	ret = false;
403label_return:
404	return (ret);
405}
406
407void
408ckh_delete(tsd_t *tsd, ckh_t *ckh)
409{
410
411	assert(ckh != NULL);
412
413#ifdef CKH_VERBOSE
414	malloc_printf(
415	    "%s(%p): ngrows: %"FMTu64", nshrinks: %"FMTu64","
416	    " nshrinkfails: %"FMTu64", ninserts: %"FMTu64","
417	    " nrelocs: %"FMTu64"\n", __func__, ckh,
418	    (unsigned long long)ckh->ngrows,
419	    (unsigned long long)ckh->nshrinks,
420	    (unsigned long long)ckh->nshrinkfails,
421	    (unsigned long long)ckh->ninserts,
422	    (unsigned long long)ckh->nrelocs);
423#endif
424
425	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
426	if (config_debug)
427		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
428}
429
430size_t
431ckh_count(ckh_t *ckh)
432{
433
434	assert(ckh != NULL);
435
436	return (ckh->count);
437}
438
439bool
440ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
441{
442	size_t i, ncells;
443
444	for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
445	    LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
446		if (ckh->tab[i].key != NULL) {
447			if (key != NULL)
448				*key = (void *)ckh->tab[i].key;
449			if (data != NULL)
450				*data = (void *)ckh->tab[i].data;
451			*tabind = i + 1;
452			return (false);
453		}
454	}
455
456	return (true);
457}
458
459bool
460ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
461{
462	bool ret;
463
464	assert(ckh != NULL);
465	assert(ckh_search(ckh, key, NULL, NULL));
466
467#ifdef CKH_COUNT
468	ckh->ninserts++;
469#endif
470
471	while (ckh_try_insert(ckh, &key, &data)) {
472		if (ckh_grow(tsd, ckh)) {
473			ret = true;
474			goto label_return;
475		}
476	}
477
478	ret = false;
479label_return:
480	return (ret);
481}
482
483bool
484ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
485    void **data)
486{
487	size_t cell;
488
489	assert(ckh != NULL);
490
491	cell = ckh_isearch(ckh, searchkey);
492	if (cell != SIZE_T_MAX) {
493		if (key != NULL)
494			*key = (void *)ckh->tab[cell].key;
495		if (data != NULL)
496			*data = (void *)ckh->tab[cell].data;
497		ckh->tab[cell].key = NULL;
498		ckh->tab[cell].data = NULL; /* Not necessary. */
499
500		ckh->count--;
501		/* Try to halve the table if it is less than 1/4 full. */
502		if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
503		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
504		    > ckh->lg_minbuckets) {
505			/* Ignore error due to OOM. */
506			ckh_shrink(tsd, ckh);
507		}
508
509		return (false);
510	}
511
512	return (true);
513}
514
515bool
516ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
517{
518	size_t cell;
519
520	assert(ckh != NULL);
521
522	cell = ckh_isearch(ckh, searchkey);
523	if (cell != SIZE_T_MAX) {
524		if (key != NULL)
525			*key = (void *)ckh->tab[cell].key;
526		if (data != NULL)
527			*data = (void *)ckh->tab[cell].data;
528		return (false);
529	}
530
531	return (true);
532}
533
534void
535ckh_string_hash(const void *key, size_t r_hash[2])
536{
537
538	hash(key, strlen((const char *)key), 0x94122f33U, r_hash);
539}
540
541bool
542ckh_string_keycomp(const void *k1, const void *k2)
543{
544
545    assert(k1 != NULL);
546    assert(k2 != NULL);
547
548    return (strcmp((char *)k1, (char *)k2) ? false : true);
549}
550
551void
552ckh_pointer_hash(const void *key, size_t r_hash[2])
553{
554	union {
555		const void	*v;
556		size_t		i;
557	} u;
558
559	assert(sizeof(u.v) == sizeof(u.i));
560	u.v = key;
561	hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash);
562}
563
564bool
565ckh_pointer_keycomp(const void *k1, const void *k2)
566{
567
568	return ((k1 == k2) ? true : false);
569}
570