dict.c revision 98ff309cdc98857eb30992f108439cb7d7673598
1/*
2 * This file is part of ltrace.
3 * Copyright (C) 2012, 2013 Petr Machata, Red Hat Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
21#include <string.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include "dict.h"
25
26struct status_bits {
27	unsigned char taken : 1;
28	unsigned char erased : 1;
29};
30
31static struct status_bits *
32bitp(struct dict *dict, size_t n)
33{
34	return VECT_ELEMENT(&dict->status, struct status_bits, n);
35}
36
37void
38dict_init(struct dict *dict,
39	  size_t key_size, size_t value_size,
40	  size_t (*hash1)(const void *),
41	  int (*eq)(const void *, const void *),
42	  size_t (*hash2)(size_t))
43{
44	assert(hash1 != NULL);
45	assert(eq != NULL);
46
47	vect_init(&dict->keys, key_size);
48	vect_init(&dict->values, value_size);
49	VECT_INIT(&dict->status, struct status_bits);
50	dict->size = 0;
51
52	dict->hash1 = hash1;
53	dict->hash2 = hash2;
54	dict->eq = eq;
55}
56
57struct clone_data {
58	struct dict *target;
59	int (*clone_key)(void *tgt, const void *src, void *data);
60	int (*clone_value)(void *tgt, const void *src, void *data);
61	void (*dtor_key)(void *tgt, void *data);
62	void (*dtor_value)(void *tgt, void *data);
63	void *data;
64};
65
66static enum callback_status
67clone_cb(void *key, void *value, void *data)
68{
69	struct clone_data *clone_data = data;
70
71	char nkey[clone_data->target->keys.elt_size];
72	if (clone_data->clone_key == NULL)
73		memmove(nkey, key, sizeof(nkey));
74	else if (clone_data->clone_key(&nkey, key, clone_data->data) < 0)
75		return CBS_STOP;
76
77	char nvalue[clone_data->target->values.elt_size];
78	if (clone_data->clone_value == NULL) {
79		memmove(nvalue, value, sizeof(nvalue));
80	} else if (clone_data->clone_value(&nvalue, value,
81					 clone_data->data) < 0) {
82	fail:
83		if (clone_data->clone_key != NULL)
84			clone_data->dtor_key(&nkey, clone_data->data);
85		return CBS_STOP;
86	}
87
88	if (dict_insert(clone_data->target, nkey, nvalue) < 0) {
89		if (clone_data->clone_value != NULL)
90			clone_data->dtor_value(&nvalue, clone_data->data);
91		goto fail;
92	}
93
94	return CBS_CONT;
95}
96
97int
98dict_clone(struct dict *target, const struct dict *source,
99	   int (*clone_key)(void *tgt, const void *src, void *data),
100	   void (*dtor_key)(void *tgt, void *data),
101	   int (*clone_value)(void *tgt, const void *src, void *data),
102	   void (*dtor_value)(void *tgt, void *data),
103	   void *data)
104{
105	assert((clone_key != NULL) == (dtor_key != NULL));
106	assert((clone_value != NULL) == (dtor_value != NULL));
107
108	dict_init(target, source->keys.elt_size, source->values.elt_size,
109		  source->hash1, source->eq, source->hash2);
110	struct clone_data clone_data = {
111		target, clone_key, clone_value, dtor_key, dtor_value, data
112	};
113	if (dict_each((struct dict *)source, NULL,
114		      clone_cb, &clone_data) != NULL) {
115		dict_destroy(target, dtor_key, dtor_value, data);
116		return -1;
117	}
118	return 0;
119}
120
121size_t
122dict_size(const struct dict *dict)
123{
124	return dict->size;
125}
126
127int
128dict_empty(const struct dict *dict)
129{
130	return dict->size == 0;
131}
132
133struct destroy_data {
134	void (*dtor_key)(void *tgt, void *data);
135	void (*dtor_value)(void *tgt, void *data);
136	void *data;
137};
138
139static enum callback_status
140destroy_cb(void *key, void *value, void *data)
141{
142	struct destroy_data *destroy_data = data;
143	if (destroy_data->dtor_key)
144		destroy_data->dtor_key(key, destroy_data->data);
145	if (destroy_data->dtor_value)
146		destroy_data->dtor_value(value, destroy_data->data);
147	return CBS_CONT;
148}
149
150void
151dict_destroy(struct dict *dict,
152	     void (*dtor_key)(void *tgt, void *data),
153	     void (*dtor_value)(void *tgt, void *data),
154	     void *data)
155{
156	/* Some slots are unused (the corresponding keys and values
157	 * are uninitialized), so we can't call dtors for them.
158	 * Iterate DICT instead.  */
159	if (dtor_key != NULL || dtor_value != NULL) {
160		struct destroy_data destroy_data = {
161			dtor_key, dtor_value, data
162		};
163		dict_each(dict, NULL, destroy_cb, &destroy_data);
164	}
165
166	vect_destroy(&dict->keys, NULL, NULL);
167	vect_destroy(&dict->values, NULL, NULL);
168	vect_destroy(&dict->status, NULL, NULL);
169}
170
171static size_t
172default_secondary_hash(size_t pos)
173{
174	return pos % 97 + 1;
175}
176
177static size_t
178small_secondary_hash(size_t pos)
179{
180	return 1;
181}
182
183static inline size_t
184n(struct dict *dict)
185{
186	return vect_size(&dict->keys);
187}
188
189static inline size_t (*
190hash2(struct dict *dict))(size_t)
191{
192	if (dict->hash2 != NULL)
193		return dict->hash2;
194	else if (n(dict) < 100)
195		return small_secondary_hash;
196	else
197		return default_secondary_hash;
198}
199
200static void *
201getkey(struct dict *dict, size_t pos)
202{
203	return ((unsigned char *)dict->keys.data)
204		+ dict->keys.elt_size * pos;
205}
206
207static void *
208getvalue(struct dict *dict, size_t pos)
209{
210	return ((unsigned char *)dict->values.data)
211		+ dict->values.elt_size * pos;
212}
213
214static size_t
215find_slot(struct dict *dict, const void *key,
216	  int *foundp, int *should_rehash, size_t *pi)
217{
218	assert(n(dict) > 0);
219	size_t pos = dict->hash1(key) % n(dict);
220	size_t pos0 = -1;
221	size_t d = hash2(dict)(pos);
222	size_t i = 0;
223	*foundp = 0;
224
225	/* We skip over any taken or erased slots.  But we remember
226	 * the first erased that we find, and if we don't find the key
227	 * later, we return that position.  */
228	for (; bitp(dict, pos)->taken || bitp(dict, pos)->erased;
229	     pos = (pos + d) % n(dict)) {
230		if (pos0 == (size_t)-1 && bitp(dict, pos)->erased)
231			pos0 = pos;
232
233		/* If there is a loop, but we've seen an erased
234		 * element, take that one.  Otherwise give up.  */
235		if (++i > dict->size) {
236			if (pos0 != (size_t)-1)
237				break;
238			return (size_t)-1;
239		}
240
241		if (bitp(dict, pos)->taken
242		    && dict->eq(getkey(dict, pos), key)) {
243			*foundp = 1;
244			break;
245		}
246	}
247
248	if (!*foundp && pos0 != (size_t)-1)
249		pos = pos0;
250
251	/* If the hash table degraded into a linked list, request a
252	 * rehash.  */
253	if (should_rehash != NULL)
254		*should_rehash = i > 10 && i > n(dict) / 10;
255
256	if (pi != NULL)
257		*pi = i;
258	return pos;
259}
260
261static enum callback_status
262rehash_move(void *key, void *value, void *data)
263{
264	if (dict_insert(data, key, value) < 0)
265		return CBS_STOP;
266	else
267		return CBS_CONT;
268}
269
270static int
271rehash(struct dict *dict, size_t nn)
272{
273	assert(nn != n(dict));
274	int ret = -1;
275
276	struct dict tmp;
277	dict_init(&tmp, dict->keys.elt_size, dict->values.elt_size,
278		  dict->hash1, dict->eq, dict->hash2);
279
280	/* To honor all invariants (so that we can safely call
281	 * dict_destroy), we first make a request to _reserve_ enough
282	 * room in all vectors.  This has no observable effect on
283	 * contents of vectors.  */
284	if (vect_reserve(&tmp.keys, nn) < 0
285	    || vect_reserve(&tmp.values, nn) < 0
286	    || vect_reserve(&tmp.status, nn) < 0)
287		goto done;
288
289	/* Now that we know that there is enough size in vectors, we
290	 * simply bump the size.  */
291	tmp.keys.size = nn;
292	tmp.values.size = nn;
293	size_t old_size = tmp.status.size;
294	tmp.status.size = nn;
295	memset(VECT_ELEMENT(&tmp.status, struct status_bits, old_size),
296	       0, (tmp.status.size - old_size) * tmp.status.elt_size);
297
298	/* At this point, TMP is once more an empty dictionary with NN
299	 * slots.  Now move stuff from DICT to TMP.  */
300	if (dict_each(dict, NULL, rehash_move, &tmp) != NULL)
301		goto done;
302
303	/* And now swap contents of DICT and TMP, and we are done.  */
304	{
305		struct dict tmp2 = *dict;
306		*dict = tmp;
307		tmp = tmp2;
308	}
309
310	ret = 0;
311
312done:
313	/* We only want to release the containers, not the actual data
314	 * that they hold, so it's fine if we don't pass any dtor.  */
315	dict_destroy(&tmp, NULL, NULL, NULL);
316	return ret;
317
318}
319
320static const size_t primes[] = {
321	13, 31, 61, 127, 251, 509, 1021, 2039, 4093,
322	8191, 16381, 32749, 65521, 130981, 0
323};
324
325static size_t
326larger_size(size_t current)
327{
328	if (current == 0)
329		return primes[0];
330
331	if (current < primes[sizeof(primes)/sizeof(*primes) - 2]) {
332		size_t i;
333		for (i = 0; primes[i] != 0; ++i)
334			if (primes[i] > current)
335				return primes[i];
336		abort();
337	}
338
339	/* We ran out of primes, so invent a new one.  The following
340	 * gives primes until about 17M elements (and then some more
341	 * later).  */
342	return 2 * current + 6585;
343}
344
345static size_t
346smaller_size(size_t current)
347{
348	if (current <= primes[0])
349		return primes[0];
350
351	if (current <= primes[sizeof(primes)/sizeof(*primes) - 2]) {
352		size_t i;
353		size_t prev = 0;
354		for (i = 0; primes[i] != 0; ++i) {
355			if (primes[i] >= current)
356				return prev;
357			prev = primes[i];
358		}
359		abort();
360	}
361
362	return (current - 6585) / 2;
363}
364
365int
366dict_insert(struct dict *dict, void *key, void *value)
367{
368	if (n(dict) == 0 || dict->size > 0.7 * n(dict))
369	rehash:
370		if (rehash(dict, larger_size(n(dict))) < 0)
371			return -1;
372
373	int found;
374	int should_rehash;
375	size_t slot_n = find_slot(dict, key, &found, &should_rehash, NULL);
376	if (slot_n == (size_t)-1)
377		return -1;
378	if (found)
379		return 1;
380	assert(!bitp(dict, slot_n)->taken);
381
382	/* If rehash was requested, do that, and retry.  But just live
383	 * with it for apparently sparse tables.  No resizing can fix
384	 * a rubbish hash.  */
385	if (should_rehash && dict->size > 0.3 * n(dict))
386		goto rehash;
387
388	memmove(getkey(dict, slot_n), key, dict->keys.elt_size);
389	memmove(getvalue(dict, slot_n), value, dict->values.elt_size);
390
391	bitp(dict, slot_n)->taken = 1;
392	bitp(dict, slot_n)->erased = 0;
393	++dict->size;
394
395	return 0;
396}
397
398void *
399dict_find(struct dict *dict, const void *key)
400{
401	if (dict->size == 0)
402		return NULL;
403	assert(n(dict) > 0);
404
405	int found;
406	size_t slot_n = find_slot(dict, key, &found, NULL, NULL);
407	if (found)
408		return getvalue(dict, slot_n);
409	else
410		return NULL;
411}
412
413int
414dict_erase(struct dict *dict, const void *key,
415	   void (*dtor_key)(void *tgt, void *data),
416	   void (*dtor_value)(void *tgt, void *data),
417	   void *data)
418{
419	int found;
420	size_t i;
421	size_t slot_n = find_slot(dict, key, &found, NULL, &i);
422	if (!found)
423		return -1;
424
425	if (dtor_key != NULL)
426		dtor_key(getkey(dict, slot_n), data);
427	if (dtor_value != NULL)
428		dtor_value(getvalue(dict, slot_n), data);
429
430	bitp(dict, slot_n)->taken = 0;
431	bitp(dict, slot_n)->erased = 1;
432	--dict->size;
433
434	if (dict->size < 0.3 * n(dict)) {
435		size_t smaller = smaller_size(n(dict));
436		if (smaller != n(dict))
437			/* Don't mind if it fails when shrinking.  */
438			rehash(dict, smaller);
439	}
440
441	return 0;
442}
443
444void *
445dict_each(struct dict *dict, void *start_after,
446	  enum callback_status (*cb)(void *, void *, void *), void *data)
447{
448	size_t i;
449	if (start_after != NULL)
450		i = ((start_after - dict->keys.data) / dict->keys.elt_size) + 1;
451	else
452		i = 0;
453
454	for (; i < dict->keys.size; ++i)
455		if (bitp(dict, i)->taken && !bitp(dict, i)->erased) {
456			void *key = getkey(dict, i);
457			if (cb(key, getvalue(dict, i), data) != CBS_CONT)
458				return key;
459		}
460
461	return NULL;
462}
463
464size_t
465dict_hash_int(const int *key)
466{
467	return (size_t)(*key * 2654435761);
468}
469
470int
471dict_eq_int(const int *key1, const int *key2)
472{
473	return *key1 == *key2;
474}
475
476size_t
477dict_hash_string(const char **key)
478{
479	size_t h = 5381;
480	const char *str = *key;
481	while (*str != 0)
482		h = h * 33 ^ *str++;
483	return h;
484}
485
486int
487dict_eq_string(const char **key1, const char **key2)
488{
489	return strcmp(*key1, *key2) == 0;
490}
491
492void
493dict_dtor_string(const char **key, void *data)
494{
495	free((char *)*key);
496}
497
498int
499dict_clone_string(const char **tgt, const char **src, void *data)
500{
501	*tgt = strdup(*src);
502	return *tgt != NULL ? 0 : -1;
503}
504
505#ifdef TEST
506static enum callback_status
507dump(int *key, int *value, void *data)
508{
509	char *seen = data;
510	assert(seen[*key] == 0);
511	seen[*key] = 1;
512	assert(*value == *key * 2 + 1);
513	return CBS_STOP;
514}
515
516static size_t
517dict_hash_int_silly(const int *key)
518{
519	return *key % 10;
520}
521
522static void
523verify(struct dict *di, size_t len, char *seen)
524{
525	size_t ct = 0;
526	int *it;
527	for (it = NULL; (it = DICT_EACH(di, int, int, it, dump, seen)) != NULL;)
528		ct++;
529	assert(ct == len);
530	memset(seen, 0, len);
531}
532
533static enum callback_status
534fill_keys(int *key, int *value, void *data)
535{
536	int *array = data;
537	array[++array[0]] = *key;
538	return CBS_CONT;
539}
540
541static void
542test1(void)
543{
544	struct dict di;
545	DICT_INIT(&di, int, int, dict_hash_int, dict_eq_int, NULL);
546
547	char seen[100000] = {};
548	size_t i;
549	for (i = 0; i < sizeof(seen); ++i) {
550		int key = i;
551		int value = 2 * i + 1;
552		DICT_INSERT(&di, &key, &value);
553		int *valp = DICT_FIND_REF(&di, &key, int);
554		assert(valp != NULL);
555		assert(*valp == value);
556		assert(dict_size(&di) == i + 1);
557	}
558
559	verify(&di, sizeof(seen), seen);
560
561	struct dict d2;
562	DICT_CLONE(&d2, &di, int, int, NULL, NULL, NULL, NULL, NULL);
563	DICT_DESTROY(&di, int, int, NULL, NULL, NULL);
564	verify(&d2, sizeof(seen), seen);
565
566	/* Now we try to gradually erase all elements.  We can't erase
567	 * inside a DICT_EACH call, so copy first keys to a separate
568	 * memory area first.  */
569	int keys[d2.size + 1];
570	size_t ct = 0;
571	keys[0] = 0;
572	DICT_EACH(&d2, int, int, NULL, fill_keys, keys);
573	for (i = 0; i < (size_t)keys[0]; ++i) {
574		assert(DICT_ERASE(&d2, &keys[i + 1], int,
575				  NULL, NULL, NULL) == 0);
576		++ct;
577	}
578	assert(ct == sizeof(seen));
579	DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
580}
581
582static void
583test_erase(void)
584{
585	int i;
586
587	/* To test erase, we need a relatively bad hash function, so
588	 * that there are some overlapping chains in the table.  */
589	struct dict d2;
590	DICT_INIT(&d2, int, int, dict_hash_int_silly, dict_eq_int, NULL);
591	const int limit = 500;
592	for (i = 0; i < limit; ++i) {
593		int key = 2 * i + 1;
594		int value = 2 * key + 1;
595		DICT_INSERT(&d2, &key, &value);
596	}
597
598	/* Now we try to delete each of the keys, and verify that none
599	 * of the chains was broken.  */
600	for (i = 0; i < limit; ++i) {
601		struct dict copy;
602		DICT_CLONE(&copy, &d2, int, int, NULL, NULL, NULL, NULL, NULL);
603		int key = 2 * i + 1;
604		DICT_ERASE(&copy, &key, int, NULL, NULL, NULL);
605		assert(dict_size(&copy) == dict_size(&d2) - 1);
606
607		int j;
608		for (j = 0; j < limit; ++j) {
609			key = 2 * j + 1;
610			int *valp = DICT_FIND_REF(&copy, &key, int);
611			if (i != j) {
612				assert(valp != NULL);
613				assert(*valp == 2 * key + 1);
614			} else {
615				assert(valp == NULL);
616			}
617		}
618
619		DICT_DESTROY(&copy, int, int, NULL, NULL, NULL);
620	}
621	DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
622}
623
624int main(int argc, char *argv[])
625{
626	test1();
627	test_erase();
628	return 0;
629}
630
631#endif
632