dict.c revision d7e4ca82e1cf20bb2605befb1da74dd1688c706e
1/* 2 * This file is part of ltrace. 3 * Copyright (C) 2012 Petr Machata, Red Hat Inc. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the 8 * License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 * 02110-1301 USA 19 */ 20 21#include <string.h> 22#include <stdlib.h> 23#include <stdio.h> 24#include "dict.h" 25 26struct status_bits { 27 unsigned char taken : 1; 28 unsigned char erased : 1; 29}; 30 31static struct status_bits * 32bitp(struct dict *dict, size_t n) 33{ 34 return VECT_ELEMENT(&dict->status, struct status_bits, n); 35} 36 37void 38dict_init(struct dict *dict, 39 size_t key_size, size_t value_size, 40 size_t (*hash1)(const void *), 41 int (*eq)(const void *, const void *), 42 size_t (*hash2)(size_t)) 43{ 44 assert(hash1 != NULL); 45 assert(eq != NULL); 46 47 vect_init(&dict->keys, key_size); 48 vect_init(&dict->values, value_size); 49 VECT_INIT(&dict->status, struct status_bits); 50 dict->size = 0; 51 52 dict->hash1 = hash1; 53 dict->hash2 = hash2; 54 dict->eq = eq; 55} 56 57struct clone_data { 58 struct dict *target; 59 int (*clone_key)(void *tgt, const void *src, void *data); 60 int (*clone_value)(void *tgt, const void *src, void *data); 61 void (*dtor_key)(void *tgt, void *data); 62 void (*dtor_value)(void *tgt, void *data); 63 void *data; 64}; 65 66enum callback_status 67clone_cb(void *key, void *value, void *data) 68{ 69 struct clone_data *clone_data = data; 70 71 char nkey[clone_data->target->keys.elt_size]; 72 if (clone_data->clone_key == NULL) 73 memmove(nkey, key, sizeof(nkey)); 74 else if (clone_data->clone_key(&nkey, key, clone_data->data) < 0) 75 return CBS_STOP; 76 77 char nvalue[clone_data->target->values.elt_size]; 78 if (clone_data->clone_value == NULL) { 79 memmove(nvalue, value, sizeof(nvalue)); 80 } else if (clone_data->clone_value(&nvalue, value, 81 clone_data->data) < 0) { 82 fail: 83 if (clone_data->clone_key != NULL) 84 clone_data->dtor_key(&nkey, clone_data->data); 85 return CBS_STOP; 86 } 87 88 if (dict_insert(clone_data->target, nkey, nvalue) < 0) { 89 if (clone_data->clone_value != NULL) 90 clone_data->dtor_value(&nvalue, clone_data->data); 91 goto fail; 92 } 93 94 return CBS_CONT; 95} 96 97int 98dict_clone(struct dict *target, const struct dict *source, 99 int (*clone_key)(void *tgt, const void *src, void *data), 100 void (*dtor_key)(void *tgt, void *data), 101 int (*clone_value)(void *tgt, const void *src, void *data), 102 void (*dtor_value)(void *tgt, void *data), 103 void *data) 104{ 105 assert((clone_key != NULL) == (dtor_key != NULL)); 106 assert((clone_value != NULL) == (dtor_value != NULL)); 107 108 dict_init(target, source->keys.elt_size, source->values.elt_size, 109 source->hash1, source->eq, source->hash2); 110 struct clone_data clone_data = { 111 target, clone_key, clone_value, dtor_key, dtor_value, data 112 }; 113 if (dict_each((struct dict *)source, NULL, 114 clone_cb, &clone_data) != NULL) { 115 dict_destroy(target, dtor_key, dtor_value, data); 116 return -1; 117 } 118 return 0; 119} 120 121size_t 122dict_size(const struct dict *dict) 123{ 124 return dict->size; 125} 126 127int 128dict_empty(const struct dict *dict) 129{ 130 return dict->size == 0; 131} 132 133struct destroy_data { 134 void (*dtor_key)(void *tgt, void *data); 135 void (*dtor_value)(void *tgt, void *data); 136 void *data; 137}; 138 139enum callback_status 140destroy_cb(void *key, void *value, void *data) 141{ 142 struct destroy_data *destroy_data = data; 143 if (destroy_data->dtor_key) 144 destroy_data->dtor_key(key, destroy_data->data); 145 if (destroy_data->dtor_value) 146 destroy_data->dtor_value(value, destroy_data->data); 147 return CBS_CONT; 148} 149 150void 151dict_destroy(struct dict *dict, 152 void (*dtor_key)(void *tgt, void *data), 153 void (*dtor_value)(void *tgt, void *data), 154 void *data) 155{ 156 /* Some keys and values are not initialized, so we can't call 157 * dtors for them. Iterate DICT instead. */ 158 if (dtor_key != NULL || dtor_value != NULL) { 159 struct destroy_data destroy_data = { 160 dtor_key, dtor_value, data 161 }; 162 dict_each(dict, NULL, destroy_cb, &destroy_data); 163 } 164 165 vect_destroy(&dict->keys, NULL, NULL); 166 vect_destroy(&dict->values, NULL, NULL); 167 vect_destroy(&dict->status, NULL, NULL); 168} 169 170static size_t 171default_secondary_hash(size_t pos) 172{ 173 return pos % 97 + 1; 174} 175 176static inline size_t 177n(struct dict *dict) 178{ 179 return vect_size(&dict->keys); 180} 181 182static inline size_t (* 183hash2(struct dict *dict))(size_t) 184{ 185 if (dict->hash2 != NULL) 186 return dict->hash2; 187 else 188 return default_secondary_hash; 189} 190 191static void * 192getkey(struct dict *dict, size_t pos) 193{ 194 return ((unsigned char *)dict->keys.data) 195 + dict->keys.elt_size * pos; 196} 197 198static void * 199getvalue(struct dict *dict, size_t pos) 200{ 201 return ((unsigned char *)dict->values.data) 202 + dict->values.elt_size * pos; 203} 204 205static size_t 206find_slot(struct dict *dict, const void *key, 207 int *foundp, int *should_rehash, size_t *pi) 208{ 209 size_t pos = dict->hash1(key) % n(dict); 210 size_t pos0 = -1; 211 size_t d = hash2(dict)(pos); 212 size_t i = 0; 213 *foundp = 0; 214 215 /* We skip over any taken or erased slots. But we remember 216 * the first erased that we find, and if we don't find the key 217 * later, we return that position. */ 218 for (; bitp(dict, pos)->taken || bitp(dict, pos)->erased; 219 pos = (pos + d) % n(dict)) { 220 221 if (pos0 == (size_t)-1 && bitp(dict, pos)->erased) 222 pos0 = pos; 223 224 if (++i > dict->size) 225 break; 226 227 if (bitp(dict, pos)->taken 228 && dict->eq(getkey(dict, pos), key)) { 229 *foundp = 1; 230 break; 231 } 232 } 233 234 if (!*foundp && pos0 != (size_t)-1) 235 pos = pos0; 236 237 /* If the hash table degraded into a linked list, request a 238 * rehash. */ 239 if (should_rehash != NULL) 240 *should_rehash = i > 10 && i > n(dict) / 10; 241 242 if (pi != NULL) 243 *pi = i; 244 return pos; 245} 246 247enum callback_status 248rehash_move(void *key, void *value, void *data) 249{ 250 if (dict_insert(data, key, value) < 0) 251 return CBS_STOP; 252 else 253 return CBS_CONT; 254} 255 256int 257rehash(struct dict *dict, size_t nn) 258{ 259 int ret = -1; 260 261 struct dict tmp; 262 dict_init(&tmp, dict->keys.elt_size, dict->values.elt_size, 263 dict->hash1, dict->eq, dict->hash2); 264 265 /* To honor all invariants (so that we can safely call 266 * dict_destroy), we first make a request to _reserve_ enough 267 * room in all vectors. This has no observable effect on 268 * contents of vectors. */ 269 if (vect_reserve(&tmp.keys, nn) < 0 270 || vect_reserve(&tmp.values, nn) < 0 271 || vect_reserve(&tmp.status, nn) < 0) 272 goto done; 273 274 /* Now that we know that there is enough size in vectors, we 275 * simply bump the size. */ 276 tmp.keys.size = nn; 277 tmp.values.size = nn; 278 size_t old_size = tmp.status.size; 279 tmp.status.size = nn; 280 memset(VECT_ELEMENT(&tmp.status, struct status_bits, old_size), 281 0, (tmp.status.size - old_size) * tmp.status.elt_size); 282 283 /* At this point, TMP is once more an empty dictionary with NN 284 * slots. Now move stuff from DICT to TMP. */ 285 if (dict_each(dict, NULL, rehash_move, &tmp) != NULL) 286 goto done; 287 288 /* And now swap contents of DICT and TMP, and we are done. */ 289 { 290 struct dict tmp2 = *dict; 291 *dict = tmp; 292 tmp = tmp2; 293 } 294 295 ret = 0; 296 297done: 298 /* We only want to release the containers, not the actual data 299 * that they hold, so it's fine if we don't pass any dtor. */ 300 dict_destroy(&tmp, NULL, NULL, NULL); 301 return ret; 302 303} 304 305static const size_t primes[] = { 306 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 307 8191, 16381, 32749, 65521, 130981, 0 308}; 309 310static size_t 311larger_size(size_t current) 312{ 313 if (current == 0) 314 return primes[0]; 315 316 if (current < primes[sizeof(primes)/sizeof(*primes) - 2]) { 317 size_t i; 318 for (i = 0; primes[i] != 0; ++i) 319 if (primes[i] > current) 320 return primes[i]; 321 abort(); 322 } 323 324 /* We ran out of primes, so invent a new one. The following 325 * gives primes until about 17M elements (and then some more 326 * later). */ 327 return 2 * current + 6585; 328} 329 330static size_t 331smaller_size(size_t current) 332{ 333 if (current <= primes[0]) 334 return primes[0]; 335 336 if (current <= primes[sizeof(primes)/sizeof(*primes) - 2]) { 337 size_t i; 338 size_t prev = 0; 339 for (i = 0; primes[i] != 0; ++i) { 340 if (primes[i] >= current) 341 return prev; 342 prev = primes[i]; 343 } 344 abort(); 345 } 346 347 return (current - 6585) / 2; 348} 349 350int 351dict_insert(struct dict *dict, void *key, void *value) 352{ 353 if (n(dict) == 0 || dict->size > 0.7 * n(dict)) 354 rehash: 355 if (rehash(dict, larger_size(n(dict))) < 0) 356 return -1; 357 358 int found; 359 int should_rehash; 360 size_t slot_n = find_slot(dict, key, &found, &should_rehash, NULL); 361 362 if (found) 363 return 1; 364 365 /* If rehash was requested, do that, and retry. But just live 366 * with it for apparently sparse tables. No resizing can fix 367 * a rubbish hash. */ 368 if (should_rehash && dict->size > 0.3 * n(dict)) 369 goto rehash; 370 371 memmove(getkey(dict, slot_n), key, dict->keys.elt_size); 372 memmove(getvalue(dict, slot_n), value, dict->values.elt_size); 373 374 bitp(dict, slot_n)->taken = 1; 375 bitp(dict, slot_n)->erased = 0; 376 ++dict->size; 377 378 return 0; 379} 380 381void * 382dict_find(struct dict *dict, const void *key) 383{ 384 if (dict->size == 0) 385 return NULL; 386 387 int found; 388 size_t slot_n = find_slot(dict, key, &found, NULL, NULL); 389 if (found) 390 return getvalue(dict, slot_n); 391 else 392 return NULL; 393} 394 395int 396dict_erase(struct dict *dict, const void *key, 397 void (*dtor_key)(void *tgt, void *data), 398 void (*dtor_value)(void *tgt, void *data), 399 void *data) 400{ 401 int found; 402 size_t i; 403 size_t slot_n = find_slot(dict, key, &found, NULL, &i); 404 if (!found) 405 return -1; 406 407 if (dtor_key != NULL) 408 dtor_key(getkey(dict, slot_n), data); 409 if (dtor_value != NULL) 410 dtor_value(getvalue(dict, slot_n), data); 411 412 bitp(dict, slot_n)->taken = 0; 413 bitp(dict, slot_n)->erased = 1; 414 --dict->size; 415 416 if (dict->size < 0.3 * n(dict)) { 417 /* Don't mind if it fails when shrinking. */ 418 rehash(dict, smaller_size(n(dict))); 419 } 420 421 return 0; 422} 423 424void * 425dict_each(struct dict *dict, void *start_after, 426 enum callback_status (*cb)(void *, void *, void *), void *data) 427{ 428 size_t i; 429 if (start_after != NULL) 430 i = ((start_after - dict->keys.data) / dict->keys.elt_size) + 1; 431 else 432 i = 0; 433 434 for (; i < dict->keys.size; ++i) 435 if (bitp(dict, i)->taken && !bitp(dict, i)->erased) { 436 void *key = getkey(dict, i); 437 if (cb(key, getvalue(dict, i), data) != CBS_CONT) 438 return key; 439 } 440 441 return NULL; 442} 443 444size_t 445dict_hash_int(const int *key) 446{ 447 return (size_t)(*key * 2654435761); 448} 449 450int 451dict_eq_int(const int *key1, const int *key2) 452{ 453 return *key1 == *key2; 454} 455 456size_t 457dict_hash_string(const char **key) 458{ 459 size_t h = 5381; 460 const char *str = *key; 461 while (*str != 0) 462 h = h * 33 ^ *str++; 463 return h; 464} 465 466int 467dict_eq_string(const char **key1, const char **key2) 468{ 469 return strcmp(*key1, *key2) == 0; 470} 471 472#ifdef TEST 473static enum callback_status 474dump(int *key, int *value, void *data) 475{ 476 char *seen = data; 477 assert(seen[*key] == 0); 478 seen[*key] = 1; 479 assert(*value == *key * 2 + 1); 480 return CBS_STOP; 481} 482 483static size_t 484dict_hash_int_silly(const int *key) 485{ 486 return *key % 10; 487} 488 489static void 490verify(struct dict *di, size_t len, char *seen) 491{ 492 size_t ct = 0; 493 int *it; 494 for (it = NULL; (it = DICT_EACH(di, int, int, it, dump, seen)) != NULL;) 495 ct++; 496 assert(ct == len); 497 memset(seen, 0, len); 498} 499 500static enum callback_status 501fill_keys(int *key, int *value, void *data) 502{ 503 int *array = data; 504 array[++array[0]] = *key; 505 return CBS_CONT; 506} 507 508static void 509test1(void) 510{ 511 struct dict di; 512 DICT_INIT(&di, int, int, dict_hash_int, dict_eq_int, NULL); 513 514 char seen[100000] = {}; 515 size_t i; 516 for (i = 0; i < sizeof(seen); ++i) { 517 int key = i; 518 int value = 2 * i + 1; 519 DICT_INSERT(&di, &key, &value); 520 int *valp = DICT_FIND(&di, &key, int); 521 assert(valp != NULL); 522 assert(*valp == value); 523 assert(dict_size(&di) == i + 1); 524 } 525 526 verify(&di, sizeof(seen), seen); 527 528 struct dict d2; 529 DICT_CLONE(&d2, &di, int, int, NULL, NULL, NULL, NULL, NULL); 530 DICT_DESTROY(&di, int, int, NULL, NULL, NULL); 531 verify(&d2, sizeof(seen), seen); 532 533 /* Now we try to gradually erase all elements. We can't erase 534 * inside a DICT_EACH call, so copy first keys to a separate 535 * memory area first. */ 536 int keys[d2.size + 1]; 537 size_t ct = 0; 538 keys[0] = 0; 539 DICT_EACH(&d2, int, int, NULL, fill_keys, keys); 540 for (i = 0; i < (size_t)keys[0]; ++i) { 541 assert(DICT_ERASE(&d2, &keys[i + 1], int, 542 NULL, NULL, NULL) == 0); 543 ++ct; 544 } 545 assert(ct == sizeof(seen)); 546 DICT_DESTROY(&d2, int, int, NULL, NULL, NULL); 547} 548 549static void 550test_erase(void) 551{ 552 int i; 553 554 /* To test erase, we need a relatively bad hash function, so 555 * that there are some overlapping chains in the table. */ 556 struct dict d2; 557 DICT_INIT(&d2, int, int, dict_hash_int_silly, dict_eq_int, NULL); 558 const int limit = 500; 559 for (i = 0; i < limit; ++i) { 560 int key = 2 * i + 1; 561 int value = 2 * key + 1; 562 DICT_INSERT(&d2, &key, &value); 563 } 564 565 /* Now we try to delete each of the keys, and verify that none 566 * of the chains was broken. */ 567 for (i = 0; i < limit; ++i) { 568 struct dict copy; 569 DICT_CLONE(©, &d2, int, int, NULL, NULL, NULL, NULL, NULL); 570 int key = 2 * i + 1; 571 DICT_ERASE(©, &key, int, NULL, NULL, NULL); 572 assert(dict_size(©) == dict_size(&d2) - 1); 573 574 int j; 575 for (j = 0; j < limit; ++j) { 576 key = 2 * j + 1; 577 int *valp = DICT_FIND(©, &key, int); 578 if (i != j) { 579 assert(valp != NULL); 580 assert(*valp == 2 * key + 1); 581 } else { 582 assert(valp == NULL); 583 } 584 } 585 586 DICT_DESTROY(©, int, int, NULL, NULL, NULL); 587 } 588 DICT_DESTROY(&d2, int, int, NULL, NULL, NULL); 589} 590 591int main(int argc, char *argv[]) 592{ 593 test1(); 594 test_erase(); 595 return 0; 596} 597 598#endif 599