1/* 2 * $Id: linkhash.c,v 1.4 2006/01/26 02:16:28 mclark Exp $ 3 * 4 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd. 5 * Michael Clark <michael@metaparadigm.com> 6 * Copyright (c) 2009 Hewlett-Packard Development Company, L.P. 7 * 8 * This library is free software; you can redistribute it and/or modify 9 * it under the terms of the MIT license. See COPYING for details. 10 * 11 */ 12 13#include <stdio.h> 14#include <string.h> 15#include <stdlib.h> 16#include <stdarg.h> 17#include <stddef.h> 18#include <limits.h> 19 20#ifdef HAVE_ENDIAN_H 21# include <endian.h> /* attempt to define endianness */ 22#endif 23 24#include "random_seed.h" 25#include "linkhash.h" 26 27void lh_abort(const char *msg, ...) 28{ 29 va_list ap; 30 va_start(ap, msg); 31 vprintf(msg, ap); 32 va_end(ap); 33 exit(1); 34} 35 36unsigned long lh_ptr_hash(const void *k) 37{ 38 /* CAW: refactored to be 64bit nice */ 39 return (unsigned long)((((ptrdiff_t)k * LH_PRIME) >> 4) & ULONG_MAX); 40} 41 42int lh_ptr_equal(const void *k1, const void *k2) 43{ 44 return (k1 == k2); 45} 46 47/* 48 * hashlittle from lookup3.c, by Bob Jenkins, May 2006, Public Domain. 49 * http://burtleburtle.net/bob/c/lookup3.c 50 * minor modifications to make functions static so no symbols are exported 51 * minor mofifications to compile with -Werror 52 */ 53 54/* 55------------------------------------------------------------------------------- 56lookup3.c, by Bob Jenkins, May 2006, Public Domain. 57 58These are functions for producing 32-bit hashes for hash table lookup. 59hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 60are externally useful functions. Routines to test the hash are included 61if SELF_TEST is defined. You can use this free for any purpose. It's in 62the public domain. It has no warranty. 63 64You probably want to use hashlittle(). hashlittle() and hashbig() 65hash byte arrays. hashlittle() is is faster than hashbig() on 66little-endian machines. Intel and AMD are little-endian machines. 67On second thought, you probably want hashlittle2(), which is identical to 68hashlittle() except it returns two 32-bit hashes for the price of one. 69You could implement hashbig2() if you wanted but I haven't bothered here. 70 71If you want to find a hash of, say, exactly 7 integers, do 72 a = i1; b = i2; c = i3; 73 mix(a,b,c); 74 a += i4; b += i5; c += i6; 75 mix(a,b,c); 76 a += i7; 77 final(a,b,c); 78then use c as the hash value. If you have a variable length array of 794-byte integers to hash, use hashword(). If you have a byte array (like 80a character string), use hashlittle(). If you have several byte arrays, or 81a mix of things, see the comments above hashlittle(). 82 83Why is this so big? I read 12 bytes at a time into 3 4-byte integers, 84then mix those integers. This is fast (you can do a lot more thorough 85mixing with 12*3 instructions on 3 integers than you can with 3 instructions 86on 1 byte), but shoehorning those bytes into integers efficiently is messy. 87------------------------------------------------------------------------------- 88*/ 89 90/* 91 * My best guess at if you are big-endian or little-endian. This may 92 * need adjustment. 93 */ 94#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ 95 __BYTE_ORDER == __LITTLE_ENDIAN) || \ 96 (defined(i386) || defined(__i386__) || defined(__i486__) || \ 97 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) 98# define HASH_LITTLE_ENDIAN 1 99# define HASH_BIG_ENDIAN 0 100#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ 101 __BYTE_ORDER == __BIG_ENDIAN) || \ 102 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) 103# define HASH_LITTLE_ENDIAN 0 104# define HASH_BIG_ENDIAN 1 105#else 106# define HASH_LITTLE_ENDIAN 0 107# define HASH_BIG_ENDIAN 0 108#endif 109 110#define hashsize(n) ((uint32_t)1<<(n)) 111#define hashmask(n) (hashsize(n)-1) 112#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) 113 114/* 115------------------------------------------------------------------------------- 116mix -- mix 3 32-bit values reversibly. 117 118This is reversible, so any information in (a,b,c) before mix() is 119still in (a,b,c) after mix(). 120 121If four pairs of (a,b,c) inputs are run through mix(), or through 122mix() in reverse, there are at least 32 bits of the output that 123are sometimes the same for one pair and different for another pair. 124This was tested for: 125* pairs that differed by one bit, by two bits, in any combination 126 of top bits of (a,b,c), or in any combination of bottom bits of 127 (a,b,c). 128* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 129 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 130 is commonly produced by subtraction) look like a single 1-bit 131 difference. 132* the base values were pseudorandom, all zero but one bit set, or 133 all zero plus a counter that starts at zero. 134 135Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that 136satisfy this are 137 4 6 8 16 19 4 138 9 15 3 18 27 15 139 14 9 3 7 17 3 140Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing 141for "differ" defined as + with a one-bit base and a two-bit delta. I 142used http://burtleburtle.net/bob/hash/avalanche.html to choose 143the operations, constants, and arrangements of the variables. 144 145This does not achieve avalanche. There are input bits of (a,b,c) 146that fail to affect some output bits of (a,b,c), especially of a. The 147most thoroughly mixed value is c, but it doesn't really even achieve 148avalanche in c. 149 150This allows some parallelism. Read-after-writes are good at doubling 151the number of bits affected, so the goal of mixing pulls in the opposite 152direction as the goal of parallelism. I did what I could. Rotates 153seem to cost as much as shifts on every machine I could lay my hands 154on, and rotates are much kinder to the top and bottom bits, so I used 155rotates. 156------------------------------------------------------------------------------- 157*/ 158#define mix(a,b,c) \ 159{ \ 160 a -= c; a ^= rot(c, 4); c += b; \ 161 b -= a; b ^= rot(a, 6); a += c; \ 162 c -= b; c ^= rot(b, 8); b += a; \ 163 a -= c; a ^= rot(c,16); c += b; \ 164 b -= a; b ^= rot(a,19); a += c; \ 165 c -= b; c ^= rot(b, 4); b += a; \ 166} 167 168/* 169------------------------------------------------------------------------------- 170final -- final mixing of 3 32-bit values (a,b,c) into c 171 172Pairs of (a,b,c) values differing in only a few bits will usually 173produce values of c that look totally different. This was tested for 174* pairs that differed by one bit, by two bits, in any combination 175 of top bits of (a,b,c), or in any combination of bottom bits of 176 (a,b,c). 177* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 178 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 179 is commonly produced by subtraction) look like a single 1-bit 180 difference. 181* the base values were pseudorandom, all zero but one bit set, or 182 all zero plus a counter that starts at zero. 183 184These constants passed: 185 14 11 25 16 4 14 24 186 12 14 25 16 4 14 24 187and these came close: 188 4 8 15 26 3 22 24 189 10 8 15 26 3 22 24 190 11 8 15 26 3 22 24 191------------------------------------------------------------------------------- 192*/ 193#define final(a,b,c) \ 194{ \ 195 c ^= b; c -= rot(b,14); \ 196 a ^= c; a -= rot(c,11); \ 197 b ^= a; b -= rot(a,25); \ 198 c ^= b; c -= rot(b,16); \ 199 a ^= c; a -= rot(c,4); \ 200 b ^= a; b -= rot(a,14); \ 201 c ^= b; c -= rot(b,24); \ 202} 203 204 205/* 206------------------------------------------------------------------------------- 207hashlittle() -- hash a variable-length key into a 32-bit value 208 k : the key (the unaligned variable-length array of bytes) 209 length : the length of the key, counting by bytes 210 initval : can be any 4-byte value 211Returns a 32-bit value. Every bit of the key affects every bit of 212the return value. Two keys differing by one or two bits will have 213totally different hash values. 214 215The best hash table sizes are powers of 2. There is no need to do 216mod a prime (mod is sooo slow!). If you need less than 32 bits, 217use a bitmask. For example, if you need only 10 bits, do 218 h = (h & hashmask(10)); 219In which case, the hash table should have hashsize(10) elements. 220 221If you are hashing n strings (uint8_t **)k, do it like this: 222 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); 223 224By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 225code any way you wish, private, educational, or commercial. It's free. 226 227Use for hash table lookup, or anything where one collision in 2^^32 is 228acceptable. Do NOT use for cryptographic purposes. 229------------------------------------------------------------------------------- 230*/ 231 232static uint32_t hashlittle( const void *key, size_t length, uint32_t initval) 233{ 234 uint32_t a,b,c; /* internal state */ 235 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 236 237 /* Set up the internal state */ 238 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; 239 240 u.ptr = key; 241 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { 242 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 243 244 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 245 while (length > 12) 246 { 247 a += k[0]; 248 b += k[1]; 249 c += k[2]; 250 mix(a,b,c); 251 length -= 12; 252 k += 3; 253 } 254 255 /*----------------------------- handle the last (probably partial) block */ 256 /* 257 * "k[2]&0xffffff" actually reads beyond the end of the string, but 258 * then masks off the part it's not allowed to read. Because the 259 * string is aligned, the masked-off tail is in the same word as the 260 * rest of the string. Every machine with memory protection I've seen 261 * does it on word boundaries, so is OK with this. But VALGRIND will 262 * still catch it and complain. The masking trick does make the hash 263 * noticably faster for short strings (like English words). 264 */ 265#ifndef VALGRIND 266 267 switch(length) 268 { 269 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 270 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 271 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 272 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 273 case 8 : b+=k[1]; a+=k[0]; break; 274 case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 275 case 6 : b+=k[1]&0xffff; a+=k[0]; break; 276 case 5 : b+=k[1]&0xff; a+=k[0]; break; 277 case 4 : a+=k[0]; break; 278 case 3 : a+=k[0]&0xffffff; break; 279 case 2 : a+=k[0]&0xffff; break; 280 case 1 : a+=k[0]&0xff; break; 281 case 0 : return c; /* zero length strings require no mixing */ 282 } 283 284#else /* make valgrind happy */ 285 286 const uint8_t *k8 = (const uint8_t *)k; 287 switch(length) 288 { 289 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 290 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 291 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ 292 case 9 : c+=k8[8]; /* fall through */ 293 case 8 : b+=k[1]; a+=k[0]; break; 294 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 295 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ 296 case 5 : b+=k8[4]; /* fall through */ 297 case 4 : a+=k[0]; break; 298 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 299 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ 300 case 1 : a+=k8[0]; break; 301 case 0 : return c; 302 } 303 304#endif /* !valgrind */ 305 306 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { 307 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 308 const uint8_t *k8; 309 310 /*--------------- all but last block: aligned reads and different mixing */ 311 while (length > 12) 312 { 313 a += k[0] + (((uint32_t)k[1])<<16); 314 b += k[2] + (((uint32_t)k[3])<<16); 315 c += k[4] + (((uint32_t)k[5])<<16); 316 mix(a,b,c); 317 length -= 12; 318 k += 6; 319 } 320 321 /*----------------------------- handle the last (probably partial) block */ 322 k8 = (const uint8_t *)k; 323 switch(length) 324 { 325 case 12: c+=k[4]+(((uint32_t)k[5])<<16); 326 b+=k[2]+(((uint32_t)k[3])<<16); 327 a+=k[0]+(((uint32_t)k[1])<<16); 328 break; 329 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 330 case 10: c+=k[4]; 331 b+=k[2]+(((uint32_t)k[3])<<16); 332 a+=k[0]+(((uint32_t)k[1])<<16); 333 break; 334 case 9 : c+=k8[8]; /* fall through */ 335 case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 336 a+=k[0]+(((uint32_t)k[1])<<16); 337 break; 338 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 339 case 6 : b+=k[2]; 340 a+=k[0]+(((uint32_t)k[1])<<16); 341 break; 342 case 5 : b+=k8[4]; /* fall through */ 343 case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 344 break; 345 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 346 case 2 : a+=k[0]; 347 break; 348 case 1 : a+=k8[0]; 349 break; 350 case 0 : return c; /* zero length requires no mixing */ 351 } 352 353 } else { /* need to read the key one byte at a time */ 354 const uint8_t *k = (const uint8_t *)key; 355 356 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 357 while (length > 12) 358 { 359 a += k[0]; 360 a += ((uint32_t)k[1])<<8; 361 a += ((uint32_t)k[2])<<16; 362 a += ((uint32_t)k[3])<<24; 363 b += k[4]; 364 b += ((uint32_t)k[5])<<8; 365 b += ((uint32_t)k[6])<<16; 366 b += ((uint32_t)k[7])<<24; 367 c += k[8]; 368 c += ((uint32_t)k[9])<<8; 369 c += ((uint32_t)k[10])<<16; 370 c += ((uint32_t)k[11])<<24; 371 mix(a,b,c); 372 length -= 12; 373 k += 12; 374 } 375 376 /*-------------------------------- last block: affect all 32 bits of (c) */ 377 switch(length) /* all the case statements fall through */ 378 { 379 case 12: c+=((uint32_t)k[11])<<24; 380 case 11: c+=((uint32_t)k[10])<<16; 381 case 10: c+=((uint32_t)k[9])<<8; 382 case 9 : c+=k[8]; 383 case 8 : b+=((uint32_t)k[7])<<24; 384 case 7 : b+=((uint32_t)k[6])<<16; 385 case 6 : b+=((uint32_t)k[5])<<8; 386 case 5 : b+=k[4]; 387 case 4 : a+=((uint32_t)k[3])<<24; 388 case 3 : a+=((uint32_t)k[2])<<16; 389 case 2 : a+=((uint32_t)k[1])<<8; 390 case 1 : a+=k[0]; 391 break; 392 case 0 : return c; 393 } 394 } 395 396 final(a,b,c); 397 return c; 398} 399 400unsigned long lh_char_hash(const void *k) 401{ 402 static volatile int random_seed = -1; 403 404 if (random_seed == -1) { 405 int seed; 406 /* we can't use -1 as it is the unitialized sentinel */ 407 while ((seed = json_c_get_random_seed()) == -1); 408#if defined __GNUC__ 409 __sync_val_compare_and_swap(&random_seed, -1, seed); 410#elif defined _MSC_VER 411 InterlockedCompareExchange(&random_seed, seed, -1); 412#else 413#warning "racy random seed initializtion if used by multiple threads" 414 random_seed = seed; /* potentially racy */ 415#endif 416 } 417 418 return hashlittle((const char*)k, strlen((const char*)k), random_seed); 419} 420 421int lh_char_equal(const void *k1, const void *k2) 422{ 423 return (strcmp((const char*)k1, (const char*)k2) == 0); 424} 425 426struct lh_table* lh_table_new(int size, const char *name, 427 lh_entry_free_fn *free_fn, 428 lh_hash_fn *hash_fn, 429 lh_equal_fn *equal_fn) 430{ 431 int i; 432 struct lh_table *t; 433 434 t = (struct lh_table*)calloc(1, sizeof(struct lh_table)); 435 if(!t) lh_abort("lh_table_new: calloc failed\n"); 436 t->count = 0; 437 t->size = size; 438 t->name = name; 439 t->table = (struct lh_entry*)calloc(size, sizeof(struct lh_entry)); 440 if(!t->table) lh_abort("lh_table_new: calloc failed\n"); 441 t->free_fn = free_fn; 442 t->hash_fn = hash_fn; 443 t->equal_fn = equal_fn; 444 for(i = 0; i < size; i++) t->table[i].k = LH_EMPTY; 445 return t; 446} 447 448struct lh_table* lh_kchar_table_new(int size, const char *name, 449 lh_entry_free_fn *free_fn) 450{ 451 return lh_table_new(size, name, free_fn, lh_char_hash, lh_char_equal); 452} 453 454struct lh_table* lh_kptr_table_new(int size, const char *name, 455 lh_entry_free_fn *free_fn) 456{ 457 return lh_table_new(size, name, free_fn, lh_ptr_hash, lh_ptr_equal); 458} 459 460void lh_table_resize(struct lh_table *t, int new_size) 461{ 462 struct lh_table *new_t; 463 struct lh_entry *ent; 464 465 new_t = lh_table_new(new_size, t->name, NULL, t->hash_fn, t->equal_fn); 466 ent = t->head; 467 while(ent) { 468 lh_table_insert(new_t, ent->k, ent->v); 469 ent = ent->next; 470 } 471 free(t->table); 472 t->table = new_t->table; 473 t->size = new_size; 474 t->head = new_t->head; 475 t->tail = new_t->tail; 476 t->resizes++; 477 free(new_t); 478} 479 480void lh_table_free(struct lh_table *t) 481{ 482 struct lh_entry *c; 483 for(c = t->head; c != NULL; c = c->next) { 484 if(t->free_fn) { 485 t->free_fn(c); 486 } 487 } 488 free(t->table); 489 free(t); 490} 491 492 493int lh_table_insert(struct lh_table *t, void *k, const void *v) 494{ 495 unsigned long h, n; 496 497 t->inserts++; 498 if(t->count >= t->size * LH_LOAD_FACTOR) lh_table_resize(t, t->size * 2); 499 500 h = t->hash_fn(k); 501 n = h % t->size; 502 503 while( 1 ) { 504 if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) break; 505 t->collisions++; 506 if ((int)++n == t->size) n = 0; 507 } 508 509 t->table[n].k = k; 510 t->table[n].v = v; 511 t->count++; 512 513 if(t->head == NULL) { 514 t->head = t->tail = &t->table[n]; 515 t->table[n].next = t->table[n].prev = NULL; 516 } else { 517 t->tail->next = &t->table[n]; 518 t->table[n].prev = t->tail; 519 t->table[n].next = NULL; 520 t->tail = &t->table[n]; 521 } 522 523 return 0; 524} 525 526 527struct lh_entry* lh_table_lookup_entry(struct lh_table *t, const void *k) 528{ 529 unsigned long h = t->hash_fn(k); 530 unsigned long n = h % t->size; 531 int count = 0; 532 533 t->lookups++; 534 while( count < t->size ) { 535 if(t->table[n].k == LH_EMPTY) return NULL; 536 if(t->table[n].k != LH_FREED && 537 t->equal_fn(t->table[n].k, k)) return &t->table[n]; 538 if ((int)++n == t->size) n = 0; 539 count++; 540 } 541 return NULL; 542} 543 544 545const void* lh_table_lookup(struct lh_table *t, const void *k) 546{ 547 void *result; 548 lh_table_lookup_ex(t, k, &result); 549 return result; 550} 551 552json_bool lh_table_lookup_ex(struct lh_table* t, const void* k, void **v) 553{ 554 struct lh_entry *e = lh_table_lookup_entry(t, k); 555 if (e != NULL) { 556 if (v != NULL) *v = (void *)e->v; 557 return TRUE; /* key found */ 558 } 559 if (v != NULL) *v = NULL; 560 return FALSE; /* key not found */ 561} 562 563int lh_table_delete_entry(struct lh_table *t, struct lh_entry *e) 564{ 565 ptrdiff_t n = (ptrdiff_t)(e - t->table); /* CAW: fixed to be 64bit nice, still need the crazy negative case... */ 566 567 /* CAW: this is bad, really bad, maybe stack goes other direction on this machine... */ 568 if(n < 0) { return -2; } 569 570 if(t->table[n].k == LH_EMPTY || t->table[n].k == LH_FREED) return -1; 571 t->count--; 572 if(t->free_fn) t->free_fn(e); 573 t->table[n].v = NULL; 574 t->table[n].k = LH_FREED; 575 if(t->tail == &t->table[n] && t->head == &t->table[n]) { 576 t->head = t->tail = NULL; 577 } else if (t->head == &t->table[n]) { 578 t->head->next->prev = NULL; 579 t->head = t->head->next; 580 } else if (t->tail == &t->table[n]) { 581 t->tail->prev->next = NULL; 582 t->tail = t->tail->prev; 583 } else { 584 t->table[n].prev->next = t->table[n].next; 585 t->table[n].next->prev = t->table[n].prev; 586 } 587 t->table[n].next = t->table[n].prev = NULL; 588 return 0; 589} 590 591 592int lh_table_delete(struct lh_table *t, const void *k) 593{ 594 struct lh_entry *e = lh_table_lookup_entry(t, k); 595 if(!e) return -1; 596 return lh_table_delete_entry(t, e); 597} 598 599int lh_table_length(struct lh_table *t) 600{ 601 return t->count; 602} 603