m_hashtable.c revision 47124e91145f71f6db7d0a60031fe49a6b6ea141
1 2/*--------------------------------------------------------------------*/ 3/*--- A separately-chained hash table. m_hashtable.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2005-2013 Nicholas Nethercote 11 njn@valgrind.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31#include "pub_core_basics.h" 32#include "pub_core_debuglog.h" 33#include "pub_core_hashtable.h" 34#include "pub_core_libcassert.h" 35#include "pub_core_libcbase.h" 36#include "pub_core_libcprint.h" 37#include "pub_core_mallocfree.h" 38 39/*--------------------------------------------------------------------*/ 40/*--- Declarations ---*/ 41/*--------------------------------------------------------------------*/ 42 43#define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains) 44 45struct _VgHashTable { 46 UInt n_chains; // should be prime 47 UInt n_elements; 48 VgHashNode* iterNode; // current iterator node 49 UInt iterChain; // next chain to be traversed by the iterator 50 VgHashNode** chains; // expanding array of hash chains 51 Bool iterOK; // table safe to iterate over? 52 const HChar* name; // name of table (for debugging only) 53}; 54 55#define N_HASH_PRIMES 20 56 57static const SizeT primes[N_HASH_PRIMES] = { 58 769UL, 1543UL, 3079UL, 6151UL, 59 12289UL, 24593UL, 49157UL, 98317UL, 60 196613UL, 393241UL, 786433UL, 1572869UL, 61 3145739UL, 6291469UL, 12582917UL, 25165843UL, 62 50331653UL, 100663319UL, 201326611UL, 402653189UL 63}; 64 65/*--------------------------------------------------------------------*/ 66/*--- Functions ---*/ 67/*--------------------------------------------------------------------*/ 68 69VgHashTable *VG_(HT_construct) ( const HChar* name ) 70{ 71 /* Initialises to zero, ie. all entries NULL */ 72 SizeT n_chains = primes[0]; 73 SizeT sz = n_chains * sizeof(VgHashNode*); 74 VgHashTable *table = VG_(calloc)("hashtable.Hc.1", 75 1, sizeof(struct _VgHashTable)); 76 table->chains = VG_(calloc)("hashtable.Hc.2", 1, sz); 77 table->n_chains = n_chains; 78 table->n_elements = 0; 79 table->iterOK = True; 80 table->name = name; 81 vg_assert(name); 82 return table; 83} 84 85Int VG_(HT_count_nodes) ( const VgHashTable *table ) 86{ 87 return table->n_elements; 88} 89 90static void resize ( VgHashTable *table ) 91{ 92 Int i; 93 SizeT sz; 94 SizeT old_chains = table->n_chains; 95 SizeT new_chains = old_chains + 1; 96 VgHashNode** chains; 97 VgHashNode * node; 98 99 /* If we've run out of primes, do nothing. */ 100 if (old_chains == primes[N_HASH_PRIMES-1]) 101 return; 102 103 vg_assert(old_chains >= primes[0] 104 && old_chains < primes[N_HASH_PRIMES-1]); 105 106 for (i = 0; i < N_HASH_PRIMES; i++) { 107 if (primes[i] > new_chains) { 108 new_chains = primes[i]; 109 break; 110 } 111 } 112 113 vg_assert(new_chains > old_chains); 114 vg_assert(new_chains > primes[0] 115 && new_chains <= primes[N_HASH_PRIMES-1]); 116 117 VG_(debugLog)( 118 1, "hashtable", 119 "resizing table `%s' from %lu to %lu (total elems %lu)\n", 120 table->name, (UWord)old_chains, (UWord)new_chains, 121 (UWord)table->n_elements ); 122 123 table->n_chains = new_chains; 124 sz = new_chains * sizeof(VgHashNode*); 125 chains = VG_(calloc)("hashtable.resize.1", 1, sz); 126 127 for (i = 0; i < old_chains; i++) { 128 node = table->chains[i]; 129 while (node != NULL) { 130 VgHashNode* next = node->next; 131 UWord chain = CHAIN_NO(node->key, table); 132 node->next = chains[chain]; 133 chains[chain] = node; 134 node = next; 135 } 136 } 137 138 VG_(free)(table->chains); 139 table->chains = chains; 140} 141 142/* Puts a new, heap allocated VgHashNode, into the VgHashTable. Prepends 143 the node to the appropriate chain. No duplicate key detection is done. */ 144void VG_(HT_add_node) ( VgHashTable *table, void* vnode ) 145{ 146 VgHashNode* node = (VgHashNode*)vnode; 147 UWord chain = CHAIN_NO(node->key, table); 148 node->next = table->chains[chain]; 149 table->chains[chain] = node; 150 table->n_elements++; 151 if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) { 152 resize(table); 153 } 154 155 /* Table has been modified; hence HT_Next should assert. */ 156 table->iterOK = False; 157} 158 159/* Looks up a VgHashNode by key in the table. Returns NULL if not found. */ 160void* VG_(HT_lookup) ( const VgHashTable *table, UWord key ) 161{ 162 VgHashNode* curr = table->chains[ CHAIN_NO(key, table) ]; 163 164 while (curr) { 165 if (key == curr->key) { 166 return curr; 167 } 168 curr = curr->next; 169 } 170 return NULL; 171} 172 173/* Looks up a VgHashNode by node in the table. Returns NULL if not found. 174 GEN!!! marks the lines that differs from VG_(HT_lookup). */ 175void* VG_(HT_gen_lookup) ( const VgHashTable *table, const void* node, 176 HT_Cmp_t cmp ) 177{ 178 const VgHashNode* hnode = node; // GEN!!! 179 VgHashNode* curr = table->chains[ CHAIN_NO(hnode->key, table) ]; // GEN!!! 180 181 while (curr) { 182 if (cmp (hnode, curr) == 0) { // GEN!!! 183 return curr; 184 } 185 curr = curr->next; 186 } 187 return NULL; 188} 189 190/* Removes a VgHashNode from the table. Returns NULL if not found. */ 191void* VG_(HT_remove) ( VgHashTable *table, UWord key ) 192{ 193 UWord chain = CHAIN_NO(key, table); 194 VgHashNode* curr = table->chains[chain]; 195 VgHashNode** prev_next_ptr = &(table->chains[chain]); 196 197 /* Table has been modified; hence HT_Next should assert. */ 198 table->iterOK = False; 199 200 while (curr) { 201 if (key == curr->key) { 202 *prev_next_ptr = curr->next; 203 table->n_elements--; 204 return curr; 205 } 206 prev_next_ptr = &(curr->next); 207 curr = curr->next; 208 } 209 return NULL; 210} 211 212/* Removes a VgHashNode by node from the table. Returns NULL if not found. 213 GEN!!! marks the lines that differs from VG_(HT_remove). */ 214void* VG_(HT_gen_remove) ( VgHashTable *table, const void* node, HT_Cmp_t cmp ) 215{ 216 const VgHashNode* hnode = node; // GEN!!! 217 UWord chain = CHAIN_NO(hnode->key, table); // GEN!!! 218 VgHashNode* curr = table->chains[chain]; 219 VgHashNode** prev_next_ptr = &(table->chains[chain]); 220 221 /* Table has been modified; hence HT_Next should assert. */ 222 table->iterOK = False; 223 224 while (curr) { 225 if (cmp(hnode, curr) == 0) { // GEN!!! 226 *prev_next_ptr = curr->next; 227 table->n_elements--; 228 return curr; 229 } 230 prev_next_ptr = &(curr->next); 231 curr = curr->next; 232 } 233 return NULL; 234} 235 236void VG_(HT_print_stats) ( const VgHashTable *table, HT_Cmp_t cmp ) 237{ 238 #define MAXOCCUR 20 239 UInt elt_occurences[MAXOCCUR+1]; 240 UInt key_occurences[MAXOCCUR+1]; 241 UInt cno_occurences[MAXOCCUR+1]; 242 /* Key occurence : how many ht elements have the same key. 243 elt_occurences : how many elements are inserted multiple time. 244 cno_occurences : how many chains have that length. 245 The last entry in these arrays collects all occurences >= MAXOCCUR. */ 246 #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++) 247 UInt i; 248 UInt nkey, nelt, ncno; 249 VgHashNode *cnode, *node; 250 251 VG_(memset)(key_occurences, 0, sizeof(key_occurences)); 252 VG_(memset)(elt_occurences, 0, sizeof(elt_occurences)); 253 VG_(memset)(cno_occurences, 0, sizeof(cno_occurences)); 254 255 // Note that the below algorithm is quadractic in nr of elements in a chain 256 // but if that happens, the hash table/function is really bad and that 257 // should be fixed. 258 for (i = 0; i < table->n_chains; i++) { 259 ncno = 0; 260 for (cnode = table->chains[i]; cnode != NULL; cnode = cnode->next) { 261 ncno++; 262 263 nkey = 0; 264 // Is the same cnode->key existing before cnode ? 265 for (node = table->chains[i]; node != cnode; node = node->next) { 266 if (node->key == cnode->key) 267 nkey++; 268 } 269 // If cnode->key not in a previous node, count occurences of key. 270 if (nkey == 0) { 271 for (node = cnode; node != NULL; node = node->next) { 272 if (node->key == cnode->key) 273 nkey++; 274 } 275 INCOCCUR(key_occurences, nkey); 276 } 277 278 nelt = 0; 279 // Is the same cnode element existing before cnode ? 280 for (node = table->chains[i]; node != cnode; node = node->next) { 281 if (cmp) { 282 if ((*cmp)(node, cnode) == 0) 283 nelt++; 284 } else 285 if (node->key == cnode->key) 286 nelt++; 287 } 288 // If cnode element not in a previous node, count occurences of elt. 289 if (nelt == 0) { 290 for (node = cnode; node != NULL; node = node->next) { 291 if (cmp) { 292 if ((*cmp)(node, cnode) == 0) 293 nelt++; 294 } else 295 if (node->key == cnode->key) 296 nelt++; 297 } 298 INCOCCUR(elt_occurences, nelt); 299 } 300 } 301 INCOCCUR(cno_occurences, ncno); 302 } 303 304 VG_(message)(Vg_DebugMsg, 305 "nr occurences of" 306 " chains of len N," 307 " N-plicated keys," 308 " N-plicated elts\n"); 309 nkey = nelt = ncno = 0; 310 for (i = 0; i <= MAXOCCUR; i++) { 311 if (elt_occurences[i] > 0 312 || key_occurences[i] > 0 313 || cno_occurences[i] > 0) 314 VG_(message)(Vg_DebugMsg, 315 "%s=%2d : nr chain %6d, nr keys %6d, nr elts %6d\n", 316 i == MAXOCCUR ? ">" : "N", i, 317 cno_occurences[i], key_occurences[i], elt_occurences[i]); 318 nkey += key_occurences[i]; 319 nelt += elt_occurences[i]; 320 ncno += cno_occurences[i]; 321 } 322 VG_(message)(Vg_DebugMsg, 323 "total nr of unique slots: %6d, keys %6d, elts %6d." 324 " Avg chain len %3.1f\n", 325 ncno, nkey, nelt, 326 (Double)nelt/(Double)(ncno == cno_occurences[0] ? 327 1 : ncno - cno_occurences[0])); 328} 329 330 331/* Allocates a suitably-sized array, copies pointers to all the hashtable 332 elements into it, then returns both the array and the size of it. The 333 array must be freed with VG_(free). 334*/ 335VgHashNode** VG_(HT_to_array) (const VgHashTable *table, /*OUT*/ UInt* n_elems) 336{ 337 UInt i, j; 338 VgHashNode** arr; 339 VgHashNode* node; 340 341 *n_elems = table->n_elements; 342 if (*n_elems == 0) 343 return NULL; 344 345 arr = VG_(malloc)( "hashtable.Hta.1", *n_elems * sizeof(VgHashNode*) ); 346 347 j = 0; 348 for (i = 0; i < table->n_chains; i++) { 349 for (node = table->chains[i]; node != NULL; node = node->next) { 350 arr[j++] = node; 351 } 352 } 353 vg_assert(j == *n_elems); 354 355 return arr; 356} 357 358void VG_(HT_ResetIter)(VgHashTable *table) 359{ 360 vg_assert(table); 361 table->iterNode = NULL; 362 table->iterChain = 0; 363 table->iterOK = True; 364} 365 366void* VG_(HT_Next)(VgHashTable *table) 367{ 368 Int i; 369 vg_assert(table); 370 /* See long comment on HT_Next prototype in pub_tool_hashtable.h. 371 In short if this fails, it means the caller tried to modify the 372 table whilst iterating over it, which is a bug. */ 373 vg_assert(table->iterOK); 374 375 if (table->iterNode && table->iterNode->next) { 376 table->iterNode = table->iterNode->next; 377 return table->iterNode; 378 } 379 380 for (i = table->iterChain; i < table->n_chains; i++) { 381 if (table->chains[i]) { 382 table->iterNode = table->chains[i]; 383 table->iterChain = i + 1; // Next chain to be traversed 384 return table->iterNode; 385 } 386 } 387 return NULL; 388} 389 390void VG_(HT_destruct)(VgHashTable *table, void(*freenode_fn)(void*)) 391{ 392 UInt i; 393 VgHashNode *node, *node_next; 394 395 for (i = 0; i < table->n_chains; i++) { 396 for (node = table->chains[i]; node != NULL; node = node_next) { 397 node_next = node->next; 398 freenode_fn(node); 399 } 400 } 401 VG_(free)(table->chains); 402 VG_(free)(table); 403} 404 405/*--------------------------------------------------------------------*/ 406/*--- end ---*/ 407/*--------------------------------------------------------------------*/ 408