m_hashtable.c revision 6debd8596cb5558b4f33f76434e35154da7fb4ba
1
2/*--------------------------------------------------------------------*/
3/*--- A separately-chained hash table.               m_hashtable.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2005-2013 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "pub_core_basics.h"
32#include "pub_core_debuglog.h"
33#include "pub_core_hashtable.h"
34#include "pub_core_libcassert.h"
35#include "pub_core_libcbase.h"
36#include "pub_core_libcprint.h"
37#include "pub_core_mallocfree.h"
38
39/*--------------------------------------------------------------------*/
40/*--- Declarations                                                 ---*/
41/*--------------------------------------------------------------------*/
42
43#define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains)
44
45struct _VgHashTable {
46   UInt         n_chains;   // should be prime
47   UInt         n_elements;
48   VgHashNode*  iterNode;   // current iterator node
49   UInt         iterChain;  // next chain to be traversed by the iterator
50   VgHashNode** chains;     // expanding array of hash chains
51   Bool         iterOK;     // table safe to iterate over?
52   const HChar* name;       // name of table (for debugging only)
53};
54
55#define N_HASH_PRIMES 20
56
57static const SizeT primes[N_HASH_PRIMES] = {
58         769UL,         1543UL,         3079UL,          6151UL,
59       12289UL,        24593UL,        49157UL,         98317UL,
60      196613UL,       393241UL,       786433UL,       1572869UL,
61     3145739UL,      6291469UL,     12582917UL,      25165843UL,
62    50331653UL,    100663319UL,    201326611UL,     402653189UL
63};
64
65/*--------------------------------------------------------------------*/
66/*--- Functions                                                    ---*/
67/*--------------------------------------------------------------------*/
68
69VgHashTable *VG_(HT_construct) ( const HChar* name )
70{
71   /* Initialises to zero, ie. all entries NULL */
72   SizeT       n_chains = primes[0];
73   SizeT       sz       = n_chains * sizeof(VgHashNode*);
74   VgHashTable *table   = VG_(calloc)("hashtable.Hc.1",
75                                      1, sizeof(struct _VgHashTable));
76   table->chains        = VG_(calloc)("hashtable.Hc.2", 1, sz);
77   table->n_chains      = n_chains;
78   table->n_elements    = 0;
79   table->iterOK        = True;
80   table->name          = name;
81   vg_assert(name);
82   return table;
83}
84
85Int VG_(HT_count_nodes) ( const VgHashTable *table )
86{
87   return table->n_elements;
88}
89
90static void resize ( VgHashTable *table )
91{
92   Int          i;
93   SizeT        sz;
94   SizeT        old_chains = table->n_chains;
95   SizeT        new_chains = old_chains + 1;
96   VgHashNode** chains;
97   VgHashNode * node;
98
99   /* If we've run out of primes, do nothing. */
100   if (old_chains == primes[N_HASH_PRIMES-1])
101      return;
102
103   vg_assert(old_chains >= primes[0]
104             && old_chains < primes[N_HASH_PRIMES-1]);
105
106   for (i = 0; i < N_HASH_PRIMES; i++) {
107      if (primes[i] > new_chains) {
108         new_chains = primes[i];
109         break;
110      }
111   }
112
113   vg_assert(new_chains > old_chains);
114   vg_assert(new_chains > primes[0]
115             && new_chains <= primes[N_HASH_PRIMES-1]);
116
117   VG_(debugLog)(
118      1, "hashtable",
119         "resizing table `%s' from %lu to %lu (total elems %lu)\n",
120         table->name, (UWord)old_chains, (UWord)new_chains,
121         (UWord)table->n_elements );
122
123   table->n_chains = new_chains;
124   sz = new_chains * sizeof(VgHashNode*);
125   chains = VG_(calloc)("hashtable.resize.1", 1, sz);
126
127   for (i = 0; i < old_chains; i++) {
128      node = table->chains[i];
129      while (node != NULL) {
130         VgHashNode* next = node->next;
131         UWord chain = CHAIN_NO(node->key, table);
132         node->next = chains[chain];
133         chains[chain] = node;
134         node = next;
135      }
136   }
137
138   VG_(free)(table->chains);
139   table->chains = chains;
140}
141
142/* Puts a new, heap allocated VgHashNode, into the VgHashTable.  Prepends
143   the node to the appropriate chain.  No duplicate key detection is done. */
144void VG_(HT_add_node) ( VgHashTable *table, void* vnode )
145{
146   VgHashNode* node     = (VgHashNode*)vnode;
147   UWord chain          = CHAIN_NO(node->key, table);
148   node->next           = table->chains[chain];
149   table->chains[chain] = node;
150   table->n_elements++;
151   if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) {
152      resize(table);
153   }
154
155   /* Table has been modified; hence HT_Next should assert. */
156   table->iterOK = False;
157}
158
159/* Looks up a VgHashNode by key in the table.  Returns NULL if not found. */
160void* VG_(HT_lookup) ( const VgHashTable *table, UWord key )
161{
162   VgHashNode* curr = table->chains[ CHAIN_NO(key, table) ];
163
164   while (curr) {
165      if (key == curr->key) {
166         return curr;
167      }
168      curr = curr->next;
169   }
170   return NULL;
171}
172
173/* Looks up a VgHashNode by node in the table.  Returns NULL if not found.
174   GEN!!! marks the lines that differs from VG_(HT_lookup). */
175void* VG_(HT_gen_lookup) ( const VgHashTable *table, const void* node,
176                           HT_Cmp_t cmp )
177{
178   const VgHashNode* hnode = node; // GEN!!!
179   VgHashNode* curr = table->chains[ CHAIN_NO(hnode->key, table) ]; // GEN!!!
180
181   while (curr) {
182      if (hnode->key == curr->key && cmp (hnode, curr) == 0) { // GEN!!!
183         return curr;
184      }
185      curr = curr->next;
186   }
187   return NULL;
188}
189
190/* Removes a VgHashNode from the table.  Returns NULL if not found. */
191void* VG_(HT_remove) ( VgHashTable *table, UWord key )
192{
193   UWord        chain         = CHAIN_NO(key, table);
194   VgHashNode*  curr          =   table->chains[chain];
195   VgHashNode** prev_next_ptr = &(table->chains[chain]);
196
197   /* Table has been modified; hence HT_Next should assert. */
198   table->iterOK = False;
199
200   while (curr) {
201      if (key == curr->key) {
202         *prev_next_ptr = curr->next;
203         table->n_elements--;
204         return curr;
205      }
206      prev_next_ptr = &(curr->next);
207      curr = curr->next;
208   }
209   return NULL;
210}
211
212/* Removes a VgHashNode by node from the table.  Returns NULL if not found.
213   GEN!!! marks the lines that differs from VG_(HT_remove). */
214void* VG_(HT_gen_remove) ( VgHashTable *table, const void* node, HT_Cmp_t cmp  )
215{
216   const VgHashNode* hnode    = node; // GEN!!!
217   UWord        chain         = CHAIN_NO(hnode->key, table); // GEN!!!
218   VgHashNode*  curr          =   table->chains[chain];
219   VgHashNode** prev_next_ptr = &(table->chains[chain]);
220
221   /* Table has been modified; hence HT_Next should assert. */
222   table->iterOK = False;
223
224   while (curr) {
225      if (hnode->key == curr->key && cmp(hnode, curr) == 0) { // GEN!!!
226         *prev_next_ptr = curr->next;
227         table->n_elements--;
228         return curr;
229      }
230      prev_next_ptr = &(curr->next);
231      curr = curr->next;
232   }
233   return NULL;
234}
235
236void VG_(HT_print_stats) ( const VgHashTable *table, HT_Cmp_t cmp )
237{
238   #define MAXOCCUR 20
239   UInt elt_occurences[MAXOCCUR+1];
240   UInt key_occurences[MAXOCCUR+1];
241   UInt cno_occurences[MAXOCCUR+1];
242   /* Key occurence  : how many ht elements have the same key.
243      elt_occurences : how many elements are inserted multiple time.
244      cno_occurences : how many chains have that length.
245      The last entry in these arrays collects all occurences >= MAXOCCUR. */
246   #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++)
247   UInt i;
248   UInt nkey, nelt, ncno;
249   VgHashNode *cnode, *node;
250
251   VG_(memset)(key_occurences, 0, sizeof(key_occurences));
252   VG_(memset)(elt_occurences, 0, sizeof(elt_occurences));
253   VG_(memset)(cno_occurences, 0, sizeof(cno_occurences));
254
255   // Note that the below algorithm is quadractic in nr of elements in a chain
256   // but if that happens, the hash table/function is really bad and that
257   // should be fixed.
258   for (i = 0; i < table->n_chains; i++) {
259      ncno = 0;
260      for (cnode = table->chains[i]; cnode != NULL; cnode = cnode->next) {
261         ncno++;
262
263         nkey = 0;
264         // Is the same cnode->key existing before cnode ?
265         for (node = table->chains[i]; node != cnode; node = node->next) {
266            if (node->key == cnode->key)
267               nkey++;
268         }
269         // If cnode->key not in a previous node, count occurences of key.
270         if (nkey == 0) {
271            for (node = cnode; node != NULL; node = node->next) {
272               if (node->key == cnode->key)
273                  nkey++;
274            }
275            INCOCCUR(key_occurences, nkey);
276         }
277
278         nelt = 0;
279         // Is the same cnode element existing before cnode ?
280         for (node = table->chains[i]; node != cnode; node = node->next) {
281            if (node->key == cnode->key
282                && (cmp == NULL || cmp (node, cnode) == 0)) {
283               nelt++;
284            }
285         }
286         // If cnode element not in a previous node, count occurences of elt.
287         if (nelt == 0) {
288            for (node = cnode; node != NULL; node = node->next) {
289               if (node->key == cnode->key
290                   && (cmp == NULL || cmp (node, cnode) == 0)) {
291                  nelt++;
292               }
293            }
294            INCOCCUR(elt_occurences, nelt);
295         }
296      }
297      INCOCCUR(cno_occurences, ncno);
298   }
299
300   VG_(message)(Vg_DebugMsg,
301                "nr occurences of"
302                " chains of len N,"
303                " N-plicated keys,"
304                " N-plicated elts\n");
305   nkey = nelt = ncno = 0;
306   for (i = 0; i <= MAXOCCUR; i++) {
307      if (elt_occurences[i] > 0
308          || key_occurences[i] > 0
309          || cno_occurences[i] > 0)
310         VG_(message)(Vg_DebugMsg,
311                      "%s=%2d : nr chain %6d, nr keys %6d, nr elts %6d\n",
312                      i == MAXOCCUR ? ">" : "N", i,
313                      cno_occurences[i], key_occurences[i], elt_occurences[i]);
314      nkey += key_occurences[i];
315      nelt += elt_occurences[i];
316      ncno += cno_occurences[i];
317   }
318   VG_(message)(Vg_DebugMsg,
319                "total nr of unique   slots: %6d, keys %6d, elts %6d."
320                " Avg chain len %3.1f\n",
321                ncno, nkey, nelt,
322                (Double)nelt/(Double)(ncno == cno_occurences[0] ?
323                                      1 : ncno - cno_occurences[0]));
324}
325
326
327/* Allocates a suitably-sized array, copies pointers to all the hashtable
328   elements into it, then returns both the array and the size of it.  The
329   array must be freed with VG_(free).
330*/
331VgHashNode** VG_(HT_to_array) (const VgHashTable *table, /*OUT*/ UInt* n_elems)
332{
333   UInt       i, j;
334   VgHashNode** arr;
335   VgHashNode*  node;
336
337   *n_elems = table->n_elements;
338   if (*n_elems == 0)
339      return NULL;
340
341   arr = VG_(malloc)( "hashtable.Hta.1", *n_elems * sizeof(VgHashNode*) );
342
343   j = 0;
344   for (i = 0; i < table->n_chains; i++) {
345      for (node = table->chains[i]; node != NULL; node = node->next) {
346         arr[j++] = node;
347      }
348   }
349   vg_assert(j == *n_elems);
350
351   return arr;
352}
353
354void VG_(HT_ResetIter)(VgHashTable *table)
355{
356   vg_assert(table);
357   table->iterNode  = NULL;
358   table->iterChain = 0;
359   table->iterOK    = True;
360}
361
362void* VG_(HT_Next)(VgHashTable *table)
363{
364   Int i;
365   vg_assert(table);
366   /* See long comment on HT_Next prototype in pub_tool_hashtable.h.
367      In short if this fails, it means the caller tried to modify the
368      table whilst iterating over it, which is a bug. */
369   vg_assert(table->iterOK);
370
371   if (table->iterNode && table->iterNode->next) {
372      table->iterNode = table->iterNode->next;
373      return table->iterNode;
374   }
375
376   for (i = table->iterChain; i < table->n_chains; i++) {
377      if (table->chains[i]) {
378         table->iterNode  = table->chains[i];
379         table->iterChain = i + 1;  // Next chain to be traversed
380         return table->iterNode;
381      }
382   }
383   return NULL;
384}
385
386void VG_(HT_destruct)(VgHashTable *table, void(*freenode_fn)(void*))
387{
388   UInt       i;
389   VgHashNode *node, *node_next;
390
391   for (i = 0; i < table->n_chains; i++) {
392      for (node = table->chains[i]; node != NULL; node = node_next) {
393         node_next = node->next;
394         freenode_fn(node);
395      }
396   }
397   VG_(free)(table->chains);
398   VG_(free)(table);
399}
400
401/*--------------------------------------------------------------------*/
402/*--- end                                                          ---*/
403/*--------------------------------------------------------------------*/
404