StringMap.cpp revision c894b02ae037b5fdb38e42535539d5a0e49d6c41
1//===--- StringMap.cpp - String Hash table map implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the StringMap class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/StringMap.h"
15#include "llvm/ADT/StringExtras.h"
16#include <cassert>
17using namespace llvm;
18
19StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
20  ItemSize = itemSize;
21
22  // If a size is specified, initialize the table with that many buckets.
23  if (InitSize) {
24    init(InitSize);
25    return;
26  }
27
28  // Otherwise, initialize it with zero buckets to avoid the allocation.
29  TheTable = 0;
30  NumBuckets = 0;
31  NumItems = 0;
32  NumTombstones = 0;
33}
34
35void StringMapImpl::init(unsigned InitSize) {
36  assert((InitSize & (InitSize-1)) == 0 &&
37         "Init Size must be a power of 2 or zero!");
38  NumBuckets = InitSize ? InitSize : 16;
39  NumItems = 0;
40  NumTombstones = 0;
41
42  TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
43                                           sizeof(StringMapEntryBase **) +
44                                           sizeof(unsigned));
45
46  // Allocate one extra bucket, set it to look filled so the iterators stop at
47  // end.
48  TheTable[NumBuckets] = (StringMapEntryBase*)2;
49}
50
51
52/// LookupBucketFor - Look up the bucket that the specified string should end
53/// up in.  If it already exists as a key in the map, the Item pointer for the
54/// specified bucket will be non-null.  Otherwise, it will be null.  In either
55/// case, the FullHashValue field of the bucket will be set to the hash value
56/// of the string.
57unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
58  unsigned HTSize = NumBuckets;
59  if (HTSize == 0) {  // Hash table unallocated so far?
60    init(16);
61    HTSize = NumBuckets;
62  }
63  unsigned FullHashValue = HashString(Name);
64  unsigned BucketNo = FullHashValue & (HTSize-1);
65  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
66
67  unsigned ProbeAmt = 1;
68  int FirstTombstone = -1;
69  while (1) {
70    StringMapEntryBase *BucketItem = TheTable[BucketNo];
71    // If we found an empty bucket, this key isn't in the table yet, return it.
72    if (BucketItem == 0) {
73      // If we found a tombstone, we want to reuse the tombstone instead of an
74      // empty bucket.  This reduces probing.
75      if (FirstTombstone != -1) {
76        HashTable[FirstTombstone] = FullHashValue;
77        return FirstTombstone;
78      }
79
80      HashTable[BucketNo] = FullHashValue;
81      return BucketNo;
82    }
83
84    if (BucketItem == getTombstoneVal()) {
85      // Skip over tombstones.  However, remember the first one we see.
86      if (FirstTombstone == -1) FirstTombstone = BucketNo;
87    } else if (HashTable[BucketNo] == FullHashValue) {
88      // If the full hash value matches, check deeply for a match.  The common
89      // case here is that we are only looking at the buckets (for item info
90      // being non-null and for the full hash value) not at the items.  This
91      // is important for cache locality.
92
93      // Do the comparison like this because Name isn't necessarily
94      // null-terminated!
95      char *ItemStr = (char*)BucketItem+ItemSize;
96      if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
97        // We found a match!
98        return BucketNo;
99      }
100    }
101
102    // Okay, we didn't find the item.  Probe to the next bucket.
103    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
104
105    // Use quadratic probing, it has fewer clumping artifacts than linear
106    // probing and has good cache behavior in the common case.
107    ++ProbeAmt;
108  }
109}
110
111
112/// FindKey - Look up the bucket that contains the specified key. If it exists
113/// in the map, return the bucket number of the key.  Otherwise return -1.
114/// This does not modify the map.
115int StringMapImpl::FindKey(StringRef Key) const {
116  unsigned HTSize = NumBuckets;
117  if (HTSize == 0) return -1;  // Really empty table?
118  unsigned FullHashValue = HashString(Key);
119  unsigned BucketNo = FullHashValue & (HTSize-1);
120  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
121
122  unsigned ProbeAmt = 1;
123  while (1) {
124    StringMapEntryBase *BucketItem = TheTable[BucketNo];
125    // If we found an empty bucket, this key isn't in the table yet, return.
126    if (BucketItem == 0)
127      return -1;
128
129    if (BucketItem == getTombstoneVal()) {
130      // Ignore tombstones.
131    } else if (HashTable[BucketNo] == FullHashValue) {
132      // If the full hash value matches, check deeply for a match.  The common
133      // case here is that we are only looking at the buckets (for item info
134      // being non-null and for the full hash value) not at the items.  This
135      // is important for cache locality.
136
137      // Do the comparison like this because NameStart isn't necessarily
138      // null-terminated!
139      char *ItemStr = (char*)BucketItem+ItemSize;
140      if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
141        // We found a match!
142        return BucketNo;
143      }
144    }
145
146    // Okay, we didn't find the item.  Probe to the next bucket.
147    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
148
149    // Use quadratic probing, it has fewer clumping artifacts than linear
150    // probing and has good cache behavior in the common case.
151    ++ProbeAmt;
152  }
153}
154
155/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
156/// delete it.  This aborts if the value isn't in the table.
157void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
158  const char *VStr = (char*)V + ItemSize;
159  StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
160  (void)V2;
161  assert(V == V2 && "Didn't find key?");
162}
163
164/// RemoveKey - Remove the StringMapEntry for the specified key from the
165/// table, returning it.  If the key is not in the table, this returns null.
166StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
167  int Bucket = FindKey(Key);
168  if (Bucket == -1) return 0;
169
170  StringMapEntryBase *Result = TheTable[Bucket];
171  TheTable[Bucket] = getTombstoneVal();
172  --NumItems;
173  ++NumTombstones;
174  assert(NumItems + NumTombstones <= NumBuckets);
175
176  return Result;
177}
178
179
180
181/// RehashTable - Grow the table, redistributing values into the buckets with
182/// the appropriate mod-of-hashtable-size.
183void StringMapImpl::RehashTable() {
184  unsigned NewSize;
185  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
186
187  // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
188  // the buckets are empty (meaning that many are filled with tombstones),
189  // grow/rehash the table.
190  if (NumItems*4 > NumBuckets*3) {
191    NewSize = NumBuckets*2;
192  } else if (NumBuckets-(NumItems+NumTombstones) < NumBuckets/8) {
193    NewSize = NumBuckets;
194  } else {
195    return;
196  }
197
198  // Allocate one extra bucket which will always be non-empty.  This allows the
199  // iterators to stop at end.
200  StringMapEntryBase **NewTableArray =
201    (StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
202                                             sizeof(unsigned));
203  unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
204  NewTableArray[NewSize] = (StringMapEntryBase*)2;
205
206  // Rehash all the items into their new buckets.  Luckily :) we already have
207  // the hash values available, so we don't have to rehash any strings.
208  for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
209    StringMapEntryBase *Bucket = TheTable[I];
210    if (Bucket && Bucket != getTombstoneVal()) {
211      // Fast case, bucket available.
212      unsigned FullHash = HashTable[I];
213      unsigned NewBucket = FullHash & (NewSize-1);
214      if (NewTableArray[NewBucket] == 0) {
215        NewTableArray[FullHash & (NewSize-1)] = Bucket;
216        NewHashArray[FullHash & (NewSize-1)] = FullHash;
217        continue;
218      }
219
220      // Otherwise probe for a spot.
221      unsigned ProbeSize = 1;
222      do {
223        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
224      } while (NewTableArray[NewBucket]);
225
226      // Finally found a slot.  Fill it in.
227      NewTableArray[NewBucket] = Bucket;
228      NewHashArray[NewBucket] = FullHash;
229    }
230  }
231
232  free(TheTable);
233
234  TheTable = NewTableArray;
235  NumBuckets = NewSize;
236  NumTombstones = 0;
237}
238