StringMap.cpp revision 4ee451de366474b9c228b4e5fa573795a715216d
1//===--- StringMap.cpp - String Hash table map implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the StringMap class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/StringMap.h"
15#include <cassert>
16using namespace llvm;
17
18StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
19  ItemSize = itemSize;
20
21  // If a size is specified, initialize the table with that many buckets.
22  if (InitSize) {
23    init(InitSize);
24    return;
25  }
26
27  // Otherwise, initialize it with zero buckets to avoid the allocation.
28  TheTable = 0;
29  NumBuckets = 0;
30  NumItems = 0;
31  NumTombstones = 0;
32}
33
34void StringMapImpl::init(unsigned InitSize) {
35  assert((InitSize & (InitSize-1)) == 0 &&
36         "Init Size must be a power of 2 or zero!");
37  NumBuckets = InitSize ? InitSize : 16;
38  NumItems = 0;
39  NumTombstones = 0;
40
41  TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
42
43  // Allocate one extra bucket, set it to look filled so the iterators stop at
44  // end.
45  TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
46}
47
48
49/// HashString - Compute a hash code for the specified string.
50///
51static unsigned HashString(const char *Start, const char *End) {
52  // Bernstein hash function.
53  unsigned int Result = 0;
54  // TODO: investigate whether a modified bernstein hash function performs
55  // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
56  //   X*33+c -> X*33^c
57  while (Start != End)
58    Result = Result * 33 + *Start++;
59  Result = Result + (Result >> 5);
60  return Result;
61}
62
63/// LookupBucketFor - Look up the bucket that the specified string should end
64/// up in.  If it already exists as a key in the map, the Item pointer for the
65/// specified bucket will be non-null.  Otherwise, it will be null.  In either
66/// case, the FullHashValue field of the bucket will be set to the hash value
67/// of the string.
68unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
69                                        const char *NameEnd) {
70  unsigned HTSize = NumBuckets;
71  if (HTSize == 0) {  // Hash table unallocated so far?
72    init(16);
73    HTSize = NumBuckets;
74  }
75  unsigned FullHashValue = HashString(NameStart, NameEnd);
76  unsigned BucketNo = FullHashValue & (HTSize-1);
77
78  unsigned ProbeAmt = 1;
79  int FirstTombstone = -1;
80  while (1) {
81    ItemBucket &Bucket = TheTable[BucketNo];
82    StringMapEntryBase *BucketItem = Bucket.Item;
83    // If we found an empty bucket, this key isn't in the table yet, return it.
84    if (BucketItem == 0) {
85      // If we found a tombstone, we want to reuse the tombstone instead of an
86      // empty bucket.  This reduces probing.
87      if (FirstTombstone != -1) {
88        TheTable[FirstTombstone].FullHashValue = FullHashValue;
89        return FirstTombstone;
90      }
91
92      Bucket.FullHashValue = FullHashValue;
93      return BucketNo;
94    }
95
96    if (BucketItem == getTombstoneVal()) {
97      // Skip over tombstones.  However, remember the first one we see.
98      if (FirstTombstone == -1) FirstTombstone = BucketNo;
99    } else if (Bucket.FullHashValue == FullHashValue) {
100      // If the full hash value matches, check deeply for a match.  The common
101      // case here is that we are only looking at the buckets (for item info
102      // being non-null and for the full hash value) not at the items.  This
103      // is important for cache locality.
104
105      // Do the comparison like this because NameStart isn't necessarily
106      // null-terminated!
107      char *ItemStr = (char*)BucketItem+ItemSize;
108      unsigned ItemStrLen = BucketItem->getKeyLength();
109      if (unsigned(NameEnd-NameStart) == ItemStrLen &&
110          memcmp(ItemStr, NameStart, ItemStrLen) == 0) {
111        // We found a match!
112        return BucketNo;
113      }
114    }
115
116    // Okay, we didn't find the item.  Probe to the next bucket.
117    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
118
119    // Use quadratic probing, it has fewer clumping artifacts than linear
120    // probing and has good cache behavior in the common case.
121    ++ProbeAmt;
122  }
123}
124
125
126/// FindKey - Look up the bucket that contains the specified key. If it exists
127/// in the map, return the bucket number of the key.  Otherwise return -1.
128/// This does not modify the map.
129int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
130  unsigned HTSize = NumBuckets;
131  if (HTSize == 0) return -1;  // Really empty table?
132  unsigned FullHashValue = HashString(KeyStart, KeyEnd);
133  unsigned BucketNo = FullHashValue & (HTSize-1);
134
135  unsigned ProbeAmt = 1;
136  while (1) {
137    ItemBucket &Bucket = TheTable[BucketNo];
138    StringMapEntryBase *BucketItem = Bucket.Item;
139    // If we found an empty bucket, this key isn't in the table yet, return.
140    if (BucketItem == 0)
141      return -1;
142
143    if (BucketItem == getTombstoneVal()) {
144      // Ignore tombstones.
145    } else if (Bucket.FullHashValue == FullHashValue) {
146      // If the full hash value matches, check deeply for a match.  The common
147      // case here is that we are only looking at the buckets (for item info
148      // being non-null and for the full hash value) not at the items.  This
149      // is important for cache locality.
150
151      // Do the comparison like this because NameStart isn't necessarily
152      // null-terminated!
153      char *ItemStr = (char*)BucketItem+ItemSize;
154      unsigned ItemStrLen = BucketItem->getKeyLength();
155      if (unsigned(KeyEnd-KeyStart) == ItemStrLen &&
156          memcmp(ItemStr, KeyStart, ItemStrLen) == 0) {
157        // We found a match!
158        return BucketNo;
159      }
160    }
161
162    // Okay, we didn't find the item.  Probe to the next bucket.
163    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
164
165    // Use quadratic probing, it has fewer clumping artifacts than linear
166    // probing and has good cache behavior in the common case.
167    ++ProbeAmt;
168  }
169}
170
171/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
172/// delete it.  This aborts if the value isn't in the table.
173void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
174  const char *VStr = (char*)V + ItemSize;
175  StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength());
176  V2 = V2;
177  assert(V == V2 && "Didn't find key?");
178}
179
180/// RemoveKey - Remove the StringMapEntry for the specified key from the
181/// table, returning it.  If the key is not in the table, this returns null.
182StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart,
183                                             const char *KeyEnd) {
184  int Bucket = FindKey(KeyStart, KeyEnd);
185  if (Bucket == -1) return 0;
186
187  StringMapEntryBase *Result = TheTable[Bucket].Item;
188  TheTable[Bucket].Item = getTombstoneVal();
189  --NumItems;
190  ++NumTombstones;
191  return Result;
192}
193
194
195
196/// RehashTable - Grow the table, redistributing values into the buckets with
197/// the appropriate mod-of-hashtable-size.
198void StringMapImpl::RehashTable() {
199  unsigned NewSize = NumBuckets*2;
200  // Allocate one extra bucket which will always be non-empty.  This allows the
201  // iterators to stop at end.
202  ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
203  NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
204
205  // Rehash all the items into their new buckets.  Luckily :) we already have
206  // the hash values available, so we don't have to rehash any strings.
207  for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
208    if (IB->Item && IB->Item != getTombstoneVal()) {
209      // Fast case, bucket available.
210      unsigned FullHash = IB->FullHashValue;
211      unsigned NewBucket = FullHash & (NewSize-1);
212      if (NewTableArray[NewBucket].Item == 0) {
213        NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
214        NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
215        continue;
216      }
217
218      // Otherwise probe for a spot.
219      unsigned ProbeSize = 1;
220      do {
221        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
222      } while (NewTableArray[NewBucket].Item);
223
224      // Finally found a slot.  Fill it in.
225      NewTableArray[NewBucket].Item = IB->Item;
226      NewTableArray[NewBucket].FullHashValue = FullHash;
227    }
228  }
229
230  free(TheTable);
231
232  TheTable = NewTableArray;
233  NumBuckets = NewSize;
234}
235