StringMap.cpp revision 44dcd01cb3424420d79d5811fa8c1c052411f975
1//===--- StringMap.cpp - String Hash table map implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the StringMap class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/StringMap.h"
15#include <cassert>
16using namespace llvm;
17
18StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
19  assert((InitSize & (InitSize-1)) == 0 &&
20         "Init Size must be a power of 2 or zero!");
21  NumBuckets = InitSize ? InitSize : 512;
22  ItemSize = itemSize;
23  NumItems = 0;
24  NumTombstones = 0;
25
26  TheTable = new ItemBucket[NumBuckets+1]();
27  memset(TheTable, 0, NumBuckets*sizeof(ItemBucket));
28
29  // Allocate one extra bucket, set it to look filled so the iterators stop at
30  // end.
31  TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
32}
33
34
35/// HashString - Compute a hash code for the specified string.
36///
37static unsigned HashString(const char *Start, const char *End) {
38  // Bernstein hash function.
39  unsigned int Result = 0;
40  // TODO: investigate whether a modified bernstein hash function performs
41  // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
42  //   X*33+c -> X*33^c
43  while (Start != End)
44    Result = Result * 33 + *Start++;
45  Result = Result + (Result >> 5);
46  return Result;
47}
48
49/// LookupBucketFor - Look up the bucket that the specified string should end
50/// up in.  If it already exists as a key in the map, the Item pointer for the
51/// specified bucket will be non-null.  Otherwise, it will be null.  In either
52/// case, the FullHashValue field of the bucket will be set to the hash value
53/// of the string.
54unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
55                                         const char *NameEnd) {
56  unsigned HTSize = NumBuckets;
57  unsigned FullHashValue = HashString(NameStart, NameEnd);
58  unsigned BucketNo = FullHashValue & (HTSize-1);
59
60  unsigned ProbeAmt = 1;
61  int FirstTombstone = -1;
62  while (1) {
63    ItemBucket &Bucket = TheTable[BucketNo];
64    StringMapEntryBase *BucketItem = Bucket.Item;
65    // If we found an empty bucket, this key isn't in the table yet, return it.
66    if (BucketItem == 0) {
67      // If we found a tombstone, we want to reuse the tombstone instead of an
68      // empty bucket.  This reduces probing.
69      if (FirstTombstone != -1) {
70        TheTable[FirstTombstone].FullHashValue = FullHashValue;
71        return FirstTombstone;
72      }
73
74      Bucket.FullHashValue = FullHashValue;
75      return BucketNo;
76    }
77
78    if (BucketItem == getTombstoneVal()) {
79      // Skip over tombstones.  However, remember the first one we see.
80      if (FirstTombstone == -1) FirstTombstone = BucketNo;
81    } else if (Bucket.FullHashValue == FullHashValue) {
82      // If the full hash value matches, check deeply for a match.  The common
83      // case here is that we are only looking at the buckets (for item info
84      // being non-null and for the full hash value) not at the items.  This
85      // is important for cache locality.
86
87      // Do the comparison like this because NameStart isn't necessarily
88      // null-terminated!
89      char *ItemStr = (char*)BucketItem+ItemSize;
90      unsigned ItemStrLen = BucketItem->getKeyLength();
91      if (unsigned(NameEnd-NameStart) == ItemStrLen &&
92          memcmp(ItemStr, NameStart, ItemStrLen) == 0) {
93        // We found a match!
94        return BucketNo;
95      }
96    }
97
98    // Okay, we didn't find the item.  Probe to the next bucket.
99    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
100
101    // Use quadratic probing, it has fewer clumping artifacts than linear
102    // probing and has good cache behavior in the common case.
103    ++ProbeAmt;
104  }
105}
106
107
108/// FindKey - Look up the bucket that contains the specified key. If it exists
109/// in the map, return the bucket number of the key.  Otherwise return -1.
110/// This does not modify the map.
111int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
112  unsigned HTSize = NumBuckets;
113  unsigned FullHashValue = HashString(KeyStart, KeyEnd);
114  unsigned BucketNo = FullHashValue & (HTSize-1);
115
116  unsigned ProbeAmt = 1;
117  while (1) {
118    ItemBucket &Bucket = TheTable[BucketNo];
119    StringMapEntryBase *BucketItem = Bucket.Item;
120    // If we found an empty bucket, this key isn't in the table yet, return.
121    if (BucketItem == 0)
122      return -1;
123
124    if (BucketItem == getTombstoneVal()) {
125      // Ignore tombstones.
126    } else if (Bucket.FullHashValue == FullHashValue) {
127      // If the full hash value matches, check deeply for a match.  The common
128      // case here is that we are only looking at the buckets (for item info
129      // being non-null and for the full hash value) not at the items.  This
130      // is important for cache locality.
131
132      // Do the comparison like this because NameStart isn't necessarily
133      // null-terminated!
134      char *ItemStr = (char*)BucketItem+ItemSize;
135      unsigned ItemStrLen = BucketItem->getKeyLength();
136      if (unsigned(KeyEnd-KeyStart) == ItemStrLen &&
137          memcmp(ItemStr, KeyStart, ItemStrLen) == 0) {
138        // We found a match!
139        return BucketNo;
140      }
141    }
142
143    // Okay, we didn't find the item.  Probe to the next bucket.
144    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
145
146    // Use quadratic probing, it has fewer clumping artifacts than linear
147    // probing and has good cache behavior in the common case.
148    ++ProbeAmt;
149  }
150}
151
152/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
153/// delete it.  This aborts if the value isn't in the table.
154void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
155  const char *VStr = (char*)V + ItemSize;
156  StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength());
157  V2 = V2;
158  assert(V == V2 && "Didn't find key?");
159}
160
161/// RemoveKey - Remove the StringMapEntry for the specified key from the
162/// table, returning it.  If the key is not in the table, this returns null.
163StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart,
164                                             const char *KeyEnd) {
165  int Bucket = FindKey(KeyStart, KeyEnd);
166  if (Bucket == -1) return 0;
167
168  StringMapEntryBase *Result = TheTable[Bucket].Item;
169  TheTable[Bucket].Item = getTombstoneVal();
170  --NumItems;
171  ++NumTombstones;
172  return Result;
173}
174
175
176
177/// RehashTable - Grow the table, redistributing values into the buckets with
178/// the appropriate mod-of-hashtable-size.
179void StringMapImpl::RehashTable() {
180  unsigned NewSize = NumBuckets*2;
181  // Allocate one extra bucket which will always be non-empty.  This allows the
182  // iterators to stop at end.
183  ItemBucket *NewTableArray = new ItemBucket[NewSize+1]();
184  memset(NewTableArray, 0, NewSize*sizeof(ItemBucket));
185  NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
186
187  // Rehash all the items into their new buckets.  Luckily :) we already have
188  // the hash values available, so we don't have to rehash any strings.
189  for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
190    if (IB->Item && IB->Item != getTombstoneVal()) {
191      // Fast case, bucket available.
192      unsigned FullHash = IB->FullHashValue;
193      unsigned NewBucket = FullHash & (NewSize-1);
194      if (NewTableArray[NewBucket].Item == 0) {
195        NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
196        NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
197        continue;
198      }
199
200      // Otherwise probe for a spot.
201      unsigned ProbeSize = 1;
202      do {
203        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
204      } while (NewTableArray[NewBucket].Item);
205
206      // Finally found a slot.  Fill it in.
207      NewTableArray[NewBucket].Item = IB->Item;
208      NewTableArray[NewBucket].FullHashValue = FullHash;
209    }
210  }
211
212  delete[] TheTable;
213
214  TheTable = NewTableArray;
215  NumBuckets = NewSize;
216}
217