StringMap.cpp revision b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6a
1//===--- StringMap.cpp - String Hash table map implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the StringMap class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/ADT/StringMap.h" 15#include <cassert> 16using namespace llvm; 17 18StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) { 19 assert((InitSize & (InitSize-1)) == 0 && 20 "Init Size must be a power of 2 or zero!"); 21 NumBuckets = InitSize ? InitSize : 512; 22 ItemSize = itemSize; 23 NumItems = 0; 24 25 TheTable = new ItemBucket[NumBuckets+1](); 26 memset(TheTable, 0, NumBuckets*sizeof(ItemBucket)); 27 28 // Allocate one extra bucket, set it to look filled so the iterators stop at 29 // end. 30 TheTable[NumBuckets].Item = (StringMapEntryBase*)2; 31} 32 33 34/// HashString - Compute a hash code for the specified string. 35/// 36static unsigned HashString(const char *Start, const char *End) { 37 // Bernstein hash function. 38 unsigned int Result = 0; 39 // TODO: investigate whether a modified bernstein hash function performs 40 // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx 41 // X*33+c -> X*33^c 42 while (Start != End) 43 Result = Result * 33 + *Start++; 44 Result = Result + (Result >> 5); 45 return Result; 46} 47 48/// LookupBucketFor - Look up the bucket that the specified string should end 49/// up in. If it already exists as a key in the map, the Item pointer for the 50/// specified bucket will be non-null. Otherwise, it will be null. In either 51/// case, the FullHashValue field of the bucket will be set to the hash value 52/// of the string. 53unsigned StringMapImpl::LookupBucketFor(const char *NameStart, 54 const char *NameEnd) { 55 unsigned HTSize = NumBuckets; 56 unsigned FullHashValue = HashString(NameStart, NameEnd); 57 unsigned BucketNo = FullHashValue & (HTSize-1); 58 59 unsigned ProbeAmt = 1; 60 while (1) { 61 ItemBucket &Bucket = TheTable[BucketNo]; 62 StringMapEntryBase *BucketItem = Bucket.Item; 63 // If we found an empty bucket, this key isn't in the table yet, return it. 64 if (BucketItem == 0) { 65 Bucket.FullHashValue = FullHashValue; 66 return BucketNo; 67 } 68 69 // If the full hash value matches, check deeply for a match. The common 70 // case here is that we are only looking at the buckets (for item info 71 // being non-null and for the full hash value) not at the items. This 72 // is important for cache locality. 73 if (Bucket.FullHashValue == FullHashValue) { 74 // Do the comparison like this because NameStart isn't necessarily 75 // null-terminated! 76 char *ItemStr = (char*)BucketItem+ItemSize; 77 unsigned ItemStrLen = BucketItem->getKeyLength(); 78 if (unsigned(NameEnd-NameStart) == ItemStrLen && 79 memcmp(ItemStr, NameStart, ItemStrLen) == 0) { 80 // We found a match! 81 return BucketNo; 82 } 83 } 84 85 // Okay, we didn't find the item. Probe to the next bucket. 86 BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); 87 88 // Use quadratic probing, it has fewer clumping artifacts than linear 89 // probing and has good cache behavior in the common case. 90 ++ProbeAmt; 91 } 92} 93 94 95/// FindKey - Look up the bucket that contains the specified key. If it exists 96/// in the map, return the bucket number of the key. Otherwise return -1. 97/// This does not modify the map. 98int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { 99 unsigned HTSize = NumBuckets; 100 unsigned FullHashValue = HashString(KeyStart, KeyEnd); 101 unsigned BucketNo = FullHashValue & (HTSize-1); 102 103 unsigned ProbeAmt = 1; 104 while (1) { 105 ItemBucket &Bucket = TheTable[BucketNo]; 106 StringMapEntryBase *BucketItem = Bucket.Item; 107 // If we found an empty bucket, this key isn't in the table yet, return. 108 if (BucketItem == 0) 109 return -1; 110 111 // If the full hash value matches, check deeply for a match. The common 112 // case here is that we are only looking at the buckets (for item info 113 // being non-null and for the full hash value) not at the items. This 114 // is important for cache locality. 115 if (Bucket.FullHashValue == FullHashValue) { 116 // Do the comparison like this because NameStart isn't necessarily 117 // null-terminated! 118 char *ItemStr = (char*)BucketItem+ItemSize; 119 unsigned ItemStrLen = BucketItem->getKeyLength(); 120 if (unsigned(KeyEnd-KeyStart) == ItemStrLen && 121 memcmp(ItemStr, KeyStart, ItemStrLen) == 0) { 122 // We found a match! 123 return BucketNo; 124 } 125 } 126 127 // Okay, we didn't find the item. Probe to the next bucket. 128 BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); 129 130 // Use quadratic probing, it has fewer clumping artifacts than linear 131 // probing and has good cache behavior in the common case. 132 ++ProbeAmt; 133 } 134} 135 136 137/// RehashTable - Grow the table, redistributing values into the buckets with 138/// the appropriate mod-of-hashtable-size. 139void StringMapImpl::RehashTable() { 140 unsigned NewSize = NumBuckets*2; 141 // Allocate one extra bucket which will always be non-empty. This allows the 142 // iterators to stop at end. 143 ItemBucket *NewTableArray = new ItemBucket[NewSize+1](); 144 memset(NewTableArray, 0, NewSize*sizeof(ItemBucket)); 145 NewTableArray[NewSize].Item = (StringMapEntryBase*)2; 146 147 // Rehash all the items into their new buckets. Luckily :) we already have 148 // the hash values available, so we don't have to rehash any strings. 149 for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { 150 if (IB->Item) { 151 // Fast case, bucket available. 152 unsigned FullHash = IB->FullHashValue; 153 unsigned NewBucket = FullHash & (NewSize-1); 154 if (NewTableArray[NewBucket].Item == 0) { 155 NewTableArray[FullHash & (NewSize-1)].Item = IB->Item; 156 NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash; 157 continue; 158 } 159 160 unsigned ProbeSize = 1; 161 do { 162 NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); 163 } while (NewTableArray[NewBucket].Item); 164 165 // Finally found a slot. Fill it in. 166 NewTableArray[NewBucket].Item = IB->Item; 167 NewTableArray[NewBucket].FullHashValue = FullHash; 168 } 169 } 170 171 delete[] TheTable; 172 173 TheTable = NewTableArray; 174 NumBuckets = NewSize; 175} 176