StringMap.cpp revision b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6a
1bb28a81ba3c112853f0eb3d8df0190accc0379c9Chris Lattner//===--- StringMap.cpp - String Hash table map implementation -------------===// 223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// The LLVM Compiler Infrastructure 423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// This file was developed by Chris Lattner and is distributed under 623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// the University of Illinois Open Source License. See LICENSE.TXT for details. 723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner//===----------------------------------------------------------------------===// 923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 10bb28a81ba3c112853f0eb3d8df0190accc0379c9Chris Lattner// This file implements the StringMap class. 1123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 1223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner//===----------------------------------------------------------------------===// 1323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 14bb28a81ba3c112853f0eb3d8df0190accc0379c9Chris Lattner#include "llvm/ADT/StringMap.h" 1523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner#include <cassert> 1623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattnerusing namespace llvm; 1723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 18bb28a81ba3c112853f0eb3d8df0190accc0379c9Chris LattnerStringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) { 1923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner assert((InitSize & (InitSize-1)) == 0 && 2023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner "Init Size must be a power of 2 or zero!"); 2123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NumBuckets = InitSize ? InitSize : 512; 2223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner ItemSize = itemSize; 2323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NumItems = 0; 2423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 25a86559ec426c643a151aeba1e051e4f878050f95Chris Lattner TheTable = new ItemBucket[NumBuckets+1](); 2623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner memset(TheTable, 0, NumBuckets*sizeof(ItemBucket)); 27a86559ec426c643a151aeba1e051e4f878050f95Chris Lattner 28a86559ec426c643a151aeba1e051e4f878050f95Chris Lattner // Allocate one extra bucket, set it to look filled so the iterators stop at 29a86559ec426c643a151aeba1e051e4f878050f95Chris Lattner // end. 30a86559ec426c643a151aeba1e051e4f878050f95Chris Lattner TheTable[NumBuckets].Item = (StringMapEntryBase*)2; 3123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 3223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 3323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 3423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// HashString - Compute a hash code for the specified string. 3523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// 3623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattnerstatic unsigned HashString(const char *Start, const char *End) { 3738742b9603858db660cbb6b67853ca9266eaa196Chris Lattner // Bernstein hash function. 3823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned int Result = 0; 3938742b9603858db660cbb6b67853ca9266eaa196Chris Lattner // TODO: investigate whether a modified bernstein hash function performs 4041a4429f6586ce648ba29fbd33bfed433005c3daChris Lattner // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx 4138742b9603858db660cbb6b67853ca9266eaa196Chris Lattner // X*33+c -> X*33^c 4223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner while (Start != End) 4323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner Result = Result * 33 + *Start++; 4423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner Result = Result + (Result >> 5); 4523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner return Result; 4623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 4723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 4823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// LookupBucketFor - Look up the bucket that the specified string should end 4923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// up in. If it already exists as a key in the map, the Item pointer for the 5023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// specified bucket will be non-null. Otherwise, it will be null. In either 5123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// case, the FullHashValue field of the bucket will be set to the hash value 5223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// of the string. 53bb28a81ba3c112853f0eb3d8df0190accc0379c9Chris Lattnerunsigned StringMapImpl::LookupBucketFor(const char *NameStart, 5423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner const char *NameEnd) { 5523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned HTSize = NumBuckets; 5623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned FullHashValue = HashString(NameStart, NameEnd); 5723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned BucketNo = FullHashValue & (HTSize-1); 5823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 5923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned ProbeAmt = 1; 6023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner while (1) { 6123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner ItemBucket &Bucket = TheTable[BucketNo]; 62ee182422ff0b638b17c5ee802c19b8680107c2bbChris Lattner StringMapEntryBase *BucketItem = Bucket.Item; 6323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // If we found an empty bucket, this key isn't in the table yet, return it. 6423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner if (BucketItem == 0) { 6523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner Bucket.FullHashValue = FullHashValue; 6623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner return BucketNo; 6723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 6823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 6923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // If the full hash value matches, check deeply for a match. The common 7023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // case here is that we are only looking at the buckets (for item info 7123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // being non-null and for the full hash value) not at the items. This 7223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // is important for cache locality. 7323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner if (Bucket.FullHashValue == FullHashValue) { 7423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Do the comparison like this because NameStart isn't necessarily 7523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // null-terminated! 7623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner char *ItemStr = (char*)BucketItem+ItemSize; 77ee182422ff0b638b17c5ee802c19b8680107c2bbChris Lattner unsigned ItemStrLen = BucketItem->getKeyLength(); 78ee182422ff0b638b17c5ee802c19b8680107c2bbChris Lattner if (unsigned(NameEnd-NameStart) == ItemStrLen && 79ee182422ff0b638b17c5ee802c19b8680107c2bbChris Lattner memcmp(ItemStr, NameStart, ItemStrLen) == 0) { 8023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // We found a match! 8123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner return BucketNo; 8223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 8323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 8423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 8523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Okay, we didn't find the item. Probe to the next bucket. 8623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); 8723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 8823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Use quadratic probing, it has fewer clumping artifacts than linear 8923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // probing and has good cache behavior in the common case. 9023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner ++ProbeAmt; 9123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 9223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 9323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 94b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner 95b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner/// FindKey - Look up the bucket that contains the specified key. If it exists 96b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner/// in the map, return the bucket number of the key. Otherwise return -1. 97b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner/// This does not modify the map. 98b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattnerint StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { 99b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner unsigned HTSize = NumBuckets; 100b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner unsigned FullHashValue = HashString(KeyStart, KeyEnd); 101b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner unsigned BucketNo = FullHashValue & (HTSize-1); 102b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner 103b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner unsigned ProbeAmt = 1; 104b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner while (1) { 105b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner ItemBucket &Bucket = TheTable[BucketNo]; 106b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner StringMapEntryBase *BucketItem = Bucket.Item; 107b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // If we found an empty bucket, this key isn't in the table yet, return. 108b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner if (BucketItem == 0) 109b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner return -1; 110b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner 111b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // If the full hash value matches, check deeply for a match. The common 112b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // case here is that we are only looking at the buckets (for item info 113b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // being non-null and for the full hash value) not at the items. This 114b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // is important for cache locality. 115b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner if (Bucket.FullHashValue == FullHashValue) { 116b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // Do the comparison like this because NameStart isn't necessarily 117b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // null-terminated! 118b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner char *ItemStr = (char*)BucketItem+ItemSize; 119b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner unsigned ItemStrLen = BucketItem->getKeyLength(); 120b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner if (unsigned(KeyEnd-KeyStart) == ItemStrLen && 121b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner memcmp(ItemStr, KeyStart, ItemStrLen) == 0) { 122b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // We found a match! 123b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner return BucketNo; 124b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner } 125b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner } 126b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner 127b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // Okay, we didn't find the item. Probe to the next bucket. 128b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); 129b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner 130b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // Use quadratic probing, it has fewer clumping artifacts than linear 131b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner // probing and has good cache behavior in the common case. 132b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner ++ProbeAmt; 133b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner } 134b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner} 135b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner 136b5bb9f5b5cfe89f4b7626671f4923d50f8d4cd6aChris Lattner 13723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// RehashTable - Grow the table, redistributing values into the buckets with 13823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// the appropriate mod-of-hashtable-size. 139bb28a81ba3c112853f0eb3d8df0190accc0379c9Chris Lattnervoid StringMapImpl::RehashTable() { 14023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned NewSize = NumBuckets*2; 141a86559ec426c643a151aeba1e051e4f878050f95Chris Lattner // Allocate one extra bucket which will always be non-empty. This allows the 142a86559ec426c643a151aeba1e051e4f878050f95Chris Lattner // iterators to stop at end. 143a86559ec426c643a151aeba1e051e4f878050f95Chris Lattner ItemBucket *NewTableArray = new ItemBucket[NewSize+1](); 14423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner memset(NewTableArray, 0, NewSize*sizeof(ItemBucket)); 145a86559ec426c643a151aeba1e051e4f878050f95Chris Lattner NewTableArray[NewSize].Item = (StringMapEntryBase*)2; 14623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 14723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Rehash all the items into their new buckets. Luckily :) we already have 14823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // the hash values available, so we don't have to rehash any strings. 14923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { 15023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner if (IB->Item) { 15123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Fast case, bucket available. 15223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned FullHash = IB->FullHashValue; 15323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned NewBucket = FullHash & (NewSize-1); 15423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner if (NewTableArray[NewBucket].Item == 0) { 15523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewTableArray[FullHash & (NewSize-1)].Item = IB->Item; 15623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash; 15723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner continue; 15823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 15923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 16023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned ProbeSize = 1; 16123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner do { 16223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); 16323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } while (NewTableArray[NewBucket].Item); 16423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 16523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Finally found a slot. Fill it in. 16623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewTableArray[NewBucket].Item = IB->Item; 16723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewTableArray[NewBucket].FullHashValue = FullHash; 16823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 16923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 17023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 17123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner delete[] TheTable; 17223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 17323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner TheTable = NewTableArray; 17423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NumBuckets = NewSize; 17523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 176