StringMap.cpp revision ee182422ff0b638b17c5ee802c19b8680107c2bb
123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner//===--- CStringMap.cpp - CString Hash table map implementation -----------===// 223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// The LLVM Compiler Infrastructure 423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// This file was developed by Chris Lattner and is distributed under 623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// the University of Illinois Open Source License. See LICENSE.TXT for details. 723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner//===----------------------------------------------------------------------===// 923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 1023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// This file implements the CStringMap class. 1123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner// 1223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner//===----------------------------------------------------------------------===// 1323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 1423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner#include "llvm/ADT/CStringMap.h" 1523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner#include <cassert> 1623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattnerusing namespace llvm; 1723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 1823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris LattnerCStringMapVisitor::~CStringMapVisitor() { 1923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 2023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 2123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris LattnerCStringMapImpl::CStringMapImpl(unsigned InitSize, unsigned itemSize) { 2223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner assert((InitSize & (InitSize-1)) == 0 && 2323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner "Init Size must be a power of 2 or zero!"); 2423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NumBuckets = InitSize ? InitSize : 512; 2523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner ItemSize = itemSize; 2623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NumItems = 0; 2723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 2823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner TheTable = new ItemBucket[NumBuckets](); 2923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner memset(TheTable, 0, NumBuckets*sizeof(ItemBucket)); 3023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 3123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 3223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 3323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// HashString - Compute a hash code for the specified string. 3423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// 3523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattnerstatic unsigned HashString(const char *Start, const char *End) { 3638742b9603858db660cbb6b67853ca9266eaa196Chris Lattner // Bernstein hash function. 3723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned int Result = 0; 3838742b9603858db660cbb6b67853ca9266eaa196Chris Lattner // TODO: investigate whether a modified bernstein hash function performs 3941a4429f6586ce648ba29fbd33bfed433005c3daChris Lattner // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx 4038742b9603858db660cbb6b67853ca9266eaa196Chris Lattner // X*33+c -> X*33^c 4123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner while (Start != End) 4223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner Result = Result * 33 + *Start++; 4323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner Result = Result + (Result >> 5); 4423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner return Result; 4523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 4623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 4723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// LookupBucketFor - Look up the bucket that the specified string should end 4823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// up in. If it already exists as a key in the map, the Item pointer for the 4923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// specified bucket will be non-null. Otherwise, it will be null. In either 5023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// case, the FullHashValue field of the bucket will be set to the hash value 5123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// of the string. 5223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattnerunsigned CStringMapImpl::LookupBucketFor(const char *NameStart, 5323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner const char *NameEnd) { 5423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned HTSize = NumBuckets; 5523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned FullHashValue = HashString(NameStart, NameEnd); 5623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned BucketNo = FullHashValue & (HTSize-1); 5723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 5823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned ProbeAmt = 1; 5923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner while (1) { 6023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner ItemBucket &Bucket = TheTable[BucketNo]; 61ee182422ff0b638b17c5ee802c19b8680107c2bbChris Lattner StringMapEntryBase *BucketItem = Bucket.Item; 6223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // If we found an empty bucket, this key isn't in the table yet, return it. 6323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner if (BucketItem == 0) { 6423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner Bucket.FullHashValue = FullHashValue; 6523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner return BucketNo; 6623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 6723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 6823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // If the full hash value matches, check deeply for a match. The common 6923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // case here is that we are only looking at the buckets (for item info 7023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // being non-null and for the full hash value) not at the items. This 7123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // is important for cache locality. 7223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner if (Bucket.FullHashValue == FullHashValue) { 7323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Do the comparison like this because NameStart isn't necessarily 7423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // null-terminated! 7523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner char *ItemStr = (char*)BucketItem+ItemSize; 76ee182422ff0b638b17c5ee802c19b8680107c2bbChris Lattner unsigned ItemStrLen = BucketItem->getKeyLength(); 77ee182422ff0b638b17c5ee802c19b8680107c2bbChris Lattner if (unsigned(NameEnd-NameStart) == ItemStrLen && 78ee182422ff0b638b17c5ee802c19b8680107c2bbChris Lattner memcmp(ItemStr, NameStart, ItemStrLen) == 0) { 7923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // We found a match! 8023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner return BucketNo; 8123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 8223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 8323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 8423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Okay, we didn't find the item. Probe to the next bucket. 8523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); 8623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 8723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Use quadratic probing, it has fewer clumping artifacts than linear 8823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // probing and has good cache behavior in the common case. 8923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner ++ProbeAmt; 9023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 9123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 9223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 9323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// RehashTable - Grow the table, redistributing values into the buckets with 9423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// the appropriate mod-of-hashtable-size. 9523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattnervoid CStringMapImpl::RehashTable() { 9623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned NewSize = NumBuckets*2; 9723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner ItemBucket *NewTableArray = new ItemBucket[NewSize](); 9823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner memset(NewTableArray, 0, NewSize*sizeof(ItemBucket)); 9923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 10023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Rehash all the items into their new buckets. Luckily :) we already have 10123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // the hash values available, so we don't have to rehash any strings. 10223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { 10323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner if (IB->Item) { 10423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Fast case, bucket available. 10523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned FullHash = IB->FullHashValue; 10623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned NewBucket = FullHash & (NewSize-1); 10723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner if (NewTableArray[NewBucket].Item == 0) { 10823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewTableArray[FullHash & (NewSize-1)].Item = IB->Item; 10923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash; 11023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner continue; 11123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 11223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 11323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner unsigned ProbeSize = 1; 11423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner do { 11523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); 11623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } while (NewTableArray[NewBucket].Item); 11723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 11823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner // Finally found a slot. Fill it in. 11923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewTableArray[NewBucket].Item = IB->Item; 12023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NewTableArray[NewBucket].FullHashValue = FullHash; 12123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 12223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 12323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 12423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner delete[] TheTable; 12523d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 12623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner TheTable = NewTableArray; 12723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner NumBuckets = NewSize; 12823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 12923d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 13023d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner 13123d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// VisitEntries - This method walks through all of the items, 13223d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner/// invoking Visitor.Visit for each of them. 13323d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattnervoid CStringMapImpl::VisitEntries(const CStringMapVisitor &Visitor) const { 13423d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { 135ee182422ff0b638b17c5ee802c19b8680107c2bbChris Lattner if (StringMapEntryBase *Id = IB->Item) 13623d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner Visitor.Visit((char*)Id + ItemSize, Id); 13723d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner } 13823d7b3611759c7fb3a853dfce3ee3d43ef5ca67dChris Lattner} 139