FoldingSet.cpp revision 541ed9fd02ea48d2739f4a9dd681ba2d5da26886
1//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a hash set that can be used to remove duplication of 11// nodes in a graph. This code was originally created by Chris Lattner for use 12// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code 13// set. 14// 15//===----------------------------------------------------------------------===// 16 17#include "llvm/ADT/FoldingSet.h" 18#include "llvm/Support/MathExtras.h" 19#include <cassert> 20#include <cstring> 21using namespace llvm; 22 23//===----------------------------------------------------------------------===// 24// FoldingSetNodeID Implementation 25 26/// Add* - Add various data types to Bit data. 27/// 28void FoldingSetNodeID::AddPointer(const void *Ptr) { 29 // Note: this adds pointers to the hash using sizes and endianness that 30 // depend on the host. It doesn't matter however, because hashing on 31 // pointer values in inherently unstable. Nothing should depend on the 32 // ordering of nodes in the folding set. 33 intptr_t PtrI = (intptr_t)Ptr; 34 Bits.push_back(unsigned(PtrI)); 35 if (sizeof(intptr_t) > sizeof(unsigned)) 36 Bits.push_back(unsigned(uint64_t(PtrI) >> 32)); 37} 38void FoldingSetNodeID::AddInteger(signed I) { 39 Bits.push_back(I); 40} 41void FoldingSetNodeID::AddInteger(unsigned I) { 42 Bits.push_back(I); 43} 44void FoldingSetNodeID::AddInteger(long I) { 45 AddInteger((unsigned long)I); 46} 47void FoldingSetNodeID::AddInteger(unsigned long I) { 48 if (sizeof(long) == sizeof(int)) 49 AddInteger(unsigned(I)); 50 else if (sizeof(long) == sizeof(long long)) { 51 AddInteger((unsigned long long)I); 52 } else { 53 assert(0 && "unexpected sizeof(long)"); 54 } 55} 56void FoldingSetNodeID::AddInteger(long long I) { 57 AddInteger((unsigned long long)I); 58} 59void FoldingSetNodeID::AddInteger(unsigned long long I) { 60 AddInteger(unsigned(I)); 61 if ((uint64_t)(int)I != I) 62 Bits.push_back(unsigned(I >> 32)); 63} 64 65void FoldingSetNodeID::AddString(const char *String) { 66 unsigned Size = static_cast<unsigned>(strlen(String)); 67 Bits.push_back(Size); 68 if (!Size) return; 69 70 unsigned Units = Size / 4; 71 unsigned Pos = 0; 72 const unsigned *Base = (const unsigned *)String; 73 74 // If the string is aligned do a bulk transfer. 75 if (!((intptr_t)Base & 3)) { 76 Bits.append(Base, Base + Units); 77 Pos = (Units + 1) * 4; 78 } else { 79 // Otherwise do it the hard way. 80 for ( Pos += 4; Pos <= Size; Pos += 4) { 81 unsigned V = ((unsigned char)String[Pos - 4] << 24) | 82 ((unsigned char)String[Pos - 3] << 16) | 83 ((unsigned char)String[Pos - 2] << 8) | 84 (unsigned char)String[Pos - 1]; 85 Bits.push_back(V); 86 } 87 } 88 89 // With the leftover bits. 90 unsigned V = 0; 91 // Pos will have overshot size by 4 - #bytes left over. 92 switch (Pos - Size) { 93 case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. 94 case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. 95 case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; 96 default: return; // Nothing left. 97 } 98 99 Bits.push_back(V); 100} 101 102void FoldingSetNodeID::AddString(const std::string &String) { 103 unsigned Size = static_cast<unsigned>(String.size()); 104 Bits.push_back(Size); 105 if (!Size) return; 106 107 unsigned Units = Size / 4; 108 unsigned Pos = 0; 109 const unsigned *Base = (const unsigned *)String.data(); 110 111 // If the string is aligned do a bulk transfer. 112 if (!((intptr_t)Base & 3)) { 113 Bits.append(Base, Base + Units); 114 Pos = (Units + 1) * 4; 115 } else { 116 // Otherwise do it the hard way. 117 for ( Pos += 4; Pos <= Size; Pos += 4) { 118 unsigned V = ((unsigned char)String[Pos - 4] << 24) | 119 ((unsigned char)String[Pos - 3] << 16) | 120 ((unsigned char)String[Pos - 2] << 8) | 121 (unsigned char)String[Pos - 1]; 122 Bits.push_back(V); 123 } 124 } 125 126 // With the leftover bits. 127 unsigned V = 0; 128 // Pos will have overshot size by 4 - #bytes left over. 129 switch (Pos - Size) { 130 case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. 131 case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. 132 case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; 133 default: return; // Nothing left. 134 } 135 136 Bits.push_back(V); 137} 138 139/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 140/// lookup the node in the FoldingSetImpl. 141unsigned FoldingSetNodeID::ComputeHash() const { 142 // This is adapted from SuperFastHash by Paul Hsieh. 143 unsigned Hash = static_cast<unsigned>(Bits.size()); 144 for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) { 145 unsigned Data = *BP; 146 Hash += Data & 0xFFFF; 147 unsigned Tmp = ((Data >> 16) << 11) ^ Hash; 148 Hash = (Hash << 16) ^ Tmp; 149 Hash += Hash >> 11; 150 } 151 152 // Force "avalanching" of final 127 bits. 153 Hash ^= Hash << 3; 154 Hash += Hash >> 5; 155 Hash ^= Hash << 4; 156 Hash += Hash >> 17; 157 Hash ^= Hash << 25; 158 Hash += Hash >> 6; 159 return Hash; 160} 161 162/// operator== - Used to compare two nodes to each other. 163/// 164bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{ 165 if (Bits.size() != RHS.Bits.size()) return false; 166 return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0; 167} 168 169 170//===----------------------------------------------------------------------===// 171/// Helper functions for FoldingSetImpl. 172 173/// GetNextPtr - In order to save space, each bucket is a 174/// singly-linked-list. In order to make deletion more efficient, we make 175/// the list circular, so we can delete a node without computing its hash. 176/// The problem with this is that the start of the hash buckets are not 177/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: 178/// use GetBucketPtr when this happens. 179static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { 180 // The low bit is set if this is the pointer back to the bucket. 181 if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) 182 return 0; 183 184 return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); 185} 186 187 188/// testing. 189static void **GetBucketPtr(void *NextInBucketPtr) { 190 intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); 191 assert((Ptr & 1) && "Not a bucket pointer"); 192 return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); 193} 194 195/// GetBucketFor - Hash the specified node ID and return the hash bucket for 196/// the specified ID. 197static void **GetBucketFor(const FoldingSetNodeID &ID, 198 void **Buckets, unsigned NumBuckets) { 199 // NumBuckets is always a power of 2. 200 unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1); 201 return Buckets + BucketNum; 202} 203 204//===----------------------------------------------------------------------===// 205// FoldingSetImpl Implementation 206 207FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { 208 assert(5 < Log2InitSize && Log2InitSize < 32 && 209 "Initial hash table size out of range"); 210 NumBuckets = 1 << Log2InitSize; 211 Buckets = new void*[NumBuckets+1]; 212 clear(); 213} 214FoldingSetImpl::~FoldingSetImpl() { 215 delete [] Buckets; 216} 217void FoldingSetImpl::clear() { 218 // Set all but the last bucket to null pointers. 219 memset(Buckets, 0, NumBuckets*sizeof(void*)); 220 221 // Set the very last bucket to be a non-null "pointer". 222 Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 223 224 // Reset the node count to zero. 225 NumNodes = 0; 226} 227 228/// GrowHashTable - Double the size of the hash table and rehash everything. 229/// 230void FoldingSetImpl::GrowHashTable() { 231 void **OldBuckets = Buckets; 232 unsigned OldNumBuckets = NumBuckets; 233 NumBuckets <<= 1; 234 235 // Clear out new buckets. 236 Buckets = new void*[NumBuckets+1]; 237 clear(); 238 239 // Walk the old buckets, rehashing nodes into their new place. 240 FoldingSetNodeID ID; 241 for (unsigned i = 0; i != OldNumBuckets; ++i) { 242 void *Probe = OldBuckets[i]; 243 if (!Probe) continue; 244 while (Node *NodeInBucket = GetNextPtr(Probe)) { 245 // Figure out the next link, remove NodeInBucket from the old link. 246 Probe = NodeInBucket->getNextInBucket(); 247 NodeInBucket->SetNextInBucket(0); 248 249 // Insert the node into the new bucket, after recomputing the hash. 250 GetNodeProfile(ID, NodeInBucket); 251 InsertNode(NodeInBucket, GetBucketFor(ID, Buckets, NumBuckets)); 252 ID.clear(); 253 } 254 } 255 256 delete[] OldBuckets; 257} 258 259/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, 260/// return it. If not, return the insertion token that will make insertion 261/// faster. 262FoldingSetImpl::Node 263*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 264 void *&InsertPos) { 265 266 void **Bucket = GetBucketFor(ID, Buckets, NumBuckets); 267 void *Probe = *Bucket; 268 269 InsertPos = 0; 270 271 FoldingSetNodeID OtherID; 272 while (Node *NodeInBucket = GetNextPtr(Probe)) { 273 GetNodeProfile(OtherID, NodeInBucket); 274 if (OtherID == ID) 275 return NodeInBucket; 276 277 Probe = NodeInBucket->getNextInBucket(); 278 OtherID.clear(); 279 } 280 281 // Didn't find the node, return null with the bucket as the InsertPos. 282 InsertPos = Bucket; 283 return 0; 284} 285 286/// InsertNode - Insert the specified node into the folding set, knowing that it 287/// is not already in the map. InsertPos must be obtained from 288/// FindNodeOrInsertPos. 289void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { 290 assert(N->getNextInBucket() == 0); 291 // Do we need to grow the hashtable? 292 if (NumNodes+1 > NumBuckets*2) { 293 GrowHashTable(); 294 FoldingSetNodeID ID; 295 GetNodeProfile(ID, N); 296 InsertPos = GetBucketFor(ID, Buckets, NumBuckets); 297 } 298 299 ++NumNodes; 300 301 /// The insert position is actually a bucket pointer. 302 void **Bucket = static_cast<void**>(InsertPos); 303 304 void *Next = *Bucket; 305 306 // If this is the first insertion into this bucket, its next pointer will be 307 // null. Pretend as if it pointed to itself, setting the low bit to indicate 308 // that it is a pointer to the bucket. 309 if (Next == 0) 310 Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); 311 312 // Set the node's next pointer, and make the bucket point to the node. 313 N->SetNextInBucket(Next); 314 *Bucket = N; 315} 316 317/// RemoveNode - Remove a node from the folding set, returning true if one was 318/// removed or false if the node was not in the folding set. 319bool FoldingSetImpl::RemoveNode(Node *N) { 320 // Because each bucket is a circular list, we don't need to compute N's hash 321 // to remove it. 322 void *Ptr = N->getNextInBucket(); 323 if (Ptr == 0) return false; // Not in folding set. 324 325 --NumNodes; 326 N->SetNextInBucket(0); 327 328 // Remember what N originally pointed to, either a bucket or another node. 329 void *NodeNextPtr = Ptr; 330 331 // Chase around the list until we find the node (or bucket) which points to N. 332 while (true) { 333 if (Node *NodeInBucket = GetNextPtr(Ptr)) { 334 // Advance pointer. 335 Ptr = NodeInBucket->getNextInBucket(); 336 337 // We found a node that points to N, change it to point to N's next node, 338 // removing N from the list. 339 if (Ptr == N) { 340 NodeInBucket->SetNextInBucket(NodeNextPtr); 341 return true; 342 } 343 } else { 344 void **Bucket = GetBucketPtr(Ptr); 345 Ptr = *Bucket; 346 347 // If we found that the bucket points to N, update the bucket to point to 348 // whatever is next. 349 if (Ptr == N) { 350 *Bucket = NodeNextPtr; 351 return true; 352 } 353 } 354 } 355} 356 357/// GetOrInsertNode - If there is an existing simple Node exactly 358/// equal to the specified node, return it. Otherwise, insert 'N' and it 359/// instead. 360FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { 361 FoldingSetNodeID ID; 362 GetNodeProfile(ID, N); 363 void *IP; 364 if (Node *E = FindNodeOrInsertPos(ID, IP)) 365 return E; 366 InsertNode(N, IP); 367 return N; 368} 369 370//===----------------------------------------------------------------------===// 371// FoldingSetIteratorImpl Implementation 372 373FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { 374 // Skip to the first non-null non-self-cycle bucket. 375 while (*Bucket != reinterpret_cast<void*>(-1) && 376 (*Bucket == 0 || GetNextPtr(*Bucket) == 0)) 377 ++Bucket; 378 379 NodePtr = static_cast<FoldingSetNode*>(*Bucket); 380} 381 382void FoldingSetIteratorImpl::advance() { 383 // If there is another link within this bucket, go to it. 384 void *Probe = NodePtr->getNextInBucket(); 385 386 if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) 387 NodePtr = NextNodeInBucket; 388 else { 389 // Otherwise, this is the last link in this bucket. 390 void **Bucket = GetBucketPtr(Probe); 391 392 // Skip to the next non-null non-self-cycle bucket. 393 do { 394 ++Bucket; 395 } while (*Bucket != reinterpret_cast<void*>(-1) && 396 (*Bucket == 0 || GetNextPtr(*Bucket) == 0)); 397 398 NodePtr = static_cast<FoldingSetNode*>(*Bucket); 399 } 400} 401 402//===----------------------------------------------------------------------===// 403// FoldingSetBucketIteratorImpl Implementation 404 405FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { 406 Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket; 407} 408