FoldingSet.cpp revision 8c0152f8a557742efddc57c3993c2c0a3a79ca4d
1//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a hash set that can be used to remove duplication of
11// nodes in a graph.  This code was originally created by Chris Lattner for use
12// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
13// set.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/Hashing.h"
19#include "llvm/Support/Allocator.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/MathExtras.h"
22#include "llvm/Support/Host.h"
23#include <cassert>
24#include <cstring>
25using namespace llvm;
26
27//===----------------------------------------------------------------------===//
28// FoldingSetNodeIDRef Implementation
29
30/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
31/// used to lookup the node in the FoldingSetImpl.
32unsigned FoldingSetNodeIDRef::ComputeHash() const {
33  return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
34}
35
36bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
37  if (Size != RHS.Size) return false;
38  return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
39}
40
41//===----------------------------------------------------------------------===//
42// FoldingSetNodeID Implementation
43
44/// Add* - Add various data types to Bit data.
45///
46void FoldingSetNodeID::AddPointer(const void *Ptr) {
47  // Note: this adds pointers to the hash using sizes and endianness that
48  // depend on the host.  It doesn't matter however, because hashing on
49  // pointer values in inherently unstable.  Nothing  should depend on the
50  // ordering of nodes in the folding set.
51  Bits.append(reinterpret_cast<unsigned *>(&Ptr),
52              reinterpret_cast<unsigned *>(&Ptr+1));
53}
54void FoldingSetNodeID::AddInteger(signed I) {
55  Bits.push_back(I);
56}
57void FoldingSetNodeID::AddInteger(unsigned I) {
58  Bits.push_back(I);
59}
60void FoldingSetNodeID::AddInteger(long I) {
61  AddInteger((unsigned long)I);
62}
63void FoldingSetNodeID::AddInteger(unsigned long I) {
64  if (sizeof(long) == sizeof(int))
65    AddInteger(unsigned(I));
66  else if (sizeof(long) == sizeof(long long)) {
67    AddInteger((unsigned long long)I);
68  } else {
69    llvm_unreachable("unexpected sizeof(long)");
70  }
71}
72void FoldingSetNodeID::AddInteger(long long I) {
73  AddInteger((unsigned long long)I);
74}
75void FoldingSetNodeID::AddInteger(unsigned long long I) {
76  AddInteger(unsigned(I));
77  if ((uint64_t)(unsigned)I != I)
78    Bits.push_back(unsigned(I >> 32));
79}
80
81void FoldingSetNodeID::AddString(StringRef String) {
82  unsigned Size =  String.size();
83  Bits.push_back(Size);
84  if (!Size) return;
85
86  unsigned Units = Size / 4;
87  unsigned Pos = 0;
88  const unsigned *Base = (const unsigned*) String.data();
89
90  // If the string is aligned do a bulk transfer.
91  if (!((intptr_t)Base & 3)) {
92    Bits.append(Base, Base + Units);
93    Pos = (Units + 1) * 4;
94  } else {
95    // Otherwise do it the hard way.
96    // To be compatible with above bulk transfer, we need to take endianness
97    // into account.
98    if (sys::isBigEndianHost()) {
99      for (Pos += 4; Pos <= Size; Pos += 4) {
100        unsigned V = ((unsigned char)String[Pos - 4] << 24) |
101                     ((unsigned char)String[Pos - 3] << 16) |
102                     ((unsigned char)String[Pos - 2] << 8) |
103                      (unsigned char)String[Pos - 1];
104        Bits.push_back(V);
105      }
106    } else {
107      assert(sys::isLittleEndianHost() && "Unexpected host endianness");
108      for (Pos += 4; Pos <= Size; Pos += 4) {
109        unsigned V = ((unsigned char)String[Pos - 1] << 24) |
110                     ((unsigned char)String[Pos - 2] << 16) |
111                     ((unsigned char)String[Pos - 3] << 8) |
112                      (unsigned char)String[Pos - 4];
113        Bits.push_back(V);
114      }
115    }
116  }
117
118  // With the leftover bits.
119  unsigned V = 0;
120  // Pos will have overshot size by 4 - #bytes left over.
121  // No need to take endianness into account here - this is always executed.
122  switch (Pos - Size) {
123  case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
124  case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
125  case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
126  default: return; // Nothing left.
127  }
128
129  Bits.push_back(V);
130}
131
132// AddNodeID - Adds the Bit data of another ID to *this.
133void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
134  Bits.append(ID.Bits.begin(), ID.Bits.end());
135}
136
137/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
138/// lookup the node in the FoldingSetImpl.
139unsigned FoldingSetNodeID::ComputeHash() const {
140  return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
141}
142
143/// operator== - Used to compare two nodes to each other.
144///
145bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
146  return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
147}
148
149/// operator== - Used to compare two nodes to each other.
150///
151bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
152  return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
153}
154
155/// Intern - Copy this node's data to a memory region allocated from the
156/// given allocator and return a FoldingSetNodeIDRef describing the
157/// interned data.
158FoldingSetNodeIDRef
159FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
160  unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
161  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
162  return FoldingSetNodeIDRef(New, Bits.size());
163}
164
165//===----------------------------------------------------------------------===//
166/// Helper functions for FoldingSetImpl.
167
168/// GetNextPtr - In order to save space, each bucket is a
169/// singly-linked-list. In order to make deletion more efficient, we make
170/// the list circular, so we can delete a node without computing its hash.
171/// The problem with this is that the start of the hash buckets are not
172/// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null:
173/// use GetBucketPtr when this happens.
174static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
175  // The low bit is set if this is the pointer back to the bucket.
176  if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
177    return 0;
178
179  return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
180}
181
182
183/// testing.
184static void **GetBucketPtr(void *NextInBucketPtr) {
185  intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
186  assert((Ptr & 1) && "Not a bucket pointer");
187  return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
188}
189
190/// GetBucketFor - Hash the specified node ID and return the hash bucket for
191/// the specified ID.
192static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
193  // NumBuckets is always a power of 2.
194  unsigned BucketNum = Hash & (NumBuckets-1);
195  return Buckets + BucketNum;
196}
197
198/// AllocateBuckets - Allocated initialized bucket memory.
199static void **AllocateBuckets(unsigned NumBuckets) {
200  void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
201  // Set the very last bucket to be a non-null "pointer".
202  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
203  return Buckets;
204}
205
206//===----------------------------------------------------------------------===//
207// FoldingSetImpl Implementation
208
209FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
210  assert(5 < Log2InitSize && Log2InitSize < 32 &&
211         "Initial hash table size out of range");
212  NumBuckets = 1 << Log2InitSize;
213  Buckets = AllocateBuckets(NumBuckets);
214  NumNodes = 0;
215}
216FoldingSetImpl::~FoldingSetImpl() {
217  free(Buckets);
218}
219void FoldingSetImpl::clear() {
220  // Set all but the last bucket to null pointers.
221  memset(Buckets, 0, NumBuckets*sizeof(void*));
222
223  // Set the very last bucket to be a non-null "pointer".
224  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
225
226  // Reset the node count to zero.
227  NumNodes = 0;
228}
229
230/// GrowHashTable - Double the size of the hash table and rehash everything.
231///
232void FoldingSetImpl::GrowHashTable() {
233  void **OldBuckets = Buckets;
234  unsigned OldNumBuckets = NumBuckets;
235  NumBuckets <<= 1;
236
237  // Clear out new buckets.
238  Buckets = AllocateBuckets(NumBuckets);
239  NumNodes = 0;
240
241  // Walk the old buckets, rehashing nodes into their new place.
242  FoldingSetNodeID TempID;
243  for (unsigned i = 0; i != OldNumBuckets; ++i) {
244    void *Probe = OldBuckets[i];
245    if (!Probe) continue;
246    while (Node *NodeInBucket = GetNextPtr(Probe)) {
247      // Figure out the next link, remove NodeInBucket from the old link.
248      Probe = NodeInBucket->getNextInBucket();
249      NodeInBucket->SetNextInBucket(0);
250
251      // Insert the node into the new bucket, after recomputing the hash.
252      InsertNode(NodeInBucket,
253                 GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
254                              Buckets, NumBuckets));
255      TempID.clear();
256    }
257  }
258
259  free(OldBuckets);
260}
261
262/// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
263/// return it.  If not, return the insertion token that will make insertion
264/// faster.
265FoldingSetImpl::Node
266*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
267                                     void *&InsertPos) {
268
269  void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
270  void *Probe = *Bucket;
271
272  InsertPos = 0;
273
274  FoldingSetNodeID TempID;
275  while (Node *NodeInBucket = GetNextPtr(Probe)) {
276    if (NodeEquals(NodeInBucket, ID, TempID))
277      return NodeInBucket;
278    TempID.clear();
279
280    Probe = NodeInBucket->getNextInBucket();
281  }
282
283  // Didn't find the node, return null with the bucket as the InsertPos.
284  InsertPos = Bucket;
285  return 0;
286}
287
288/// InsertNode - Insert the specified node into the folding set, knowing that it
289/// is not already in the map.  InsertPos must be obtained from
290/// FindNodeOrInsertPos.
291void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
292  assert(N->getNextInBucket() == 0);
293  // Do we need to grow the hashtable?
294  if (NumNodes+1 > NumBuckets*2) {
295    GrowHashTable();
296    FoldingSetNodeID TempID;
297    InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
298  }
299
300  ++NumNodes;
301
302  /// The insert position is actually a bucket pointer.
303  void **Bucket = static_cast<void**>(InsertPos);
304
305  void *Next = *Bucket;
306
307  // If this is the first insertion into this bucket, its next pointer will be
308  // null.  Pretend as if it pointed to itself, setting the low bit to indicate
309  // that it is a pointer to the bucket.
310  if (Next == 0)
311    Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
312
313  // Set the node's next pointer, and make the bucket point to the node.
314  N->SetNextInBucket(Next);
315  *Bucket = N;
316}
317
318/// RemoveNode - Remove a node from the folding set, returning true if one was
319/// removed or false if the node was not in the folding set.
320bool FoldingSetImpl::RemoveNode(Node *N) {
321  // Because each bucket is a circular list, we don't need to compute N's hash
322  // to remove it.
323  void *Ptr = N->getNextInBucket();
324  if (Ptr == 0) return false;  // Not in folding set.
325
326  --NumNodes;
327  N->SetNextInBucket(0);
328
329  // Remember what N originally pointed to, either a bucket or another node.
330  void *NodeNextPtr = Ptr;
331
332  // Chase around the list until we find the node (or bucket) which points to N.
333  while (true) {
334    if (Node *NodeInBucket = GetNextPtr(Ptr)) {
335      // Advance pointer.
336      Ptr = NodeInBucket->getNextInBucket();
337
338      // We found a node that points to N, change it to point to N's next node,
339      // removing N from the list.
340      if (Ptr == N) {
341        NodeInBucket->SetNextInBucket(NodeNextPtr);
342        return true;
343      }
344    } else {
345      void **Bucket = GetBucketPtr(Ptr);
346      Ptr = *Bucket;
347
348      // If we found that the bucket points to N, update the bucket to point to
349      // whatever is next.
350      if (Ptr == N) {
351        *Bucket = NodeNextPtr;
352        return true;
353      }
354    }
355  }
356}
357
358/// GetOrInsertNode - If there is an existing simple Node exactly
359/// equal to the specified node, return it.  Otherwise, insert 'N' and it
360/// instead.
361FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
362  FoldingSetNodeID ID;
363  GetNodeProfile(N, ID);
364  void *IP;
365  if (Node *E = FindNodeOrInsertPos(ID, IP))
366    return E;
367  InsertNode(N, IP);
368  return N;
369}
370
371//===----------------------------------------------------------------------===//
372// FoldingSetIteratorImpl Implementation
373
374FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
375  // Skip to the first non-null non-self-cycle bucket.
376  while (*Bucket != reinterpret_cast<void*>(-1) &&
377         (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
378    ++Bucket;
379
380  NodePtr = static_cast<FoldingSetNode*>(*Bucket);
381}
382
383void FoldingSetIteratorImpl::advance() {
384  // If there is another link within this bucket, go to it.
385  void *Probe = NodePtr->getNextInBucket();
386
387  if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
388    NodePtr = NextNodeInBucket;
389  else {
390    // Otherwise, this is the last link in this bucket.
391    void **Bucket = GetBucketPtr(Probe);
392
393    // Skip to the next non-null non-self-cycle bucket.
394    do {
395      ++Bucket;
396    } while (*Bucket != reinterpret_cast<void*>(-1) &&
397             (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
398
399    NodePtr = static_cast<FoldingSetNode*>(*Bucket);
400  }
401}
402
403//===----------------------------------------------------------------------===//
404// FoldingSetBucketIteratorImpl Implementation
405
406FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
407  Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
408}
409