FoldingSet.cpp revision 9eddc1cf310c49c0f1f90cbde3687b2610a46689
1//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a hash set that can be used to remove duplication of
11// nodes in a graph.  This code was originally created by Chris Lattner for use
12// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
13// set.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/Hashing.h"
19#include "llvm/Support/Allocator.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/MathExtras.h"
22#include "llvm/Support/Host.h"
23#include <cassert>
24#include <cstring>
25using namespace llvm;
26
27//===----------------------------------------------------------------------===//
28// FoldingSetNodeIDRef Implementation
29
30/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
31/// used to lookup the node in the FoldingSetImpl.
32unsigned FoldingSetNodeIDRef::ComputeHash() const {
33  return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
34}
35
36bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
37  if (Size != RHS.Size) return false;
38  return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
39}
40
41//===----------------------------------------------------------------------===//
42// FoldingSetNodeID Implementation
43
44void FoldingSetNodeID::AddString(StringRef String) {
45  unsigned Size =  String.size();
46  Bits.push_back(Size);
47  if (!Size) return;
48
49  unsigned Units = Size / 4;
50  unsigned Pos = 0;
51  const unsigned *Base = (const unsigned*) String.data();
52
53  // If the string is aligned do a bulk transfer.
54  if (!((intptr_t)Base & 3)) {
55    Bits.append(Base, Base + Units);
56    Pos = (Units + 1) * 4;
57  } else {
58    // Otherwise do it the hard way.
59    // To be compatible with above bulk transfer, we need to take endianness
60    // into account.
61    if (sys::isBigEndianHost()) {
62      for (Pos += 4; Pos <= Size; Pos += 4) {
63        unsigned V = ((unsigned char)String[Pos - 4] << 24) |
64                     ((unsigned char)String[Pos - 3] << 16) |
65                     ((unsigned char)String[Pos - 2] << 8) |
66                      (unsigned char)String[Pos - 1];
67        Bits.push_back(V);
68      }
69    } else {
70      assert(sys::isLittleEndianHost() && "Unexpected host endianness");
71      for (Pos += 4; Pos <= Size; Pos += 4) {
72        unsigned V = ((unsigned char)String[Pos - 1] << 24) |
73                     ((unsigned char)String[Pos - 2] << 16) |
74                     ((unsigned char)String[Pos - 3] << 8) |
75                      (unsigned char)String[Pos - 4];
76        Bits.push_back(V);
77      }
78    }
79  }
80
81  // With the leftover bits.
82  unsigned V = 0;
83  // Pos will have overshot size by 4 - #bytes left over.
84  // No need to take endianness into account here - this is always executed.
85  switch (Pos - Size) {
86  case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
87  case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
88  case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
89  default: return; // Nothing left.
90  }
91
92  Bits.push_back(V);
93}
94
95/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
96/// lookup the node in the FoldingSetImpl.
97unsigned FoldingSetNodeID::ComputeHash() const {
98  return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
99}
100
101/// operator== - Used to compare two nodes to each other.
102///
103bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
104  return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
105}
106
107/// operator== - Used to compare two nodes to each other.
108///
109bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
110  return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
111}
112
113/// Intern - Copy this node's data to a memory region allocated from the
114/// given allocator and return a FoldingSetNodeIDRef describing the
115/// interned data.
116FoldingSetNodeIDRef
117FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
118  unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
119  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
120  return FoldingSetNodeIDRef(New, Bits.size());
121}
122
123//===----------------------------------------------------------------------===//
124/// Helper functions for FoldingSetImpl.
125
126/// GetNextPtr - In order to save space, each bucket is a
127/// singly-linked-list. In order to make deletion more efficient, we make
128/// the list circular, so we can delete a node without computing its hash.
129/// The problem with this is that the start of the hash buckets are not
130/// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null:
131/// use GetBucketPtr when this happens.
132static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
133  // The low bit is set if this is the pointer back to the bucket.
134  if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
135    return 0;
136
137  return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
138}
139
140
141/// testing.
142static void **GetBucketPtr(void *NextInBucketPtr) {
143  intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
144  assert((Ptr & 1) && "Not a bucket pointer");
145  return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
146}
147
148/// GetBucketFor - Hash the specified node ID and return the hash bucket for
149/// the specified ID.
150static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
151  // NumBuckets is always a power of 2.
152  unsigned BucketNum = Hash & (NumBuckets-1);
153  return Buckets + BucketNum;
154}
155
156/// AllocateBuckets - Allocated initialized bucket memory.
157static void **AllocateBuckets(unsigned NumBuckets) {
158  void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
159  // Set the very last bucket to be a non-null "pointer".
160  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
161  return Buckets;
162}
163
164//===----------------------------------------------------------------------===//
165// FoldingSetImpl Implementation
166
167FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
168  assert(5 < Log2InitSize && Log2InitSize < 32 &&
169         "Initial hash table size out of range");
170  NumBuckets = 1 << Log2InitSize;
171  Buckets = AllocateBuckets(NumBuckets);
172  NumNodes = 0;
173}
174FoldingSetImpl::~FoldingSetImpl() {
175  free(Buckets);
176}
177void FoldingSetImpl::clear() {
178  // Set all but the last bucket to null pointers.
179  memset(Buckets, 0, NumBuckets*sizeof(void*));
180
181  // Set the very last bucket to be a non-null "pointer".
182  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
183
184  // Reset the node count to zero.
185  NumNodes = 0;
186}
187
188/// GrowHashTable - Double the size of the hash table and rehash everything.
189///
190void FoldingSetImpl::GrowHashTable() {
191  void **OldBuckets = Buckets;
192  unsigned OldNumBuckets = NumBuckets;
193  NumBuckets <<= 1;
194
195  // Clear out new buckets.
196  Buckets = AllocateBuckets(NumBuckets);
197  NumNodes = 0;
198
199  // Walk the old buckets, rehashing nodes into their new place.
200  FoldingSetNodeID TempID;
201  for (unsigned i = 0; i != OldNumBuckets; ++i) {
202    void *Probe = OldBuckets[i];
203    if (!Probe) continue;
204    while (Node *NodeInBucket = GetNextPtr(Probe)) {
205      // Figure out the next link, remove NodeInBucket from the old link.
206      Probe = NodeInBucket->getNextInBucket();
207      NodeInBucket->SetNextInBucket(0);
208
209      // Insert the node into the new bucket, after recomputing the hash.
210      InsertNode(NodeInBucket,
211                 GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
212                              Buckets, NumBuckets));
213      TempID.clear();
214    }
215  }
216
217  free(OldBuckets);
218}
219
220/// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
221/// return it.  If not, return the insertion token that will make insertion
222/// faster.
223FoldingSetImpl::Node
224*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
225                                     void *&InsertPos) {
226
227  void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
228  void *Probe = *Bucket;
229
230  InsertPos = 0;
231
232  FoldingSetNodeID TempID;
233  while (Node *NodeInBucket = GetNextPtr(Probe)) {
234    if (NodeEquals(NodeInBucket, ID, TempID))
235      return NodeInBucket;
236    TempID.clear();
237
238    Probe = NodeInBucket->getNextInBucket();
239  }
240
241  // Didn't find the node, return null with the bucket as the InsertPos.
242  InsertPos = Bucket;
243  return 0;
244}
245
246/// InsertNode - Insert the specified node into the folding set, knowing that it
247/// is not already in the map.  InsertPos must be obtained from
248/// FindNodeOrInsertPos.
249void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
250  assert(N->getNextInBucket() == 0);
251  // Do we need to grow the hashtable?
252  if (NumNodes+1 > NumBuckets*2) {
253    GrowHashTable();
254    FoldingSetNodeID TempID;
255    InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
256  }
257
258  ++NumNodes;
259
260  /// The insert position is actually a bucket pointer.
261  void **Bucket = static_cast<void**>(InsertPos);
262
263  void *Next = *Bucket;
264
265  // If this is the first insertion into this bucket, its next pointer will be
266  // null.  Pretend as if it pointed to itself, setting the low bit to indicate
267  // that it is a pointer to the bucket.
268  if (Next == 0)
269    Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
270
271  // Set the node's next pointer, and make the bucket point to the node.
272  N->SetNextInBucket(Next);
273  *Bucket = N;
274}
275
276/// RemoveNode - Remove a node from the folding set, returning true if one was
277/// removed or false if the node was not in the folding set.
278bool FoldingSetImpl::RemoveNode(Node *N) {
279  // Because each bucket is a circular list, we don't need to compute N's hash
280  // to remove it.
281  void *Ptr = N->getNextInBucket();
282  if (Ptr == 0) return false;  // Not in folding set.
283
284  --NumNodes;
285  N->SetNextInBucket(0);
286
287  // Remember what N originally pointed to, either a bucket or another node.
288  void *NodeNextPtr = Ptr;
289
290  // Chase around the list until we find the node (or bucket) which points to N.
291  while (true) {
292    if (Node *NodeInBucket = GetNextPtr(Ptr)) {
293      // Advance pointer.
294      Ptr = NodeInBucket->getNextInBucket();
295
296      // We found a node that points to N, change it to point to N's next node,
297      // removing N from the list.
298      if (Ptr == N) {
299        NodeInBucket->SetNextInBucket(NodeNextPtr);
300        return true;
301      }
302    } else {
303      void **Bucket = GetBucketPtr(Ptr);
304      Ptr = *Bucket;
305
306      // If we found that the bucket points to N, update the bucket to point to
307      // whatever is next.
308      if (Ptr == N) {
309        *Bucket = NodeNextPtr;
310        return true;
311      }
312    }
313  }
314}
315
316/// GetOrInsertNode - If there is an existing simple Node exactly
317/// equal to the specified node, return it.  Otherwise, insert 'N' and it
318/// instead.
319FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
320  FoldingSetNodeID ID;
321  GetNodeProfile(N, ID);
322  void *IP;
323  if (Node *E = FindNodeOrInsertPos(ID, IP))
324    return E;
325  InsertNode(N, IP);
326  return N;
327}
328
329//===----------------------------------------------------------------------===//
330// FoldingSetIteratorImpl Implementation
331
332FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
333  // Skip to the first non-null non-self-cycle bucket.
334  while (*Bucket != reinterpret_cast<void*>(-1) &&
335         (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
336    ++Bucket;
337
338  NodePtr = static_cast<FoldingSetNode*>(*Bucket);
339}
340
341void FoldingSetIteratorImpl::advance() {
342  // If there is another link within this bucket, go to it.
343  void *Probe = NodePtr->getNextInBucket();
344
345  if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
346    NodePtr = NextNodeInBucket;
347  else {
348    // Otherwise, this is the last link in this bucket.
349    void **Bucket = GetBucketPtr(Probe);
350
351    // Skip to the next non-null non-self-cycle bucket.
352    do {
353      ++Bucket;
354    } while (*Bucket != reinterpret_cast<void*>(-1) &&
355             (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
356
357    NodePtr = static_cast<FoldingSetNode*>(*Bucket);
358  }
359}
360
361//===----------------------------------------------------------------------===//
362// FoldingSetBucketIteratorImpl Implementation
363
364FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
365  Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
366}
367