FoldingSet.cpp revision 0e5af195f6c54dbf5a24a1ec12ed2d0bd02f5b7f
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//                     The LLVM Compiler Infrastructure
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file was developed by James M. Laskey and is distributed under
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the University of Illinois Open Source License. See LICENSE.TXT for details.
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===//
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file implements a hash set that can be used to remove duplication of
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// nodes in a graph.  This code was originally created by Chris Lattner for use
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// set.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===//
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/FoldingSet.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/MathExtras.h"
20
21using namespace llvm;
22
23//===----------------------------------------------------------------------===//
24// FoldingSetImpl::NodeID Implementation
25
26/// Add* - Add various data types to Bit data.
27///
28void FoldingSetImpl::NodeID::AddPointer(const void *Ptr) {
29  // Note: this adds pointers to the hash using sizes and endianness that
30  // depend on the host.  It doesn't matter however, because hashing on
31  // pointer values in inherently unstable.  Nothing  should depend on the
32  // ordering of nodes in the folding set.
33  intptr_t PtrI = (intptr_t)Ptr;
34  Bits.push_back(unsigned(PtrI));
35  if (sizeof(intptr_t) > sizeof(unsigned))
36    Bits.push_back(unsigned(uint64_t(PtrI) >> 32));
37}
38void FoldingSetImpl::NodeID::AddInteger(signed I) {
39  Bits.push_back(I);
40}
41void FoldingSetImpl::NodeID::AddInteger(unsigned I) {
42  Bits.push_back(I);
43}
44void FoldingSetImpl::NodeID::AddInteger(uint64_t I) {
45  Bits.push_back(unsigned(I));
46  Bits.push_back(unsigned(I >> 32));
47}
48void FoldingSetImpl::NodeID::AddFloat(float F) {
49  Bits.push_back(FloatToBits(F));
50}
51void FoldingSetImpl::NodeID::AddDouble(double D) {
52  Bits.push_back(DoubleToBits(D));
53}
54void FoldingSetImpl::NodeID::AddString(const std::string &String) {
55  // Note: An assumption is made here that strings are composed of one byte
56  // chars.
57  unsigned Size = String.size();
58  unsigned Units = Size / sizeof(unsigned);
59  const unsigned *Base = (const unsigned *)String.data();
60  Bits.insert(Bits.end(), Base, Base + Units);
61  if (Size & 3) {
62    unsigned V = 0;
63    for (unsigned i = Units * sizeof(unsigned); i < Size; ++i)
64      V = (V << 8) | String[i];
65    Bits.push_back(V);
66  }
67}
68
69/// ComputeHash - Compute a strong hash value for this NodeID, used to
70/// lookup the node in the FoldingSetImpl.
71unsigned FoldingSetImpl::NodeID::ComputeHash() const {
72  // This is adapted from SuperFastHash by Paul Hsieh.
73  unsigned Hash = Bits.size();
74  for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) {
75    unsigned Data = *BP;
76    Hash         += Data & 0xFFFF;
77    unsigned Tmp  = ((Data >> 16) << 11) ^ Hash;
78    Hash          = (Hash << 16) ^ Tmp;
79    Hash         += Hash >> 11;
80  }
81
82  // Force "avalanching" of final 127 bits.
83  Hash ^= Hash << 3;
84  Hash += Hash >> 5;
85  Hash ^= Hash << 4;
86  Hash += Hash >> 17;
87  Hash ^= Hash << 25;
88  Hash += Hash >> 6;
89  return Hash;
90}
91
92/// operator== - Used to compare two nodes to each other.
93///
94bool FoldingSetImpl::NodeID::operator==(const FoldingSetImpl::NodeID &RHS)const{
95  if (Bits.size() != RHS.Bits.size()) return false;
96  return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0;
97}
98
99
100//===----------------------------------------------------------------------===//
101// FoldingSetImpl Implementation
102
103FoldingSetImpl::FoldingSetImpl() : NumNodes(0) {
104  NumBuckets = 64;
105  Buckets = new void*[NumBuckets];
106  memset(Buckets, 0, NumBuckets*sizeof(void*));
107}
108FoldingSetImpl::~FoldingSetImpl() {
109  delete [] Buckets;
110}
111
112/// GetNextPtr - In order to save space, each bucket is a
113/// singly-linked-list. In order to make deletion more efficient, we make
114/// the list circular, so we can delete a node without computing its hash.
115/// The problem with this is that the start of the hash buckets are not
116/// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null
117/// : use GetBucketPtr when this happens.
118FoldingSetImpl::Node *FoldingSetImpl::GetNextPtr(void *NextInBucketPtr) {
119  if (NextInBucketPtr >= Buckets && NextInBucketPtr < Buckets+NumBuckets)
120    return 0;
121  return static_cast<Node*>(NextInBucketPtr);
122}
123
124/// GetNextPtr - This is just like the previous GetNextPtr implementation,
125/// but allows a bucket array to be specified.
126FoldingSetImpl::Node *FoldingSetImpl::GetNextPtr(void *NextInBucketPtr,
127                                                 void **Bucks,
128                                                 unsigned NumBuck) {
129  if (NextInBucketPtr >= Bucks && NextInBucketPtr < Bucks+NumBuck)
130    return 0;
131  return static_cast<Node*>(NextInBucketPtr);
132}
133
134/// GetBucketPtr - Provides a casting of a bucket pointer for isNode
135/// testing.
136void **FoldingSetImpl::GetBucketPtr(void *NextInBucketPtr) {
137  return static_cast<void**>(NextInBucketPtr);
138}
139
140/// GetBucketFor - Hash the specified node ID and return the hash bucket for
141/// the specified ID.
142void **FoldingSetImpl::GetBucketFor(const NodeID &ID) const {
143  // NumBuckets is always a power of 2.
144  unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1);
145  return Buckets+BucketNum;
146}
147
148/// GrowHashTable - Double the size of the hash table and rehash everything.
149///
150void FoldingSetImpl::GrowHashTable() {
151  void **OldBuckets = Buckets;
152  unsigned OldNumBuckets = NumBuckets;
153  NumBuckets <<= 1;
154
155  // Reset the node count to zero: we're going to reinsert everything.
156  NumNodes = 0;
157
158  // Clear out new buckets.
159  Buckets = new void*[NumBuckets];
160  memset(Buckets, 0, NumBuckets*sizeof(void*));
161
162  // Walk the old buckets, rehashing nodes into their new place.
163  for (unsigned i = 0; i != OldNumBuckets; ++i) {
164    void *Probe = OldBuckets[i];
165    if (!Probe) continue;
166    while (Node *NodeInBucket = GetNextPtr(Probe, OldBuckets, OldNumBuckets)){
167      // Figure out the next link, remove NodeInBucket from the old link.
168      Probe = NodeInBucket->getNextInBucket();
169      NodeInBucket->SetNextInBucket(0);
170
171      // Insert the node into the new bucket, after recomputing the hash.
172      NodeID ID;
173      GetNodeProfile(ID, NodeInBucket);
174      InsertNode(NodeInBucket, GetBucketFor(ID));
175    }
176  }
177
178  delete[] OldBuckets;
179}
180
181/// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
182/// return it.  If not, return the insertion token that will make insertion
183/// faster.
184FoldingSetImpl::Node *FoldingSetImpl::FindNodeOrInsertPos(const NodeID &ID,
185                                                          void *&InsertPos) {
186  void **Bucket = GetBucketFor(ID);
187  void *Probe = *Bucket;
188
189  InsertPos = 0;
190
191  while (Node *NodeInBucket = GetNextPtr(Probe)) {
192    NodeID OtherID;
193    GetNodeProfile(OtherID, NodeInBucket);
194    if (OtherID == ID)
195      return NodeInBucket;
196
197    Probe = NodeInBucket->getNextInBucket();
198  }
199
200  // Didn't find the node, return null with the bucket as the InsertPos.
201  InsertPos = Bucket;
202  return 0;
203}
204
205/// InsertNode - Insert the specified node into the folding set, knowing that it
206/// is not already in the map.  InsertPos must be obtained from
207/// FindNodeOrInsertPos.
208void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
209  ++NumNodes;
210  // Do we need to grow the hashtable?
211  if (NumNodes > NumBuckets*2) {
212    GrowHashTable();
213    NodeID ID;
214    GetNodeProfile(ID, N);
215    InsertPos = GetBucketFor(ID);
216  }
217
218  /// The insert position is actually a bucket pointer.
219  void **Bucket = static_cast<void**>(InsertPos);
220
221  void *Next = *Bucket;
222
223  // If this is the first insertion into this bucket, its next pointer will be
224  // null.  Pretend as if it pointed to itself.
225  if (Next == 0)
226    Next = Bucket;
227
228  // Set the nodes next pointer, and make the bucket point to the node.
229  N->SetNextInBucket(Next);
230  *Bucket = N;
231}
232
233/// RemoveNode - Remove a node from the folding set, returning true if one was
234/// removed or false if the node was not in the folding set.
235bool FoldingSetImpl::RemoveNode(Node *N) {
236  // Because each bucket is a circular list, we don't need to compute N's hash
237  // to remove it.  Chase around the list until we find the node (or bucket)
238  // which points to N.
239  void *Ptr = N->getNextInBucket();
240  if (Ptr == 0) return false;  // Not in folding set.
241
242  --NumNodes;
243
244  void *NodeNextPtr = Ptr;
245  N->SetNextInBucket(0);
246  while (true) {
247    if (Node *NodeInBucket = GetNextPtr(Ptr)) {
248      // Advance pointer.
249      Ptr = NodeInBucket->getNextInBucket();
250
251      // We found a node that points to N, change it to point to N's next node,
252      // removing N from the list.
253      if (Ptr == N) {
254        NodeInBucket->SetNextInBucket(NodeNextPtr);
255        return true;
256      }
257    } else {
258      void **Bucket = GetBucketPtr(Ptr);
259      Ptr = *Bucket;
260
261      // If we found that the bucket points to N, update the bucket to point to
262      // whatever is next.
263      if (Ptr == N) {
264        *Bucket = NodeNextPtr;
265        return true;
266      }
267    }
268  }
269}
270
271/// GetOrInsertNode - If there is an existing simple Node exactly
272/// equal to the specified node, return it.  Otherwise, insert 'N' and it
273/// instead.
274FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
275  NodeID ID;
276  GetNodeProfile(ID, N);
277  void *IP;
278  if (Node *E = FindNodeOrInsertPos(ID, IP))
279    return E;
280  InsertNode(N, IP);
281  return N;
282}
283