ValueEnumerator.cpp revision 0444de0c0e7cfc8d8f8fed6f64cd97812bdd6a41
1//===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the ValueEnumerator class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ValueEnumerator.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/Constants.h"
18#include "llvm/DerivedTypes.h"
19#include "llvm/Module.h"
20#include "llvm/ValueSymbolTable.h"
21#include "llvm/Instructions.h"
22#include <algorithm>
23using namespace llvm;
24
25static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
26  return V.first->getType()->isIntegerTy();
27}
28
29/// ValueEnumerator - Enumerate module-level information.
30ValueEnumerator::ValueEnumerator(const Module *M) {
31  // Enumerate the global variables.
32  for (Module::const_global_iterator I = M->global_begin(),
33         E = M->global_end(); I != E; ++I)
34    EnumerateValue(I);
35
36  // Enumerate the functions.
37  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
38    EnumerateValue(I);
39    EnumerateAttributes(cast<Function>(I)->getAttributes());
40  }
41
42  // Enumerate the aliases.
43  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
44       I != E; ++I)
45    EnumerateValue(I);
46
47  // Remember what is the cutoff between globalvalue's and other constants.
48  unsigned FirstConstant = Values.size();
49
50  // Enumerate the global variable initializers.
51  for (Module::const_global_iterator I = M->global_begin(),
52         E = M->global_end(); I != E; ++I)
53    if (I->hasInitializer())
54      EnumerateValue(I->getInitializer());
55
56  // Enumerate the aliasees.
57  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
58       I != E; ++I)
59    EnumerateValue(I->getAliasee());
60
61  // Insert constants and metadata that are named at module level into the slot
62  // pool so that the module symbol table can refer to them...
63  EnumerateValueSymbolTable(M->getValueSymbolTable());
64  EnumerateNamedMetadata(M);
65
66  SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
67
68  // Enumerate types used by function bodies and argument lists.
69  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
70
71    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
72         I != E; ++I)
73      EnumerateType(I->getType());
74
75    for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
76      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
77        for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
78             OI != E; ++OI) {
79          if (MDNode *MD = dyn_cast<MDNode>(*OI))
80            if (MD->isFunctionLocal() && MD->getFunction())
81              // These will get enumerated during function-incorporation.
82              continue;
83          EnumerateOperandType(*OI);
84        }
85        EnumerateType(I->getType());
86        if (const CallInst *CI = dyn_cast<CallInst>(I))
87          EnumerateAttributes(CI->getAttributes());
88        else if (const InvokeInst *II = dyn_cast<InvokeInst>(I))
89          EnumerateAttributes(II->getAttributes());
90
91        // Enumerate metadata attached with this instruction.
92        MDs.clear();
93        I->getAllMetadataOtherThanDebugLoc(MDs);
94        for (unsigned i = 0, e = MDs.size(); i != e; ++i)
95          EnumerateMetadata(MDs[i].second);
96
97        if (!I->getDebugLoc().isUnknown()) {
98          MDNode *Scope, *IA;
99          I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());
100          if (Scope) EnumerateMetadata(Scope);
101          if (IA) EnumerateMetadata(IA);
102        }
103      }
104  }
105
106  // Optimize constant ordering.
107  OptimizeConstants(FirstConstant, Values.size());
108}
109
110
111unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
112  InstructionMapType::const_iterator I = InstructionMap.find(Inst);
113  assert(I != InstructionMap.end() && "Instruction is not mapped!");
114  return I->second;
115}
116
117void ValueEnumerator::setInstructionID(const Instruction *I) {
118  InstructionMap[I] = InstructionCount++;
119}
120
121unsigned ValueEnumerator::getValueID(const Value *V) const {
122  if (isa<MDNode>(V) || isa<MDString>(V)) {
123    ValueMapType::const_iterator I = MDValueMap.find(V);
124    assert(I != MDValueMap.end() && "Value not in slotcalculator!");
125    return I->second-1;
126  }
127
128  ValueMapType::const_iterator I = ValueMap.find(V);
129  assert(I != ValueMap.end() && "Value not in slotcalculator!");
130  return I->second-1;
131}
132
133// Optimize constant ordering.
134namespace {
135  struct CstSortPredicate {
136    ValueEnumerator &VE;
137    explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {}
138    bool operator()(const std::pair<const Value*, unsigned> &LHS,
139                    const std::pair<const Value*, unsigned> &RHS) {
140      // Sort by plane.
141      if (LHS.first->getType() != RHS.first->getType())
142        return VE.getTypeID(LHS.first->getType()) <
143               VE.getTypeID(RHS.first->getType());
144      // Then by frequency.
145      return LHS.second > RHS.second;
146    }
147  };
148}
149
150/// OptimizeConstants - Reorder constant pool for denser encoding.
151void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
152  if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
153
154  CstSortPredicate P(*this);
155  std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
156
157  // Ensure that integer constants are at the start of the constant pool.  This
158  // is important so that GEP structure indices come before gep constant exprs.
159  std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
160                 isIntegerValue);
161
162  // Rebuild the modified portion of ValueMap.
163  for (; CstStart != CstEnd; ++CstStart)
164    ValueMap[Values[CstStart].first] = CstStart+1;
165}
166
167
168/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
169/// table into the values table.
170void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
171  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
172       VI != VE; ++VI)
173    EnumerateValue(VI->getValue());
174}
175
176/// EnumerateNamedMetadata - Insert all of the values referenced by
177/// named metadata in the specified module.
178void ValueEnumerator::EnumerateNamedMetadata(const Module *M) {
179  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
180       E = M->named_metadata_end(); I != E; ++I)
181    EnumerateNamedMDNode(I);
182}
183
184void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
185  for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
186    EnumerateMetadata(MD->getOperand(i));
187}
188
189/// EnumerateMDNodeOperands - Enumerate all non-function-local values
190/// and types referenced by the given MDNode.
191void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
192  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
193    if (Value *V = N->getOperand(i)) {
194      if (isa<MDNode>(V) || isa<MDString>(V))
195        EnumerateMetadata(V);
196      else if (!isa<Instruction>(V) && !isa<Argument>(V))
197        EnumerateValue(V);
198    } else
199      EnumerateType(Type::getVoidTy(N->getContext()));
200  }
201}
202
203void ValueEnumerator::EnumerateMetadata(const Value *MD) {
204  assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
205
206  // Enumerate the type of this value.
207  EnumerateType(MD->getType());
208
209  const MDNode *N = dyn_cast<MDNode>(MD);
210
211  // In the module-level pass, skip function-local nodes themselves, but
212  // do walk their operands.
213  if (N && N->isFunctionLocal() && N->getFunction()) {
214    EnumerateMDNodeOperands(N);
215    return;
216  }
217
218  // Check to see if it's already in!
219  unsigned &MDValueID = MDValueMap[MD];
220  if (MDValueID) {
221    // Increment use count.
222    MDValues[MDValueID-1].second++;
223    return;
224  }
225  MDValues.push_back(std::make_pair(MD, 1U));
226  MDValueID = MDValues.size();
227
228  // Enumerate all non-function-local operands.
229  if (N)
230    EnumerateMDNodeOperands(N);
231}
232
233/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
234/// information reachable from the given MDNode.
235void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) {
236  assert(N->isFunctionLocal() && N->getFunction() &&
237         "EnumerateFunctionLocalMetadata called on non-function-local mdnode!");
238
239  // Enumerate the type of this value.
240  EnumerateType(N->getType());
241
242  // Check to see if it's already in!
243  unsigned &MDValueID = MDValueMap[N];
244  if (MDValueID) {
245    // Increment use count.
246    MDValues[MDValueID-1].second++;
247    return;
248  }
249  MDValues.push_back(std::make_pair(N, 1U));
250  MDValueID = MDValues.size();
251
252  // To incoroporate function-local information visit all function-local
253  // MDNodes and all function-local values they reference.
254  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
255    if (Value *V = N->getOperand(i)) {
256      if (MDNode *O = dyn_cast<MDNode>(V)) {
257        if (O->isFunctionLocal() && O->getFunction())
258          EnumerateFunctionLocalMetadata(O);
259      } else if (isa<Instruction>(V) || isa<Argument>(V))
260        EnumerateValue(V);
261    }
262
263  // Also, collect all function-local MDNodes for easy access.
264  FunctionLocalMDs.push_back(N);
265}
266
267void ValueEnumerator::EnumerateValue(const Value *V) {
268  assert(!V->getType()->isVoidTy() && "Can't insert void values!");
269  assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
270         "EnumerateValue doesn't handle Metadata!");
271
272  // Check to see if it's already in!
273  unsigned &ValueID = ValueMap[V];
274  if (ValueID) {
275    // Increment use count.
276    Values[ValueID-1].second++;
277    return;
278  }
279
280  // Enumerate the type of this value.
281  EnumerateType(V->getType());
282
283  if (const Constant *C = dyn_cast<Constant>(V)) {
284    if (isa<GlobalValue>(C)) {
285      // Initializers for globals are handled explicitly elsewhere.
286    //} else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
287      // Do not enumerate the initializers for an array of simple characters.
288      // The initializers just pollute the value table, and we emit the strings
289      // specially.
290    } else if (C->getNumOperands()) {
291      // If a constant has operands, enumerate them.  This makes sure that if a
292      // constant has uses (for example an array of const ints), that they are
293      // inserted also.
294
295      // We prefer to enumerate them with values before we enumerate the user
296      // itself.  This makes it more likely that we can avoid forward references
297      // in the reader.  We know that there can be no cycles in the constants
298      // graph that don't go through a global variable.
299      for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
300           I != E; ++I)
301        if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
302          EnumerateValue(*I);
303
304      // Finally, add the value.  Doing this could make the ValueID reference be
305      // dangling, don't reuse it.
306      Values.push_back(std::make_pair(V, 1U));
307      ValueMap[V] = Values.size();
308      return;
309    }
310  }
311
312  // Add the value.
313  Values.push_back(std::make_pair(V, 1U));
314  ValueID = Values.size();
315}
316
317
318void ValueEnumerator::EnumerateType(Type *Ty) {
319  unsigned *TypeID = &TypeMap[Ty];
320
321  // We've already seen this type.
322  if (*TypeID)
323    return;
324
325  // If it is a non-anonymous struct, mark the type as being visited so that we
326  // don't recursively visit it.  This is safe because we allow forward
327  // references of these in the bitcode reader.
328  if (StructType *STy = dyn_cast<StructType>(Ty))
329    if (!STy->isLiteral())
330      *TypeID = ~0U;
331
332  // Enumerate all of the subtypes before we enumerate this type.  This ensures
333  // that the type will be enumerated in an order that can be directly built.
334  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
335       I != E; ++I)
336    EnumerateType(*I);
337
338  // Refresh the TypeID pointer in case the table rehashed.
339  TypeID = &TypeMap[Ty];
340
341  // Check to see if we got the pointer another way.  This can happen when
342  // enumerating recursive types that hit the base case deeper than they start.
343  //
344  // If this is actually a struct that we are treating as forward ref'able,
345  // then emit the definition now that all of its contents are available.
346  if (*TypeID && *TypeID != ~0U)
347    return;
348
349  // Add this type now that its contents are all happily enumerated.
350  Types.push_back(Ty);
351
352  *TypeID = Types.size();
353}
354
355// Enumerate the types for the specified value.  If the value is a constant,
356// walk through it, enumerating the types of the constant.
357void ValueEnumerator::EnumerateOperandType(const Value *V) {
358  EnumerateType(V->getType());
359
360  if (const Constant *C = dyn_cast<Constant>(V)) {
361    // If this constant is already enumerated, ignore it, we know its type must
362    // be enumerated.
363    if (ValueMap.count(V)) return;
364
365    // This constant may have operands, make sure to enumerate the types in
366    // them.
367    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
368      const Value *Op = C->getOperand(i);
369
370      // Don't enumerate basic blocks here, this happens as operands to
371      // blockaddress.
372      if (isa<BasicBlock>(Op)) continue;
373
374      EnumerateOperandType(Op);
375    }
376
377    if (const MDNode *N = dyn_cast<MDNode>(V)) {
378      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
379        if (Value *Elem = N->getOperand(i))
380          EnumerateOperandType(Elem);
381    }
382  } else if (isa<MDString>(V) || isa<MDNode>(V))
383    EnumerateMetadata(V);
384}
385
386void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
387  if (PAL.isEmpty()) return;  // null is always 0.
388  // Do a lookup.
389  unsigned &Entry = AttributeMap[PAL.getRawPointer()];
390  if (Entry == 0) {
391    // Never saw this before, add it.
392    Attributes.push_back(PAL);
393    Entry = Attributes.size();
394  }
395}
396
397void ValueEnumerator::incorporateFunction(const Function &F) {
398  InstructionCount = 0;
399  NumModuleValues = Values.size();
400  NumModuleMDValues = MDValues.size();
401
402  // Adding function arguments to the value table.
403  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
404       I != E; ++I)
405    EnumerateValue(I);
406
407  FirstFuncConstantID = Values.size();
408
409  // Add all function-level constants to the value table.
410  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
411    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
412      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
413           OI != E; ++OI) {
414        if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
415            isa<InlineAsm>(*OI))
416          EnumerateValue(*OI);
417      }
418    BasicBlocks.push_back(BB);
419    ValueMap[BB] = BasicBlocks.size();
420  }
421
422  // Optimize the constant layout.
423  OptimizeConstants(FirstFuncConstantID, Values.size());
424
425  // Add the function's parameter attributes so they are available for use in
426  // the function's instruction.
427  EnumerateAttributes(F.getAttributes());
428
429  FirstInstID = Values.size();
430
431  SmallVector<MDNode *, 8> FnLocalMDVector;
432  // Add all of the instructions.
433  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
434    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
435      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
436           OI != E; ++OI) {
437        if (MDNode *MD = dyn_cast<MDNode>(*OI))
438          if (MD->isFunctionLocal() && MD->getFunction())
439            // Enumerate metadata after the instructions they might refer to.
440            FnLocalMDVector.push_back(MD);
441      }
442
443      SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
444      I->getAllMetadataOtherThanDebugLoc(MDs);
445      for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
446        MDNode *N = MDs[i].second;
447        if (N->isFunctionLocal() && N->getFunction())
448          FnLocalMDVector.push_back(N);
449      }
450
451      if (!I->getType()->isVoidTy())
452        EnumerateValue(I);
453    }
454  }
455
456  // Add all of the function-local metadata.
457  for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
458    EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
459}
460
461void ValueEnumerator::purgeFunction() {
462  /// Remove purged values from the ValueMap.
463  for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
464    ValueMap.erase(Values[i].first);
465  for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i)
466    MDValueMap.erase(MDValues[i].first);
467  for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
468    ValueMap.erase(BasicBlocks[i]);
469
470  Values.resize(NumModuleValues);
471  MDValues.resize(NumModuleMDValues);
472  BasicBlocks.clear();
473  FunctionLocalMDs.clear();
474}
475
476static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
477                                 DenseMap<const BasicBlock*, unsigned> &IDMap) {
478  unsigned Counter = 0;
479  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
480    IDMap[BB] = ++Counter;
481}
482
483/// getGlobalBasicBlockID - This returns the function-specific ID for the
484/// specified basic block.  This is relatively expensive information, so it
485/// should only be used by rare constructs such as address-of-label.
486unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
487  unsigned &Idx = GlobalBasicBlockIDs[BB];
488  if (Idx != 0)
489    return Idx-1;
490
491  IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
492  return getGlobalBasicBlockID(BB);
493}
494
495