1//===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the ValueEnumerator class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ValueEnumerator.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SmallPtrSet.h"
17#include "llvm/IR/Constants.h"
18#include "llvm/IR/DebugInfoMetadata.h"
19#include "llvm/IR/DerivedTypes.h"
20#include "llvm/IR/Instructions.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/ValueSymbolTable.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_ostream.h"
25#include <algorithm>
26using namespace llvm;
27
28namespace llvm_2_9_func {
29
30static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
31  return V.first->getType()->isIntOrIntVectorTy();
32}
33
34/// ValueEnumerator - Enumerate module-level information.
35ValueEnumerator::ValueEnumerator(const llvm::Module &M)
36    : HasMDString(false), HasDILocation(false) {
37  // Enumerate the global variables.
38  for (llvm::Module::const_global_iterator I = M.global_begin(), E = M.global_end();
39       I != E; ++I)
40    EnumerateValue(&*I);
41
42  // Enumerate the functions.
43  for (llvm::Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
44    EnumerateValue(&*I);
45    EnumerateAttributes(cast<Function>(I)->getAttributes());
46  }
47
48  // Enumerate the aliases.
49  for (llvm::Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
50       I != E; ++I)
51    EnumerateValue(&*I);
52
53  // Remember what is the cutoff between globalvalue's and other constants.
54  unsigned FirstConstant = Values.size();
55
56  // Enumerate the global variable initializers.
57  for (llvm::Module::const_global_iterator I = M.global_begin(), E = M.global_end();
58       I != E; ++I)
59    if (I->hasInitializer())
60      EnumerateValue(I->getInitializer());
61
62  // Enumerate the aliasees.
63  for (llvm::Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
64       I != E; ++I)
65    EnumerateValue(I->getAliasee());
66
67  // Enumerate the metadata type.
68  //
69  // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode
70  // only encodes the metadata type when it's used as a value.
71  EnumerateType(Type::getMetadataTy(M.getContext()));
72
73  // Insert constants and metadata that are named at module level into the slot
74  // pool so that the module symbol table can refer to them...
75  EnumerateValueSymbolTable(M.getValueSymbolTable());
76  EnumerateNamedMetadata(M);
77
78  SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
79
80  // Enumerate types used by function bodies and argument lists.
81  for (const Function &F : M) {
82    for (const Argument &A : F.args())
83      EnumerateType(A.getType());
84
85    for (const BasicBlock &BB : F)
86      for (const Instruction &I : BB) {
87        for (const Use &Op : I.operands()) {
88          auto *MD = dyn_cast<MetadataAsValue>(&Op);
89          if (!MD) {
90            EnumerateOperandType(Op);
91            continue;
92          }
93
94          // Local metadata is enumerated during function-incorporation.
95          if (isa<LocalAsMetadata>(MD->getMetadata()))
96            continue;
97
98          EnumerateMetadata(MD->getMetadata());
99        }
100        EnumerateType(I.getType());
101        if (const CallInst *CI = dyn_cast<CallInst>(&I))
102          EnumerateAttributes(CI->getAttributes());
103        else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I))
104          EnumerateAttributes(II->getAttributes());
105
106        // Enumerate metadata attached with this instruction.
107        MDs.clear();
108        I.getAllMetadataOtherThanDebugLoc(MDs);
109        for (unsigned i = 0, e = MDs.size(); i != e; ++i)
110          EnumerateMetadata(MDs[i].second);
111
112        // Don't enumerate the location directly -- it has a special record
113        // type -- but enumerate its operands.
114        if (DILocation *L = I.getDebugLoc())
115          EnumerateMDNodeOperands(L);
116      }
117  }
118
119  // Optimize constant ordering.
120  OptimizeConstants(FirstConstant, Values.size());
121}
122
123unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
124  InstructionMapType::const_iterator I = InstructionMap.find(Inst);
125  assert(I != InstructionMap.end() && "Instruction is not mapped!");
126  return I->second;
127}
128
129void ValueEnumerator::setInstructionID(const Instruction *I) {
130  InstructionMap[I] = InstructionCount++;
131}
132
133unsigned ValueEnumerator::getValueID(const Value *V) const {
134  if (auto *MD = dyn_cast<MetadataAsValue>(V))
135    return getMetadataID(MD->getMetadata());
136
137  ValueMapType::const_iterator I = ValueMap.find(V);
138  assert(I != ValueMap.end() && "Value not in slotcalculator!");
139  return I->second-1;
140}
141
142void ValueEnumerator::dump() const {
143  print(dbgs(), ValueMap, "Default");
144  dbgs() << '\n';
145  print(dbgs(), MDValueMap, "MetaData");
146  dbgs() << '\n';
147}
148
149void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
150                            const char *Name) const {
151
152  OS << "Map Name: " << Name << "\n";
153  OS << "Size: " << Map.size() << "\n";
154  for (ValueMapType::const_iterator I = Map.begin(),
155         E = Map.end(); I != E; ++I) {
156
157    const Value *V = I->first;
158    if (V->hasName())
159      OS << "Value: " << V->getName();
160    else
161      OS << "Value: [null]\n";
162    V->dump();
163
164    OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
165    for (const Use &U : V->uses()) {
166      if (&U != &*V->use_begin())
167        OS << ",";
168      if(U->hasName())
169        OS << " " << U->getName();
170      else
171        OS << " [null]";
172
173    }
174    OS <<  "\n\n";
175  }
176}
177
178void ValueEnumerator::print(llvm::raw_ostream &OS, const MetadataMapType &Map,
179                            const char *Name) const {
180
181  OS << "Map Name: " << Name << "\n";
182  OS << "Size: " << Map.size() << "\n";
183  for (auto I = Map.begin(), E = Map.end(); I != E; ++I) {
184    const llvm::Metadata *MD = I->first;
185    OS << "Metadata: slot = " << I->second << "\n";
186    MD->print(OS);
187  }
188}
189
190
191// Optimize constant ordering.
192namespace {
193  struct CstSortPredicate {
194    ValueEnumerator &VE;
195    explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {}
196    bool operator()(const std::pair<const Value*, unsigned> &LHS,
197                    const std::pair<const Value*, unsigned> &RHS) {
198      // Sort by plane.
199      if (LHS.first->getType() != RHS.first->getType())
200        return VE.getTypeID(LHS.first->getType()) <
201               VE.getTypeID(RHS.first->getType());
202      // Then by frequency.
203      return LHS.second > RHS.second;
204    }
205  };
206}
207
208/// OptimizeConstants - Reorder constant pool for denser encoding.
209void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
210  if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
211
212  CstSortPredicate P(*this);
213  std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
214
215  // Ensure that integer and vector of integer constants are at the start of the
216  // constant pool.  This is important so that GEP structure indices come before
217  // gep constant exprs.
218  std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
219                 isIntOrIntVectorValue);
220
221  // Rebuild the modified portion of ValueMap.
222  for (; CstStart != CstEnd; ++CstStart)
223    ValueMap[Values[CstStart].first] = CstStart+1;
224}
225
226
227/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
228/// table into the values table.
229void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
230  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
231       VI != VE; ++VI)
232    EnumerateValue(VI->getValue());
233}
234
235/// EnumerateNamedMetadata - Insert all of the values referenced by
236/// named metadata in the specified module.
237void ValueEnumerator::EnumerateNamedMetadata(const llvm::Module &M) {
238  for (llvm::Module::const_named_metadata_iterator I = M.named_metadata_begin(),
239                                             E = M.named_metadata_end();
240       I != E; ++I)
241    EnumerateNamedMDNode(&*I);
242}
243
244void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
245  for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
246    EnumerateMetadata(MD->getOperand(i));
247}
248
249/// EnumerateMDNodeOperands - Enumerate all non-function-local values
250/// and types referenced by the given MDNode.
251void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
252  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
253    Metadata *MD = N->getOperand(i);
254    if (!MD)
255      continue;
256    assert(!isa<LocalAsMetadata>(MD) && "MDNodes cannot be function-local");
257    EnumerateMetadata(MD);
258  }
259}
260
261void ValueEnumerator::EnumerateMetadata(const llvm::Metadata *MD) {
262  assert(
263      (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) &&
264      "Invalid metadata kind");
265
266  // Insert a dummy ID to block the co-recursive call to
267  // EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph.
268  //
269  // Return early if there's already an ID.
270  if (!MDValueMap.insert(std::make_pair(MD, 0)).second)
271    return;
272
273  // Visit operands first to minimize RAUW.
274  if (auto *N = dyn_cast<MDNode>(MD))
275    EnumerateMDNodeOperands(N);
276  else if (auto *C = dyn_cast<ConstantAsMetadata>(MD))
277    EnumerateValue(C->getValue());
278
279  HasMDString |= isa<MDString>(MD);
280  HasDILocation |= isa<DILocation>(MD);
281
282  // Replace the dummy ID inserted above with the correct one.  MDValueMap may
283  // have changed by inserting operands, so we need a fresh lookup here.
284  MDs.push_back(MD);
285  MDValueMap[MD] = MDs.size();
286}
287
288/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
289/// information reachable from the metadata.
290void ValueEnumerator::EnumerateFunctionLocalMetadata(
291    const llvm::LocalAsMetadata *Local) {
292  // Check to see if it's already in!
293  unsigned &MDValueID = MDValueMap[Local];
294  if (MDValueID)
295    return;
296
297  MDs.push_back(Local);
298  MDValueID = MDs.size();
299
300  EnumerateValue(Local->getValue());
301
302  // Also, collect all function-local metadata for easy access.
303  FunctionLocalMDs.push_back(Local);
304}
305
306void ValueEnumerator::EnumerateValue(const Value *V) {
307  assert(!V->getType()->isVoidTy() && "Can't insert void values!");
308  assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!");
309
310  // Check to see if it's already in!
311  unsigned &ValueID = ValueMap[V];
312  if (ValueID) {
313    // Increment use count.
314    Values[ValueID-1].second++;
315    return;
316  }
317
318  // Enumerate the type of this value.
319  EnumerateType(V->getType());
320
321  if (const Constant *C = dyn_cast<Constant>(V)) {
322    if (isa<GlobalValue>(C)) {
323      // Initializers for globals are handled explicitly elsewhere.
324    } else if (C->getNumOperands()) {
325      // If a constant has operands, enumerate them.  This makes sure that if a
326      // constant has uses (for example an array of const ints), that they are
327      // inserted also.
328
329      // We prefer to enumerate them with values before we enumerate the user
330      // itself.  This makes it more likely that we can avoid forward references
331      // in the reader.  We know that there can be no cycles in the constants
332      // graph that don't go through a global variable.
333      for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
334           I != E; ++I)
335        if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
336          EnumerateValue(*I);
337
338      // Finally, add the value.  Doing this could make the ValueID reference be
339      // dangling, don't reuse it.
340      Values.push_back(std::make_pair(V, 1U));
341      ValueMap[V] = Values.size();
342      return;
343    } else if (const ConstantDataSequential *CDS =
344               dyn_cast<ConstantDataSequential>(C)) {
345      // For our legacy handling of the new ConstantDataSequential type, we
346      // need to enumerate the individual elements, as well as mark the
347      // outer constant as used.
348      for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
349        EnumerateValue(CDS->getElementAsConstant(i));
350      Values.push_back(std::make_pair(V, 1U));
351      ValueMap[V] = Values.size();
352      return;
353    }
354  }
355
356  // Add the value.
357  Values.push_back(std::make_pair(V, 1U));
358  ValueID = Values.size();
359}
360
361
362void ValueEnumerator::EnumerateType(Type *Ty) {
363  unsigned *TypeID = &TypeMap[Ty];
364
365  // We've already seen this type.
366  if (*TypeID)
367    return;
368
369  // If it is a non-anonymous struct, mark the type as being visited so that we
370  // don't recursively visit it.  This is safe because we allow forward
371  // references of these in the bitcode reader.
372  if (StructType *STy = dyn_cast<StructType>(Ty))
373    if (!STy->isLiteral())
374      *TypeID = ~0U;
375
376  // Enumerate all of the subtypes before we enumerate this type.  This ensures
377  // that the type will be enumerated in an order that can be directly built.
378  for (Type *SubTy : Ty->subtypes())
379    EnumerateType(SubTy);
380
381  // Refresh the TypeID pointer in case the table rehashed.
382  TypeID = &TypeMap[Ty];
383
384  // Check to see if we got the pointer another way.  This can happen when
385  // enumerating recursive types that hit the base case deeper than they start.
386  //
387  // If this is actually a struct that we are treating as forward ref'able,
388  // then emit the definition now that all of its contents are available.
389  if (*TypeID && *TypeID != ~0U)
390    return;
391
392  // Add this type now that its contents are all happily enumerated.
393  Types.push_back(Ty);
394
395  *TypeID = Types.size();
396}
397
398// Enumerate the types for the specified value.  If the value is a constant,
399// walk through it, enumerating the types of the constant.
400void ValueEnumerator::EnumerateOperandType(const Value *V) {
401  EnumerateType(V->getType());
402
403  if (auto *MD = dyn_cast<MetadataAsValue>(V)) {
404    assert(!isa<LocalAsMetadata>(MD->getMetadata()) &&
405           "Function-local metadata should be left for later");
406
407    EnumerateMetadata(MD->getMetadata());
408    return;
409  }
410
411  const Constant *C = dyn_cast<Constant>(V);
412  if (!C)
413    return;
414
415  // If this constant is already enumerated, ignore it, we know its type must
416  // be enumerated.
417  if (ValueMap.count(C))
418    return;
419
420  // This constant may have operands, make sure to enumerate the types in
421  // them.
422  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
423    const Value *Op = C->getOperand(i);
424
425    // Don't enumerate basic blocks here, this happens as operands to
426    // blockaddress.
427    if (isa<BasicBlock>(Op))
428      continue;
429
430    EnumerateOperandType(Op);
431  }
432}
433
434void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) {
435  if (PAL.isEmpty()) return;  // null is always 0.
436
437  // Do a lookup.
438  unsigned &Entry = AttributeMap[PAL];
439  if (Entry == 0) {
440    // Never saw this before, add it.
441    Attribute.push_back(PAL);
442    Entry = Attribute.size();
443  }
444
445  // Do lookups for all attribute groups.
446  for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) {
447    AttributeSet AS = PAL.getSlotAttributes(i);
448    unsigned &Entry = AttributeGroupMap[AS];
449    if (Entry == 0) {
450      AttributeGroups.push_back(AS);
451      Entry = AttributeGroups.size();
452    }
453  }
454}
455
456void ValueEnumerator::incorporateFunction(const Function &F) {
457  InstructionCount = 0;
458  NumModuleValues = Values.size();
459  NumModuleMDs = MDs.size();
460
461  // Adding function arguments to the value table.
462  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
463       I != E; ++I)
464    EnumerateValue(&*I);
465
466  FirstFuncConstantID = Values.size();
467
468  // Add all function-level constants to the value table.
469  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
470    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
471      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
472           OI != E; ++OI) {
473        if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
474            isa<InlineAsm>(*OI))
475          EnumerateValue(*OI);
476      }
477    BasicBlocks.push_back(&*BB);
478    ValueMap[&*BB] = BasicBlocks.size();
479  }
480
481  // Optimize the constant layout.
482  OptimizeConstants(FirstFuncConstantID, Values.size());
483
484  // Add the function's parameter attributes so they are available for use in
485  // the function's instruction.
486  EnumerateAttributes(F.getAttributes());
487
488  FirstInstID = Values.size();
489
490  SmallVector<llvm::LocalAsMetadata *, 8> FnLocalMDVector;
491  // Add all of the instructions.
492  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
493    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
494      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
495           OI != E; ++OI) {
496        if (auto *MD = dyn_cast<llvm::MetadataAsValue>(&*OI))
497          if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata()))
498            // Enumerate metadata after the instructions they might refer to.
499            FnLocalMDVector.push_back(Local);
500      }
501
502      if (!I->getType()->isVoidTy())
503        EnumerateValue(&*I);
504    }
505  }
506
507  // Add all of the function-local metadata.
508  for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
509    EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
510}
511
512void ValueEnumerator::purgeFunction() {
513  /// Remove purged values from the ValueMap.
514  for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
515    ValueMap.erase(Values[i].first);
516  for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
517    MDValueMap.erase(MDs[i]);
518  for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
519    ValueMap.erase(BasicBlocks[i]);
520
521  Values.resize(NumModuleValues);
522  MDs.resize(NumModuleMDs);
523  BasicBlocks.clear();
524  FunctionLocalMDs.clear();
525}
526
527static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
528                                 DenseMap<const BasicBlock*, unsigned> &IDMap) {
529  unsigned Counter = 0;
530  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
531    IDMap[&*BB] = ++Counter;
532}
533
534/// getGlobalBasicBlockID - This returns the function-specific ID for the
535/// specified basic block.  This is relatively expensive information, so it
536/// should only be used by rare constructs such as address-of-label.
537unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
538  unsigned &Idx = GlobalBasicBlockIDs[BB];
539  if (Idx != 0)
540    return Idx-1;
541
542  IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
543  return getGlobalBasicBlockID(BB);
544}
545
546}  // end llvm_2_9_func namespace
547