GVN.cpp revision f4177aa0193d9bedc4f919b9322464bf3dee83ab
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global value numbering to eliminate fully redundant
11// instructions.  It also performs simple dead load elimination.
12//
13// Note that this pass does the value numbering itself; it does not use the
14// ValueNumbering analysis passes.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "gvn"
19#include "llvm/Transforms/Scalar.h"
20#include "llvm/Constants.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/GlobalVariable.h"
23#include "llvm/Function.h"
24#include "llvm/IntrinsicInst.h"
25#include "llvm/LLVMContext.h"
26#include "llvm/Operator.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/Analysis/ConstantFolding.h"
29#include "llvm/Analysis/Dominators.h"
30#include "llvm/Analysis/InstructionSimplify.h"
31#include "llvm/Analysis/Loads.h"
32#include "llvm/Analysis/MemoryBuiltins.h"
33#include "llvm/Analysis/MemoryDependenceAnalysis.h"
34#include "llvm/Analysis/PHITransAddr.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/Target/TargetData.h"
37#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38#include "llvm/Transforms/Utils/Local.h"
39#include "llvm/Transforms/Utils/SSAUpdater.h"
40#include "llvm/ADT/DenseMap.h"
41#include "llvm/ADT/DepthFirstIterator.h"
42#include "llvm/ADT/PostOrderIterator.h"
43#include "llvm/ADT/SmallPtrSet.h"
44#include "llvm/ADT/Statistic.h"
45#include "llvm/Support/Allocator.h"
46#include "llvm/Support/CFG.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/GetElementPtrTypeIterator.h"
51#include "llvm/Support/IRBuilder.h"
52#include <list>
53using namespace llvm;
54
55STATISTIC(NumGVNInstr,  "Number of instructions deleted");
56STATISTIC(NumGVNLoad,   "Number of loads deleted");
57STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
58STATISTIC(NumGVNBlocks, "Number of blocks merged");
59STATISTIC(NumPRELoad,   "Number of loads PRE'd");
60
61static cl::opt<bool> EnablePRE("enable-pre",
62                               cl::init(true), cl::Hidden);
63static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
64
65//===----------------------------------------------------------------------===//
66//                         ValueTable Class
67//===----------------------------------------------------------------------===//
68
69/// This class holds the mapping between values and value numbers.  It is used
70/// as an efficient mechanism to determine the expression-wise equivalence of
71/// two values.
72namespace {
73  struct Expression {
74    enum ExpressionOpcode {
75      ADD = Instruction::Add,
76      FADD = Instruction::FAdd,
77      SUB = Instruction::Sub,
78      FSUB = Instruction::FSub,
79      MUL = Instruction::Mul,
80      FMUL = Instruction::FMul,
81      UDIV = Instruction::UDiv,
82      SDIV = Instruction::SDiv,
83      FDIV = Instruction::FDiv,
84      UREM = Instruction::URem,
85      SREM = Instruction::SRem,
86      FREM = Instruction::FRem,
87      SHL = Instruction::Shl,
88      LSHR = Instruction::LShr,
89      ASHR = Instruction::AShr,
90      AND = Instruction::And,
91      OR = Instruction::Or,
92      XOR = Instruction::Xor,
93      TRUNC = Instruction::Trunc,
94      ZEXT = Instruction::ZExt,
95      SEXT = Instruction::SExt,
96      FPTOUI = Instruction::FPToUI,
97      FPTOSI = Instruction::FPToSI,
98      UITOFP = Instruction::UIToFP,
99      SITOFP = Instruction::SIToFP,
100      FPTRUNC = Instruction::FPTrunc,
101      FPEXT = Instruction::FPExt,
102      PTRTOINT = Instruction::PtrToInt,
103      INTTOPTR = Instruction::IntToPtr,
104      BITCAST = Instruction::BitCast,
105      ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
106      ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
107      FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
108      FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
109      FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
110      SHUFFLE, SELECT, GEP, CALL, CONSTANT,
111      INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
112
113    ExpressionOpcode opcode;
114    const Type* type;
115    SmallVector<uint32_t, 4> varargs;
116    Value *function;
117
118    Expression() { }
119    Expression(ExpressionOpcode o) : opcode(o) { }
120
121    bool operator==(const Expression &other) const {
122      if (opcode != other.opcode)
123        return false;
124      else if (opcode == EMPTY || opcode == TOMBSTONE)
125        return true;
126      else if (type != other.type)
127        return false;
128      else if (function != other.function)
129        return false;
130      else {
131        if (varargs.size() != other.varargs.size())
132          return false;
133
134        for (size_t i = 0; i < varargs.size(); ++i)
135          if (varargs[i] != other.varargs[i])
136            return false;
137
138        return true;
139      }
140    }
141
142    /*bool operator!=(const Expression &other) const {
143      return !(*this == other);
144    }*/
145  };
146
147  class ValueTable {
148    private:
149      DenseMap<Value*, uint32_t> valueNumbering;
150      DenseMap<Expression, uint32_t> expressionNumbering;
151      AliasAnalysis* AA;
152      MemoryDependenceAnalysis* MD;
153      DominatorTree* DT;
154
155      uint32_t nextValueNumber;
156
157      Expression::ExpressionOpcode getOpcode(CmpInst* C);
158      Expression create_expression(BinaryOperator* BO);
159      Expression create_expression(CmpInst* C);
160      Expression create_expression(ShuffleVectorInst* V);
161      Expression create_expression(ExtractElementInst* C);
162      Expression create_expression(InsertElementInst* V);
163      Expression create_expression(SelectInst* V);
164      Expression create_expression(CastInst* C);
165      Expression create_expression(GetElementPtrInst* G);
166      Expression create_expression(CallInst* C);
167      Expression create_expression(ExtractValueInst* C);
168      Expression create_expression(InsertValueInst* C);
169
170      uint32_t lookup_or_add_call(CallInst* C);
171    public:
172      ValueTable() : nextValueNumber(1) { }
173      uint32_t lookup_or_add(Value *V);
174      uint32_t lookup(Value *V) const;
175      void add(Value *V, uint32_t num);
176      void clear();
177      void erase(Value *v);
178      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
179      AliasAnalysis *getAliasAnalysis() const { return AA; }
180      void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
181      void setDomTree(DominatorTree* D) { DT = D; }
182      uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
183      void verifyRemoved(const Value *) const;
184  };
185}
186
187namespace llvm {
188template <> struct DenseMapInfo<Expression> {
189  static inline Expression getEmptyKey() {
190    return Expression(Expression::EMPTY);
191  }
192
193  static inline Expression getTombstoneKey() {
194    return Expression(Expression::TOMBSTONE);
195  }
196
197  static unsigned getHashValue(const Expression e) {
198    unsigned hash = e.opcode;
199
200    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
201            (unsigned)((uintptr_t)e.type >> 9));
202
203    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
204         E = e.varargs.end(); I != E; ++I)
205      hash = *I + hash * 37;
206
207    hash = ((unsigned)((uintptr_t)e.function >> 4) ^
208            (unsigned)((uintptr_t)e.function >> 9)) +
209           hash * 37;
210
211    return hash;
212  }
213  static bool isEqual(const Expression &LHS, const Expression &RHS) {
214    return LHS == RHS;
215  }
216};
217
218template <>
219struct isPodLike<Expression> { static const bool value = true; };
220
221}
222
223//===----------------------------------------------------------------------===//
224//                     ValueTable Internal Functions
225//===----------------------------------------------------------------------===//
226
227Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
228  if (isa<ICmpInst>(C)) {
229    switch (C->getPredicate()) {
230    default:  // THIS SHOULD NEVER HAPPEN
231      llvm_unreachable("Comparison with unknown predicate?");
232    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
233    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
234    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
235    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
236    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
237    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
238    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
239    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
240    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
241    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
242    }
243  } else {
244    switch (C->getPredicate()) {
245    default: // THIS SHOULD NEVER HAPPEN
246      llvm_unreachable("Comparison with unknown predicate?");
247    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
248    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
249    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
250    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
251    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
252    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
253    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
254    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
255    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
256    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
257    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
258    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
259    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
260    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
261    }
262  }
263}
264
265Expression ValueTable::create_expression(CallInst* C) {
266  Expression e;
267
268  e.type = C->getType();
269  e.function = C->getCalledFunction();
270  e.opcode = Expression::CALL;
271
272  CallSite CS(C);
273  for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
274       I != E; ++I)
275    e.varargs.push_back(lookup_or_add(*I));
276
277  return e;
278}
279
280Expression ValueTable::create_expression(BinaryOperator* BO) {
281  Expression e;
282  e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
283  e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
284  e.function = 0;
285  e.type = BO->getType();
286  e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
287
288  return e;
289}
290
291Expression ValueTable::create_expression(CmpInst* C) {
292  Expression e;
293
294  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
295  e.varargs.push_back(lookup_or_add(C->getOperand(1)));
296  e.function = 0;
297  e.type = C->getType();
298  e.opcode = getOpcode(C);
299
300  return e;
301}
302
303Expression ValueTable::create_expression(CastInst* C) {
304  Expression e;
305
306  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
307  e.function = 0;
308  e.type = C->getType();
309  e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
310
311  return e;
312}
313
314Expression ValueTable::create_expression(ShuffleVectorInst* S) {
315  Expression e;
316
317  e.varargs.push_back(lookup_or_add(S->getOperand(0)));
318  e.varargs.push_back(lookup_or_add(S->getOperand(1)));
319  e.varargs.push_back(lookup_or_add(S->getOperand(2)));
320  e.function = 0;
321  e.type = S->getType();
322  e.opcode = Expression::SHUFFLE;
323
324  return e;
325}
326
327Expression ValueTable::create_expression(ExtractElementInst* E) {
328  Expression e;
329
330  e.varargs.push_back(lookup_or_add(E->getOperand(0)));
331  e.varargs.push_back(lookup_or_add(E->getOperand(1)));
332  e.function = 0;
333  e.type = E->getType();
334  e.opcode = Expression::EXTRACT;
335
336  return e;
337}
338
339Expression ValueTable::create_expression(InsertElementInst* I) {
340  Expression e;
341
342  e.varargs.push_back(lookup_or_add(I->getOperand(0)));
343  e.varargs.push_back(lookup_or_add(I->getOperand(1)));
344  e.varargs.push_back(lookup_or_add(I->getOperand(2)));
345  e.function = 0;
346  e.type = I->getType();
347  e.opcode = Expression::INSERT;
348
349  return e;
350}
351
352Expression ValueTable::create_expression(SelectInst* I) {
353  Expression e;
354
355  e.varargs.push_back(lookup_or_add(I->getCondition()));
356  e.varargs.push_back(lookup_or_add(I->getTrueValue()));
357  e.varargs.push_back(lookup_or_add(I->getFalseValue()));
358  e.function = 0;
359  e.type = I->getType();
360  e.opcode = Expression::SELECT;
361
362  return e;
363}
364
365Expression ValueTable::create_expression(GetElementPtrInst* G) {
366  Expression e;
367
368  e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
369  e.function = 0;
370  e.type = G->getType();
371  e.opcode = Expression::GEP;
372
373  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
374       I != E; ++I)
375    e.varargs.push_back(lookup_or_add(*I));
376
377  return e;
378}
379
380Expression ValueTable::create_expression(ExtractValueInst* E) {
381  Expression e;
382
383  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
384  for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
385       II != IE; ++II)
386    e.varargs.push_back(*II);
387  e.function = 0;
388  e.type = E->getType();
389  e.opcode = Expression::EXTRACTVALUE;
390
391  return e;
392}
393
394Expression ValueTable::create_expression(InsertValueInst* E) {
395  Expression e;
396
397  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
398  e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
399  for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
400       II != IE; ++II)
401    e.varargs.push_back(*II);
402  e.function = 0;
403  e.type = E->getType();
404  e.opcode = Expression::INSERTVALUE;
405
406  return e;
407}
408
409//===----------------------------------------------------------------------===//
410//                     ValueTable External Functions
411//===----------------------------------------------------------------------===//
412
413/// add - Insert a value into the table with a specified value number.
414void ValueTable::add(Value *V, uint32_t num) {
415  valueNumbering.insert(std::make_pair(V, num));
416}
417
418uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
419  if (AA->doesNotAccessMemory(C)) {
420    Expression exp = create_expression(C);
421    uint32_t& e = expressionNumbering[exp];
422    if (!e) e = nextValueNumber++;
423    valueNumbering[C] = e;
424    return e;
425  } else if (AA->onlyReadsMemory(C)) {
426    Expression exp = create_expression(C);
427    uint32_t& e = expressionNumbering[exp];
428    if (!e) {
429      e = nextValueNumber++;
430      valueNumbering[C] = e;
431      return e;
432    }
433    if (!MD) {
434      e = nextValueNumber++;
435      valueNumbering[C] = e;
436      return e;
437    }
438
439    MemDepResult local_dep = MD->getDependency(C);
440
441    if (!local_dep.isDef() && !local_dep.isNonLocal()) {
442      valueNumbering[C] =  nextValueNumber;
443      return nextValueNumber++;
444    }
445
446    if (local_dep.isDef()) {
447      CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
448
449      if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
450        valueNumbering[C] = nextValueNumber;
451        return nextValueNumber++;
452      }
453
454      for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
455        uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
456        uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
457        if (c_vn != cd_vn) {
458          valueNumbering[C] = nextValueNumber;
459          return nextValueNumber++;
460        }
461      }
462
463      uint32_t v = lookup_or_add(local_cdep);
464      valueNumbering[C] = v;
465      return v;
466    }
467
468    // Non-local case.
469    const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
470      MD->getNonLocalCallDependency(CallSite(C));
471    // FIXME: call/call dependencies for readonly calls should return def, not
472    // clobber!  Move the checking logic to MemDep!
473    CallInst* cdep = 0;
474
475    // Check to see if we have a single dominating call instruction that is
476    // identical to C.
477    for (unsigned i = 0, e = deps.size(); i != e; ++i) {
478      const NonLocalDepEntry *I = &deps[i];
479      // Ignore non-local dependencies.
480      if (I->getResult().isNonLocal())
481        continue;
482
483      // We don't handle non-depedencies.  If we already have a call, reject
484      // instruction dependencies.
485      if (I->getResult().isClobber() || cdep != 0) {
486        cdep = 0;
487        break;
488      }
489
490      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
491      // FIXME: All duplicated with non-local case.
492      if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
493        cdep = NonLocalDepCall;
494        continue;
495      }
496
497      cdep = 0;
498      break;
499    }
500
501    if (!cdep) {
502      valueNumbering[C] = nextValueNumber;
503      return nextValueNumber++;
504    }
505
506    if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
507      valueNumbering[C] = nextValueNumber;
508      return nextValueNumber++;
509    }
510    for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
511      uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
512      uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
513      if (c_vn != cd_vn) {
514        valueNumbering[C] = nextValueNumber;
515        return nextValueNumber++;
516      }
517    }
518
519    uint32_t v = lookup_or_add(cdep);
520    valueNumbering[C] = v;
521    return v;
522
523  } else {
524    valueNumbering[C] = nextValueNumber;
525    return nextValueNumber++;
526  }
527}
528
529/// lookup_or_add - Returns the value number for the specified value, assigning
530/// it a new number if it did not have one before.
531uint32_t ValueTable::lookup_or_add(Value *V) {
532  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
533  if (VI != valueNumbering.end())
534    return VI->second;
535
536  if (!isa<Instruction>(V)) {
537    valueNumbering[V] = nextValueNumber;
538    return nextValueNumber++;
539  }
540
541  Instruction* I = cast<Instruction>(V);
542  Expression exp;
543  switch (I->getOpcode()) {
544    case Instruction::Call:
545      return lookup_or_add_call(cast<CallInst>(I));
546    case Instruction::Add:
547    case Instruction::FAdd:
548    case Instruction::Sub:
549    case Instruction::FSub:
550    case Instruction::Mul:
551    case Instruction::FMul:
552    case Instruction::UDiv:
553    case Instruction::SDiv:
554    case Instruction::FDiv:
555    case Instruction::URem:
556    case Instruction::SRem:
557    case Instruction::FRem:
558    case Instruction::Shl:
559    case Instruction::LShr:
560    case Instruction::AShr:
561    case Instruction::And:
562    case Instruction::Or :
563    case Instruction::Xor:
564      exp = create_expression(cast<BinaryOperator>(I));
565      break;
566    case Instruction::ICmp:
567    case Instruction::FCmp:
568      exp = create_expression(cast<CmpInst>(I));
569      break;
570    case Instruction::Trunc:
571    case Instruction::ZExt:
572    case Instruction::SExt:
573    case Instruction::FPToUI:
574    case Instruction::FPToSI:
575    case Instruction::UIToFP:
576    case Instruction::SIToFP:
577    case Instruction::FPTrunc:
578    case Instruction::FPExt:
579    case Instruction::PtrToInt:
580    case Instruction::IntToPtr:
581    case Instruction::BitCast:
582      exp = create_expression(cast<CastInst>(I));
583      break;
584    case Instruction::Select:
585      exp = create_expression(cast<SelectInst>(I));
586      break;
587    case Instruction::ExtractElement:
588      exp = create_expression(cast<ExtractElementInst>(I));
589      break;
590    case Instruction::InsertElement:
591      exp = create_expression(cast<InsertElementInst>(I));
592      break;
593    case Instruction::ShuffleVector:
594      exp = create_expression(cast<ShuffleVectorInst>(I));
595      break;
596    case Instruction::ExtractValue:
597      exp = create_expression(cast<ExtractValueInst>(I));
598      break;
599    case Instruction::InsertValue:
600      exp = create_expression(cast<InsertValueInst>(I));
601      break;
602    case Instruction::GetElementPtr:
603      exp = create_expression(cast<GetElementPtrInst>(I));
604      break;
605    default:
606      valueNumbering[V] = nextValueNumber;
607      return nextValueNumber++;
608  }
609
610  uint32_t& e = expressionNumbering[exp];
611  if (!e) e = nextValueNumber++;
612  valueNumbering[V] = e;
613  return e;
614}
615
616/// lookup - Returns the value number of the specified value. Fails if
617/// the value has not yet been numbered.
618uint32_t ValueTable::lookup(Value *V) const {
619  DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
620  assert(VI != valueNumbering.end() && "Value not numbered?");
621  return VI->second;
622}
623
624/// clear - Remove all entries from the ValueTable
625void ValueTable::clear() {
626  valueNumbering.clear();
627  expressionNumbering.clear();
628  nextValueNumber = 1;
629}
630
631/// erase - Remove a value from the value numbering
632void ValueTable::erase(Value *V) {
633  valueNumbering.erase(V);
634}
635
636/// verifyRemoved - Verify that the value is removed from all internal data
637/// structures.
638void ValueTable::verifyRemoved(const Value *V) const {
639  for (DenseMap<Value*, uint32_t>::const_iterator
640         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
641    assert(I->first != V && "Inst still occurs in value numbering map!");
642  }
643}
644
645//===----------------------------------------------------------------------===//
646//                                GVN Pass
647//===----------------------------------------------------------------------===//
648
649namespace {
650  struct ValueNumberScope {
651    ValueNumberScope* parent;
652    DenseMap<uint32_t, Value*> table;
653
654    ValueNumberScope(ValueNumberScope* p) : parent(p) { }
655  };
656}
657
658namespace {
659
660  class GVN : public FunctionPass {
661    bool runOnFunction(Function &F);
662  public:
663    static char ID; // Pass identification, replacement for typeid
664    explicit GVN(bool noloads = false)
665        : FunctionPass(ID), NoLoads(noloads), MD(0) {
666      initializeGVNPass(*PassRegistry::getPassRegistry());
667    }
668
669  private:
670    bool NoLoads;
671    MemoryDependenceAnalysis *MD;
672    DominatorTree *DT;
673    const TargetData* TD;
674
675    ValueTable VN;
676
677    /// NumberTable - A mapping from value numers to lists of Value*'s that
678    /// have that value number.  Use lookupNumber to query it.
679    DenseMap<uint32_t, std::pair<Value*, void*> > NumberTable;
680    BumpPtrAllocator TableAllocator;
681
682    /// insert_table - Push a new Value to the NumberTable onto the list for
683    /// its value number.
684    void insert_table(uint32_t N, Value *V) {
685      std::pair<Value*, void*>& Curr = NumberTable[N];
686      if (!Curr.first) {
687        Curr.first = V;
688        return;
689      }
690
691      std::pair<Value*, void*>* Node =
692        TableAllocator.Allocate<std::pair<Value*, void*> >();
693      Node->first = V;
694      Node->second = Curr.second;
695      Curr.second = Node;
696    }
697
698    /// erase_table - Scan the list of values corresponding to a given value
699    /// number, and remove the given value if encountered.
700    void erase_table(uint32_t N, Value *V) {
701      std::pair<Value*, void*>* Prev = 0;
702      std::pair<Value*, void*>* Curr = &NumberTable[N];
703
704      while (Curr->first != V) {
705        Prev = Curr;
706        Curr = static_cast<std::pair<Value*, void*>*>(Curr->second);
707      }
708
709      if (Prev) {
710        Prev->second = Curr->second;
711      } else {
712        if (!Curr->second) {
713          Curr->first = 0;
714        } else {
715          std::pair<Value*, void*>* Next =
716            static_cast<std::pair<Value*, void*>*>(Curr->second);
717          Curr->first = Next->first;
718          Curr->second = Next->second;
719        }
720      }
721    }
722
723    // List of critical edges to be split between iterations.
724    SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
725
726    // This transformation requires dominator postdominator info
727    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
728      AU.addRequired<DominatorTree>();
729      if (!NoLoads)
730        AU.addRequired<MemoryDependenceAnalysis>();
731      AU.addRequired<AliasAnalysis>();
732
733      AU.addPreserved<DominatorTree>();
734      AU.addPreserved<AliasAnalysis>();
735    }
736
737    // Helper fuctions
738    // FIXME: eliminate or document these better
739    bool processLoad(LoadInst* L,
740                     SmallVectorImpl<Instruction*> &toErase);
741    bool processInstruction(Instruction *I,
742                            SmallVectorImpl<Instruction*> &toErase);
743    bool processNonLocalLoad(LoadInst* L,
744                             SmallVectorImpl<Instruction*> &toErase);
745    bool processBlock(BasicBlock *BB);
746    void dump(DenseMap<uint32_t, Value*>& d);
747    bool iterateOnFunction(Function &F);
748    bool performPRE(Function& F);
749    Value *lookupNumber(BasicBlock *BB, uint32_t num);
750    void cleanupGlobalSets();
751    void verifyRemoved(const Instruction *I) const;
752    bool splitCriticalEdges();
753  };
754
755  char GVN::ID = 0;
756}
757
758// createGVNPass - The public interface to this file...
759FunctionPass *llvm::createGVNPass(bool NoLoads) {
760  return new GVN(NoLoads);
761}
762
763INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
764INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
765INITIALIZE_PASS_DEPENDENCY(DominatorTree)
766INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
767INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
768
769void GVN::dump(DenseMap<uint32_t, Value*>& d) {
770  errs() << "{\n";
771  for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
772       E = d.end(); I != E; ++I) {
773      errs() << I->first << "\n";
774      I->second->dump();
775  }
776  errs() << "}\n";
777}
778
779/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
780/// we're analyzing is fully available in the specified block.  As we go, keep
781/// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
782/// map is actually a tri-state map with the following values:
783///   0) we know the block *is not* fully available.
784///   1) we know the block *is* fully available.
785///   2) we do not know whether the block is fully available or not, but we are
786///      currently speculating that it will be.
787///   3) we are speculating for this block and have used that to speculate for
788///      other blocks.
789static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
790                            DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
791  // Optimistically assume that the block is fully available and check to see
792  // if we already know about this block in one lookup.
793  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
794    FullyAvailableBlocks.insert(std::make_pair(BB, 2));
795
796  // If the entry already existed for this block, return the precomputed value.
797  if (!IV.second) {
798    // If this is a speculative "available" value, mark it as being used for
799    // speculation of other blocks.
800    if (IV.first->second == 2)
801      IV.first->second = 3;
802    return IV.first->second != 0;
803  }
804
805  // Otherwise, see if it is fully available in all predecessors.
806  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
807
808  // If this block has no predecessors, it isn't live-in here.
809  if (PI == PE)
810    goto SpeculationFailure;
811
812  for (; PI != PE; ++PI)
813    // If the value isn't fully available in one of our predecessors, then it
814    // isn't fully available in this block either.  Undo our previous
815    // optimistic assumption and bail out.
816    if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
817      goto SpeculationFailure;
818
819  return true;
820
821// SpeculationFailure - If we get here, we found out that this is not, after
822// all, a fully-available block.  We have a problem if we speculated on this and
823// used the speculation to mark other blocks as available.
824SpeculationFailure:
825  char &BBVal = FullyAvailableBlocks[BB];
826
827  // If we didn't speculate on this, just return with it set to false.
828  if (BBVal == 2) {
829    BBVal = 0;
830    return false;
831  }
832
833  // If we did speculate on this value, we could have blocks set to 1 that are
834  // incorrect.  Walk the (transitive) successors of this block and mark them as
835  // 0 if set to one.
836  SmallVector<BasicBlock*, 32> BBWorklist;
837  BBWorklist.push_back(BB);
838
839  do {
840    BasicBlock *Entry = BBWorklist.pop_back_val();
841    // Note that this sets blocks to 0 (unavailable) if they happen to not
842    // already be in FullyAvailableBlocks.  This is safe.
843    char &EntryVal = FullyAvailableBlocks[Entry];
844    if (EntryVal == 0) continue;  // Already unavailable.
845
846    // Mark as unavailable.
847    EntryVal = 0;
848
849    for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
850      BBWorklist.push_back(*I);
851  } while (!BBWorklist.empty());
852
853  return false;
854}
855
856
857/// CanCoerceMustAliasedValueToLoad - Return true if
858/// CoerceAvailableValueToLoadType will succeed.
859static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
860                                            const Type *LoadTy,
861                                            const TargetData &TD) {
862  // If the loaded or stored value is an first class array or struct, don't try
863  // to transform them.  We need to be able to bitcast to integer.
864  if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
865      StoredVal->getType()->isStructTy() ||
866      StoredVal->getType()->isArrayTy())
867    return false;
868
869  // The store has to be at least as big as the load.
870  if (TD.getTypeSizeInBits(StoredVal->getType()) <
871        TD.getTypeSizeInBits(LoadTy))
872    return false;
873
874  return true;
875}
876
877
878/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
879/// then a load from a must-aliased pointer of a different type, try to coerce
880/// the stored value.  LoadedTy is the type of the load we want to replace and
881/// InsertPt is the place to insert new instructions.
882///
883/// If we can't do it, return null.
884static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
885                                             const Type *LoadedTy,
886                                             Instruction *InsertPt,
887                                             const TargetData &TD) {
888  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
889    return 0;
890
891  const Type *StoredValTy = StoredVal->getType();
892
893  uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
894  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
895
896  // If the store and reload are the same size, we can always reuse it.
897  if (StoreSize == LoadSize) {
898    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
899      // Pointer to Pointer -> use bitcast.
900      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
901    }
902
903    // Convert source pointers to integers, which can be bitcast.
904    if (StoredValTy->isPointerTy()) {
905      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
906      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
907    }
908
909    const Type *TypeToCastTo = LoadedTy;
910    if (TypeToCastTo->isPointerTy())
911      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
912
913    if (StoredValTy != TypeToCastTo)
914      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
915
916    // Cast to pointer if the load needs a pointer type.
917    if (LoadedTy->isPointerTy())
918      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
919
920    return StoredVal;
921  }
922
923  // If the loaded value is smaller than the available value, then we can
924  // extract out a piece from it.  If the available value is too small, then we
925  // can't do anything.
926  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
927
928  // Convert source pointers to integers, which can be manipulated.
929  if (StoredValTy->isPointerTy()) {
930    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
931    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
932  }
933
934  // Convert vectors and fp to integer, which can be manipulated.
935  if (!StoredValTy->isIntegerTy()) {
936    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
937    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
938  }
939
940  // If this is a big-endian system, we need to shift the value down to the low
941  // bits so that a truncate will work.
942  if (TD.isBigEndian()) {
943    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
944    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
945  }
946
947  // Truncate the integer to the right size now.
948  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
949  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
950
951  if (LoadedTy == NewIntTy)
952    return StoredVal;
953
954  // If the result is a pointer, inttoptr.
955  if (LoadedTy->isPointerTy())
956    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
957
958  // Otherwise, bitcast.
959  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
960}
961
962/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
963/// memdep query of a load that ends up being a clobbering memory write (store,
964/// memset, memcpy, memmove).  This means that the write *may* provide bits used
965/// by the load but we can't be sure because the pointers don't mustalias.
966///
967/// Check this case to see if there is anything more we can do before we give
968/// up.  This returns -1 if we have to give up, or a byte number in the stored
969/// value of the piece that feeds the load.
970static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
971                                          Value *WritePtr,
972                                          uint64_t WriteSizeInBits,
973                                          const TargetData &TD) {
974  // If the loaded or stored value is an first class array or struct, don't try
975  // to transform them.  We need to be able to bitcast to integer.
976  if (LoadTy->isStructTy() || LoadTy->isArrayTy())
977    return -1;
978
979  int64_t StoreOffset = 0, LoadOffset = 0;
980  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
981  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
982  if (StoreBase != LoadBase)
983    return -1;
984
985  // If the load and store are to the exact same address, they should have been
986  // a must alias.  AA must have gotten confused.
987  // FIXME: Study to see if/when this happens.  One case is forwarding a memset
988  // to a load from the base of the memset.
989#if 0
990  if (LoadOffset == StoreOffset) {
991    dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
992    << "Base       = " << *StoreBase << "\n"
993    << "Store Ptr  = " << *WritePtr << "\n"
994    << "Store Offs = " << StoreOffset << "\n"
995    << "Load Ptr   = " << *LoadPtr << "\n";
996    abort();
997  }
998#endif
999
1000  // If the load and store don't overlap at all, the store doesn't provide
1001  // anything to the load.  In this case, they really don't alias at all, AA
1002  // must have gotten confused.
1003  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
1004
1005  if ((WriteSizeInBits & 7) | (LoadSize & 7))
1006    return -1;
1007  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
1008  LoadSize >>= 3;
1009
1010
1011  bool isAAFailure = false;
1012  if (StoreOffset < LoadOffset)
1013    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
1014  else
1015    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
1016
1017  if (isAAFailure) {
1018#if 0
1019    dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
1020    << "Base       = " << *StoreBase << "\n"
1021    << "Store Ptr  = " << *WritePtr << "\n"
1022    << "Store Offs = " << StoreOffset << "\n"
1023    << "Load Ptr   = " << *LoadPtr << "\n";
1024    abort();
1025#endif
1026    return -1;
1027  }
1028
1029  // If the Load isn't completely contained within the stored bits, we don't
1030  // have all the bits to feed it.  We could do something crazy in the future
1031  // (issue a smaller load then merge the bits in) but this seems unlikely to be
1032  // valuable.
1033  if (StoreOffset > LoadOffset ||
1034      StoreOffset+StoreSize < LoadOffset+LoadSize)
1035    return -1;
1036
1037  // Okay, we can do this transformation.  Return the number of bytes into the
1038  // store that the load is.
1039  return LoadOffset-StoreOffset;
1040}
1041
1042/// AnalyzeLoadFromClobberingStore - This function is called when we have a
1043/// memdep query of a load that ends up being a clobbering store.
1044static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
1045                                          StoreInst *DepSI,
1046                                          const TargetData &TD) {
1047  // Cannot handle reading from store of first-class aggregate yet.
1048  if (DepSI->getValueOperand()->getType()->isStructTy() ||
1049      DepSI->getValueOperand()->getType()->isArrayTy())
1050    return -1;
1051
1052  Value *StorePtr = DepSI->getPointerOperand();
1053  uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
1054  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1055                                        StorePtr, StoreSize, TD);
1056}
1057
1058static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
1059                                            MemIntrinsic *MI,
1060                                            const TargetData &TD) {
1061  // If the mem operation is a non-constant size, we can't handle it.
1062  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
1063  if (SizeCst == 0) return -1;
1064  uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
1065
1066  // If this is memset, we just need to see if the offset is valid in the size
1067  // of the memset..
1068  if (MI->getIntrinsicID() == Intrinsic::memset)
1069    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
1070                                          MemSizeInBits, TD);
1071
1072  // If we have a memcpy/memmove, the only case we can handle is if this is a
1073  // copy from constant memory.  In that case, we can read directly from the
1074  // constant memory.
1075  MemTransferInst *MTI = cast<MemTransferInst>(MI);
1076
1077  Constant *Src = dyn_cast<Constant>(MTI->getSource());
1078  if (Src == 0) return -1;
1079
1080  GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src));
1081  if (GV == 0 || !GV->isConstant()) return -1;
1082
1083  // See if the access is within the bounds of the transfer.
1084  int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1085                                              MI->getDest(), MemSizeInBits, TD);
1086  if (Offset == -1)
1087    return Offset;
1088
1089  // Otherwise, see if we can constant fold a load from the constant with the
1090  // offset applied as appropriate.
1091  Src = ConstantExpr::getBitCast(Src,
1092                                 llvm::Type::getInt8PtrTy(Src->getContext()));
1093  Constant *OffsetCst =
1094    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1095  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1096  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1097  if (ConstantFoldLoadFromConstPtr(Src, &TD))
1098    return Offset;
1099  return -1;
1100}
1101
1102
1103/// GetStoreValueForLoad - This function is called when we have a
1104/// memdep query of a load that ends up being a clobbering store.  This means
1105/// that the store *may* provide bits used by the load but we can't be sure
1106/// because the pointers don't mustalias.  Check this case to see if there is
1107/// anything more we can do before we give up.
1108static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
1109                                   const Type *LoadTy,
1110                                   Instruction *InsertPt, const TargetData &TD){
1111  LLVMContext &Ctx = SrcVal->getType()->getContext();
1112
1113  uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
1114  uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
1115
1116  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1117
1118  // Compute which bits of the stored value are being used by the load.  Convert
1119  // to an integer type to start with.
1120  if (SrcVal->getType()->isPointerTy())
1121    SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
1122  if (!SrcVal->getType()->isIntegerTy())
1123    SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
1124                                   "tmp");
1125
1126  // Shift the bits to the least significant depending on endianness.
1127  unsigned ShiftAmt;
1128  if (TD.isLittleEndian())
1129    ShiftAmt = Offset*8;
1130  else
1131    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
1132
1133  if (ShiftAmt)
1134    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
1135
1136  if (LoadSize != StoreSize)
1137    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
1138                                 "tmp");
1139
1140  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
1141}
1142
1143/// GetMemInstValueForLoad - This function is called when we have a
1144/// memdep query of a load that ends up being a clobbering mem intrinsic.
1145static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
1146                                     const Type *LoadTy, Instruction *InsertPt,
1147                                     const TargetData &TD){
1148  LLVMContext &Ctx = LoadTy->getContext();
1149  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
1150
1151  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1152
1153  // We know that this method is only called when the mem transfer fully
1154  // provides the bits for the load.
1155  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
1156    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1157    // independently of what the offset is.
1158    Value *Val = MSI->getValue();
1159    if (LoadSize != 1)
1160      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
1161
1162    Value *OneElt = Val;
1163
1164    // Splat the value out to the right number of bits.
1165    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
1166      // If we can double the number of bytes set, do it.
1167      if (NumBytesSet*2 <= LoadSize) {
1168        Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
1169        Val = Builder.CreateOr(Val, ShVal);
1170        NumBytesSet <<= 1;
1171        continue;
1172      }
1173
1174      // Otherwise insert one byte at a time.
1175      Value *ShVal = Builder.CreateShl(Val, 1*8);
1176      Val = Builder.CreateOr(OneElt, ShVal);
1177      ++NumBytesSet;
1178    }
1179
1180    return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
1181  }
1182
1183  // Otherwise, this is a memcpy/memmove from a constant global.
1184  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
1185  Constant *Src = cast<Constant>(MTI->getSource());
1186
1187  // Otherwise, see if we can constant fold a load from the constant with the
1188  // offset applied as appropriate.
1189  Src = ConstantExpr::getBitCast(Src,
1190                                 llvm::Type::getInt8PtrTy(Src->getContext()));
1191  Constant *OffsetCst =
1192  ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1193  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1194  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1195  return ConstantFoldLoadFromConstPtr(Src, &TD);
1196}
1197
1198namespace {
1199
1200struct AvailableValueInBlock {
1201  /// BB - The basic block in question.
1202  BasicBlock *BB;
1203  enum ValType {
1204    SimpleVal,  // A simple offsetted value that is accessed.
1205    MemIntrin   // A memory intrinsic which is loaded from.
1206  };
1207
1208  /// V - The value that is live out of the block.
1209  PointerIntPair<Value *, 1, ValType> Val;
1210
1211  /// Offset - The byte offset in Val that is interesting for the load query.
1212  unsigned Offset;
1213
1214  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
1215                                   unsigned Offset = 0) {
1216    AvailableValueInBlock Res;
1217    Res.BB = BB;
1218    Res.Val.setPointer(V);
1219    Res.Val.setInt(SimpleVal);
1220    Res.Offset = Offset;
1221    return Res;
1222  }
1223
1224  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
1225                                     unsigned Offset = 0) {
1226    AvailableValueInBlock Res;
1227    Res.BB = BB;
1228    Res.Val.setPointer(MI);
1229    Res.Val.setInt(MemIntrin);
1230    Res.Offset = Offset;
1231    return Res;
1232  }
1233
1234  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
1235  Value *getSimpleValue() const {
1236    assert(isSimpleValue() && "Wrong accessor");
1237    return Val.getPointer();
1238  }
1239
1240  MemIntrinsic *getMemIntrinValue() const {
1241    assert(!isSimpleValue() && "Wrong accessor");
1242    return cast<MemIntrinsic>(Val.getPointer());
1243  }
1244
1245  /// MaterializeAdjustedValue - Emit code into this block to adjust the value
1246  /// defined here to the specified type.  This handles various coercion cases.
1247  Value *MaterializeAdjustedValue(const Type *LoadTy,
1248                                  const TargetData *TD) const {
1249    Value *Res;
1250    if (isSimpleValue()) {
1251      Res = getSimpleValue();
1252      if (Res->getType() != LoadTy) {
1253        assert(TD && "Need target data to handle type mismatch case");
1254        Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
1255                                   *TD);
1256
1257        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
1258                     << *getSimpleValue() << '\n'
1259                     << *Res << '\n' << "\n\n\n");
1260      }
1261    } else {
1262      Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
1263                                   LoadTy, BB->getTerminator(), *TD);
1264      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1265                   << "  " << *getMemIntrinValue() << '\n'
1266                   << *Res << '\n' << "\n\n\n");
1267    }
1268    return Res;
1269  }
1270};
1271
1272}
1273
1274/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
1275/// construct SSA form, allowing us to eliminate LI.  This returns the value
1276/// that should be used at LI's definition site.
1277static Value *ConstructSSAForLoadSet(LoadInst *LI,
1278                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1279                                     const TargetData *TD,
1280                                     const DominatorTree &DT,
1281                                     AliasAnalysis *AA) {
1282  // Check for the fully redundant, dominating load case.  In this case, we can
1283  // just use the dominating value directly.
1284  if (ValuesPerBlock.size() == 1 &&
1285      DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
1286    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
1287
1288  // Otherwise, we have to construct SSA form.
1289  SmallVector<PHINode*, 8> NewPHIs;
1290  SSAUpdater SSAUpdate(&NewPHIs);
1291  SSAUpdate.Initialize(LI->getType(), LI->getName());
1292
1293  const Type *LoadTy = LI->getType();
1294
1295  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1296    const AvailableValueInBlock &AV = ValuesPerBlock[i];
1297    BasicBlock *BB = AV.BB;
1298
1299    if (SSAUpdate.HasValueForBlock(BB))
1300      continue;
1301
1302    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
1303  }
1304
1305  // Perform PHI construction.
1306  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
1307
1308  // If new PHI nodes were created, notify alias analysis.
1309  if (V->getType()->isPointerTy())
1310    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
1311      AA->copyValue(LI, NewPHIs[i]);
1312
1313  return V;
1314}
1315
1316static bool isLifetimeStart(const Instruction *Inst) {
1317  if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1318    return II->getIntrinsicID() == Intrinsic::lifetime_start;
1319  return false;
1320}
1321
1322/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
1323/// non-local by performing PHI construction.
1324bool GVN::processNonLocalLoad(LoadInst *LI,
1325                              SmallVectorImpl<Instruction*> &toErase) {
1326  // Find the non-local dependencies of the load.
1327  SmallVector<NonLocalDepResult, 64> Deps;
1328  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
1329  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
1330  //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
1331  //             << Deps.size() << *LI << '\n');
1332
1333  // If we had to process more than one hundred blocks to find the
1334  // dependencies, this load isn't worth worrying about.  Optimizing
1335  // it will be too expensive.
1336  if (Deps.size() > 100)
1337    return false;
1338
1339  // If we had a phi translation failure, we'll have a single entry which is a
1340  // clobber in the current block.  Reject this early.
1341  if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
1342    DEBUG(
1343      dbgs() << "GVN: non-local load ";
1344      WriteAsOperand(dbgs(), LI);
1345      dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
1346    );
1347    return false;
1348  }
1349
1350  // Filter out useless results (non-locals, etc).  Keep track of the blocks
1351  // where we have a value available in repl, also keep track of whether we see
1352  // dependencies that produce an unknown value for the load (such as a call
1353  // that could potentially clobber the load).
1354  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
1355  SmallVector<BasicBlock*, 16> UnavailableBlocks;
1356
1357  for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
1358    BasicBlock *DepBB = Deps[i].getBB();
1359    MemDepResult DepInfo = Deps[i].getResult();
1360
1361    if (DepInfo.isClobber()) {
1362      // The address being loaded in this non-local block may not be the same as
1363      // the pointer operand of the load if PHI translation occurs.  Make sure
1364      // to consider the right address.
1365      Value *Address = Deps[i].getAddress();
1366
1367      // If the dependence is to a store that writes to a superset of the bits
1368      // read by the load, we can extract the bits we need for the load from the
1369      // stored value.
1370      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
1371        if (TD && Address) {
1372          int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
1373                                                      DepSI, *TD);
1374          if (Offset != -1) {
1375            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1376                                                       DepSI->getValueOperand(),
1377                                                                Offset));
1378            continue;
1379          }
1380        }
1381      }
1382
1383      // If the clobbering value is a memset/memcpy/memmove, see if we can
1384      // forward a value on from it.
1385      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
1386        if (TD && Address) {
1387          int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
1388                                                        DepMI, *TD);
1389          if (Offset != -1) {
1390            ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
1391                                                                  Offset));
1392            continue;
1393          }
1394        }
1395      }
1396
1397      UnavailableBlocks.push_back(DepBB);
1398      continue;
1399    }
1400
1401    Instruction *DepInst = DepInfo.getInst();
1402
1403    // Loading the allocation -> undef.
1404    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
1405        // Loading immediately after lifetime begin -> undef.
1406        isLifetimeStart(DepInst)) {
1407      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1408                                             UndefValue::get(LI->getType())));
1409      continue;
1410    }
1411
1412    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1413      // Reject loads and stores that are to the same address but are of
1414      // different types if we have to.
1415      if (S->getValueOperand()->getType() != LI->getType()) {
1416        // If the stored value is larger or equal to the loaded value, we can
1417        // reuse it.
1418        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
1419                                                        LI->getType(), *TD)) {
1420          UnavailableBlocks.push_back(DepBB);
1421          continue;
1422        }
1423      }
1424
1425      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1426                                                         S->getValueOperand()));
1427      continue;
1428    }
1429
1430    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1431      // If the types mismatch and we can't handle it, reject reuse of the load.
1432      if (LD->getType() != LI->getType()) {
1433        // If the stored value is larger or equal to the loaded value, we can
1434        // reuse it.
1435        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
1436          UnavailableBlocks.push_back(DepBB);
1437          continue;
1438        }
1439      }
1440      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
1441      continue;
1442    }
1443
1444    UnavailableBlocks.push_back(DepBB);
1445    continue;
1446  }
1447
1448  // If we have no predecessors that produce a known value for this load, exit
1449  // early.
1450  if (ValuesPerBlock.empty()) return false;
1451
1452  // If all of the instructions we depend on produce a known value for this
1453  // load, then it is fully redundant and we can use PHI insertion to compute
1454  // its value.  Insert PHIs and remove the fully redundant value now.
1455  if (UnavailableBlocks.empty()) {
1456    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1457
1458    // Perform PHI construction.
1459    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1460                                      VN.getAliasAnalysis());
1461    LI->replaceAllUsesWith(V);
1462
1463    if (isa<PHINode>(V))
1464      V->takeName(LI);
1465    if (V->getType()->isPointerTy())
1466      MD->invalidateCachedPointerInfo(V);
1467    VN.erase(LI);
1468    toErase.push_back(LI);
1469    ++NumGVNLoad;
1470    return true;
1471  }
1472
1473  if (!EnablePRE || !EnableLoadPRE)
1474    return false;
1475
1476  // Okay, we have *some* definitions of the value.  This means that the value
1477  // is available in some of our (transitive) predecessors.  Lets think about
1478  // doing PRE of this load.  This will involve inserting a new load into the
1479  // predecessor when it's not available.  We could do this in general, but
1480  // prefer to not increase code size.  As such, we only do this when we know
1481  // that we only have to insert *one* load (which means we're basically moving
1482  // the load, not inserting a new one).
1483
1484  SmallPtrSet<BasicBlock *, 4> Blockers;
1485  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1486    Blockers.insert(UnavailableBlocks[i]);
1487
1488  // Lets find first basic block with more than one predecessor.  Walk backwards
1489  // through predecessors if needed.
1490  BasicBlock *LoadBB = LI->getParent();
1491  BasicBlock *TmpBB = LoadBB;
1492
1493  bool isSinglePred = false;
1494  bool allSingleSucc = true;
1495  while (TmpBB->getSinglePredecessor()) {
1496    isSinglePred = true;
1497    TmpBB = TmpBB->getSinglePredecessor();
1498    if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1499      return false;
1500    if (Blockers.count(TmpBB))
1501      return false;
1502
1503    // If any of these blocks has more than one successor (i.e. if the edge we
1504    // just traversed was critical), then there are other paths through this
1505    // block along which the load may not be anticipated.  Hoisting the load
1506    // above this block would be adding the load to execution paths along
1507    // which it was not previously executed.
1508    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1509      return false;
1510  }
1511
1512  assert(TmpBB);
1513  LoadBB = TmpBB;
1514
1515  // FIXME: It is extremely unclear what this loop is doing, other than
1516  // artificially restricting loadpre.
1517  if (isSinglePred) {
1518    bool isHot = false;
1519    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1520      const AvailableValueInBlock &AV = ValuesPerBlock[i];
1521      if (AV.isSimpleValue())
1522        // "Hot" Instruction is in some loop (because it dominates its dep.
1523        // instruction).
1524        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
1525          if (DT->dominates(LI, I)) {
1526            isHot = true;
1527            break;
1528          }
1529    }
1530
1531    // We are interested only in "hot" instructions. We don't want to do any
1532    // mis-optimizations here.
1533    if (!isHot)
1534      return false;
1535  }
1536
1537  // Check to see how many predecessors have the loaded value fully
1538  // available.
1539  DenseMap<BasicBlock*, Value*> PredLoads;
1540  DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1541  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1542    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
1543  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1544    FullyAvailableBlocks[UnavailableBlocks[i]] = false;
1545
1546  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
1547  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
1548       PI != E; ++PI) {
1549    BasicBlock *Pred = *PI;
1550    if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1551      continue;
1552    }
1553    PredLoads[Pred] = 0;
1554
1555    if (Pred->getTerminator()->getNumSuccessors() != 1) {
1556      if (isa<IndirectBrInst>(Pred->getTerminator())) {
1557        DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1558              << Pred->getName() << "': " << *LI << '\n');
1559        return false;
1560      }
1561      unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
1562      NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
1563    }
1564  }
1565  if (!NeedToSplit.empty()) {
1566    toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
1567    return false;
1568  }
1569
1570  // Decide whether PRE is profitable for this load.
1571  unsigned NumUnavailablePreds = PredLoads.size();
1572  assert(NumUnavailablePreds != 0 &&
1573         "Fully available value should be eliminated above!");
1574
1575  // If this load is unavailable in multiple predecessors, reject it.
1576  // FIXME: If we could restructure the CFG, we could make a common pred with
1577  // all the preds that don't have an available LI and insert a new load into
1578  // that one block.
1579  if (NumUnavailablePreds != 1)
1580      return false;
1581
1582  // Check if the load can safely be moved to all the unavailable predecessors.
1583  bool CanDoPRE = true;
1584  SmallVector<Instruction*, 8> NewInsts;
1585  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1586         E = PredLoads.end(); I != E; ++I) {
1587    BasicBlock *UnavailablePred = I->first;
1588
1589    // Do PHI translation to get its value in the predecessor if necessary.  The
1590    // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1591
1592    // If all preds have a single successor, then we know it is safe to insert
1593    // the load on the pred (?!?), so we can insert code to materialize the
1594    // pointer if it is not available.
1595    PHITransAddr Address(LI->getPointerOperand(), TD);
1596    Value *LoadPtr = 0;
1597    if (allSingleSucc) {
1598      LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
1599                                                  *DT, NewInsts);
1600    } else {
1601      Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
1602      LoadPtr = Address.getAddr();
1603    }
1604
1605    // If we couldn't find or insert a computation of this phi translated value,
1606    // we fail PRE.
1607    if (LoadPtr == 0) {
1608      DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1609            << *LI->getPointerOperand() << "\n");
1610      CanDoPRE = false;
1611      break;
1612    }
1613
1614    // Make sure it is valid to move this load here.  We have to watch out for:
1615    //  @1 = getelementptr (i8* p, ...
1616    //  test p and branch if == 0
1617    //  load @1
1618    // It is valid to have the getelementptr before the test, even if p can be 0,
1619    // as getelementptr only does address arithmetic.
1620    // If we are not pushing the value through any multiple-successor blocks
1621    // we do not have this case.  Otherwise, check that the load is safe to
1622    // put anywhere; this can be improved, but should be conservatively safe.
1623    if (!allSingleSucc &&
1624        // FIXME: REEVALUTE THIS.
1625        !isSafeToLoadUnconditionally(LoadPtr,
1626                                     UnavailablePred->getTerminator(),
1627                                     LI->getAlignment(), TD)) {
1628      CanDoPRE = false;
1629      break;
1630    }
1631
1632    I->second = LoadPtr;
1633  }
1634
1635  if (!CanDoPRE) {
1636    while (!NewInsts.empty())
1637      NewInsts.pop_back_val()->eraseFromParent();
1638    return false;
1639  }
1640
1641  // Okay, we can eliminate this load by inserting a reload in the predecessor
1642  // and using PHI construction to get the value in the other predecessors, do
1643  // it.
1644  DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1645  DEBUG(if (!NewInsts.empty())
1646          dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
1647                 << *NewInsts.back() << '\n');
1648
1649  // Assign value numbers to the new instructions.
1650  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
1651    // FIXME: We really _ought_ to insert these value numbers into their
1652    // parent's availability map.  However, in doing so, we risk getting into
1653    // ordering issues.  If a block hasn't been processed yet, we would be
1654    // marking a value as AVAIL-IN, which isn't what we intend.
1655    VN.lookup_or_add(NewInsts[i]);
1656  }
1657
1658  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1659         E = PredLoads.end(); I != E; ++I) {
1660    BasicBlock *UnavailablePred = I->first;
1661    Value *LoadPtr = I->second;
1662
1663    Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
1664                                        LI->getAlignment(),
1665                                        UnavailablePred->getTerminator());
1666
1667    // Transfer the old load's TBAA tag to the new load.
1668    if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
1669      NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
1670
1671    // Add the newly created load.
1672    ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
1673                                                        NewLoad));
1674    MD->invalidateCachedPointerInfo(LoadPtr);
1675    DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1676  }
1677
1678  // Perform PHI construction.
1679  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1680                                    VN.getAliasAnalysis());
1681  LI->replaceAllUsesWith(V);
1682  if (isa<PHINode>(V))
1683    V->takeName(LI);
1684  if (V->getType()->isPointerTy())
1685    MD->invalidateCachedPointerInfo(V);
1686  VN.erase(LI);
1687  toErase.push_back(LI);
1688  ++NumPRELoad;
1689  return true;
1690}
1691
1692/// processLoad - Attempt to eliminate a load, first by eliminating it
1693/// locally, and then attempting non-local elimination if that fails.
1694bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
1695  if (!MD)
1696    return false;
1697
1698  if (L->isVolatile())
1699    return false;
1700
1701  // ... to a pointer that has been loaded from before...
1702  MemDepResult Dep = MD->getDependency(L);
1703
1704  // If the value isn't available, don't do anything!
1705  if (Dep.isClobber()) {
1706    // Check to see if we have something like this:
1707    //   store i32 123, i32* %P
1708    //   %A = bitcast i32* %P to i8*
1709    //   %B = gep i8* %A, i32 1
1710    //   %C = load i8* %B
1711    //
1712    // We could do that by recognizing if the clobber instructions are obviously
1713    // a common base + constant offset, and if the previous store (or memset)
1714    // completely covers this load.  This sort of thing can happen in bitfield
1715    // access code.
1716    Value *AvailVal = 0;
1717    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
1718      if (TD) {
1719        int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
1720                                                    L->getPointerOperand(),
1721                                                    DepSI, *TD);
1722        if (Offset != -1)
1723          AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
1724                                          L->getType(), L, *TD);
1725      }
1726
1727    // If the clobbering value is a memset/memcpy/memmove, see if we can forward
1728    // a value on from it.
1729    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
1730      if (TD) {
1731        int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
1732                                                      L->getPointerOperand(),
1733                                                      DepMI, *TD);
1734        if (Offset != -1)
1735          AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
1736      }
1737    }
1738
1739    if (AvailVal) {
1740      DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
1741            << *AvailVal << '\n' << *L << "\n\n\n");
1742
1743      // Replace the load!
1744      L->replaceAllUsesWith(AvailVal);
1745      if (AvailVal->getType()->isPointerTy())
1746        MD->invalidateCachedPointerInfo(AvailVal);
1747      VN.erase(L);
1748      toErase.push_back(L);
1749      ++NumGVNLoad;
1750      return true;
1751    }
1752
1753    DEBUG(
1754      // fast print dep, using operator<< on instruction would be too slow
1755      dbgs() << "GVN: load ";
1756      WriteAsOperand(dbgs(), L);
1757      Instruction *I = Dep.getInst();
1758      dbgs() << " is clobbered by " << *I << '\n';
1759    );
1760    return false;
1761  }
1762
1763  // If it is defined in another block, try harder.
1764  if (Dep.isNonLocal())
1765    return processNonLocalLoad(L, toErase);
1766
1767  Instruction *DepInst = Dep.getInst();
1768  if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1769    Value *StoredVal = DepSI->getValueOperand();
1770
1771    // The store and load are to a must-aliased pointer, but they may not
1772    // actually have the same type.  See if we know how to reuse the stored
1773    // value (depending on its type).
1774    if (StoredVal->getType() != L->getType()) {
1775      if (TD) {
1776        StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
1777                                                   L, *TD);
1778        if (StoredVal == 0)
1779          return false;
1780
1781        DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
1782                     << '\n' << *L << "\n\n\n");
1783      }
1784      else
1785        return false;
1786    }
1787
1788    // Remove it!
1789    L->replaceAllUsesWith(StoredVal);
1790    if (StoredVal->getType()->isPointerTy())
1791      MD->invalidateCachedPointerInfo(StoredVal);
1792    VN.erase(L);
1793    toErase.push_back(L);
1794    ++NumGVNLoad;
1795    return true;
1796  }
1797
1798  if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
1799    Value *AvailableVal = DepLI;
1800
1801    // The loads are of a must-aliased pointer, but they may not actually have
1802    // the same type.  See if we know how to reuse the previously loaded value
1803    // (depending on its type).
1804    if (DepLI->getType() != L->getType()) {
1805      if (TD) {
1806        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
1807        if (AvailableVal == 0)
1808          return false;
1809
1810        DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
1811                     << "\n" << *L << "\n\n\n");
1812      }
1813      else
1814        return false;
1815    }
1816
1817    // Remove it!
1818    L->replaceAllUsesWith(AvailableVal);
1819    if (DepLI->getType()->isPointerTy())
1820      MD->invalidateCachedPointerInfo(DepLI);
1821    VN.erase(L);
1822    toErase.push_back(L);
1823    ++NumGVNLoad;
1824    return true;
1825  }
1826
1827  // If this load really doesn't depend on anything, then we must be loading an
1828  // undef value.  This can happen when loading for a fresh allocation with no
1829  // intervening stores, for example.
1830  if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
1831    L->replaceAllUsesWith(UndefValue::get(L->getType()));
1832    VN.erase(L);
1833    toErase.push_back(L);
1834    ++NumGVNLoad;
1835    return true;
1836  }
1837
1838  // If this load occurs either right after a lifetime begin,
1839  // then the loaded value is undefined.
1840  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
1841    if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1842      L->replaceAllUsesWith(UndefValue::get(L->getType()));
1843      VN.erase(L);
1844      toErase.push_back(L);
1845      ++NumGVNLoad;
1846      return true;
1847    }
1848  }
1849
1850  return false;
1851}
1852
1853// lookupNumber - In order to find a leader for a given value number at a
1854// specific basic block, we first obtain the list of all Values for that number,
1855// and then scan the list to find one whose block dominates the block in
1856// question.  This is fast because dominator tree queries consist of only
1857// a few comparisons of DFS numbers.
1858Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
1859  std::pair<Value*, void*> Vals = NumberTable[num];
1860  if (!Vals.first) return 0;
1861  Instruction *Inst = dyn_cast<Instruction>(Vals.first);
1862  if (!Inst) return Vals.first;
1863  BasicBlock *Parent = Inst->getParent();
1864  if (DT->dominates(Parent, BB))
1865    return Inst;
1866
1867  std::pair<Value*, void*>* Next =
1868    static_cast<std::pair<Value*, void*>*>(Vals.second);
1869  while (Next) {
1870    Instruction *CurrInst = dyn_cast<Instruction>(Next->first);
1871    if (!CurrInst) return Next->first;
1872
1873    BasicBlock *Parent = CurrInst->getParent();
1874    if (DT->dominates(Parent, BB))
1875      return CurrInst;
1876
1877    Next = static_cast<std::pair<Value*, void*>*>(Next->second);
1878  }
1879
1880  return 0;
1881}
1882
1883
1884/// processInstruction - When calculating availability, handle an instruction
1885/// by inserting it into the appropriate sets
1886bool GVN::processInstruction(Instruction *I,
1887                             SmallVectorImpl<Instruction*> &toErase) {
1888  // Ignore dbg info intrinsics.
1889  if (isa<DbgInfoIntrinsic>(I))
1890    return false;
1891
1892  // If the instruction can be easily simplified then do so now in preference
1893  // to value numbering it.  Value numbering often exposes redundancies, for
1894  // example if it determines that %y is equal to %x then the instruction
1895  // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
1896  if (Value *V = SimplifyInstruction(I, TD, DT)) {
1897    I->replaceAllUsesWith(V);
1898    if (MD && V->getType()->isPointerTy())
1899      MD->invalidateCachedPointerInfo(V);
1900    VN.erase(I);
1901    toErase.push_back(I);
1902    return true;
1903  }
1904
1905  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1906    bool Changed = processLoad(LI, toErase);
1907
1908    if (!Changed) {
1909      unsigned Num = VN.lookup_or_add(LI);
1910      insert_table(Num, LI);
1911    }
1912
1913    return Changed;
1914  }
1915
1916  uint32_t NextNum = VN.getNextUnusedValueNumber();
1917  unsigned Num = VN.lookup_or_add(I);
1918
1919  // Allocations are always uniquely numbered, so we can save time and memory
1920  // by fast failing them.
1921  if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
1922    insert_table(Num, I);
1923    return false;
1924  }
1925
1926  if (isa<PHINode>(I)) {
1927    insert_table(Num, I);
1928
1929  // If the number we were assigned was a brand new VN, then we don't
1930  // need to do a lookup to see if the number already exists
1931  // somewhere in the domtree: it can't!
1932  } else if (Num == NextNum) {
1933    insert_table(Num, I);
1934
1935  // Perform fast-path value-number based elimination of values inherited from
1936  // dominators.
1937  } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
1938    // Remove it!
1939    VN.erase(I);
1940    I->replaceAllUsesWith(repl);
1941    if (MD && repl->getType()->isPointerTy())
1942      MD->invalidateCachedPointerInfo(repl);
1943    toErase.push_back(I);
1944    return true;
1945
1946  } else {
1947    insert_table(Num, I);
1948  }
1949
1950  return false;
1951}
1952
1953/// runOnFunction - This is the main transformation entry point for a function.
1954bool GVN::runOnFunction(Function& F) {
1955  if (!NoLoads)
1956    MD = &getAnalysis<MemoryDependenceAnalysis>();
1957  DT = &getAnalysis<DominatorTree>();
1958  TD = getAnalysisIfAvailable<TargetData>();
1959  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
1960  VN.setMemDep(MD);
1961  VN.setDomTree(DT);
1962
1963  bool Changed = false;
1964  bool ShouldContinue = true;
1965
1966  // Merge unconditional branches, allowing PRE to catch more
1967  // optimization opportunities.
1968  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
1969    BasicBlock *BB = FI;
1970    ++FI;
1971    bool removedBlock = MergeBlockIntoPredecessor(BB, this);
1972    if (removedBlock) ++NumGVNBlocks;
1973
1974    Changed |= removedBlock;
1975  }
1976
1977  unsigned Iteration = 0;
1978
1979  while (ShouldContinue) {
1980    DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
1981    ShouldContinue = iterateOnFunction(F);
1982    if (splitCriticalEdges())
1983      ShouldContinue = true;
1984    Changed |= ShouldContinue;
1985    ++Iteration;
1986  }
1987
1988  if (EnablePRE) {
1989    bool PREChanged = true;
1990    while (PREChanged) {
1991      PREChanged = performPRE(F);
1992      Changed |= PREChanged;
1993    }
1994  }
1995  // FIXME: Should perform GVN again after PRE does something.  PRE can move
1996  // computations into blocks where they become fully redundant.  Note that
1997  // we can't do this until PRE's critical edge splitting updates memdep.
1998  // Actually, when this happens, we should just fully integrate PRE into GVN.
1999
2000  cleanupGlobalSets();
2001
2002  return Changed;
2003}
2004
2005
2006bool GVN::processBlock(BasicBlock *BB) {
2007  // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
2008  // incrementing BI before processing an instruction).
2009  SmallVector<Instruction*, 8> toErase;
2010  bool ChangedFunction = false;
2011
2012  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2013       BI != BE;) {
2014    ChangedFunction |= processInstruction(BI, toErase);
2015    if (toErase.empty()) {
2016      ++BI;
2017      continue;
2018    }
2019
2020    // If we need some instructions deleted, do it now.
2021    NumGVNInstr += toErase.size();
2022
2023    // Avoid iterator invalidation.
2024    bool AtStart = BI == BB->begin();
2025    if (!AtStart)
2026      --BI;
2027
2028    for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
2029         E = toErase.end(); I != E; ++I) {
2030      DEBUG(dbgs() << "GVN removed: " << **I << '\n');
2031      if (MD) MD->removeInstruction(*I);
2032      (*I)->eraseFromParent();
2033      DEBUG(verifyRemoved(*I));
2034    }
2035    toErase.clear();
2036
2037    if (AtStart)
2038      BI = BB->begin();
2039    else
2040      ++BI;
2041  }
2042
2043  return ChangedFunction;
2044}
2045
2046/// performPRE - Perform a purely local form of PRE that looks for diamond
2047/// control flow patterns and attempts to perform simple PRE at the join point.
2048bool GVN::performPRE(Function &F) {
2049  bool Changed = false;
2050  DenseMap<BasicBlock*, Value*> predMap;
2051  for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
2052       DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
2053    BasicBlock *CurrentBlock = *DI;
2054
2055    // Nothing to PRE in the entry block.
2056    if (CurrentBlock == &F.getEntryBlock()) continue;
2057
2058    for (BasicBlock::iterator BI = CurrentBlock->begin(),
2059         BE = CurrentBlock->end(); BI != BE; ) {
2060      Instruction *CurInst = BI++;
2061
2062      if (isa<AllocaInst>(CurInst) ||
2063          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
2064          CurInst->getType()->isVoidTy() ||
2065          CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2066          isa<DbgInfoIntrinsic>(CurInst))
2067        continue;
2068
2069      // We don't currently value number ANY inline asm calls.
2070      if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
2071        if (CallI->isInlineAsm())
2072          continue;
2073
2074      uint32_t ValNo = VN.lookup(CurInst);
2075
2076      // Look for the predecessors for PRE opportunities.  We're
2077      // only trying to solve the basic diamond case, where
2078      // a value is computed in the successor and one predecessor,
2079      // but not the other.  We also explicitly disallow cases
2080      // where the successor is its own predecessor, because they're
2081      // more complicated to get right.
2082      unsigned NumWith = 0;
2083      unsigned NumWithout = 0;
2084      BasicBlock *PREPred = 0;
2085      predMap.clear();
2086
2087      for (pred_iterator PI = pred_begin(CurrentBlock),
2088           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2089        BasicBlock *P = *PI;
2090        // We're not interested in PRE where the block is its
2091        // own predecessor, or in blocks with predecessors
2092        // that are not reachable.
2093        if (P == CurrentBlock) {
2094          NumWithout = 2;
2095          break;
2096        } else if (!DT->dominates(&F.getEntryBlock(), P))  {
2097          NumWithout = 2;
2098          break;
2099        }
2100
2101        Value* predV = lookupNumber(P, ValNo);
2102        if (predV == 0) {
2103          PREPred = P;
2104          ++NumWithout;
2105        } else if (predV == CurInst) {
2106          NumWithout = 2;
2107        } else {
2108          predMap[P] = predV;
2109          ++NumWith;
2110        }
2111      }
2112
2113      // Don't do PRE when it might increase code size, i.e. when
2114      // we would need to insert instructions in more than one pred.
2115      if (NumWithout != 1 || NumWith == 0)
2116        continue;
2117
2118      // Don't do PRE across indirect branch.
2119      if (isa<IndirectBrInst>(PREPred->getTerminator()))
2120        continue;
2121
2122      // We can't do PRE safely on a critical edge, so instead we schedule
2123      // the edge to be split and perform the PRE the next time we iterate
2124      // on the function.
2125      unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
2126      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
2127        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
2128        continue;
2129      }
2130
2131      // Instantiate the expression in the predecessor that lacked it.
2132      // Because we are going top-down through the block, all value numbers
2133      // will be available in the predecessor by the time we need them.  Any
2134      // that weren't originally present will have been instantiated earlier
2135      // in this loop.
2136      Instruction *PREInstr = CurInst->clone();
2137      bool success = true;
2138      for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
2139        Value *Op = PREInstr->getOperand(i);
2140        if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2141          continue;
2142
2143        if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
2144          PREInstr->setOperand(i, V);
2145        } else {
2146          success = false;
2147          break;
2148        }
2149      }
2150
2151      // Fail out if we encounter an operand that is not available in
2152      // the PRE predecessor.  This is typically because of loads which
2153      // are not value numbered precisely.
2154      if (!success) {
2155        delete PREInstr;
2156        DEBUG(verifyRemoved(PREInstr));
2157        continue;
2158      }
2159
2160      PREInstr->insertBefore(PREPred->getTerminator());
2161      PREInstr->setName(CurInst->getName() + ".pre");
2162      predMap[PREPred] = PREInstr;
2163      VN.add(PREInstr, ValNo);
2164      ++NumGVNPRE;
2165
2166      // Update the availability map to include the new instruction.
2167      insert_table(ValNo, PREInstr);
2168
2169      // Create a PHI to make the value available in this block.
2170      PHINode* Phi = PHINode::Create(CurInst->getType(),
2171                                     CurInst->getName() + ".pre-phi",
2172                                     CurrentBlock->begin());
2173      for (pred_iterator PI = pred_begin(CurrentBlock),
2174           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2175        BasicBlock *P = *PI;
2176        Phi->addIncoming(predMap[P], P);
2177      }
2178
2179      VN.add(Phi, ValNo);
2180      insert_table(ValNo, Phi);
2181
2182      CurInst->replaceAllUsesWith(Phi);
2183      if (MD && Phi->getType()->isPointerTy())
2184        MD->invalidateCachedPointerInfo(Phi);
2185      VN.erase(CurInst);
2186      erase_table(ValNo, CurInst);
2187
2188      DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
2189      if (MD) MD->removeInstruction(CurInst);
2190      CurInst->eraseFromParent();
2191      DEBUG(verifyRemoved(CurInst));
2192      Changed = true;
2193    }
2194  }
2195
2196  if (splitCriticalEdges())
2197    Changed = true;
2198
2199  return Changed;
2200}
2201
2202/// splitCriticalEdges - Split critical edges found during the previous
2203/// iteration that may enable further optimization.
2204bool GVN::splitCriticalEdges() {
2205  if (toSplit.empty())
2206    return false;
2207  do {
2208    std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
2209    SplitCriticalEdge(Edge.first, Edge.second, this);
2210  } while (!toSplit.empty());
2211  if (MD) MD->invalidateCachedPredecessors();
2212  return true;
2213}
2214
2215/// iterateOnFunction - Executes one iteration of GVN
2216bool GVN::iterateOnFunction(Function &F) {
2217  cleanupGlobalSets();
2218
2219  // Top-down walk of the dominator tree
2220  bool Changed = false;
2221#if 0
2222  // Needed for value numbering with phi construction to work.
2223  ReversePostOrderTraversal<Function*> RPOT(&F);
2224  for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
2225       RE = RPOT.end(); RI != RE; ++RI)
2226    Changed |= processBlock(*RI);
2227#else
2228  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
2229       DE = df_end(DT->getRootNode()); DI != DE; ++DI)
2230    Changed |= processBlock(DI->getBlock());
2231#endif
2232
2233  return Changed;
2234}
2235
2236void GVN::cleanupGlobalSets() {
2237  VN.clear();
2238  NumberTable.clear();
2239  TableAllocator.Reset();
2240}
2241
2242/// verifyRemoved - Verify that the specified instruction does not occur in our
2243/// internal data structures.
2244void GVN::verifyRemoved(const Instruction *Inst) const {
2245  VN.verifyRemoved(Inst);
2246
2247  // Walk through the value number scope to make sure the instruction isn't
2248  // ferreted away in it.
2249  for (DenseMap<uint32_t, std::pair<Value*, void*> >::const_iterator
2250       I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) {
2251    std::pair<Value*, void*> const * Node = &I->second;
2252    assert(Node->first != Inst && "Inst still in value numbering scope!");
2253
2254    while (Node->second) {
2255      Node = static_cast<std::pair<Value*, void*>*>(Node->second);
2256      assert(Node->first != Inst && "Inst still in value numbering scope!");
2257    }
2258  }
2259}
2260