GVN.cpp revision 88c3df7afb8e067daab12dbbbc9ce898a52e9624
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global value numbering to eliminate fully redundant
11// instructions.  It also performs simple dead load elimination.
12//
13// Note that this pass does the value numbering itself; it does not use the
14// ValueNumbering analysis passes.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "gvn"
19#include "llvm/Transforms/Scalar.h"
20#include "llvm/BasicBlock.h"
21#include "llvm/Constants.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/GlobalVariable.h"
24#include "llvm/Function.h"
25#include "llvm/IntrinsicInst.h"
26#include "llvm/LLVMContext.h"
27#include "llvm/Operator.h"
28#include "llvm/Value.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/DepthFirstIterator.h"
31#include "llvm/ADT/PostOrderIterator.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/Analysis/AliasAnalysis.h"
36#include "llvm/Analysis/ConstantFolding.h"
37#include "llvm/Analysis/Dominators.h"
38#include "llvm/Analysis/InstructionSimplify.h"
39#include "llvm/Analysis/Loads.h"
40#include "llvm/Analysis/MemoryBuiltins.h"
41#include "llvm/Analysis/MemoryDependenceAnalysis.h"
42#include "llvm/Analysis/PHITransAddr.h"
43#include "llvm/Support/CFG.h"
44#include "llvm/Support/CommandLine.h"
45#include "llvm/Support/Debug.h"
46#include "llvm/Support/ErrorHandling.h"
47#include "llvm/Support/GetElementPtrTypeIterator.h"
48#include "llvm/Support/IRBuilder.h"
49#include "llvm/Support/raw_ostream.h"
50#include "llvm/Target/TargetData.h"
51#include "llvm/Transforms/Utils/BasicBlockUtils.h"
52#include "llvm/Transforms/Utils/Local.h"
53#include "llvm/Transforms/Utils/SSAUpdater.h"
54using namespace llvm;
55
56STATISTIC(NumGVNInstr,  "Number of instructions deleted");
57STATISTIC(NumGVNLoad,   "Number of loads deleted");
58STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
59STATISTIC(NumGVNBlocks, "Number of blocks merged");
60STATISTIC(NumPRELoad,   "Number of loads PRE'd");
61
62static cl::opt<bool> EnablePRE("enable-pre",
63                               cl::init(true), cl::Hidden);
64static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
65
66//===----------------------------------------------------------------------===//
67//                         ValueTable Class
68//===----------------------------------------------------------------------===//
69
70/// This class holds the mapping between values and value numbers.  It is used
71/// as an efficient mechanism to determine the expression-wise equivalence of
72/// two values.
73namespace {
74  struct Expression {
75    enum ExpressionOpcode {
76      ADD = Instruction::Add,
77      FADD = Instruction::FAdd,
78      SUB = Instruction::Sub,
79      FSUB = Instruction::FSub,
80      MUL = Instruction::Mul,
81      FMUL = Instruction::FMul,
82      UDIV = Instruction::UDiv,
83      SDIV = Instruction::SDiv,
84      FDIV = Instruction::FDiv,
85      UREM = Instruction::URem,
86      SREM = Instruction::SRem,
87      FREM = Instruction::FRem,
88      SHL = Instruction::Shl,
89      LSHR = Instruction::LShr,
90      ASHR = Instruction::AShr,
91      AND = Instruction::And,
92      OR = Instruction::Or,
93      XOR = Instruction::Xor,
94      TRUNC = Instruction::Trunc,
95      ZEXT = Instruction::ZExt,
96      SEXT = Instruction::SExt,
97      FPTOUI = Instruction::FPToUI,
98      FPTOSI = Instruction::FPToSI,
99      UITOFP = Instruction::UIToFP,
100      SITOFP = Instruction::SIToFP,
101      FPTRUNC = Instruction::FPTrunc,
102      FPEXT = Instruction::FPExt,
103      PTRTOINT = Instruction::PtrToInt,
104      INTTOPTR = Instruction::IntToPtr,
105      BITCAST = Instruction::BitCast,
106      ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
107      ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
108      FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
109      FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
110      FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
111      SHUFFLE, SELECT, GEP, CALL, CONSTANT,
112      INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
113
114    ExpressionOpcode opcode;
115    const Type* type;
116    SmallVector<uint32_t, 4> varargs;
117    Value *function;
118
119    Expression() { }
120    Expression(ExpressionOpcode o) : opcode(o) { }
121
122    bool operator==(const Expression &other) const {
123      if (opcode != other.opcode)
124        return false;
125      else if (opcode == EMPTY || opcode == TOMBSTONE)
126        return true;
127      else if (type != other.type)
128        return false;
129      else if (function != other.function)
130        return false;
131      else {
132        if (varargs.size() != other.varargs.size())
133          return false;
134
135        for (size_t i = 0; i < varargs.size(); ++i)
136          if (varargs[i] != other.varargs[i])
137            return false;
138
139        return true;
140      }
141    }
142
143    /*bool operator!=(const Expression &other) const {
144      return !(*this == other);
145    }*/
146  };
147
148  class ValueTable {
149    private:
150      DenseMap<Value*, uint32_t> valueNumbering;
151      DenseMap<Expression, uint32_t> expressionNumbering;
152      AliasAnalysis* AA;
153      MemoryDependenceAnalysis* MD;
154      DominatorTree* DT;
155
156      uint32_t nextValueNumber;
157
158      Expression::ExpressionOpcode getOpcode(CmpInst* C);
159      Expression create_expression(BinaryOperator* BO);
160      Expression create_expression(CmpInst* C);
161      Expression create_expression(ShuffleVectorInst* V);
162      Expression create_expression(ExtractElementInst* C);
163      Expression create_expression(InsertElementInst* V);
164      Expression create_expression(SelectInst* V);
165      Expression create_expression(CastInst* C);
166      Expression create_expression(GetElementPtrInst* G);
167      Expression create_expression(CallInst* C);
168      Expression create_expression(ExtractValueInst* C);
169      Expression create_expression(InsertValueInst* C);
170
171      uint32_t lookup_or_add_call(CallInst* C);
172    public:
173      ValueTable() : nextValueNumber(1) { }
174      uint32_t lookup_or_add(Value *V);
175      uint32_t lookup(Value *V) const;
176      void add(Value *V, uint32_t num);
177      void clear();
178      void erase(Value *v);
179      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
180      AliasAnalysis *getAliasAnalysis() const { return AA; }
181      void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
182      void setDomTree(DominatorTree* D) { DT = D; }
183      uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
184      void verifyRemoved(const Value *) const;
185  };
186}
187
188namespace llvm {
189template <> struct DenseMapInfo<Expression> {
190  static inline Expression getEmptyKey() {
191    return Expression(Expression::EMPTY);
192  }
193
194  static inline Expression getTombstoneKey() {
195    return Expression(Expression::TOMBSTONE);
196  }
197
198  static unsigned getHashValue(const Expression e) {
199    unsigned hash = e.opcode;
200
201    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
202            (unsigned)((uintptr_t)e.type >> 9));
203
204    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
205         E = e.varargs.end(); I != E; ++I)
206      hash = *I + hash * 37;
207
208    hash = ((unsigned)((uintptr_t)e.function >> 4) ^
209            (unsigned)((uintptr_t)e.function >> 9)) +
210           hash * 37;
211
212    return hash;
213  }
214  static bool isEqual(const Expression &LHS, const Expression &RHS) {
215    return LHS == RHS;
216  }
217};
218
219template <>
220struct isPodLike<Expression> { static const bool value = true; };
221
222}
223
224//===----------------------------------------------------------------------===//
225//                     ValueTable Internal Functions
226//===----------------------------------------------------------------------===//
227
228Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
229  if (isa<ICmpInst>(C)) {
230    switch (C->getPredicate()) {
231    default:  // THIS SHOULD NEVER HAPPEN
232      llvm_unreachable("Comparison with unknown predicate?");
233    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
234    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
235    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
236    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
237    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
238    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
239    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
240    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
241    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
242    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
243    }
244  } else {
245    switch (C->getPredicate()) {
246    default: // THIS SHOULD NEVER HAPPEN
247      llvm_unreachable("Comparison with unknown predicate?");
248    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
249    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
250    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
251    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
252    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
253    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
254    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
255    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
256    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
257    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
258    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
259    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
260    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
261    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
262    }
263  }
264}
265
266Expression ValueTable::create_expression(CallInst* C) {
267  Expression e;
268
269  e.type = C->getType();
270  e.function = C->getCalledFunction();
271  e.opcode = Expression::CALL;
272
273  CallSite CS(C);
274  for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
275       I != E; ++I)
276    e.varargs.push_back(lookup_or_add(*I));
277
278  return e;
279}
280
281Expression ValueTable::create_expression(BinaryOperator* BO) {
282  Expression e;
283  e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
284  e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
285  e.function = 0;
286  e.type = BO->getType();
287  e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
288
289  return e;
290}
291
292Expression ValueTable::create_expression(CmpInst* C) {
293  Expression e;
294
295  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
296  e.varargs.push_back(lookup_or_add(C->getOperand(1)));
297  e.function = 0;
298  e.type = C->getType();
299  e.opcode = getOpcode(C);
300
301  return e;
302}
303
304Expression ValueTable::create_expression(CastInst* C) {
305  Expression e;
306
307  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
308  e.function = 0;
309  e.type = C->getType();
310  e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
311
312  return e;
313}
314
315Expression ValueTable::create_expression(ShuffleVectorInst* S) {
316  Expression e;
317
318  e.varargs.push_back(lookup_or_add(S->getOperand(0)));
319  e.varargs.push_back(lookup_or_add(S->getOperand(1)));
320  e.varargs.push_back(lookup_or_add(S->getOperand(2)));
321  e.function = 0;
322  e.type = S->getType();
323  e.opcode = Expression::SHUFFLE;
324
325  return e;
326}
327
328Expression ValueTable::create_expression(ExtractElementInst* E) {
329  Expression e;
330
331  e.varargs.push_back(lookup_or_add(E->getOperand(0)));
332  e.varargs.push_back(lookup_or_add(E->getOperand(1)));
333  e.function = 0;
334  e.type = E->getType();
335  e.opcode = Expression::EXTRACT;
336
337  return e;
338}
339
340Expression ValueTable::create_expression(InsertElementInst* I) {
341  Expression e;
342
343  e.varargs.push_back(lookup_or_add(I->getOperand(0)));
344  e.varargs.push_back(lookup_or_add(I->getOperand(1)));
345  e.varargs.push_back(lookup_or_add(I->getOperand(2)));
346  e.function = 0;
347  e.type = I->getType();
348  e.opcode = Expression::INSERT;
349
350  return e;
351}
352
353Expression ValueTable::create_expression(SelectInst* I) {
354  Expression e;
355
356  e.varargs.push_back(lookup_or_add(I->getCondition()));
357  e.varargs.push_back(lookup_or_add(I->getTrueValue()));
358  e.varargs.push_back(lookup_or_add(I->getFalseValue()));
359  e.function = 0;
360  e.type = I->getType();
361  e.opcode = Expression::SELECT;
362
363  return e;
364}
365
366Expression ValueTable::create_expression(GetElementPtrInst* G) {
367  Expression e;
368
369  e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
370  e.function = 0;
371  e.type = G->getType();
372  e.opcode = Expression::GEP;
373
374  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
375       I != E; ++I)
376    e.varargs.push_back(lookup_or_add(*I));
377
378  return e;
379}
380
381Expression ValueTable::create_expression(ExtractValueInst* E) {
382  Expression e;
383
384  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
385  for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
386       II != IE; ++II)
387    e.varargs.push_back(*II);
388  e.function = 0;
389  e.type = E->getType();
390  e.opcode = Expression::EXTRACTVALUE;
391
392  return e;
393}
394
395Expression ValueTable::create_expression(InsertValueInst* E) {
396  Expression e;
397
398  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
399  e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
400  for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
401       II != IE; ++II)
402    e.varargs.push_back(*II);
403  e.function = 0;
404  e.type = E->getType();
405  e.opcode = Expression::INSERTVALUE;
406
407  return e;
408}
409
410//===----------------------------------------------------------------------===//
411//                     ValueTable External Functions
412//===----------------------------------------------------------------------===//
413
414/// add - Insert a value into the table with a specified value number.
415void ValueTable::add(Value *V, uint32_t num) {
416  valueNumbering.insert(std::make_pair(V, num));
417}
418
419uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
420  if (AA->doesNotAccessMemory(C)) {
421    Expression exp = create_expression(C);
422    uint32_t& e = expressionNumbering[exp];
423    if (!e) e = nextValueNumber++;
424    valueNumbering[C] = e;
425    return e;
426  } else if (AA->onlyReadsMemory(C)) {
427    Expression exp = create_expression(C);
428    uint32_t& e = expressionNumbering[exp];
429    if (!e) {
430      e = nextValueNumber++;
431      valueNumbering[C] = e;
432      return e;
433    }
434    if (!MD) {
435      e = nextValueNumber++;
436      valueNumbering[C] = e;
437      return e;
438    }
439
440    MemDepResult local_dep = MD->getDependency(C);
441
442    if (!local_dep.isDef() && !local_dep.isNonLocal()) {
443      valueNumbering[C] =  nextValueNumber;
444      return nextValueNumber++;
445    }
446
447    if (local_dep.isDef()) {
448      CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
449
450      if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
451        valueNumbering[C] = nextValueNumber;
452        return nextValueNumber++;
453      }
454
455      for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
456        uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
457        uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
458        if (c_vn != cd_vn) {
459          valueNumbering[C] = nextValueNumber;
460          return nextValueNumber++;
461        }
462      }
463
464      uint32_t v = lookup_or_add(local_cdep);
465      valueNumbering[C] = v;
466      return v;
467    }
468
469    // Non-local case.
470    const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
471      MD->getNonLocalCallDependency(CallSite(C));
472    // FIXME: call/call dependencies for readonly calls should return def, not
473    // clobber!  Move the checking logic to MemDep!
474    CallInst* cdep = 0;
475
476    // Check to see if we have a single dominating call instruction that is
477    // identical to C.
478    for (unsigned i = 0, e = deps.size(); i != e; ++i) {
479      const NonLocalDepEntry *I = &deps[i];
480      // Ignore non-local dependencies.
481      if (I->getResult().isNonLocal())
482        continue;
483
484      // We don't handle non-depedencies.  If we already have a call, reject
485      // instruction dependencies.
486      if (I->getResult().isClobber() || cdep != 0) {
487        cdep = 0;
488        break;
489      }
490
491      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
492      // FIXME: All duplicated with non-local case.
493      if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
494        cdep = NonLocalDepCall;
495        continue;
496      }
497
498      cdep = 0;
499      break;
500    }
501
502    if (!cdep) {
503      valueNumbering[C] = nextValueNumber;
504      return nextValueNumber++;
505    }
506
507    if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
508      valueNumbering[C] = nextValueNumber;
509      return nextValueNumber++;
510    }
511    for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
512      uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
513      uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
514      if (c_vn != cd_vn) {
515        valueNumbering[C] = nextValueNumber;
516        return nextValueNumber++;
517      }
518    }
519
520    uint32_t v = lookup_or_add(cdep);
521    valueNumbering[C] = v;
522    return v;
523
524  } else {
525    valueNumbering[C] = nextValueNumber;
526    return nextValueNumber++;
527  }
528}
529
530/// lookup_or_add - Returns the value number for the specified value, assigning
531/// it a new number if it did not have one before.
532uint32_t ValueTable::lookup_or_add(Value *V) {
533  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
534  if (VI != valueNumbering.end())
535    return VI->second;
536
537  if (!isa<Instruction>(V)) {
538    valueNumbering[V] = nextValueNumber;
539    return nextValueNumber++;
540  }
541
542  Instruction* I = cast<Instruction>(V);
543  Expression exp;
544  switch (I->getOpcode()) {
545    case Instruction::Call:
546      return lookup_or_add_call(cast<CallInst>(I));
547    case Instruction::Add:
548    case Instruction::FAdd:
549    case Instruction::Sub:
550    case Instruction::FSub:
551    case Instruction::Mul:
552    case Instruction::FMul:
553    case Instruction::UDiv:
554    case Instruction::SDiv:
555    case Instruction::FDiv:
556    case Instruction::URem:
557    case Instruction::SRem:
558    case Instruction::FRem:
559    case Instruction::Shl:
560    case Instruction::LShr:
561    case Instruction::AShr:
562    case Instruction::And:
563    case Instruction::Or :
564    case Instruction::Xor:
565      exp = create_expression(cast<BinaryOperator>(I));
566      break;
567    case Instruction::ICmp:
568    case Instruction::FCmp:
569      exp = create_expression(cast<CmpInst>(I));
570      break;
571    case Instruction::Trunc:
572    case Instruction::ZExt:
573    case Instruction::SExt:
574    case Instruction::FPToUI:
575    case Instruction::FPToSI:
576    case Instruction::UIToFP:
577    case Instruction::SIToFP:
578    case Instruction::FPTrunc:
579    case Instruction::FPExt:
580    case Instruction::PtrToInt:
581    case Instruction::IntToPtr:
582    case Instruction::BitCast:
583      exp = create_expression(cast<CastInst>(I));
584      break;
585    case Instruction::Select:
586      exp = create_expression(cast<SelectInst>(I));
587      break;
588    case Instruction::ExtractElement:
589      exp = create_expression(cast<ExtractElementInst>(I));
590      break;
591    case Instruction::InsertElement:
592      exp = create_expression(cast<InsertElementInst>(I));
593      break;
594    case Instruction::ShuffleVector:
595      exp = create_expression(cast<ShuffleVectorInst>(I));
596      break;
597    case Instruction::ExtractValue:
598      exp = create_expression(cast<ExtractValueInst>(I));
599      break;
600    case Instruction::InsertValue:
601      exp = create_expression(cast<InsertValueInst>(I));
602      break;
603    case Instruction::GetElementPtr:
604      exp = create_expression(cast<GetElementPtrInst>(I));
605      break;
606    default:
607      valueNumbering[V] = nextValueNumber;
608      return nextValueNumber++;
609  }
610
611  uint32_t& e = expressionNumbering[exp];
612  if (!e) e = nextValueNumber++;
613  valueNumbering[V] = e;
614  return e;
615}
616
617/// lookup - Returns the value number of the specified value. Fails if
618/// the value has not yet been numbered.
619uint32_t ValueTable::lookup(Value *V) const {
620  DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
621  assert(VI != valueNumbering.end() && "Value not numbered?");
622  return VI->second;
623}
624
625/// clear - Remove all entries from the ValueTable
626void ValueTable::clear() {
627  valueNumbering.clear();
628  expressionNumbering.clear();
629  nextValueNumber = 1;
630}
631
632/// erase - Remove a value from the value numbering
633void ValueTable::erase(Value *V) {
634  valueNumbering.erase(V);
635}
636
637/// verifyRemoved - Verify that the value is removed from all internal data
638/// structures.
639void ValueTable::verifyRemoved(const Value *V) const {
640  for (DenseMap<Value*, uint32_t>::const_iterator
641         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
642    assert(I->first != V && "Inst still occurs in value numbering map!");
643  }
644}
645
646//===----------------------------------------------------------------------===//
647//                                GVN Pass
648//===----------------------------------------------------------------------===//
649
650namespace {
651  struct ValueNumberScope {
652    ValueNumberScope* parent;
653    DenseMap<uint32_t, Value*> table;
654
655    ValueNumberScope(ValueNumberScope* p) : parent(p) { }
656  };
657}
658
659namespace {
660
661  class GVN : public FunctionPass {
662    bool runOnFunction(Function &F);
663  public:
664    static char ID; // Pass identification, replacement for typeid
665    explicit GVN(bool noloads = false)
666        : FunctionPass(ID), NoLoads(noloads), MD(0) {
667      initializeGVNPass(*PassRegistry::getPassRegistry());
668    }
669
670  private:
671    bool NoLoads;
672    MemoryDependenceAnalysis *MD;
673    DominatorTree *DT;
674    const TargetData* TD;
675
676    ValueTable VN;
677    DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
678
679    // List of critical edges to be split between iterations.
680    SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
681
682    // This transformation requires dominator postdominator info
683    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
684      AU.addRequired<DominatorTree>();
685      if (!NoLoads)
686        AU.addRequired<MemoryDependenceAnalysis>();
687      AU.addRequired<AliasAnalysis>();
688
689      AU.addPreserved<DominatorTree>();
690      AU.addPreserved<AliasAnalysis>();
691    }
692
693    // Helper fuctions
694    // FIXME: eliminate or document these better
695    bool processLoad(LoadInst* L,
696                     SmallVectorImpl<Instruction*> &toErase);
697    bool processInstruction(Instruction *I,
698                            SmallVectorImpl<Instruction*> &toErase);
699    bool processNonLocalLoad(LoadInst* L,
700                             SmallVectorImpl<Instruction*> &toErase);
701    bool processBlock(BasicBlock *BB);
702    void dump(DenseMap<uint32_t, Value*>& d);
703    bool iterateOnFunction(Function &F);
704    Value *CollapsePhi(PHINode* p);
705    bool performPRE(Function& F);
706    Value *lookupNumber(BasicBlock *BB, uint32_t num);
707    void cleanupGlobalSets();
708    void verifyRemoved(const Instruction *I) const;
709    bool splitCriticalEdges();
710  };
711
712  char GVN::ID = 0;
713}
714
715// createGVNPass - The public interface to this file...
716FunctionPass *llvm::createGVNPass(bool NoLoads) {
717  return new GVN(NoLoads);
718}
719
720INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
721INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
722INITIALIZE_PASS_DEPENDENCY(DominatorTree)
723INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
724INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
725
726void GVN::dump(DenseMap<uint32_t, Value*>& d) {
727  errs() << "{\n";
728  for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
729       E = d.end(); I != E; ++I) {
730      errs() << I->first << "\n";
731      I->second->dump();
732  }
733  errs() << "}\n";
734}
735
736static bool isSafeReplacement(PHINode* p, Instruction *inst) {
737  if (!isa<PHINode>(inst))
738    return true;
739
740  for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
741       UI != E; ++UI)
742    if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
743      if (use_phi->getParent() == inst->getParent())
744        return false;
745
746  return true;
747}
748
749Value *GVN::CollapsePhi(PHINode *PN) {
750  Value *ConstVal = PN->hasConstantValue(DT);
751  if (!ConstVal) return 0;
752
753  Instruction *Inst = dyn_cast<Instruction>(ConstVal);
754  if (!Inst)
755    return ConstVal;
756
757  if (DT->dominates(Inst, PN))
758    if (isSafeReplacement(PN, Inst))
759      return Inst;
760  return 0;
761}
762
763/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
764/// we're analyzing is fully available in the specified block.  As we go, keep
765/// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
766/// map is actually a tri-state map with the following values:
767///   0) we know the block *is not* fully available.
768///   1) we know the block *is* fully available.
769///   2) we do not know whether the block is fully available or not, but we are
770///      currently speculating that it will be.
771///   3) we are speculating for this block and have used that to speculate for
772///      other blocks.
773static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
774                            DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
775  // Optimistically assume that the block is fully available and check to see
776  // if we already know about this block in one lookup.
777  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
778    FullyAvailableBlocks.insert(std::make_pair(BB, 2));
779
780  // If the entry already existed for this block, return the precomputed value.
781  if (!IV.second) {
782    // If this is a speculative "available" value, mark it as being used for
783    // speculation of other blocks.
784    if (IV.first->second == 2)
785      IV.first->second = 3;
786    return IV.first->second != 0;
787  }
788
789  // Otherwise, see if it is fully available in all predecessors.
790  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
791
792  // If this block has no predecessors, it isn't live-in here.
793  if (PI == PE)
794    goto SpeculationFailure;
795
796  for (; PI != PE; ++PI)
797    // If the value isn't fully available in one of our predecessors, then it
798    // isn't fully available in this block either.  Undo our previous
799    // optimistic assumption and bail out.
800    if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
801      goto SpeculationFailure;
802
803  return true;
804
805// SpeculationFailure - If we get here, we found out that this is not, after
806// all, a fully-available block.  We have a problem if we speculated on this and
807// used the speculation to mark other blocks as available.
808SpeculationFailure:
809  char &BBVal = FullyAvailableBlocks[BB];
810
811  // If we didn't speculate on this, just return with it set to false.
812  if (BBVal == 2) {
813    BBVal = 0;
814    return false;
815  }
816
817  // If we did speculate on this value, we could have blocks set to 1 that are
818  // incorrect.  Walk the (transitive) successors of this block and mark them as
819  // 0 if set to one.
820  SmallVector<BasicBlock*, 32> BBWorklist;
821  BBWorklist.push_back(BB);
822
823  do {
824    BasicBlock *Entry = BBWorklist.pop_back_val();
825    // Note that this sets blocks to 0 (unavailable) if they happen to not
826    // already be in FullyAvailableBlocks.  This is safe.
827    char &EntryVal = FullyAvailableBlocks[Entry];
828    if (EntryVal == 0) continue;  // Already unavailable.
829
830    // Mark as unavailable.
831    EntryVal = 0;
832
833    for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
834      BBWorklist.push_back(*I);
835  } while (!BBWorklist.empty());
836
837  return false;
838}
839
840
841/// CanCoerceMustAliasedValueToLoad - Return true if
842/// CoerceAvailableValueToLoadType will succeed.
843static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
844                                            const Type *LoadTy,
845                                            const TargetData &TD) {
846  // If the loaded or stored value is an first class array or struct, don't try
847  // to transform them.  We need to be able to bitcast to integer.
848  if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
849      StoredVal->getType()->isStructTy() ||
850      StoredVal->getType()->isArrayTy())
851    return false;
852
853  // The store has to be at least as big as the load.
854  if (TD.getTypeSizeInBits(StoredVal->getType()) <
855        TD.getTypeSizeInBits(LoadTy))
856    return false;
857
858  return true;
859}
860
861
862/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
863/// then a load from a must-aliased pointer of a different type, try to coerce
864/// the stored value.  LoadedTy is the type of the load we want to replace and
865/// InsertPt is the place to insert new instructions.
866///
867/// If we can't do it, return null.
868static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
869                                             const Type *LoadedTy,
870                                             Instruction *InsertPt,
871                                             const TargetData &TD) {
872  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
873    return 0;
874
875  const Type *StoredValTy = StoredVal->getType();
876
877  uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
878  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
879
880  // If the store and reload are the same size, we can always reuse it.
881  if (StoreSize == LoadSize) {
882    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
883      // Pointer to Pointer -> use bitcast.
884      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
885    }
886
887    // Convert source pointers to integers, which can be bitcast.
888    if (StoredValTy->isPointerTy()) {
889      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
890      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
891    }
892
893    const Type *TypeToCastTo = LoadedTy;
894    if (TypeToCastTo->isPointerTy())
895      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
896
897    if (StoredValTy != TypeToCastTo)
898      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
899
900    // Cast to pointer if the load needs a pointer type.
901    if (LoadedTy->isPointerTy())
902      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
903
904    return StoredVal;
905  }
906
907  // If the loaded value is smaller than the available value, then we can
908  // extract out a piece from it.  If the available value is too small, then we
909  // can't do anything.
910  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
911
912  // Convert source pointers to integers, which can be manipulated.
913  if (StoredValTy->isPointerTy()) {
914    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
915    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
916  }
917
918  // Convert vectors and fp to integer, which can be manipulated.
919  if (!StoredValTy->isIntegerTy()) {
920    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
921    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
922  }
923
924  // If this is a big-endian system, we need to shift the value down to the low
925  // bits so that a truncate will work.
926  if (TD.isBigEndian()) {
927    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
928    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
929  }
930
931  // Truncate the integer to the right size now.
932  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
933  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
934
935  if (LoadedTy == NewIntTy)
936    return StoredVal;
937
938  // If the result is a pointer, inttoptr.
939  if (LoadedTy->isPointerTy())
940    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
941
942  // Otherwise, bitcast.
943  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
944}
945
946/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
947/// be expressed as a base pointer plus a constant offset.  Return the base and
948/// offset to the caller.
949static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
950                                        const TargetData &TD) {
951  Operator *PtrOp = dyn_cast<Operator>(Ptr);
952  if (PtrOp == 0) return Ptr;
953
954  // Just look through bitcasts.
955  if (PtrOp->getOpcode() == Instruction::BitCast)
956    return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
957
958  // If this is a GEP with constant indices, we can look through it.
959  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
960  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
961
962  gep_type_iterator GTI = gep_type_begin(GEP);
963  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
964       ++I, ++GTI) {
965    ConstantInt *OpC = cast<ConstantInt>(*I);
966    if (OpC->isZero()) continue;
967
968    // Handle a struct and array indices which add their offset to the pointer.
969    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
970      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
971    } else {
972      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
973      Offset += OpC->getSExtValue()*Size;
974    }
975  }
976
977  // Re-sign extend from the pointer size if needed to get overflow edge cases
978  // right.
979  unsigned PtrSize = TD.getPointerSizeInBits();
980  if (PtrSize < 64)
981    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
982
983  return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
984}
985
986
987/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
988/// memdep query of a load that ends up being a clobbering memory write (store,
989/// memset, memcpy, memmove).  This means that the write *may* provide bits used
990/// by the load but we can't be sure because the pointers don't mustalias.
991///
992/// Check this case to see if there is anything more we can do before we give
993/// up.  This returns -1 if we have to give up, or a byte number in the stored
994/// value of the piece that feeds the load.
995static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
996                                          Value *WritePtr,
997                                          uint64_t WriteSizeInBits,
998                                          const TargetData &TD) {
999  // If the loaded or stored value is an first class array or struct, don't try
1000  // to transform them.  We need to be able to bitcast to integer.
1001  if (LoadTy->isStructTy() || LoadTy->isArrayTy())
1002    return -1;
1003
1004  int64_t StoreOffset = 0, LoadOffset = 0;
1005  Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
1006  Value *LoadBase =
1007    GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
1008  if (StoreBase != LoadBase)
1009    return -1;
1010
1011  // If the load and store are to the exact same address, they should have been
1012  // a must alias.  AA must have gotten confused.
1013  // FIXME: Study to see if/when this happens.  One case is forwarding a memset
1014  // to a load from the base of the memset.
1015#if 0
1016  if (LoadOffset == StoreOffset) {
1017    dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
1018    << "Base       = " << *StoreBase << "\n"
1019    << "Store Ptr  = " << *WritePtr << "\n"
1020    << "Store Offs = " << StoreOffset << "\n"
1021    << "Load Ptr   = " << *LoadPtr << "\n";
1022    abort();
1023  }
1024#endif
1025
1026  // If the load and store don't overlap at all, the store doesn't provide
1027  // anything to the load.  In this case, they really don't alias at all, AA
1028  // must have gotten confused.
1029  // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
1030  // remove this check, as it is duplicated with what we have below.
1031  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
1032
1033  if ((WriteSizeInBits & 7) | (LoadSize & 7))
1034    return -1;
1035  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
1036  LoadSize >>= 3;
1037
1038
1039  bool isAAFailure = false;
1040  if (StoreOffset < LoadOffset)
1041    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
1042  else
1043    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
1044
1045  if (isAAFailure) {
1046#if 0
1047    dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
1048    << "Base       = " << *StoreBase << "\n"
1049    << "Store Ptr  = " << *WritePtr << "\n"
1050    << "Store Offs = " << StoreOffset << "\n"
1051    << "Load Ptr   = " << *LoadPtr << "\n";
1052    abort();
1053#endif
1054    return -1;
1055  }
1056
1057  // If the Load isn't completely contained within the stored bits, we don't
1058  // have all the bits to feed it.  We could do something crazy in the future
1059  // (issue a smaller load then merge the bits in) but this seems unlikely to be
1060  // valuable.
1061  if (StoreOffset > LoadOffset ||
1062      StoreOffset+StoreSize < LoadOffset+LoadSize)
1063    return -1;
1064
1065  // Okay, we can do this transformation.  Return the number of bytes into the
1066  // store that the load is.
1067  return LoadOffset-StoreOffset;
1068}
1069
1070/// AnalyzeLoadFromClobberingStore - This function is called when we have a
1071/// memdep query of a load that ends up being a clobbering store.
1072static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
1073                                          StoreInst *DepSI,
1074                                          const TargetData &TD) {
1075  // Cannot handle reading from store of first-class aggregate yet.
1076  if (DepSI->getValueOperand()->getType()->isStructTy() ||
1077      DepSI->getValueOperand()->getType()->isArrayTy())
1078    return -1;
1079
1080  Value *StorePtr = DepSI->getPointerOperand();
1081  uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
1082  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1083                                        StorePtr, StoreSize, TD);
1084}
1085
1086static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
1087                                            MemIntrinsic *MI,
1088                                            const TargetData &TD) {
1089  // If the mem operation is a non-constant size, we can't handle it.
1090  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
1091  if (SizeCst == 0) return -1;
1092  uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
1093
1094  // If this is memset, we just need to see if the offset is valid in the size
1095  // of the memset..
1096  if (MI->getIntrinsicID() == Intrinsic::memset)
1097    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
1098                                          MemSizeInBits, TD);
1099
1100  // If we have a memcpy/memmove, the only case we can handle is if this is a
1101  // copy from constant memory.  In that case, we can read directly from the
1102  // constant memory.
1103  MemTransferInst *MTI = cast<MemTransferInst>(MI);
1104
1105  Constant *Src = dyn_cast<Constant>(MTI->getSource());
1106  if (Src == 0) return -1;
1107
1108  GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
1109  if (GV == 0 || !GV->isConstant()) return -1;
1110
1111  // See if the access is within the bounds of the transfer.
1112  int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1113                                              MI->getDest(), MemSizeInBits, TD);
1114  if (Offset == -1)
1115    return Offset;
1116
1117  // Otherwise, see if we can constant fold a load from the constant with the
1118  // offset applied as appropriate.
1119  Src = ConstantExpr::getBitCast(Src,
1120                                 llvm::Type::getInt8PtrTy(Src->getContext()));
1121  Constant *OffsetCst =
1122    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1123  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1124  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1125  if (ConstantFoldLoadFromConstPtr(Src, &TD))
1126    return Offset;
1127  return -1;
1128}
1129
1130
1131/// GetStoreValueForLoad - This function is called when we have a
1132/// memdep query of a load that ends up being a clobbering store.  This means
1133/// that the store *may* provide bits used by the load but we can't be sure
1134/// because the pointers don't mustalias.  Check this case to see if there is
1135/// anything more we can do before we give up.
1136static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
1137                                   const Type *LoadTy,
1138                                   Instruction *InsertPt, const TargetData &TD){
1139  LLVMContext &Ctx = SrcVal->getType()->getContext();
1140
1141  uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
1142  uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
1143
1144  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1145
1146  // Compute which bits of the stored value are being used by the load.  Convert
1147  // to an integer type to start with.
1148  if (SrcVal->getType()->isPointerTy())
1149    SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
1150  if (!SrcVal->getType()->isIntegerTy())
1151    SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
1152                                   "tmp");
1153
1154  // Shift the bits to the least significant depending on endianness.
1155  unsigned ShiftAmt;
1156  if (TD.isLittleEndian())
1157    ShiftAmt = Offset*8;
1158  else
1159    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
1160
1161  if (ShiftAmt)
1162    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
1163
1164  if (LoadSize != StoreSize)
1165    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
1166                                 "tmp");
1167
1168  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
1169}
1170
1171/// GetMemInstValueForLoad - This function is called when we have a
1172/// memdep query of a load that ends up being a clobbering mem intrinsic.
1173static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
1174                                     const Type *LoadTy, Instruction *InsertPt,
1175                                     const TargetData &TD){
1176  LLVMContext &Ctx = LoadTy->getContext();
1177  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
1178
1179  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1180
1181  // We know that this method is only called when the mem transfer fully
1182  // provides the bits for the load.
1183  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
1184    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1185    // independently of what the offset is.
1186    Value *Val = MSI->getValue();
1187    if (LoadSize != 1)
1188      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
1189
1190    Value *OneElt = Val;
1191
1192    // Splat the value out to the right number of bits.
1193    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
1194      // If we can double the number of bytes set, do it.
1195      if (NumBytesSet*2 <= LoadSize) {
1196        Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
1197        Val = Builder.CreateOr(Val, ShVal);
1198        NumBytesSet <<= 1;
1199        continue;
1200      }
1201
1202      // Otherwise insert one byte at a time.
1203      Value *ShVal = Builder.CreateShl(Val, 1*8);
1204      Val = Builder.CreateOr(OneElt, ShVal);
1205      ++NumBytesSet;
1206    }
1207
1208    return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
1209  }
1210
1211  // Otherwise, this is a memcpy/memmove from a constant global.
1212  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
1213  Constant *Src = cast<Constant>(MTI->getSource());
1214
1215  // Otherwise, see if we can constant fold a load from the constant with the
1216  // offset applied as appropriate.
1217  Src = ConstantExpr::getBitCast(Src,
1218                                 llvm::Type::getInt8PtrTy(Src->getContext()));
1219  Constant *OffsetCst =
1220  ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1221  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1222  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1223  return ConstantFoldLoadFromConstPtr(Src, &TD);
1224}
1225
1226namespace {
1227
1228struct AvailableValueInBlock {
1229  /// BB - The basic block in question.
1230  BasicBlock *BB;
1231  enum ValType {
1232    SimpleVal,  // A simple offsetted value that is accessed.
1233    MemIntrin   // A memory intrinsic which is loaded from.
1234  };
1235
1236  /// V - The value that is live out of the block.
1237  PointerIntPair<Value *, 1, ValType> Val;
1238
1239  /// Offset - The byte offset in Val that is interesting for the load query.
1240  unsigned Offset;
1241
1242  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
1243                                   unsigned Offset = 0) {
1244    AvailableValueInBlock Res;
1245    Res.BB = BB;
1246    Res.Val.setPointer(V);
1247    Res.Val.setInt(SimpleVal);
1248    Res.Offset = Offset;
1249    return Res;
1250  }
1251
1252  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
1253                                     unsigned Offset = 0) {
1254    AvailableValueInBlock Res;
1255    Res.BB = BB;
1256    Res.Val.setPointer(MI);
1257    Res.Val.setInt(MemIntrin);
1258    Res.Offset = Offset;
1259    return Res;
1260  }
1261
1262  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
1263  Value *getSimpleValue() const {
1264    assert(isSimpleValue() && "Wrong accessor");
1265    return Val.getPointer();
1266  }
1267
1268  MemIntrinsic *getMemIntrinValue() const {
1269    assert(!isSimpleValue() && "Wrong accessor");
1270    return cast<MemIntrinsic>(Val.getPointer());
1271  }
1272
1273  /// MaterializeAdjustedValue - Emit code into this block to adjust the value
1274  /// defined here to the specified type.  This handles various coercion cases.
1275  Value *MaterializeAdjustedValue(const Type *LoadTy,
1276                                  const TargetData *TD) const {
1277    Value *Res;
1278    if (isSimpleValue()) {
1279      Res = getSimpleValue();
1280      if (Res->getType() != LoadTy) {
1281        assert(TD && "Need target data to handle type mismatch case");
1282        Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
1283                                   *TD);
1284
1285        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
1286                     << *getSimpleValue() << '\n'
1287                     << *Res << '\n' << "\n\n\n");
1288      }
1289    } else {
1290      Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
1291                                   LoadTy, BB->getTerminator(), *TD);
1292      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1293                   << "  " << *getMemIntrinValue() << '\n'
1294                   << *Res << '\n' << "\n\n\n");
1295    }
1296    return Res;
1297  }
1298};
1299
1300}
1301
1302/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
1303/// construct SSA form, allowing us to eliminate LI.  This returns the value
1304/// that should be used at LI's definition site.
1305static Value *ConstructSSAForLoadSet(LoadInst *LI,
1306                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1307                                     const TargetData *TD,
1308                                     const DominatorTree &DT,
1309                                     AliasAnalysis *AA) {
1310  // Check for the fully redundant, dominating load case.  In this case, we can
1311  // just use the dominating value directly.
1312  if (ValuesPerBlock.size() == 1 &&
1313      DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
1314    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
1315
1316  // Otherwise, we have to construct SSA form.
1317  SmallVector<PHINode*, 8> NewPHIs;
1318  SSAUpdater SSAUpdate(&NewPHIs);
1319  SSAUpdate.Initialize(LI->getType(), LI->getName());
1320
1321  const Type *LoadTy = LI->getType();
1322
1323  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1324    const AvailableValueInBlock &AV = ValuesPerBlock[i];
1325    BasicBlock *BB = AV.BB;
1326
1327    if (SSAUpdate.HasValueForBlock(BB))
1328      continue;
1329
1330    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
1331  }
1332
1333  // Perform PHI construction.
1334  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
1335
1336  // If new PHI nodes were created, notify alias analysis.
1337  if (V->getType()->isPointerTy())
1338    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
1339      AA->copyValue(LI, NewPHIs[i]);
1340
1341  return V;
1342}
1343
1344static bool isLifetimeStart(const Instruction *Inst) {
1345  if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1346    return II->getIntrinsicID() == Intrinsic::lifetime_start;
1347  return false;
1348}
1349
1350/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
1351/// non-local by performing PHI construction.
1352bool GVN::processNonLocalLoad(LoadInst *LI,
1353                              SmallVectorImpl<Instruction*> &toErase) {
1354  // Find the non-local dependencies of the load.
1355  SmallVector<NonLocalDepResult, 64> Deps;
1356  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
1357  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
1358  //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
1359  //             << Deps.size() << *LI << '\n');
1360
1361  // If we had to process more than one hundred blocks to find the
1362  // dependencies, this load isn't worth worrying about.  Optimizing
1363  // it will be too expensive.
1364  if (Deps.size() > 100)
1365    return false;
1366
1367  // If we had a phi translation failure, we'll have a single entry which is a
1368  // clobber in the current block.  Reject this early.
1369  if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
1370    DEBUG(
1371      dbgs() << "GVN: non-local load ";
1372      WriteAsOperand(dbgs(), LI);
1373      dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
1374    );
1375    return false;
1376  }
1377
1378  // Filter out useless results (non-locals, etc).  Keep track of the blocks
1379  // where we have a value available in repl, also keep track of whether we see
1380  // dependencies that produce an unknown value for the load (such as a call
1381  // that could potentially clobber the load).
1382  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
1383  SmallVector<BasicBlock*, 16> UnavailableBlocks;
1384
1385  for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
1386    BasicBlock *DepBB = Deps[i].getBB();
1387    MemDepResult DepInfo = Deps[i].getResult();
1388
1389    if (DepInfo.isClobber()) {
1390      // The address being loaded in this non-local block may not be the same as
1391      // the pointer operand of the load if PHI translation occurs.  Make sure
1392      // to consider the right address.
1393      Value *Address = Deps[i].getAddress();
1394
1395      // If the dependence is to a store that writes to a superset of the bits
1396      // read by the load, we can extract the bits we need for the load from the
1397      // stored value.
1398      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
1399        if (TD && Address) {
1400          int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
1401                                                      DepSI, *TD);
1402          if (Offset != -1) {
1403            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1404                                                       DepSI->getValueOperand(),
1405                                                                Offset));
1406            continue;
1407          }
1408        }
1409      }
1410
1411      // If the clobbering value is a memset/memcpy/memmove, see if we can
1412      // forward a value on from it.
1413      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
1414        if (TD && Address) {
1415          int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
1416                                                        DepMI, *TD);
1417          if (Offset != -1) {
1418            ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
1419                                                                  Offset));
1420            continue;
1421          }
1422        }
1423      }
1424
1425      UnavailableBlocks.push_back(DepBB);
1426      continue;
1427    }
1428
1429    Instruction *DepInst = DepInfo.getInst();
1430
1431    // Loading the allocation -> undef.
1432    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
1433        // Loading immediately after lifetime begin -> undef.
1434        isLifetimeStart(DepInst)) {
1435      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1436                                             UndefValue::get(LI->getType())));
1437      continue;
1438    }
1439
1440    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1441      // Reject loads and stores that are to the same address but are of
1442      // different types if we have to.
1443      if (S->getValueOperand()->getType() != LI->getType()) {
1444        // If the stored value is larger or equal to the loaded value, we can
1445        // reuse it.
1446        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
1447                                                        LI->getType(), *TD)) {
1448          UnavailableBlocks.push_back(DepBB);
1449          continue;
1450        }
1451      }
1452
1453      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1454                                                         S->getValueOperand()));
1455      continue;
1456    }
1457
1458    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1459      // If the types mismatch and we can't handle it, reject reuse of the load.
1460      if (LD->getType() != LI->getType()) {
1461        // If the stored value is larger or equal to the loaded value, we can
1462        // reuse it.
1463        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
1464          UnavailableBlocks.push_back(DepBB);
1465          continue;
1466        }
1467      }
1468      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
1469      continue;
1470    }
1471
1472    UnavailableBlocks.push_back(DepBB);
1473    continue;
1474  }
1475
1476  // If we have no predecessors that produce a known value for this load, exit
1477  // early.
1478  if (ValuesPerBlock.empty()) return false;
1479
1480  // If all of the instructions we depend on produce a known value for this
1481  // load, then it is fully redundant and we can use PHI insertion to compute
1482  // its value.  Insert PHIs and remove the fully redundant value now.
1483  if (UnavailableBlocks.empty()) {
1484    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1485
1486    // Perform PHI construction.
1487    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1488                                      VN.getAliasAnalysis());
1489    LI->replaceAllUsesWith(V);
1490
1491    if (isa<PHINode>(V))
1492      V->takeName(LI);
1493    if (V->getType()->isPointerTy())
1494      MD->invalidateCachedPointerInfo(V);
1495    VN.erase(LI);
1496    toErase.push_back(LI);
1497    ++NumGVNLoad;
1498    return true;
1499  }
1500
1501  if (!EnablePRE || !EnableLoadPRE)
1502    return false;
1503
1504  // Okay, we have *some* definitions of the value.  This means that the value
1505  // is available in some of our (transitive) predecessors.  Lets think about
1506  // doing PRE of this load.  This will involve inserting a new load into the
1507  // predecessor when it's not available.  We could do this in general, but
1508  // prefer to not increase code size.  As such, we only do this when we know
1509  // that we only have to insert *one* load (which means we're basically moving
1510  // the load, not inserting a new one).
1511
1512  SmallPtrSet<BasicBlock *, 4> Blockers;
1513  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1514    Blockers.insert(UnavailableBlocks[i]);
1515
1516  // Lets find first basic block with more than one predecessor.  Walk backwards
1517  // through predecessors if needed.
1518  BasicBlock *LoadBB = LI->getParent();
1519  BasicBlock *TmpBB = LoadBB;
1520
1521  bool isSinglePred = false;
1522  bool allSingleSucc = true;
1523  while (TmpBB->getSinglePredecessor()) {
1524    isSinglePred = true;
1525    TmpBB = TmpBB->getSinglePredecessor();
1526    if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1527      return false;
1528    if (Blockers.count(TmpBB))
1529      return false;
1530
1531    // If any of these blocks has more than one successor (i.e. if the edge we
1532    // just traversed was critical), then there are other paths through this
1533    // block along which the load may not be anticipated.  Hoisting the load
1534    // above this block would be adding the load to execution paths along
1535    // which it was not previously executed.
1536    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1537      return false;
1538  }
1539
1540  assert(TmpBB);
1541  LoadBB = TmpBB;
1542
1543  // FIXME: It is extremely unclear what this loop is doing, other than
1544  // artificially restricting loadpre.
1545  if (isSinglePred) {
1546    bool isHot = false;
1547    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1548      const AvailableValueInBlock &AV = ValuesPerBlock[i];
1549      if (AV.isSimpleValue())
1550        // "Hot" Instruction is in some loop (because it dominates its dep.
1551        // instruction).
1552        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
1553          if (DT->dominates(LI, I)) {
1554            isHot = true;
1555            break;
1556          }
1557    }
1558
1559    // We are interested only in "hot" instructions. We don't want to do any
1560    // mis-optimizations here.
1561    if (!isHot)
1562      return false;
1563  }
1564
1565  // Check to see how many predecessors have the loaded value fully
1566  // available.
1567  DenseMap<BasicBlock*, Value*> PredLoads;
1568  DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1569  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1570    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
1571  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1572    FullyAvailableBlocks[UnavailableBlocks[i]] = false;
1573
1574  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
1575  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
1576       PI != E; ++PI) {
1577    BasicBlock *Pred = *PI;
1578    if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1579      continue;
1580    }
1581    PredLoads[Pred] = 0;
1582
1583    if (Pred->getTerminator()->getNumSuccessors() != 1) {
1584      if (isa<IndirectBrInst>(Pred->getTerminator())) {
1585        DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1586              << Pred->getName() << "': " << *LI << '\n');
1587        return false;
1588      }
1589      unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
1590      NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
1591    }
1592  }
1593  if (!NeedToSplit.empty()) {
1594    toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
1595    return false;
1596  }
1597
1598  // Decide whether PRE is profitable for this load.
1599  unsigned NumUnavailablePreds = PredLoads.size();
1600  assert(NumUnavailablePreds != 0 &&
1601         "Fully available value should be eliminated above!");
1602
1603  // If this load is unavailable in multiple predecessors, reject it.
1604  // FIXME: If we could restructure the CFG, we could make a common pred with
1605  // all the preds that don't have an available LI and insert a new load into
1606  // that one block.
1607  if (NumUnavailablePreds != 1)
1608      return false;
1609
1610  // Check if the load can safely be moved to all the unavailable predecessors.
1611  bool CanDoPRE = true;
1612  SmallVector<Instruction*, 8> NewInsts;
1613  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1614         E = PredLoads.end(); I != E; ++I) {
1615    BasicBlock *UnavailablePred = I->first;
1616
1617    // Do PHI translation to get its value in the predecessor if necessary.  The
1618    // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1619
1620    // If all preds have a single successor, then we know it is safe to insert
1621    // the load on the pred (?!?), so we can insert code to materialize the
1622    // pointer if it is not available.
1623    PHITransAddr Address(LI->getPointerOperand(), TD);
1624    Value *LoadPtr = 0;
1625    if (allSingleSucc) {
1626      LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
1627                                                  *DT, NewInsts);
1628    } else {
1629      Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
1630      LoadPtr = Address.getAddr();
1631    }
1632
1633    // If we couldn't find or insert a computation of this phi translated value,
1634    // we fail PRE.
1635    if (LoadPtr == 0) {
1636      DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1637            << *LI->getPointerOperand() << "\n");
1638      CanDoPRE = false;
1639      break;
1640    }
1641
1642    // Make sure it is valid to move this load here.  We have to watch out for:
1643    //  @1 = getelementptr (i8* p, ...
1644    //  test p and branch if == 0
1645    //  load @1
1646    // It is valid to have the getelementptr before the test, even if p can be 0,
1647    // as getelementptr only does address arithmetic.
1648    // If we are not pushing the value through any multiple-successor blocks
1649    // we do not have this case.  Otherwise, check that the load is safe to
1650    // put anywhere; this can be improved, but should be conservatively safe.
1651    if (!allSingleSucc &&
1652        // FIXME: REEVALUTE THIS.
1653        !isSafeToLoadUnconditionally(LoadPtr,
1654                                     UnavailablePred->getTerminator(),
1655                                     LI->getAlignment(), TD)) {
1656      CanDoPRE = false;
1657      break;
1658    }
1659
1660    I->second = LoadPtr;
1661  }
1662
1663  if (!CanDoPRE) {
1664    while (!NewInsts.empty())
1665      NewInsts.pop_back_val()->eraseFromParent();
1666    return false;
1667  }
1668
1669  // Okay, we can eliminate this load by inserting a reload in the predecessor
1670  // and using PHI construction to get the value in the other predecessors, do
1671  // it.
1672  DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1673  DEBUG(if (!NewInsts.empty())
1674          dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
1675                 << *NewInsts.back() << '\n');
1676
1677  // Assign value numbers to the new instructions.
1678  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
1679    // FIXME: We really _ought_ to insert these value numbers into their
1680    // parent's availability map.  However, in doing so, we risk getting into
1681    // ordering issues.  If a block hasn't been processed yet, we would be
1682    // marking a value as AVAIL-IN, which isn't what we intend.
1683    VN.lookup_or_add(NewInsts[i]);
1684  }
1685
1686  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1687         E = PredLoads.end(); I != E; ++I) {
1688    BasicBlock *UnavailablePred = I->first;
1689    Value *LoadPtr = I->second;
1690
1691    Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
1692                                  LI->getAlignment(),
1693                                  UnavailablePred->getTerminator());
1694
1695    // Add the newly created load.
1696    ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
1697                                                        NewLoad));
1698    MD->invalidateCachedPointerInfo(LoadPtr);
1699    DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1700  }
1701
1702  // Perform PHI construction.
1703  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1704                                    VN.getAliasAnalysis());
1705  LI->replaceAllUsesWith(V);
1706  if (isa<PHINode>(V))
1707    V->takeName(LI);
1708  if (V->getType()->isPointerTy())
1709    MD->invalidateCachedPointerInfo(V);
1710  VN.erase(LI);
1711  toErase.push_back(LI);
1712  ++NumPRELoad;
1713  return true;
1714}
1715
1716/// processLoad - Attempt to eliminate a load, first by eliminating it
1717/// locally, and then attempting non-local elimination if that fails.
1718bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
1719  if (!MD)
1720    return false;
1721
1722  if (L->isVolatile())
1723    return false;
1724
1725  // ... to a pointer that has been loaded from before...
1726  MemDepResult Dep = MD->getDependency(L);
1727
1728  // If the value isn't available, don't do anything!
1729  if (Dep.isClobber()) {
1730    // Check to see if we have something like this:
1731    //   store i32 123, i32* %P
1732    //   %A = bitcast i32* %P to i8*
1733    //   %B = gep i8* %A, i32 1
1734    //   %C = load i8* %B
1735    //
1736    // We could do that by recognizing if the clobber instructions are obviously
1737    // a common base + constant offset, and if the previous store (or memset)
1738    // completely covers this load.  This sort of thing can happen in bitfield
1739    // access code.
1740    Value *AvailVal = 0;
1741    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
1742      if (TD) {
1743        int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
1744                                                    L->getPointerOperand(),
1745                                                    DepSI, *TD);
1746        if (Offset != -1)
1747          AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
1748                                          L->getType(), L, *TD);
1749      }
1750
1751    // If the clobbering value is a memset/memcpy/memmove, see if we can forward
1752    // a value on from it.
1753    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
1754      if (TD) {
1755        int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
1756                                                      L->getPointerOperand(),
1757                                                      DepMI, *TD);
1758        if (Offset != -1)
1759          AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
1760      }
1761    }
1762
1763    if (AvailVal) {
1764      DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
1765            << *AvailVal << '\n' << *L << "\n\n\n");
1766
1767      // Replace the load!
1768      L->replaceAllUsesWith(AvailVal);
1769      if (AvailVal->getType()->isPointerTy())
1770        MD->invalidateCachedPointerInfo(AvailVal);
1771      VN.erase(L);
1772      toErase.push_back(L);
1773      ++NumGVNLoad;
1774      return true;
1775    }
1776
1777    DEBUG(
1778      // fast print dep, using operator<< on instruction would be too slow
1779      dbgs() << "GVN: load ";
1780      WriteAsOperand(dbgs(), L);
1781      Instruction *I = Dep.getInst();
1782      dbgs() << " is clobbered by " << *I << '\n';
1783    );
1784    return false;
1785  }
1786
1787  // If it is defined in another block, try harder.
1788  if (Dep.isNonLocal())
1789    return processNonLocalLoad(L, toErase);
1790
1791  Instruction *DepInst = Dep.getInst();
1792  if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1793    Value *StoredVal = DepSI->getValueOperand();
1794
1795    // The store and load are to a must-aliased pointer, but they may not
1796    // actually have the same type.  See if we know how to reuse the stored
1797    // value (depending on its type).
1798    if (StoredVal->getType() != L->getType()) {
1799      if (TD) {
1800        StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
1801                                                   L, *TD);
1802        if (StoredVal == 0)
1803          return false;
1804
1805        DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
1806                     << '\n' << *L << "\n\n\n");
1807      }
1808      else
1809        return false;
1810    }
1811
1812    // Remove it!
1813    L->replaceAllUsesWith(StoredVal);
1814    if (StoredVal->getType()->isPointerTy())
1815      MD->invalidateCachedPointerInfo(StoredVal);
1816    VN.erase(L);
1817    toErase.push_back(L);
1818    ++NumGVNLoad;
1819    return true;
1820  }
1821
1822  if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
1823    Value *AvailableVal = DepLI;
1824
1825    // The loads are of a must-aliased pointer, but they may not actually have
1826    // the same type.  See if we know how to reuse the previously loaded value
1827    // (depending on its type).
1828    if (DepLI->getType() != L->getType()) {
1829      if (TD) {
1830        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
1831        if (AvailableVal == 0)
1832          return false;
1833
1834        DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
1835                     << "\n" << *L << "\n\n\n");
1836      }
1837      else
1838        return false;
1839    }
1840
1841    // Remove it!
1842    L->replaceAllUsesWith(AvailableVal);
1843    if (DepLI->getType()->isPointerTy())
1844      MD->invalidateCachedPointerInfo(DepLI);
1845    VN.erase(L);
1846    toErase.push_back(L);
1847    ++NumGVNLoad;
1848    return true;
1849  }
1850
1851  // If this load really doesn't depend on anything, then we must be loading an
1852  // undef value.  This can happen when loading for a fresh allocation with no
1853  // intervening stores, for example.
1854  if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
1855    L->replaceAllUsesWith(UndefValue::get(L->getType()));
1856    VN.erase(L);
1857    toErase.push_back(L);
1858    ++NumGVNLoad;
1859    return true;
1860  }
1861
1862  // If this load occurs either right after a lifetime begin,
1863  // then the loaded value is undefined.
1864  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
1865    if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1866      L->replaceAllUsesWith(UndefValue::get(L->getType()));
1867      VN.erase(L);
1868      toErase.push_back(L);
1869      ++NumGVNLoad;
1870      return true;
1871    }
1872  }
1873
1874  return false;
1875}
1876
1877Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
1878  DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
1879  if (I == localAvail.end())
1880    return 0;
1881
1882  ValueNumberScope *Locals = I->second;
1883  while (Locals) {
1884    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
1885    if (I != Locals->table.end())
1886      return I->second;
1887    Locals = Locals->parent;
1888  }
1889
1890  return 0;
1891}
1892
1893
1894/// processInstruction - When calculating availability, handle an instruction
1895/// by inserting it into the appropriate sets
1896bool GVN::processInstruction(Instruction *I,
1897                             SmallVectorImpl<Instruction*> &toErase) {
1898  // Ignore dbg info intrinsics.
1899  if (isa<DbgInfoIntrinsic>(I))
1900    return false;
1901
1902  // If the instruction can be easily simplified then do so now in preference
1903  // to value numbering it.  Value numbering often exposes redundancies, for
1904  // example if it determines that %y is equal to %x then the instruction
1905  // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
1906  if (Value *V = SimplifyInstruction(I, TD)) {
1907    I->replaceAllUsesWith(V);
1908    if (MD && V->getType()->isPointerTy())
1909      MD->invalidateCachedPointerInfo(V);
1910    VN.erase(I);
1911    toErase.push_back(I);
1912    return true;
1913  }
1914
1915  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1916    bool Changed = processLoad(LI, toErase);
1917
1918    if (!Changed) {
1919      unsigned Num = VN.lookup_or_add(LI);
1920      localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
1921    }
1922
1923    return Changed;
1924  }
1925
1926  uint32_t NextNum = VN.getNextUnusedValueNumber();
1927  unsigned Num = VN.lookup_or_add(I);
1928
1929  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
1930    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1931
1932    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1933      return false;
1934
1935    Value *BranchCond = BI->getCondition();
1936    uint32_t CondVN = VN.lookup_or_add(BranchCond);
1937
1938    BasicBlock *TrueSucc = BI->getSuccessor(0);
1939    BasicBlock *FalseSucc = BI->getSuccessor(1);
1940
1941    if (TrueSucc->getSinglePredecessor())
1942      localAvail[TrueSucc]->table[CondVN] =
1943        ConstantInt::getTrue(TrueSucc->getContext());
1944    if (FalseSucc->getSinglePredecessor())
1945      localAvail[FalseSucc]->table[CondVN] =
1946        ConstantInt::getFalse(TrueSucc->getContext());
1947
1948    return false;
1949
1950  // Allocations are always uniquely numbered, so we can save time and memory
1951  // by fast failing them.
1952  } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
1953    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1954    return false;
1955  }
1956
1957  // Collapse PHI nodes
1958  if (PHINode* p = dyn_cast<PHINode>(I)) {
1959    Value *constVal = CollapsePhi(p);
1960
1961    if (constVal) {
1962      p->replaceAllUsesWith(constVal);
1963      if (MD && constVal->getType()->isPointerTy())
1964        MD->invalidateCachedPointerInfo(constVal);
1965      VN.erase(p);
1966
1967      toErase.push_back(p);
1968    } else {
1969      localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1970    }
1971
1972  // If the number we were assigned was a brand new VN, then we don't
1973  // need to do a lookup to see if the number already exists
1974  // somewhere in the domtree: it can't!
1975  } else if (Num == NextNum) {
1976    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1977
1978  // Perform fast-path value-number based elimination of values inherited from
1979  // dominators.
1980  } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
1981    // Remove it!
1982    VN.erase(I);
1983    I->replaceAllUsesWith(repl);
1984    if (MD && repl->getType()->isPointerTy())
1985      MD->invalidateCachedPointerInfo(repl);
1986    toErase.push_back(I);
1987    return true;
1988
1989  } else {
1990    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1991  }
1992
1993  return false;
1994}
1995
1996/// runOnFunction - This is the main transformation entry point for a function.
1997bool GVN::runOnFunction(Function& F) {
1998  if (!NoLoads)
1999    MD = &getAnalysis<MemoryDependenceAnalysis>();
2000  DT = &getAnalysis<DominatorTree>();
2001  TD = getAnalysisIfAvailable<TargetData>();
2002  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
2003  VN.setMemDep(MD);
2004  VN.setDomTree(DT);
2005
2006  bool Changed = false;
2007  bool ShouldContinue = true;
2008
2009  // Merge unconditional branches, allowing PRE to catch more
2010  // optimization opportunities.
2011  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
2012    BasicBlock *BB = FI;
2013    ++FI;
2014    bool removedBlock = MergeBlockIntoPredecessor(BB, this);
2015    if (removedBlock) ++NumGVNBlocks;
2016
2017    Changed |= removedBlock;
2018  }
2019
2020  unsigned Iteration = 0;
2021
2022  while (ShouldContinue) {
2023    DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2024    ShouldContinue = iterateOnFunction(F);
2025    if (splitCriticalEdges())
2026      ShouldContinue = true;
2027    Changed |= ShouldContinue;
2028    ++Iteration;
2029  }
2030
2031  if (EnablePRE) {
2032    bool PREChanged = true;
2033    while (PREChanged) {
2034      PREChanged = performPRE(F);
2035      Changed |= PREChanged;
2036    }
2037  }
2038  // FIXME: Should perform GVN again after PRE does something.  PRE can move
2039  // computations into blocks where they become fully redundant.  Note that
2040  // we can't do this until PRE's critical edge splitting updates memdep.
2041  // Actually, when this happens, we should just fully integrate PRE into GVN.
2042
2043  cleanupGlobalSets();
2044
2045  return Changed;
2046}
2047
2048
2049bool GVN::processBlock(BasicBlock *BB) {
2050  // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
2051  // incrementing BI before processing an instruction).
2052  SmallVector<Instruction*, 8> toErase;
2053  bool ChangedFunction = false;
2054
2055  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2056       BI != BE;) {
2057    ChangedFunction |= processInstruction(BI, toErase);
2058    if (toErase.empty()) {
2059      ++BI;
2060      continue;
2061    }
2062
2063    // If we need some instructions deleted, do it now.
2064    NumGVNInstr += toErase.size();
2065
2066    // Avoid iterator invalidation.
2067    bool AtStart = BI == BB->begin();
2068    if (!AtStart)
2069      --BI;
2070
2071    for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
2072         E = toErase.end(); I != E; ++I) {
2073      DEBUG(dbgs() << "GVN removed: " << **I << '\n');
2074      if (MD) MD->removeInstruction(*I);
2075      (*I)->eraseFromParent();
2076      DEBUG(verifyRemoved(*I));
2077    }
2078    toErase.clear();
2079
2080    if (AtStart)
2081      BI = BB->begin();
2082    else
2083      ++BI;
2084  }
2085
2086  return ChangedFunction;
2087}
2088
2089/// performPRE - Perform a purely local form of PRE that looks for diamond
2090/// control flow patterns and attempts to perform simple PRE at the join point.
2091bool GVN::performPRE(Function &F) {
2092  bool Changed = false;
2093  DenseMap<BasicBlock*, Value*> predMap;
2094  for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
2095       DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
2096    BasicBlock *CurrentBlock = *DI;
2097
2098    // Nothing to PRE in the entry block.
2099    if (CurrentBlock == &F.getEntryBlock()) continue;
2100
2101    for (BasicBlock::iterator BI = CurrentBlock->begin(),
2102         BE = CurrentBlock->end(); BI != BE; ) {
2103      Instruction *CurInst = BI++;
2104
2105      if (isa<AllocaInst>(CurInst) ||
2106          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
2107          CurInst->getType()->isVoidTy() ||
2108          CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2109          isa<DbgInfoIntrinsic>(CurInst))
2110        continue;
2111
2112      // We don't currently value number ANY inline asm calls.
2113      if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
2114        if (CallI->isInlineAsm())
2115          continue;
2116
2117      uint32_t ValNo = VN.lookup(CurInst);
2118
2119      // Look for the predecessors for PRE opportunities.  We're
2120      // only trying to solve the basic diamond case, where
2121      // a value is computed in the successor and one predecessor,
2122      // but not the other.  We also explicitly disallow cases
2123      // where the successor is its own predecessor, because they're
2124      // more complicated to get right.
2125      unsigned NumWith = 0;
2126      unsigned NumWithout = 0;
2127      BasicBlock *PREPred = 0;
2128      predMap.clear();
2129
2130      for (pred_iterator PI = pred_begin(CurrentBlock),
2131           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2132        BasicBlock *P = *PI;
2133        // We're not interested in PRE where the block is its
2134        // own predecessor, or in blocks with predecessors
2135        // that are not reachable.
2136        if (P == CurrentBlock) {
2137          NumWithout = 2;
2138          break;
2139        } else if (!localAvail.count(P))  {
2140          NumWithout = 2;
2141          break;
2142        }
2143
2144        DenseMap<uint32_t, Value*>::iterator predV =
2145                                            localAvail[P]->table.find(ValNo);
2146        if (predV == localAvail[P]->table.end()) {
2147          PREPred = P;
2148          ++NumWithout;
2149        } else if (predV->second == CurInst) {
2150          NumWithout = 2;
2151        } else {
2152          predMap[P] = predV->second;
2153          ++NumWith;
2154        }
2155      }
2156
2157      // Don't do PRE when it might increase code size, i.e. when
2158      // we would need to insert instructions in more than one pred.
2159      if (NumWithout != 1 || NumWith == 0)
2160        continue;
2161
2162      // Don't do PRE across indirect branch.
2163      if (isa<IndirectBrInst>(PREPred->getTerminator()))
2164        continue;
2165
2166      // We can't do PRE safely on a critical edge, so instead we schedule
2167      // the edge to be split and perform the PRE the next time we iterate
2168      // on the function.
2169      unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
2170      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
2171        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
2172        continue;
2173      }
2174
2175      // Instantiate the expression in the predecessor that lacked it.
2176      // Because we are going top-down through the block, all value numbers
2177      // will be available in the predecessor by the time we need them.  Any
2178      // that weren't originally present will have been instantiated earlier
2179      // in this loop.
2180      Instruction *PREInstr = CurInst->clone();
2181      bool success = true;
2182      for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
2183        Value *Op = PREInstr->getOperand(i);
2184        if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2185          continue;
2186
2187        if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
2188          PREInstr->setOperand(i, V);
2189        } else {
2190          success = false;
2191          break;
2192        }
2193      }
2194
2195      // Fail out if we encounter an operand that is not available in
2196      // the PRE predecessor.  This is typically because of loads which
2197      // are not value numbered precisely.
2198      if (!success) {
2199        delete PREInstr;
2200        DEBUG(verifyRemoved(PREInstr));
2201        continue;
2202      }
2203
2204      PREInstr->insertBefore(PREPred->getTerminator());
2205      PREInstr->setName(CurInst->getName() + ".pre");
2206      predMap[PREPred] = PREInstr;
2207      VN.add(PREInstr, ValNo);
2208      ++NumGVNPRE;
2209
2210      // Update the availability map to include the new instruction.
2211      localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
2212
2213      // Create a PHI to make the value available in this block.
2214      PHINode* Phi = PHINode::Create(CurInst->getType(),
2215                                     CurInst->getName() + ".pre-phi",
2216                                     CurrentBlock->begin());
2217      for (pred_iterator PI = pred_begin(CurrentBlock),
2218           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2219        BasicBlock *P = *PI;
2220        Phi->addIncoming(predMap[P], P);
2221      }
2222
2223      VN.add(Phi, ValNo);
2224      localAvail[CurrentBlock]->table[ValNo] = Phi;
2225
2226      CurInst->replaceAllUsesWith(Phi);
2227      if (MD && Phi->getType()->isPointerTy())
2228        MD->invalidateCachedPointerInfo(Phi);
2229      VN.erase(CurInst);
2230
2231      DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
2232      if (MD) MD->removeInstruction(CurInst);
2233      CurInst->eraseFromParent();
2234      DEBUG(verifyRemoved(CurInst));
2235      Changed = true;
2236    }
2237  }
2238
2239  if (splitCriticalEdges())
2240    Changed = true;
2241
2242  return Changed;
2243}
2244
2245/// splitCriticalEdges - Split critical edges found during the previous
2246/// iteration that may enable further optimization.
2247bool GVN::splitCriticalEdges() {
2248  if (toSplit.empty())
2249    return false;
2250  do {
2251    std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
2252    SplitCriticalEdge(Edge.first, Edge.second, this);
2253  } while (!toSplit.empty());
2254  if (MD) MD->invalidateCachedPredecessors();
2255  return true;
2256}
2257
2258/// iterateOnFunction - Executes one iteration of GVN
2259bool GVN::iterateOnFunction(Function &F) {
2260  cleanupGlobalSets();
2261
2262  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
2263       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
2264    if (DI->getIDom())
2265      localAvail[DI->getBlock()] =
2266                   new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
2267    else
2268      localAvail[DI->getBlock()] = new ValueNumberScope(0);
2269  }
2270
2271  // Top-down walk of the dominator tree
2272  bool Changed = false;
2273#if 0
2274  // Needed for value numbering with phi construction to work.
2275  ReversePostOrderTraversal<Function*> RPOT(&F);
2276  for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
2277       RE = RPOT.end(); RI != RE; ++RI)
2278    Changed |= processBlock(*RI);
2279#else
2280  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
2281       DE = df_end(DT->getRootNode()); DI != DE; ++DI)
2282    Changed |= processBlock(DI->getBlock());
2283#endif
2284
2285  return Changed;
2286}
2287
2288void GVN::cleanupGlobalSets() {
2289  VN.clear();
2290
2291  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
2292       I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
2293    delete I->second;
2294  localAvail.clear();
2295}
2296
2297/// verifyRemoved - Verify that the specified instruction does not occur in our
2298/// internal data structures.
2299void GVN::verifyRemoved(const Instruction *Inst) const {
2300  VN.verifyRemoved(Inst);
2301
2302  // Walk through the value number scope to make sure the instruction isn't
2303  // ferreted away in it.
2304  for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
2305         I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
2306    const ValueNumberScope *VNS = I->second;
2307
2308    while (VNS) {
2309      for (DenseMap<uint32_t, Value*>::const_iterator
2310             II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
2311        assert(II->second != Inst && "Inst still in value numbering scope!");
2312      }
2313
2314      VNS = VNS->parent;
2315    }
2316  }
2317}
2318