GVN.cpp revision ed58a6f96f605901adc0df3ca76499d52b2d1a1a
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global value numbering to eliminate fully redundant
11// instructions.  It also performs simple dead load elimination.
12//
13// Note that this pass does the value numbering itself; it does not use the
14// ValueNumbering analysis passes.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "gvn"
19#include "llvm/Transforms/Scalar.h"
20#include "llvm/Constants.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/GlobalVariable.h"
23#include "llvm/Function.h"
24#include "llvm/IntrinsicInst.h"
25#include "llvm/Operator.h"
26#include "llvm/Analysis/AliasAnalysis.h"
27#include "llvm/Analysis/ConstantFolding.h"
28#include "llvm/Analysis/Dominators.h"
29#include "llvm/Analysis/InstructionSimplify.h"
30#include "llvm/Analysis/Loads.h"
31#include "llvm/Analysis/MemoryBuiltins.h"
32#include "llvm/Analysis/MemoryDependenceAnalysis.h"
33#include "llvm/Analysis/PHITransAddr.h"
34#include "llvm/Analysis/ValueTracking.h"
35#include "llvm/Target/TargetData.h"
36#include "llvm/Transforms/Utils/BasicBlockUtils.h"
37#include "llvm/Transforms/Utils/Local.h"
38#include "llvm/Transforms/Utils/SSAUpdater.h"
39#include "llvm/ADT/DenseMap.h"
40#include "llvm/ADT/DepthFirstIterator.h"
41#include "llvm/ADT/PostOrderIterator.h"
42#include "llvm/ADT/SmallPtrSet.h"
43#include "llvm/ADT/Statistic.h"
44#include "llvm/Support/Allocator.h"
45#include "llvm/Support/CFG.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/Debug.h"
48#include "llvm/Support/ErrorHandling.h"
49#include "llvm/Support/GetElementPtrTypeIterator.h"
50#include "llvm/Support/IRBuilder.h"
51#include <list>
52using namespace llvm;
53
54STATISTIC(NumGVNInstr,  "Number of instructions deleted");
55STATISTIC(NumGVNLoad,   "Number of loads deleted");
56STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
57STATISTIC(NumGVNBlocks, "Number of blocks merged");
58STATISTIC(NumPRELoad,   "Number of loads PRE'd");
59
60static cl::opt<bool> EnablePRE("enable-pre",
61                               cl::init(true), cl::Hidden);
62static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
63
64//===----------------------------------------------------------------------===//
65//                         ValueTable Class
66//===----------------------------------------------------------------------===//
67
68/// This class holds the mapping between values and value numbers.  It is used
69/// as an efficient mechanism to determine the expression-wise equivalence of
70/// two values.
71namespace {
72  struct Expression {
73    enum ExpressionOpcode {
74      ADD = Instruction::Add,
75      FADD = Instruction::FAdd,
76      SUB = Instruction::Sub,
77      FSUB = Instruction::FSub,
78      MUL = Instruction::Mul,
79      FMUL = Instruction::FMul,
80      UDIV = Instruction::UDiv,
81      SDIV = Instruction::SDiv,
82      FDIV = Instruction::FDiv,
83      UREM = Instruction::URem,
84      SREM = Instruction::SRem,
85      FREM = Instruction::FRem,
86      SHL = Instruction::Shl,
87      LSHR = Instruction::LShr,
88      ASHR = Instruction::AShr,
89      AND = Instruction::And,
90      OR = Instruction::Or,
91      XOR = Instruction::Xor,
92      TRUNC = Instruction::Trunc,
93      ZEXT = Instruction::ZExt,
94      SEXT = Instruction::SExt,
95      FPTOUI = Instruction::FPToUI,
96      FPTOSI = Instruction::FPToSI,
97      UITOFP = Instruction::UIToFP,
98      SITOFP = Instruction::SIToFP,
99      FPTRUNC = Instruction::FPTrunc,
100      FPEXT = Instruction::FPExt,
101      PTRTOINT = Instruction::PtrToInt,
102      INTTOPTR = Instruction::IntToPtr,
103      BITCAST = Instruction::BitCast,
104      ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
105      ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
106      FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
107      FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
108      FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
109      SHUFFLE, SELECT, GEP, CALL, CONSTANT,
110      INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
111
112    ExpressionOpcode opcode;
113    const Type* type;
114    SmallVector<uint32_t, 4> varargs;
115    Value *function;
116
117    Expression() { }
118    Expression(ExpressionOpcode o) : opcode(o) { }
119
120    bool operator==(const Expression &other) const {
121      if (opcode != other.opcode)
122        return false;
123      else if (opcode == EMPTY || opcode == TOMBSTONE)
124        return true;
125      else if (type != other.type)
126        return false;
127      else if (function != other.function)
128        return false;
129      else {
130        if (varargs.size() != other.varargs.size())
131          return false;
132
133        for (size_t i = 0; i < varargs.size(); ++i)
134          if (varargs[i] != other.varargs[i])
135            return false;
136
137        return true;
138      }
139    }
140
141    /*bool operator!=(const Expression &other) const {
142      return !(*this == other);
143    }*/
144  };
145
146  class ValueTable {
147    private:
148      DenseMap<Value*, uint32_t> valueNumbering;
149      DenseMap<Expression, uint32_t> expressionNumbering;
150      AliasAnalysis* AA;
151      MemoryDependenceAnalysis* MD;
152      DominatorTree* DT;
153
154      uint32_t nextValueNumber;
155
156      Expression::ExpressionOpcode getOpcode(CmpInst* C);
157      Expression create_expression(BinaryOperator* BO);
158      Expression create_expression(CmpInst* C);
159      Expression create_expression(ShuffleVectorInst* V);
160      Expression create_expression(ExtractElementInst* C);
161      Expression create_expression(InsertElementInst* V);
162      Expression create_expression(SelectInst* V);
163      Expression create_expression(CastInst* C);
164      Expression create_expression(GetElementPtrInst* G);
165      Expression create_expression(CallInst* C);
166      Expression create_expression(ExtractValueInst* C);
167      Expression create_expression(InsertValueInst* C);
168
169      uint32_t lookup_or_add_call(CallInst* C);
170    public:
171      ValueTable() : nextValueNumber(1) { }
172      uint32_t lookup_or_add(Value *V);
173      uint32_t lookup(Value *V) const;
174      void add(Value *V, uint32_t num);
175      void clear();
176      void erase(Value *v);
177      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
178      AliasAnalysis *getAliasAnalysis() const { return AA; }
179      void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
180      void setDomTree(DominatorTree* D) { DT = D; }
181      uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
182      void verifyRemoved(const Value *) const;
183  };
184}
185
186namespace llvm {
187template <> struct DenseMapInfo<Expression> {
188  static inline Expression getEmptyKey() {
189    return Expression(Expression::EMPTY);
190  }
191
192  static inline Expression getTombstoneKey() {
193    return Expression(Expression::TOMBSTONE);
194  }
195
196  static unsigned getHashValue(const Expression e) {
197    unsigned hash = e.opcode;
198
199    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
200            (unsigned)((uintptr_t)e.type >> 9));
201
202    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
203         E = e.varargs.end(); I != E; ++I)
204      hash = *I + hash * 37;
205
206    hash = ((unsigned)((uintptr_t)e.function >> 4) ^
207            (unsigned)((uintptr_t)e.function >> 9)) +
208           hash * 37;
209
210    return hash;
211  }
212  static bool isEqual(const Expression &LHS, const Expression &RHS) {
213    return LHS == RHS;
214  }
215};
216
217template <>
218struct isPodLike<Expression> { static const bool value = true; };
219
220}
221
222//===----------------------------------------------------------------------===//
223//                     ValueTable Internal Functions
224//===----------------------------------------------------------------------===//
225
226Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
227  if (isa<ICmpInst>(C)) {
228    switch (C->getPredicate()) {
229    default:  // THIS SHOULD NEVER HAPPEN
230      llvm_unreachable("Comparison with unknown predicate?");
231    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
232    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
233    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
234    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
235    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
236    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
237    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
238    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
239    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
240    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
241    }
242  } else {
243    switch (C->getPredicate()) {
244    default: // THIS SHOULD NEVER HAPPEN
245      llvm_unreachable("Comparison with unknown predicate?");
246    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
247    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
248    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
249    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
250    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
251    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
252    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
253    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
254    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
255    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
256    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
257    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
258    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
259    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
260    }
261  }
262}
263
264Expression ValueTable::create_expression(CallInst* C) {
265  Expression e;
266
267  e.type = C->getType();
268  e.function = C->getCalledFunction();
269  e.opcode = Expression::CALL;
270
271  CallSite CS(C);
272  for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
273       I != E; ++I)
274    e.varargs.push_back(lookup_or_add(*I));
275
276  return e;
277}
278
279Expression ValueTable::create_expression(BinaryOperator* BO) {
280  Expression e;
281  e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
282  e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
283  e.function = 0;
284  e.type = BO->getType();
285  e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
286
287  return e;
288}
289
290Expression ValueTable::create_expression(CmpInst* C) {
291  Expression e;
292
293  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
294  e.varargs.push_back(lookup_or_add(C->getOperand(1)));
295  e.function = 0;
296  e.type = C->getType();
297  e.opcode = getOpcode(C);
298
299  return e;
300}
301
302Expression ValueTable::create_expression(CastInst* C) {
303  Expression e;
304
305  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
306  e.function = 0;
307  e.type = C->getType();
308  e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
309
310  return e;
311}
312
313Expression ValueTable::create_expression(ShuffleVectorInst* S) {
314  Expression e;
315
316  e.varargs.push_back(lookup_or_add(S->getOperand(0)));
317  e.varargs.push_back(lookup_or_add(S->getOperand(1)));
318  e.varargs.push_back(lookup_or_add(S->getOperand(2)));
319  e.function = 0;
320  e.type = S->getType();
321  e.opcode = Expression::SHUFFLE;
322
323  return e;
324}
325
326Expression ValueTable::create_expression(ExtractElementInst* E) {
327  Expression e;
328
329  e.varargs.push_back(lookup_or_add(E->getOperand(0)));
330  e.varargs.push_back(lookup_or_add(E->getOperand(1)));
331  e.function = 0;
332  e.type = E->getType();
333  e.opcode = Expression::EXTRACT;
334
335  return e;
336}
337
338Expression ValueTable::create_expression(InsertElementInst* I) {
339  Expression e;
340
341  e.varargs.push_back(lookup_or_add(I->getOperand(0)));
342  e.varargs.push_back(lookup_or_add(I->getOperand(1)));
343  e.varargs.push_back(lookup_or_add(I->getOperand(2)));
344  e.function = 0;
345  e.type = I->getType();
346  e.opcode = Expression::INSERT;
347
348  return e;
349}
350
351Expression ValueTable::create_expression(SelectInst* I) {
352  Expression e;
353
354  e.varargs.push_back(lookup_or_add(I->getCondition()));
355  e.varargs.push_back(lookup_or_add(I->getTrueValue()));
356  e.varargs.push_back(lookup_or_add(I->getFalseValue()));
357  e.function = 0;
358  e.type = I->getType();
359  e.opcode = Expression::SELECT;
360
361  return e;
362}
363
364Expression ValueTable::create_expression(GetElementPtrInst* G) {
365  Expression e;
366
367  e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
368  e.function = 0;
369  e.type = G->getType();
370  e.opcode = Expression::GEP;
371
372  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
373       I != E; ++I)
374    e.varargs.push_back(lookup_or_add(*I));
375
376  return e;
377}
378
379Expression ValueTable::create_expression(ExtractValueInst* E) {
380  Expression e;
381
382  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
383  for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
384       II != IE; ++II)
385    e.varargs.push_back(*II);
386  e.function = 0;
387  e.type = E->getType();
388  e.opcode = Expression::EXTRACTVALUE;
389
390  return e;
391}
392
393Expression ValueTable::create_expression(InsertValueInst* E) {
394  Expression e;
395
396  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
397  e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
398  for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
399       II != IE; ++II)
400    e.varargs.push_back(*II);
401  e.function = 0;
402  e.type = E->getType();
403  e.opcode = Expression::INSERTVALUE;
404
405  return e;
406}
407
408//===----------------------------------------------------------------------===//
409//                     ValueTable External Functions
410//===----------------------------------------------------------------------===//
411
412/// add - Insert a value into the table with a specified value number.
413void ValueTable::add(Value *V, uint32_t num) {
414  valueNumbering.insert(std::make_pair(V, num));
415}
416
417uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
418  if (AA->doesNotAccessMemory(C)) {
419    Expression exp = create_expression(C);
420    uint32_t& e = expressionNumbering[exp];
421    if (!e) e = nextValueNumber++;
422    valueNumbering[C] = e;
423    return e;
424  } else if (AA->onlyReadsMemory(C)) {
425    Expression exp = create_expression(C);
426    uint32_t& e = expressionNumbering[exp];
427    if (!e) {
428      e = nextValueNumber++;
429      valueNumbering[C] = e;
430      return e;
431    }
432    if (!MD) {
433      e = nextValueNumber++;
434      valueNumbering[C] = e;
435      return e;
436    }
437
438    MemDepResult local_dep = MD->getDependency(C);
439
440    if (!local_dep.isDef() && !local_dep.isNonLocal()) {
441      valueNumbering[C] =  nextValueNumber;
442      return nextValueNumber++;
443    }
444
445    if (local_dep.isDef()) {
446      CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
447
448      if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
449        valueNumbering[C] = nextValueNumber;
450        return nextValueNumber++;
451      }
452
453      for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
454        uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
455        uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
456        if (c_vn != cd_vn) {
457          valueNumbering[C] = nextValueNumber;
458          return nextValueNumber++;
459        }
460      }
461
462      uint32_t v = lookup_or_add(local_cdep);
463      valueNumbering[C] = v;
464      return v;
465    }
466
467    // Non-local case.
468    const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
469      MD->getNonLocalCallDependency(CallSite(C));
470    // FIXME: call/call dependencies for readonly calls should return def, not
471    // clobber!  Move the checking logic to MemDep!
472    CallInst* cdep = 0;
473
474    // Check to see if we have a single dominating call instruction that is
475    // identical to C.
476    for (unsigned i = 0, e = deps.size(); i != e; ++i) {
477      const NonLocalDepEntry *I = &deps[i];
478      // Ignore non-local dependencies.
479      if (I->getResult().isNonLocal())
480        continue;
481
482      // We don't handle non-depedencies.  If we already have a call, reject
483      // instruction dependencies.
484      if (I->getResult().isClobber() || cdep != 0) {
485        cdep = 0;
486        break;
487      }
488
489      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
490      // FIXME: All duplicated with non-local case.
491      if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
492        cdep = NonLocalDepCall;
493        continue;
494      }
495
496      cdep = 0;
497      break;
498    }
499
500    if (!cdep) {
501      valueNumbering[C] = nextValueNumber;
502      return nextValueNumber++;
503    }
504
505    if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
506      valueNumbering[C] = nextValueNumber;
507      return nextValueNumber++;
508    }
509    for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
510      uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
511      uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
512      if (c_vn != cd_vn) {
513        valueNumbering[C] = nextValueNumber;
514        return nextValueNumber++;
515      }
516    }
517
518    uint32_t v = lookup_or_add(cdep);
519    valueNumbering[C] = v;
520    return v;
521
522  } else {
523    valueNumbering[C] = nextValueNumber;
524    return nextValueNumber++;
525  }
526}
527
528/// lookup_or_add - Returns the value number for the specified value, assigning
529/// it a new number if it did not have one before.
530uint32_t ValueTable::lookup_or_add(Value *V) {
531  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
532  if (VI != valueNumbering.end())
533    return VI->second;
534
535  if (!isa<Instruction>(V)) {
536    valueNumbering[V] = nextValueNumber;
537    return nextValueNumber++;
538  }
539
540  Instruction* I = cast<Instruction>(V);
541  Expression exp;
542  switch (I->getOpcode()) {
543    case Instruction::Call:
544      return lookup_or_add_call(cast<CallInst>(I));
545    case Instruction::Add:
546    case Instruction::FAdd:
547    case Instruction::Sub:
548    case Instruction::FSub:
549    case Instruction::Mul:
550    case Instruction::FMul:
551    case Instruction::UDiv:
552    case Instruction::SDiv:
553    case Instruction::FDiv:
554    case Instruction::URem:
555    case Instruction::SRem:
556    case Instruction::FRem:
557    case Instruction::Shl:
558    case Instruction::LShr:
559    case Instruction::AShr:
560    case Instruction::And:
561    case Instruction::Or :
562    case Instruction::Xor:
563      exp = create_expression(cast<BinaryOperator>(I));
564      break;
565    case Instruction::ICmp:
566    case Instruction::FCmp:
567      exp = create_expression(cast<CmpInst>(I));
568      break;
569    case Instruction::Trunc:
570    case Instruction::ZExt:
571    case Instruction::SExt:
572    case Instruction::FPToUI:
573    case Instruction::FPToSI:
574    case Instruction::UIToFP:
575    case Instruction::SIToFP:
576    case Instruction::FPTrunc:
577    case Instruction::FPExt:
578    case Instruction::PtrToInt:
579    case Instruction::IntToPtr:
580    case Instruction::BitCast:
581      exp = create_expression(cast<CastInst>(I));
582      break;
583    case Instruction::Select:
584      exp = create_expression(cast<SelectInst>(I));
585      break;
586    case Instruction::ExtractElement:
587      exp = create_expression(cast<ExtractElementInst>(I));
588      break;
589    case Instruction::InsertElement:
590      exp = create_expression(cast<InsertElementInst>(I));
591      break;
592    case Instruction::ShuffleVector:
593      exp = create_expression(cast<ShuffleVectorInst>(I));
594      break;
595    case Instruction::ExtractValue:
596      exp = create_expression(cast<ExtractValueInst>(I));
597      break;
598    case Instruction::InsertValue:
599      exp = create_expression(cast<InsertValueInst>(I));
600      break;
601    case Instruction::GetElementPtr:
602      exp = create_expression(cast<GetElementPtrInst>(I));
603      break;
604    default:
605      valueNumbering[V] = nextValueNumber;
606      return nextValueNumber++;
607  }
608
609  uint32_t& e = expressionNumbering[exp];
610  if (!e) e = nextValueNumber++;
611  valueNumbering[V] = e;
612  return e;
613}
614
615/// lookup - Returns the value number of the specified value. Fails if
616/// the value has not yet been numbered.
617uint32_t ValueTable::lookup(Value *V) const {
618  DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
619  assert(VI != valueNumbering.end() && "Value not numbered?");
620  return VI->second;
621}
622
623/// clear - Remove all entries from the ValueTable
624void ValueTable::clear() {
625  valueNumbering.clear();
626  expressionNumbering.clear();
627  nextValueNumber = 1;
628}
629
630/// erase - Remove a value from the value numbering
631void ValueTable::erase(Value *V) {
632  valueNumbering.erase(V);
633}
634
635/// verifyRemoved - Verify that the value is removed from all internal data
636/// structures.
637void ValueTable::verifyRemoved(const Value *V) const {
638  for (DenseMap<Value*, uint32_t>::const_iterator
639         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
640    assert(I->first != V && "Inst still occurs in value numbering map!");
641  }
642}
643
644//===----------------------------------------------------------------------===//
645//                                GVN Pass
646//===----------------------------------------------------------------------===//
647
648namespace {
649  struct ValueNumberScope {
650    ValueNumberScope* parent;
651    DenseMap<uint32_t, Value*> table;
652
653    ValueNumberScope(ValueNumberScope* p) : parent(p) { }
654  };
655}
656
657namespace {
658
659  class GVN : public FunctionPass {
660    bool runOnFunction(Function &F);
661  public:
662    static char ID; // Pass identification, replacement for typeid
663    explicit GVN(bool noloads = false)
664        : FunctionPass(ID), NoLoads(noloads), MD(0) {
665      initializeGVNPass(*PassRegistry::getPassRegistry());
666    }
667
668  private:
669    bool NoLoads;
670    MemoryDependenceAnalysis *MD;
671    DominatorTree *DT;
672    const TargetData* TD;
673
674    ValueTable VN;
675
676    /// NumberTable - A mapping from value numers to lists of Value*'s that
677    /// have that value number.  Use lookupNumber to query it.
678    DenseMap<uint32_t, std::pair<Value*, void*> > NumberTable;
679    BumpPtrAllocator TableAllocator;
680
681    /// insert_table - Push a new Value to the NumberTable onto the list for
682    /// its value number.
683    void insert_table(uint32_t N, Value *V) {
684      std::pair<Value*, void*>& Curr = NumberTable[N];
685      if (!Curr.first) {
686        Curr.first = V;
687        return;
688      }
689
690      std::pair<Value*, void*>* Node =
691        TableAllocator.Allocate<std::pair<Value*, void*> >();
692      Node->first = V;
693      Node->second = Curr.second;
694      Curr.second = Node;
695    }
696
697    /// erase_table - Scan the list of values corresponding to a given value
698    /// number, and remove the given value if encountered.
699    void erase_table(uint32_t N, Value *V) {
700      std::pair<Value*, void*>* Prev = 0;
701      std::pair<Value*, void*>* Curr = &NumberTable[N];
702
703      while (Curr->first != V) {
704        Prev = Curr;
705        Curr = static_cast<std::pair<Value*, void*>*>(Curr->second);
706      }
707
708      if (Prev) {
709        Prev->second = Curr->second;
710      } else {
711        if (!Curr->second) {
712          Curr->first = 0;
713        } else {
714          std::pair<Value*, void*>* Next =
715            static_cast<std::pair<Value*, void*>*>(Curr->second);
716          Curr->first = Next->first;
717          Curr->second = Next->second;
718        }
719      }
720    }
721
722    // List of critical edges to be split between iterations.
723    SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
724
725    // This transformation requires dominator postdominator info
726    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
727      AU.addRequired<DominatorTree>();
728      if (!NoLoads)
729        AU.addRequired<MemoryDependenceAnalysis>();
730      AU.addRequired<AliasAnalysis>();
731
732      AU.addPreserved<DominatorTree>();
733      AU.addPreserved<AliasAnalysis>();
734    }
735
736    // Helper fuctions
737    // FIXME: eliminate or document these better
738    bool processLoad(LoadInst* L,
739                     SmallVectorImpl<Instruction*> &toErase);
740    bool processInstruction(Instruction *I,
741                            SmallVectorImpl<Instruction*> &toErase);
742    bool processNonLocalLoad(LoadInst* L,
743                             SmallVectorImpl<Instruction*> &toErase);
744    bool processBlock(BasicBlock *BB);
745    void dump(DenseMap<uint32_t, Value*>& d);
746    bool iterateOnFunction(Function &F);
747    bool performPRE(Function& F);
748    Value *lookupNumber(BasicBlock *BB, uint32_t num);
749    void cleanupGlobalSets();
750    void verifyRemoved(const Instruction *I) const;
751    bool splitCriticalEdges();
752  };
753
754  char GVN::ID = 0;
755}
756
757// createGVNPass - The public interface to this file...
758FunctionPass *llvm::createGVNPass(bool NoLoads) {
759  return new GVN(NoLoads);
760}
761
762INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
763INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
764INITIALIZE_PASS_DEPENDENCY(DominatorTree)
765INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
766INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
767
768void GVN::dump(DenseMap<uint32_t, Value*>& d) {
769  errs() << "{\n";
770  for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
771       E = d.end(); I != E; ++I) {
772      errs() << I->first << "\n";
773      I->second->dump();
774  }
775  errs() << "}\n";
776}
777
778/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
779/// we're analyzing is fully available in the specified block.  As we go, keep
780/// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
781/// map is actually a tri-state map with the following values:
782///   0) we know the block *is not* fully available.
783///   1) we know the block *is* fully available.
784///   2) we do not know whether the block is fully available or not, but we are
785///      currently speculating that it will be.
786///   3) we are speculating for this block and have used that to speculate for
787///      other blocks.
788static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
789                            DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
790  // Optimistically assume that the block is fully available and check to see
791  // if we already know about this block in one lookup.
792  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
793    FullyAvailableBlocks.insert(std::make_pair(BB, 2));
794
795  // If the entry already existed for this block, return the precomputed value.
796  if (!IV.second) {
797    // If this is a speculative "available" value, mark it as being used for
798    // speculation of other blocks.
799    if (IV.first->second == 2)
800      IV.first->second = 3;
801    return IV.first->second != 0;
802  }
803
804  // Otherwise, see if it is fully available in all predecessors.
805  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
806
807  // If this block has no predecessors, it isn't live-in here.
808  if (PI == PE)
809    goto SpeculationFailure;
810
811  for (; PI != PE; ++PI)
812    // If the value isn't fully available in one of our predecessors, then it
813    // isn't fully available in this block either.  Undo our previous
814    // optimistic assumption and bail out.
815    if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
816      goto SpeculationFailure;
817
818  return true;
819
820// SpeculationFailure - If we get here, we found out that this is not, after
821// all, a fully-available block.  We have a problem if we speculated on this and
822// used the speculation to mark other blocks as available.
823SpeculationFailure:
824  char &BBVal = FullyAvailableBlocks[BB];
825
826  // If we didn't speculate on this, just return with it set to false.
827  if (BBVal == 2) {
828    BBVal = 0;
829    return false;
830  }
831
832  // If we did speculate on this value, we could have blocks set to 1 that are
833  // incorrect.  Walk the (transitive) successors of this block and mark them as
834  // 0 if set to one.
835  SmallVector<BasicBlock*, 32> BBWorklist;
836  BBWorklist.push_back(BB);
837
838  do {
839    BasicBlock *Entry = BBWorklist.pop_back_val();
840    // Note that this sets blocks to 0 (unavailable) if they happen to not
841    // already be in FullyAvailableBlocks.  This is safe.
842    char &EntryVal = FullyAvailableBlocks[Entry];
843    if (EntryVal == 0) continue;  // Already unavailable.
844
845    // Mark as unavailable.
846    EntryVal = 0;
847
848    for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
849      BBWorklist.push_back(*I);
850  } while (!BBWorklist.empty());
851
852  return false;
853}
854
855
856/// CanCoerceMustAliasedValueToLoad - Return true if
857/// CoerceAvailableValueToLoadType will succeed.
858static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
859                                            const Type *LoadTy,
860                                            const TargetData &TD) {
861  // If the loaded or stored value is an first class array or struct, don't try
862  // to transform them.  We need to be able to bitcast to integer.
863  if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
864      StoredVal->getType()->isStructTy() ||
865      StoredVal->getType()->isArrayTy())
866    return false;
867
868  // The store has to be at least as big as the load.
869  if (TD.getTypeSizeInBits(StoredVal->getType()) <
870        TD.getTypeSizeInBits(LoadTy))
871    return false;
872
873  return true;
874}
875
876
877/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
878/// then a load from a must-aliased pointer of a different type, try to coerce
879/// the stored value.  LoadedTy is the type of the load we want to replace and
880/// InsertPt is the place to insert new instructions.
881///
882/// If we can't do it, return null.
883static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
884                                             const Type *LoadedTy,
885                                             Instruction *InsertPt,
886                                             const TargetData &TD) {
887  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
888    return 0;
889
890  const Type *StoredValTy = StoredVal->getType();
891
892  uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
893  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
894
895  // If the store and reload are the same size, we can always reuse it.
896  if (StoreSize == LoadSize) {
897    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
898      // Pointer to Pointer -> use bitcast.
899      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
900    }
901
902    // Convert source pointers to integers, which can be bitcast.
903    if (StoredValTy->isPointerTy()) {
904      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
905      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
906    }
907
908    const Type *TypeToCastTo = LoadedTy;
909    if (TypeToCastTo->isPointerTy())
910      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
911
912    if (StoredValTy != TypeToCastTo)
913      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
914
915    // Cast to pointer if the load needs a pointer type.
916    if (LoadedTy->isPointerTy())
917      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
918
919    return StoredVal;
920  }
921
922  // If the loaded value is smaller than the available value, then we can
923  // extract out a piece from it.  If the available value is too small, then we
924  // can't do anything.
925  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
926
927  // Convert source pointers to integers, which can be manipulated.
928  if (StoredValTy->isPointerTy()) {
929    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
930    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
931  }
932
933  // Convert vectors and fp to integer, which can be manipulated.
934  if (!StoredValTy->isIntegerTy()) {
935    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
936    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
937  }
938
939  // If this is a big-endian system, we need to shift the value down to the low
940  // bits so that a truncate will work.
941  if (TD.isBigEndian()) {
942    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
943    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
944  }
945
946  // Truncate the integer to the right size now.
947  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
948  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
949
950  if (LoadedTy == NewIntTy)
951    return StoredVal;
952
953  // If the result is a pointer, inttoptr.
954  if (LoadedTy->isPointerTy())
955    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
956
957  // Otherwise, bitcast.
958  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
959}
960
961/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
962/// memdep query of a load that ends up being a clobbering memory write (store,
963/// memset, memcpy, memmove).  This means that the write *may* provide bits used
964/// by the load but we can't be sure because the pointers don't mustalias.
965///
966/// Check this case to see if there is anything more we can do before we give
967/// up.  This returns -1 if we have to give up, or a byte number in the stored
968/// value of the piece that feeds the load.
969static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
970                                          Value *WritePtr,
971                                          uint64_t WriteSizeInBits,
972                                          const TargetData &TD) {
973  // If the loaded or stored value is an first class array or struct, don't try
974  // to transform them.  We need to be able to bitcast to integer.
975  if (LoadTy->isStructTy() || LoadTy->isArrayTy())
976    return -1;
977
978  int64_t StoreOffset = 0, LoadOffset = 0;
979  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
980  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
981  if (StoreBase != LoadBase)
982    return -1;
983
984  // If the load and store are to the exact same address, they should have been
985  // a must alias.  AA must have gotten confused.
986  // FIXME: Study to see if/when this happens.  One case is forwarding a memset
987  // to a load from the base of the memset.
988#if 0
989  if (LoadOffset == StoreOffset) {
990    dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
991    << "Base       = " << *StoreBase << "\n"
992    << "Store Ptr  = " << *WritePtr << "\n"
993    << "Store Offs = " << StoreOffset << "\n"
994    << "Load Ptr   = " << *LoadPtr << "\n";
995    abort();
996  }
997#endif
998
999  // If the load and store don't overlap at all, the store doesn't provide
1000  // anything to the load.  In this case, they really don't alias at all, AA
1001  // must have gotten confused.
1002  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
1003
1004  if ((WriteSizeInBits & 7) | (LoadSize & 7))
1005    return -1;
1006  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
1007  LoadSize >>= 3;
1008
1009
1010  bool isAAFailure = false;
1011  if (StoreOffset < LoadOffset)
1012    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
1013  else
1014    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
1015
1016  if (isAAFailure) {
1017#if 0
1018    dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
1019    << "Base       = " << *StoreBase << "\n"
1020    << "Store Ptr  = " << *WritePtr << "\n"
1021    << "Store Offs = " << StoreOffset << "\n"
1022    << "Load Ptr   = " << *LoadPtr << "\n";
1023    abort();
1024#endif
1025    return -1;
1026  }
1027
1028  // If the Load isn't completely contained within the stored bits, we don't
1029  // have all the bits to feed it.  We could do something crazy in the future
1030  // (issue a smaller load then merge the bits in) but this seems unlikely to be
1031  // valuable.
1032  if (StoreOffset > LoadOffset ||
1033      StoreOffset+StoreSize < LoadOffset+LoadSize)
1034    return -1;
1035
1036  // Okay, we can do this transformation.  Return the number of bytes into the
1037  // store that the load is.
1038  return LoadOffset-StoreOffset;
1039}
1040
1041/// AnalyzeLoadFromClobberingStore - This function is called when we have a
1042/// memdep query of a load that ends up being a clobbering store.
1043static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
1044                                          StoreInst *DepSI,
1045                                          const TargetData &TD) {
1046  // Cannot handle reading from store of first-class aggregate yet.
1047  if (DepSI->getValueOperand()->getType()->isStructTy() ||
1048      DepSI->getValueOperand()->getType()->isArrayTy())
1049    return -1;
1050
1051  Value *StorePtr = DepSI->getPointerOperand();
1052  uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
1053  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1054                                        StorePtr, StoreSize, TD);
1055}
1056
1057static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
1058                                            MemIntrinsic *MI,
1059                                            const TargetData &TD) {
1060  // If the mem operation is a non-constant size, we can't handle it.
1061  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
1062  if (SizeCst == 0) return -1;
1063  uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
1064
1065  // If this is memset, we just need to see if the offset is valid in the size
1066  // of the memset..
1067  if (MI->getIntrinsicID() == Intrinsic::memset)
1068    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
1069                                          MemSizeInBits, TD);
1070
1071  // If we have a memcpy/memmove, the only case we can handle is if this is a
1072  // copy from constant memory.  In that case, we can read directly from the
1073  // constant memory.
1074  MemTransferInst *MTI = cast<MemTransferInst>(MI);
1075
1076  Constant *Src = dyn_cast<Constant>(MTI->getSource());
1077  if (Src == 0) return -1;
1078
1079  GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
1080  if (GV == 0 || !GV->isConstant()) return -1;
1081
1082  // See if the access is within the bounds of the transfer.
1083  int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1084                                              MI->getDest(), MemSizeInBits, TD);
1085  if (Offset == -1)
1086    return Offset;
1087
1088  // Otherwise, see if we can constant fold a load from the constant with the
1089  // offset applied as appropriate.
1090  Src = ConstantExpr::getBitCast(Src,
1091                                 llvm::Type::getInt8PtrTy(Src->getContext()));
1092  Constant *OffsetCst =
1093    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1094  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1095  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1096  if (ConstantFoldLoadFromConstPtr(Src, &TD))
1097    return Offset;
1098  return -1;
1099}
1100
1101
1102/// GetStoreValueForLoad - This function is called when we have a
1103/// memdep query of a load that ends up being a clobbering store.  This means
1104/// that the store *may* provide bits used by the load but we can't be sure
1105/// because the pointers don't mustalias.  Check this case to see if there is
1106/// anything more we can do before we give up.
1107static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
1108                                   const Type *LoadTy,
1109                                   Instruction *InsertPt, const TargetData &TD){
1110  LLVMContext &Ctx = SrcVal->getType()->getContext();
1111
1112  uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
1113  uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
1114
1115  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1116
1117  // Compute which bits of the stored value are being used by the load.  Convert
1118  // to an integer type to start with.
1119  if (SrcVal->getType()->isPointerTy())
1120    SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
1121  if (!SrcVal->getType()->isIntegerTy())
1122    SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
1123                                   "tmp");
1124
1125  // Shift the bits to the least significant depending on endianness.
1126  unsigned ShiftAmt;
1127  if (TD.isLittleEndian())
1128    ShiftAmt = Offset*8;
1129  else
1130    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
1131
1132  if (ShiftAmt)
1133    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
1134
1135  if (LoadSize != StoreSize)
1136    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
1137                                 "tmp");
1138
1139  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
1140}
1141
1142/// GetMemInstValueForLoad - This function is called when we have a
1143/// memdep query of a load that ends up being a clobbering mem intrinsic.
1144static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
1145                                     const Type *LoadTy, Instruction *InsertPt,
1146                                     const TargetData &TD){
1147  LLVMContext &Ctx = LoadTy->getContext();
1148  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
1149
1150  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1151
1152  // We know that this method is only called when the mem transfer fully
1153  // provides the bits for the load.
1154  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
1155    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1156    // independently of what the offset is.
1157    Value *Val = MSI->getValue();
1158    if (LoadSize != 1)
1159      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
1160
1161    Value *OneElt = Val;
1162
1163    // Splat the value out to the right number of bits.
1164    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
1165      // If we can double the number of bytes set, do it.
1166      if (NumBytesSet*2 <= LoadSize) {
1167        Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
1168        Val = Builder.CreateOr(Val, ShVal);
1169        NumBytesSet <<= 1;
1170        continue;
1171      }
1172
1173      // Otherwise insert one byte at a time.
1174      Value *ShVal = Builder.CreateShl(Val, 1*8);
1175      Val = Builder.CreateOr(OneElt, ShVal);
1176      ++NumBytesSet;
1177    }
1178
1179    return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
1180  }
1181
1182  // Otherwise, this is a memcpy/memmove from a constant global.
1183  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
1184  Constant *Src = cast<Constant>(MTI->getSource());
1185
1186  // Otherwise, see if we can constant fold a load from the constant with the
1187  // offset applied as appropriate.
1188  Src = ConstantExpr::getBitCast(Src,
1189                                 llvm::Type::getInt8PtrTy(Src->getContext()));
1190  Constant *OffsetCst =
1191  ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1192  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1193  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1194  return ConstantFoldLoadFromConstPtr(Src, &TD);
1195}
1196
1197namespace {
1198
1199struct AvailableValueInBlock {
1200  /// BB - The basic block in question.
1201  BasicBlock *BB;
1202  enum ValType {
1203    SimpleVal,  // A simple offsetted value that is accessed.
1204    MemIntrin   // A memory intrinsic which is loaded from.
1205  };
1206
1207  /// V - The value that is live out of the block.
1208  PointerIntPair<Value *, 1, ValType> Val;
1209
1210  /// Offset - The byte offset in Val that is interesting for the load query.
1211  unsigned Offset;
1212
1213  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
1214                                   unsigned Offset = 0) {
1215    AvailableValueInBlock Res;
1216    Res.BB = BB;
1217    Res.Val.setPointer(V);
1218    Res.Val.setInt(SimpleVal);
1219    Res.Offset = Offset;
1220    return Res;
1221  }
1222
1223  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
1224                                     unsigned Offset = 0) {
1225    AvailableValueInBlock Res;
1226    Res.BB = BB;
1227    Res.Val.setPointer(MI);
1228    Res.Val.setInt(MemIntrin);
1229    Res.Offset = Offset;
1230    return Res;
1231  }
1232
1233  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
1234  Value *getSimpleValue() const {
1235    assert(isSimpleValue() && "Wrong accessor");
1236    return Val.getPointer();
1237  }
1238
1239  MemIntrinsic *getMemIntrinValue() const {
1240    assert(!isSimpleValue() && "Wrong accessor");
1241    return cast<MemIntrinsic>(Val.getPointer());
1242  }
1243
1244  /// MaterializeAdjustedValue - Emit code into this block to adjust the value
1245  /// defined here to the specified type.  This handles various coercion cases.
1246  Value *MaterializeAdjustedValue(const Type *LoadTy,
1247                                  const TargetData *TD) const {
1248    Value *Res;
1249    if (isSimpleValue()) {
1250      Res = getSimpleValue();
1251      if (Res->getType() != LoadTy) {
1252        assert(TD && "Need target data to handle type mismatch case");
1253        Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
1254                                   *TD);
1255
1256        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
1257                     << *getSimpleValue() << '\n'
1258                     << *Res << '\n' << "\n\n\n");
1259      }
1260    } else {
1261      Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
1262                                   LoadTy, BB->getTerminator(), *TD);
1263      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1264                   << "  " << *getMemIntrinValue() << '\n'
1265                   << *Res << '\n' << "\n\n\n");
1266    }
1267    return Res;
1268  }
1269};
1270
1271}
1272
1273/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
1274/// construct SSA form, allowing us to eliminate LI.  This returns the value
1275/// that should be used at LI's definition site.
1276static Value *ConstructSSAForLoadSet(LoadInst *LI,
1277                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1278                                     const TargetData *TD,
1279                                     const DominatorTree &DT,
1280                                     AliasAnalysis *AA) {
1281  // Check for the fully redundant, dominating load case.  In this case, we can
1282  // just use the dominating value directly.
1283  if (ValuesPerBlock.size() == 1 &&
1284      DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
1285    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
1286
1287  // Otherwise, we have to construct SSA form.
1288  SmallVector<PHINode*, 8> NewPHIs;
1289  SSAUpdater SSAUpdate(&NewPHIs);
1290  SSAUpdate.Initialize(LI->getType(), LI->getName());
1291
1292  const Type *LoadTy = LI->getType();
1293
1294  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1295    const AvailableValueInBlock &AV = ValuesPerBlock[i];
1296    BasicBlock *BB = AV.BB;
1297
1298    if (SSAUpdate.HasValueForBlock(BB))
1299      continue;
1300
1301    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
1302  }
1303
1304  // Perform PHI construction.
1305  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
1306
1307  // If new PHI nodes were created, notify alias analysis.
1308  if (V->getType()->isPointerTy())
1309    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
1310      AA->copyValue(LI, NewPHIs[i]);
1311
1312  return V;
1313}
1314
1315static bool isLifetimeStart(const Instruction *Inst) {
1316  if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1317    return II->getIntrinsicID() == Intrinsic::lifetime_start;
1318  return false;
1319}
1320
1321/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
1322/// non-local by performing PHI construction.
1323bool GVN::processNonLocalLoad(LoadInst *LI,
1324                              SmallVectorImpl<Instruction*> &toErase) {
1325  // Find the non-local dependencies of the load.
1326  SmallVector<NonLocalDepResult, 64> Deps;
1327  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
1328  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
1329  //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
1330  //             << Deps.size() << *LI << '\n');
1331
1332  // If we had to process more than one hundred blocks to find the
1333  // dependencies, this load isn't worth worrying about.  Optimizing
1334  // it will be too expensive.
1335  if (Deps.size() > 100)
1336    return false;
1337
1338  // If we had a phi translation failure, we'll have a single entry which is a
1339  // clobber in the current block.  Reject this early.
1340  if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
1341    DEBUG(
1342      dbgs() << "GVN: non-local load ";
1343      WriteAsOperand(dbgs(), LI);
1344      dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
1345    );
1346    return false;
1347  }
1348
1349  // Filter out useless results (non-locals, etc).  Keep track of the blocks
1350  // where we have a value available in repl, also keep track of whether we see
1351  // dependencies that produce an unknown value for the load (such as a call
1352  // that could potentially clobber the load).
1353  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
1354  SmallVector<BasicBlock*, 16> UnavailableBlocks;
1355
1356  for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
1357    BasicBlock *DepBB = Deps[i].getBB();
1358    MemDepResult DepInfo = Deps[i].getResult();
1359
1360    if (DepInfo.isClobber()) {
1361      // The address being loaded in this non-local block may not be the same as
1362      // the pointer operand of the load if PHI translation occurs.  Make sure
1363      // to consider the right address.
1364      Value *Address = Deps[i].getAddress();
1365
1366      // If the dependence is to a store that writes to a superset of the bits
1367      // read by the load, we can extract the bits we need for the load from the
1368      // stored value.
1369      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
1370        if (TD && Address) {
1371          int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
1372                                                      DepSI, *TD);
1373          if (Offset != -1) {
1374            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1375                                                       DepSI->getValueOperand(),
1376                                                                Offset));
1377            continue;
1378          }
1379        }
1380      }
1381
1382      // If the clobbering value is a memset/memcpy/memmove, see if we can
1383      // forward a value on from it.
1384      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
1385        if (TD && Address) {
1386          int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
1387                                                        DepMI, *TD);
1388          if (Offset != -1) {
1389            ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
1390                                                                  Offset));
1391            continue;
1392          }
1393        }
1394      }
1395
1396      UnavailableBlocks.push_back(DepBB);
1397      continue;
1398    }
1399
1400    Instruction *DepInst = DepInfo.getInst();
1401
1402    // Loading the allocation -> undef.
1403    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
1404        // Loading immediately after lifetime begin -> undef.
1405        isLifetimeStart(DepInst)) {
1406      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1407                                             UndefValue::get(LI->getType())));
1408      continue;
1409    }
1410
1411    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1412      // Reject loads and stores that are to the same address but are of
1413      // different types if we have to.
1414      if (S->getValueOperand()->getType() != LI->getType()) {
1415        // If the stored value is larger or equal to the loaded value, we can
1416        // reuse it.
1417        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
1418                                                        LI->getType(), *TD)) {
1419          UnavailableBlocks.push_back(DepBB);
1420          continue;
1421        }
1422      }
1423
1424      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1425                                                         S->getValueOperand()));
1426      continue;
1427    }
1428
1429    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1430      // If the types mismatch and we can't handle it, reject reuse of the load.
1431      if (LD->getType() != LI->getType()) {
1432        // If the stored value is larger or equal to the loaded value, we can
1433        // reuse it.
1434        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
1435          UnavailableBlocks.push_back(DepBB);
1436          continue;
1437        }
1438      }
1439      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
1440      continue;
1441    }
1442
1443    UnavailableBlocks.push_back(DepBB);
1444    continue;
1445  }
1446
1447  // If we have no predecessors that produce a known value for this load, exit
1448  // early.
1449  if (ValuesPerBlock.empty()) return false;
1450
1451  // If all of the instructions we depend on produce a known value for this
1452  // load, then it is fully redundant and we can use PHI insertion to compute
1453  // its value.  Insert PHIs and remove the fully redundant value now.
1454  if (UnavailableBlocks.empty()) {
1455    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1456
1457    // Perform PHI construction.
1458    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1459                                      VN.getAliasAnalysis());
1460    LI->replaceAllUsesWith(V);
1461
1462    if (isa<PHINode>(V))
1463      V->takeName(LI);
1464    if (V->getType()->isPointerTy())
1465      MD->invalidateCachedPointerInfo(V);
1466    VN.erase(LI);
1467    toErase.push_back(LI);
1468    ++NumGVNLoad;
1469    return true;
1470  }
1471
1472  if (!EnablePRE || !EnableLoadPRE)
1473    return false;
1474
1475  // Okay, we have *some* definitions of the value.  This means that the value
1476  // is available in some of our (transitive) predecessors.  Lets think about
1477  // doing PRE of this load.  This will involve inserting a new load into the
1478  // predecessor when it's not available.  We could do this in general, but
1479  // prefer to not increase code size.  As such, we only do this when we know
1480  // that we only have to insert *one* load (which means we're basically moving
1481  // the load, not inserting a new one).
1482
1483  SmallPtrSet<BasicBlock *, 4> Blockers;
1484  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1485    Blockers.insert(UnavailableBlocks[i]);
1486
1487  // Lets find first basic block with more than one predecessor.  Walk backwards
1488  // through predecessors if needed.
1489  BasicBlock *LoadBB = LI->getParent();
1490  BasicBlock *TmpBB = LoadBB;
1491
1492  bool isSinglePred = false;
1493  bool allSingleSucc = true;
1494  while (TmpBB->getSinglePredecessor()) {
1495    isSinglePred = true;
1496    TmpBB = TmpBB->getSinglePredecessor();
1497    if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1498      return false;
1499    if (Blockers.count(TmpBB))
1500      return false;
1501
1502    // If any of these blocks has more than one successor (i.e. if the edge we
1503    // just traversed was critical), then there are other paths through this
1504    // block along which the load may not be anticipated.  Hoisting the load
1505    // above this block would be adding the load to execution paths along
1506    // which it was not previously executed.
1507    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1508      return false;
1509  }
1510
1511  assert(TmpBB);
1512  LoadBB = TmpBB;
1513
1514  // FIXME: It is extremely unclear what this loop is doing, other than
1515  // artificially restricting loadpre.
1516  if (isSinglePred) {
1517    bool isHot = false;
1518    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1519      const AvailableValueInBlock &AV = ValuesPerBlock[i];
1520      if (AV.isSimpleValue())
1521        // "Hot" Instruction is in some loop (because it dominates its dep.
1522        // instruction).
1523        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
1524          if (DT->dominates(LI, I)) {
1525            isHot = true;
1526            break;
1527          }
1528    }
1529
1530    // We are interested only in "hot" instructions. We don't want to do any
1531    // mis-optimizations here.
1532    if (!isHot)
1533      return false;
1534  }
1535
1536  // Check to see how many predecessors have the loaded value fully
1537  // available.
1538  DenseMap<BasicBlock*, Value*> PredLoads;
1539  DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1540  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1541    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
1542  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1543    FullyAvailableBlocks[UnavailableBlocks[i]] = false;
1544
1545  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
1546  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
1547       PI != E; ++PI) {
1548    BasicBlock *Pred = *PI;
1549    if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1550      continue;
1551    }
1552    PredLoads[Pred] = 0;
1553
1554    if (Pred->getTerminator()->getNumSuccessors() != 1) {
1555      if (isa<IndirectBrInst>(Pred->getTerminator())) {
1556        DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1557              << Pred->getName() << "': " << *LI << '\n');
1558        return false;
1559      }
1560      unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
1561      NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
1562    }
1563  }
1564  if (!NeedToSplit.empty()) {
1565    toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
1566    return false;
1567  }
1568
1569  // Decide whether PRE is profitable for this load.
1570  unsigned NumUnavailablePreds = PredLoads.size();
1571  assert(NumUnavailablePreds != 0 &&
1572         "Fully available value should be eliminated above!");
1573
1574  // If this load is unavailable in multiple predecessors, reject it.
1575  // FIXME: If we could restructure the CFG, we could make a common pred with
1576  // all the preds that don't have an available LI and insert a new load into
1577  // that one block.
1578  if (NumUnavailablePreds != 1)
1579      return false;
1580
1581  // Check if the load can safely be moved to all the unavailable predecessors.
1582  bool CanDoPRE = true;
1583  SmallVector<Instruction*, 8> NewInsts;
1584  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1585         E = PredLoads.end(); I != E; ++I) {
1586    BasicBlock *UnavailablePred = I->first;
1587
1588    // Do PHI translation to get its value in the predecessor if necessary.  The
1589    // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1590
1591    // If all preds have a single successor, then we know it is safe to insert
1592    // the load on the pred (?!?), so we can insert code to materialize the
1593    // pointer if it is not available.
1594    PHITransAddr Address(LI->getPointerOperand(), TD);
1595    Value *LoadPtr = 0;
1596    if (allSingleSucc) {
1597      LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
1598                                                  *DT, NewInsts);
1599    } else {
1600      Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
1601      LoadPtr = Address.getAddr();
1602    }
1603
1604    // If we couldn't find or insert a computation of this phi translated value,
1605    // we fail PRE.
1606    if (LoadPtr == 0) {
1607      DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1608            << *LI->getPointerOperand() << "\n");
1609      CanDoPRE = false;
1610      break;
1611    }
1612
1613    // Make sure it is valid to move this load here.  We have to watch out for:
1614    //  @1 = getelementptr (i8* p, ...
1615    //  test p and branch if == 0
1616    //  load @1
1617    // It is valid to have the getelementptr before the test, even if p can be 0,
1618    // as getelementptr only does address arithmetic.
1619    // If we are not pushing the value through any multiple-successor blocks
1620    // we do not have this case.  Otherwise, check that the load is safe to
1621    // put anywhere; this can be improved, but should be conservatively safe.
1622    if (!allSingleSucc &&
1623        // FIXME: REEVALUTE THIS.
1624        !isSafeToLoadUnconditionally(LoadPtr,
1625                                     UnavailablePred->getTerminator(),
1626                                     LI->getAlignment(), TD)) {
1627      CanDoPRE = false;
1628      break;
1629    }
1630
1631    I->second = LoadPtr;
1632  }
1633
1634  if (!CanDoPRE) {
1635    while (!NewInsts.empty())
1636      NewInsts.pop_back_val()->eraseFromParent();
1637    return false;
1638  }
1639
1640  // Okay, we can eliminate this load by inserting a reload in the predecessor
1641  // and using PHI construction to get the value in the other predecessors, do
1642  // it.
1643  DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1644  DEBUG(if (!NewInsts.empty())
1645          dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
1646                 << *NewInsts.back() << '\n');
1647
1648  // Assign value numbers to the new instructions.
1649  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
1650    // FIXME: We really _ought_ to insert these value numbers into their
1651    // parent's availability map.  However, in doing so, we risk getting into
1652    // ordering issues.  If a block hasn't been processed yet, we would be
1653    // marking a value as AVAIL-IN, which isn't what we intend.
1654    VN.lookup_or_add(NewInsts[i]);
1655  }
1656
1657  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1658         E = PredLoads.end(); I != E; ++I) {
1659    BasicBlock *UnavailablePred = I->first;
1660    Value *LoadPtr = I->second;
1661
1662    Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
1663                                  LI->getAlignment(),
1664                                  UnavailablePred->getTerminator());
1665
1666    // Add the newly created load.
1667    ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
1668                                                        NewLoad));
1669    MD->invalidateCachedPointerInfo(LoadPtr);
1670    DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1671  }
1672
1673  // Perform PHI construction.
1674  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1675                                    VN.getAliasAnalysis());
1676  LI->replaceAllUsesWith(V);
1677  if (isa<PHINode>(V))
1678    V->takeName(LI);
1679  if (V->getType()->isPointerTy())
1680    MD->invalidateCachedPointerInfo(V);
1681  VN.erase(LI);
1682  toErase.push_back(LI);
1683  ++NumPRELoad;
1684  return true;
1685}
1686
1687/// processLoad - Attempt to eliminate a load, first by eliminating it
1688/// locally, and then attempting non-local elimination if that fails.
1689bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
1690  if (!MD)
1691    return false;
1692
1693  if (L->isVolatile())
1694    return false;
1695
1696  // ... to a pointer that has been loaded from before...
1697  MemDepResult Dep = MD->getDependency(L);
1698
1699  // If the value isn't available, don't do anything!
1700  if (Dep.isClobber()) {
1701    // Check to see if we have something like this:
1702    //   store i32 123, i32* %P
1703    //   %A = bitcast i32* %P to i8*
1704    //   %B = gep i8* %A, i32 1
1705    //   %C = load i8* %B
1706    //
1707    // We could do that by recognizing if the clobber instructions are obviously
1708    // a common base + constant offset, and if the previous store (or memset)
1709    // completely covers this load.  This sort of thing can happen in bitfield
1710    // access code.
1711    Value *AvailVal = 0;
1712    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
1713      if (TD) {
1714        int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
1715                                                    L->getPointerOperand(),
1716                                                    DepSI, *TD);
1717        if (Offset != -1)
1718          AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
1719                                          L->getType(), L, *TD);
1720      }
1721
1722    // If the clobbering value is a memset/memcpy/memmove, see if we can forward
1723    // a value on from it.
1724    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
1725      if (TD) {
1726        int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
1727                                                      L->getPointerOperand(),
1728                                                      DepMI, *TD);
1729        if (Offset != -1)
1730          AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
1731      }
1732    }
1733
1734    if (AvailVal) {
1735      DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
1736            << *AvailVal << '\n' << *L << "\n\n\n");
1737
1738      // Replace the load!
1739      L->replaceAllUsesWith(AvailVal);
1740      if (AvailVal->getType()->isPointerTy())
1741        MD->invalidateCachedPointerInfo(AvailVal);
1742      VN.erase(L);
1743      toErase.push_back(L);
1744      ++NumGVNLoad;
1745      return true;
1746    }
1747
1748    DEBUG(
1749      // fast print dep, using operator<< on instruction would be too slow
1750      dbgs() << "GVN: load ";
1751      WriteAsOperand(dbgs(), L);
1752      Instruction *I = Dep.getInst();
1753      dbgs() << " is clobbered by " << *I << '\n';
1754    );
1755    return false;
1756  }
1757
1758  // If it is defined in another block, try harder.
1759  if (Dep.isNonLocal())
1760    return processNonLocalLoad(L, toErase);
1761
1762  Instruction *DepInst = Dep.getInst();
1763  if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1764    Value *StoredVal = DepSI->getValueOperand();
1765
1766    // The store and load are to a must-aliased pointer, but they may not
1767    // actually have the same type.  See if we know how to reuse the stored
1768    // value (depending on its type).
1769    if (StoredVal->getType() != L->getType()) {
1770      if (TD) {
1771        StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
1772                                                   L, *TD);
1773        if (StoredVal == 0)
1774          return false;
1775
1776        DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
1777                     << '\n' << *L << "\n\n\n");
1778      }
1779      else
1780        return false;
1781    }
1782
1783    // Remove it!
1784    L->replaceAllUsesWith(StoredVal);
1785    if (StoredVal->getType()->isPointerTy())
1786      MD->invalidateCachedPointerInfo(StoredVal);
1787    VN.erase(L);
1788    toErase.push_back(L);
1789    ++NumGVNLoad;
1790    return true;
1791  }
1792
1793  if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
1794    Value *AvailableVal = DepLI;
1795
1796    // The loads are of a must-aliased pointer, but they may not actually have
1797    // the same type.  See if we know how to reuse the previously loaded value
1798    // (depending on its type).
1799    if (DepLI->getType() != L->getType()) {
1800      if (TD) {
1801        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
1802        if (AvailableVal == 0)
1803          return false;
1804
1805        DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
1806                     << "\n" << *L << "\n\n\n");
1807      }
1808      else
1809        return false;
1810    }
1811
1812    // Remove it!
1813    L->replaceAllUsesWith(AvailableVal);
1814    if (DepLI->getType()->isPointerTy())
1815      MD->invalidateCachedPointerInfo(DepLI);
1816    VN.erase(L);
1817    toErase.push_back(L);
1818    ++NumGVNLoad;
1819    return true;
1820  }
1821
1822  // If this load really doesn't depend on anything, then we must be loading an
1823  // undef value.  This can happen when loading for a fresh allocation with no
1824  // intervening stores, for example.
1825  if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
1826    L->replaceAllUsesWith(UndefValue::get(L->getType()));
1827    VN.erase(L);
1828    toErase.push_back(L);
1829    ++NumGVNLoad;
1830    return true;
1831  }
1832
1833  // If this load occurs either right after a lifetime begin,
1834  // then the loaded value is undefined.
1835  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
1836    if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1837      L->replaceAllUsesWith(UndefValue::get(L->getType()));
1838      VN.erase(L);
1839      toErase.push_back(L);
1840      ++NumGVNLoad;
1841      return true;
1842    }
1843  }
1844
1845  return false;
1846}
1847
1848// lookupNumber - In order to find a leader for a given value number at a
1849// specific basic block, we first obtain the list of all Values for that number,
1850// and then scan the list to find one whose block dominates the block in
1851// question.  This is fast because dominator tree queries consist of only
1852// a few comparisons of DFS numbers.
1853Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
1854  std::pair<Value*, void*> Vals = NumberTable[num];
1855  if (!Vals.first) return 0;
1856  Instruction *Inst = dyn_cast<Instruction>(Vals.first);
1857  if (!Inst) return Vals.first;
1858  BasicBlock *Parent = Inst->getParent();
1859  if (DT->dominates(Parent, BB))
1860    return Inst;
1861
1862  std::pair<Value*, void*>* Next =
1863    static_cast<std::pair<Value*, void*>*>(Vals.second);
1864  while (Next) {
1865    Instruction *CurrInst = dyn_cast<Instruction>(Next->first);
1866    if (!CurrInst) return Next->first;
1867
1868    BasicBlock *Parent = CurrInst->getParent();
1869    if (DT->dominates(Parent, BB))
1870      return CurrInst;
1871
1872    Next = static_cast<std::pair<Value*, void*>*>(Next->second);
1873  }
1874
1875  return 0;
1876}
1877
1878
1879/// processInstruction - When calculating availability, handle an instruction
1880/// by inserting it into the appropriate sets
1881bool GVN::processInstruction(Instruction *I,
1882                             SmallVectorImpl<Instruction*> &toErase) {
1883  // Ignore dbg info intrinsics.
1884  if (isa<DbgInfoIntrinsic>(I))
1885    return false;
1886
1887  // If the instruction can be easily simplified then do so now in preference
1888  // to value numbering it.  Value numbering often exposes redundancies, for
1889  // example if it determines that %y is equal to %x then the instruction
1890  // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
1891  if (Value *V = SimplifyInstruction(I, TD, DT)) {
1892    I->replaceAllUsesWith(V);
1893    if (MD && V->getType()->isPointerTy())
1894      MD->invalidateCachedPointerInfo(V);
1895    VN.erase(I);
1896    toErase.push_back(I);
1897    return true;
1898  }
1899
1900  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1901    bool Changed = processLoad(LI, toErase);
1902
1903    if (!Changed) {
1904      unsigned Num = VN.lookup_or_add(LI);
1905      insert_table(Num, LI);
1906    }
1907
1908    return Changed;
1909  }
1910
1911  uint32_t NextNum = VN.getNextUnusedValueNumber();
1912  unsigned Num = VN.lookup_or_add(I);
1913
1914  // Allocations are always uniquely numbered, so we can save time and memory
1915  // by fast failing them.
1916  if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
1917    insert_table(Num, I);
1918    return false;
1919  }
1920
1921  if (isa<PHINode>(I)) {
1922    insert_table(Num, I);
1923
1924  // If the number we were assigned was a brand new VN, then we don't
1925  // need to do a lookup to see if the number already exists
1926  // somewhere in the domtree: it can't!
1927  } else if (Num == NextNum) {
1928    insert_table(Num, I);
1929
1930  // Perform fast-path value-number based elimination of values inherited from
1931  // dominators.
1932  } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
1933    // Remove it!
1934    VN.erase(I);
1935    I->replaceAllUsesWith(repl);
1936    if (MD && repl->getType()->isPointerTy())
1937      MD->invalidateCachedPointerInfo(repl);
1938    toErase.push_back(I);
1939    return true;
1940
1941  } else {
1942    insert_table(Num, I);
1943  }
1944
1945  return false;
1946}
1947
1948/// runOnFunction - This is the main transformation entry point for a function.
1949bool GVN::runOnFunction(Function& F) {
1950  if (!NoLoads)
1951    MD = &getAnalysis<MemoryDependenceAnalysis>();
1952  DT = &getAnalysis<DominatorTree>();
1953  TD = getAnalysisIfAvailable<TargetData>();
1954  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
1955  VN.setMemDep(MD);
1956  VN.setDomTree(DT);
1957
1958  bool Changed = false;
1959  bool ShouldContinue = true;
1960
1961  // Merge unconditional branches, allowing PRE to catch more
1962  // optimization opportunities.
1963  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
1964    BasicBlock *BB = FI;
1965    ++FI;
1966    bool removedBlock = MergeBlockIntoPredecessor(BB, this);
1967    if (removedBlock) ++NumGVNBlocks;
1968
1969    Changed |= removedBlock;
1970  }
1971
1972  unsigned Iteration = 0;
1973
1974  while (ShouldContinue) {
1975    DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
1976    ShouldContinue = iterateOnFunction(F);
1977    if (splitCriticalEdges())
1978      ShouldContinue = true;
1979    Changed |= ShouldContinue;
1980    ++Iteration;
1981  }
1982
1983  if (EnablePRE) {
1984    bool PREChanged = true;
1985    while (PREChanged) {
1986      PREChanged = performPRE(F);
1987      Changed |= PREChanged;
1988    }
1989  }
1990  // FIXME: Should perform GVN again after PRE does something.  PRE can move
1991  // computations into blocks where they become fully redundant.  Note that
1992  // we can't do this until PRE's critical edge splitting updates memdep.
1993  // Actually, when this happens, we should just fully integrate PRE into GVN.
1994
1995  cleanupGlobalSets();
1996
1997  return Changed;
1998}
1999
2000
2001bool GVN::processBlock(BasicBlock *BB) {
2002  // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
2003  // incrementing BI before processing an instruction).
2004  SmallVector<Instruction*, 8> toErase;
2005  bool ChangedFunction = false;
2006
2007  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2008       BI != BE;) {
2009    ChangedFunction |= processInstruction(BI, toErase);
2010    if (toErase.empty()) {
2011      ++BI;
2012      continue;
2013    }
2014
2015    // If we need some instructions deleted, do it now.
2016    NumGVNInstr += toErase.size();
2017
2018    // Avoid iterator invalidation.
2019    bool AtStart = BI == BB->begin();
2020    if (!AtStart)
2021      --BI;
2022
2023    for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
2024         E = toErase.end(); I != E; ++I) {
2025      DEBUG(dbgs() << "GVN removed: " << **I << '\n');
2026      if (MD) MD->removeInstruction(*I);
2027      (*I)->eraseFromParent();
2028      DEBUG(verifyRemoved(*I));
2029    }
2030    toErase.clear();
2031
2032    if (AtStart)
2033      BI = BB->begin();
2034    else
2035      ++BI;
2036  }
2037
2038  return ChangedFunction;
2039}
2040
2041/// performPRE - Perform a purely local form of PRE that looks for diamond
2042/// control flow patterns and attempts to perform simple PRE at the join point.
2043bool GVN::performPRE(Function &F) {
2044  bool Changed = false;
2045  DenseMap<BasicBlock*, Value*> predMap;
2046  for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
2047       DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
2048    BasicBlock *CurrentBlock = *DI;
2049
2050    // Nothing to PRE in the entry block.
2051    if (CurrentBlock == &F.getEntryBlock()) continue;
2052
2053    for (BasicBlock::iterator BI = CurrentBlock->begin(),
2054         BE = CurrentBlock->end(); BI != BE; ) {
2055      Instruction *CurInst = BI++;
2056
2057      if (isa<AllocaInst>(CurInst) ||
2058          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
2059          CurInst->getType()->isVoidTy() ||
2060          CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2061          isa<DbgInfoIntrinsic>(CurInst))
2062        continue;
2063
2064      // We don't currently value number ANY inline asm calls.
2065      if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
2066        if (CallI->isInlineAsm())
2067          continue;
2068
2069      uint32_t ValNo = VN.lookup(CurInst);
2070
2071      // Look for the predecessors for PRE opportunities.  We're
2072      // only trying to solve the basic diamond case, where
2073      // a value is computed in the successor and one predecessor,
2074      // but not the other.  We also explicitly disallow cases
2075      // where the successor is its own predecessor, because they're
2076      // more complicated to get right.
2077      unsigned NumWith = 0;
2078      unsigned NumWithout = 0;
2079      BasicBlock *PREPred = 0;
2080      predMap.clear();
2081
2082      for (pred_iterator PI = pred_begin(CurrentBlock),
2083           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2084        BasicBlock *P = *PI;
2085        // We're not interested in PRE where the block is its
2086        // own predecessor, or in blocks with predecessors
2087        // that are not reachable.
2088        if (P == CurrentBlock) {
2089          NumWithout = 2;
2090          break;
2091        } else if (!DT->dominates(&F.getEntryBlock(), P))  {
2092          NumWithout = 2;
2093          break;
2094        }
2095
2096        Value* predV = lookupNumber(P, ValNo);
2097        if (predV == 0) {
2098          PREPred = P;
2099          ++NumWithout;
2100        } else if (predV == CurInst) {
2101          NumWithout = 2;
2102        } else {
2103          predMap[P] = predV;
2104          ++NumWith;
2105        }
2106      }
2107
2108      // Don't do PRE when it might increase code size, i.e. when
2109      // we would need to insert instructions in more than one pred.
2110      if (NumWithout != 1 || NumWith == 0)
2111        continue;
2112
2113      // Don't do PRE across indirect branch.
2114      if (isa<IndirectBrInst>(PREPred->getTerminator()))
2115        continue;
2116
2117      // We can't do PRE safely on a critical edge, so instead we schedule
2118      // the edge to be split and perform the PRE the next time we iterate
2119      // on the function.
2120      unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
2121      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
2122        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
2123        continue;
2124      }
2125
2126      // Instantiate the expression in the predecessor that lacked it.
2127      // Because we are going top-down through the block, all value numbers
2128      // will be available in the predecessor by the time we need them.  Any
2129      // that weren't originally present will have been instantiated earlier
2130      // in this loop.
2131      Instruction *PREInstr = CurInst->clone();
2132      bool success = true;
2133      for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
2134        Value *Op = PREInstr->getOperand(i);
2135        if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2136          continue;
2137
2138        if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
2139          PREInstr->setOperand(i, V);
2140        } else {
2141          success = false;
2142          break;
2143        }
2144      }
2145
2146      // Fail out if we encounter an operand that is not available in
2147      // the PRE predecessor.  This is typically because of loads which
2148      // are not value numbered precisely.
2149      if (!success) {
2150        delete PREInstr;
2151        DEBUG(verifyRemoved(PREInstr));
2152        continue;
2153      }
2154
2155      PREInstr->insertBefore(PREPred->getTerminator());
2156      PREInstr->setName(CurInst->getName() + ".pre");
2157      predMap[PREPred] = PREInstr;
2158      VN.add(PREInstr, ValNo);
2159      ++NumGVNPRE;
2160
2161      // Update the availability map to include the new instruction.
2162      insert_table(ValNo, PREInstr);
2163
2164      // Create a PHI to make the value available in this block.
2165      PHINode* Phi = PHINode::Create(CurInst->getType(),
2166                                     CurInst->getName() + ".pre-phi",
2167                                     CurrentBlock->begin());
2168      for (pred_iterator PI = pred_begin(CurrentBlock),
2169           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2170        BasicBlock *P = *PI;
2171        Phi->addIncoming(predMap[P], P);
2172      }
2173
2174      VN.add(Phi, ValNo);
2175      insert_table(ValNo, Phi);
2176
2177      CurInst->replaceAllUsesWith(Phi);
2178      if (MD && Phi->getType()->isPointerTy())
2179        MD->invalidateCachedPointerInfo(Phi);
2180      VN.erase(CurInst);
2181      erase_table(ValNo, CurInst);
2182
2183      DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
2184      if (MD) MD->removeInstruction(CurInst);
2185      CurInst->eraseFromParent();
2186      DEBUG(verifyRemoved(CurInst));
2187      Changed = true;
2188    }
2189  }
2190
2191  if (splitCriticalEdges())
2192    Changed = true;
2193
2194  return Changed;
2195}
2196
2197/// splitCriticalEdges - Split critical edges found during the previous
2198/// iteration that may enable further optimization.
2199bool GVN::splitCriticalEdges() {
2200  if (toSplit.empty())
2201    return false;
2202  do {
2203    std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
2204    SplitCriticalEdge(Edge.first, Edge.second, this);
2205  } while (!toSplit.empty());
2206  if (MD) MD->invalidateCachedPredecessors();
2207  return true;
2208}
2209
2210/// iterateOnFunction - Executes one iteration of GVN
2211bool GVN::iterateOnFunction(Function &F) {
2212  cleanupGlobalSets();
2213
2214  // Top-down walk of the dominator tree
2215  bool Changed = false;
2216#if 0
2217  // Needed for value numbering with phi construction to work.
2218  ReversePostOrderTraversal<Function*> RPOT(&F);
2219  for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
2220       RE = RPOT.end(); RI != RE; ++RI)
2221    Changed |= processBlock(*RI);
2222#else
2223  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
2224       DE = df_end(DT->getRootNode()); DI != DE; ++DI)
2225    Changed |= processBlock(DI->getBlock());
2226#endif
2227
2228  return Changed;
2229}
2230
2231void GVN::cleanupGlobalSets() {
2232  VN.clear();
2233  NumberTable.clear();
2234  TableAllocator.Reset();
2235}
2236
2237/// verifyRemoved - Verify that the specified instruction does not occur in our
2238/// internal data structures.
2239void GVN::verifyRemoved(const Instruction *Inst) const {
2240  VN.verifyRemoved(Inst);
2241
2242  // Walk through the value number scope to make sure the instruction isn't
2243  // ferreted away in it.
2244  for (DenseMap<uint32_t, std::pair<Value*, void*> >::const_iterator
2245       I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) {
2246    std::pair<Value*, void*> const * Node = &I->second;
2247    assert(Node->first != Inst && "Inst still in value numbering scope!");
2248
2249    while (Node->second) {
2250      Node = static_cast<std::pair<Value*, void*>*>(Node->second);
2251      assert(Node->first != Inst && "Inst still in value numbering scope!");
2252    }
2253  }
2254}
2255