GVN.cpp revision cb9cbc494975dc827465af32d806e7b9ae17090c
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global value numbering to eliminate fully redundant
11// instructions.  It also performs simple dead load elimination.
12//
13// Note that this pass does the value numbering itself; it does not use the
14// ValueNumbering analysis passes.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "gvn"
19#include "llvm/Transforms/Scalar.h"
20#include "llvm/BasicBlock.h"
21#include "llvm/Constants.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/Function.h"
24#include "llvm/IntrinsicInst.h"
25#include "llvm/LLVMContext.h"
26#include "llvm/Operator.h"
27#include "llvm/Value.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/DepthFirstIterator.h"
30#include "llvm/ADT/PostOrderIterator.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/Analysis/Dominators.h"
35#include "llvm/Analysis/AliasAnalysis.h"
36#include "llvm/Analysis/MemoryBuiltins.h"
37#include "llvm/Analysis/MemoryDependenceAnalysis.h"
38#include "llvm/Support/CFG.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/Debug.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/GetElementPtrTypeIterator.h"
43#include "llvm/Support/IRBuilder.h"
44#include "llvm/Support/raw_ostream.h"
45#include "llvm/Target/TargetData.h"
46#include "llvm/Transforms/Utils/BasicBlockUtils.h"
47#include "llvm/Transforms/Utils/Local.h"
48#include "llvm/Transforms/Utils/SSAUpdater.h"
49#include <cstdio>
50using namespace llvm;
51
52STATISTIC(NumGVNInstr,  "Number of instructions deleted");
53STATISTIC(NumGVNLoad,   "Number of loads deleted");
54STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
55STATISTIC(NumGVNBlocks, "Number of blocks merged");
56STATISTIC(NumPRELoad,   "Number of loads PRE'd");
57
58static cl::opt<bool> EnablePRE("enable-pre",
59                               cl::init(true), cl::Hidden);
60static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
61
62//===----------------------------------------------------------------------===//
63//                         ValueTable Class
64//===----------------------------------------------------------------------===//
65
66/// This class holds the mapping between values and value numbers.  It is used
67/// as an efficient mechanism to determine the expression-wise equivalence of
68/// two values.
69namespace {
70  struct Expression {
71    enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
72                            UDIV, SDIV, FDIV, UREM, SREM,
73                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
74                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
75                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
76                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
77                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
78                            FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
79                            SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
80                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
81                            PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
82                            INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
83
84    ExpressionOpcode opcode;
85    const Type* type;
86    SmallVector<uint32_t, 4> varargs;
87    Value *function;
88
89    Expression() { }
90    Expression(ExpressionOpcode o) : opcode(o) { }
91
92    bool operator==(const Expression &other) const {
93      if (opcode != other.opcode)
94        return false;
95      else if (opcode == EMPTY || opcode == TOMBSTONE)
96        return true;
97      else if (type != other.type)
98        return false;
99      else if (function != other.function)
100        return false;
101      else {
102        if (varargs.size() != other.varargs.size())
103          return false;
104
105        for (size_t i = 0; i < varargs.size(); ++i)
106          if (varargs[i] != other.varargs[i])
107            return false;
108
109        return true;
110      }
111    }
112
113    bool operator!=(const Expression &other) const {
114      return !(*this == other);
115    }
116  };
117
118  class ValueTable {
119    private:
120      DenseMap<Value*, uint32_t> valueNumbering;
121      DenseMap<Expression, uint32_t> expressionNumbering;
122      AliasAnalysis* AA;
123      MemoryDependenceAnalysis* MD;
124      DominatorTree* DT;
125
126      uint32_t nextValueNumber;
127
128      Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
129      Expression::ExpressionOpcode getOpcode(CmpInst* C);
130      Expression::ExpressionOpcode getOpcode(CastInst* C);
131      Expression create_expression(BinaryOperator* BO);
132      Expression create_expression(CmpInst* C);
133      Expression create_expression(ShuffleVectorInst* V);
134      Expression create_expression(ExtractElementInst* C);
135      Expression create_expression(InsertElementInst* V);
136      Expression create_expression(SelectInst* V);
137      Expression create_expression(CastInst* C);
138      Expression create_expression(GetElementPtrInst* G);
139      Expression create_expression(CallInst* C);
140      Expression create_expression(Constant* C);
141      Expression create_expression(ExtractValueInst* C);
142      Expression create_expression(InsertValueInst* C);
143
144      uint32_t lookup_or_add_call(CallInst* C);
145    public:
146      ValueTable() : nextValueNumber(1) { }
147      uint32_t lookup_or_add(Value *V);
148      uint32_t lookup(Value *V) const;
149      void add(Value *V, uint32_t num);
150      void clear();
151      void erase(Value *v);
152      unsigned size();
153      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
154      AliasAnalysis *getAliasAnalysis() const { return AA; }
155      void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
156      void setDomTree(DominatorTree* D) { DT = D; }
157      uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
158      void verifyRemoved(const Value *) const;
159  };
160}
161
162namespace llvm {
163template <> struct DenseMapInfo<Expression> {
164  static inline Expression getEmptyKey() {
165    return Expression(Expression::EMPTY);
166  }
167
168  static inline Expression getTombstoneKey() {
169    return Expression(Expression::TOMBSTONE);
170  }
171
172  static unsigned getHashValue(const Expression e) {
173    unsigned hash = e.opcode;
174
175    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
176            (unsigned)((uintptr_t)e.type >> 9));
177
178    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
179         E = e.varargs.end(); I != E; ++I)
180      hash = *I + hash * 37;
181
182    hash = ((unsigned)((uintptr_t)e.function >> 4) ^
183            (unsigned)((uintptr_t)e.function >> 9)) +
184           hash * 37;
185
186    return hash;
187  }
188  static bool isEqual(const Expression &LHS, const Expression &RHS) {
189    return LHS == RHS;
190  }
191  static bool isPod() { return true; }
192};
193}
194
195//===----------------------------------------------------------------------===//
196//                     ValueTable Internal Functions
197//===----------------------------------------------------------------------===//
198Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
199  switch(BO->getOpcode()) {
200  default: // THIS SHOULD NEVER HAPPEN
201    llvm_unreachable("Binary operator with unknown opcode?");
202  case Instruction::Add:  return Expression::ADD;
203  case Instruction::FAdd: return Expression::FADD;
204  case Instruction::Sub:  return Expression::SUB;
205  case Instruction::FSub: return Expression::FSUB;
206  case Instruction::Mul:  return Expression::MUL;
207  case Instruction::FMul: return Expression::FMUL;
208  case Instruction::UDiv: return Expression::UDIV;
209  case Instruction::SDiv: return Expression::SDIV;
210  case Instruction::FDiv: return Expression::FDIV;
211  case Instruction::URem: return Expression::UREM;
212  case Instruction::SRem: return Expression::SREM;
213  case Instruction::FRem: return Expression::FREM;
214  case Instruction::Shl:  return Expression::SHL;
215  case Instruction::LShr: return Expression::LSHR;
216  case Instruction::AShr: return Expression::ASHR;
217  case Instruction::And:  return Expression::AND;
218  case Instruction::Or:   return Expression::OR;
219  case Instruction::Xor:  return Expression::XOR;
220  }
221}
222
223Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
224  if (isa<ICmpInst>(C)) {
225    switch (C->getPredicate()) {
226    default:  // THIS SHOULD NEVER HAPPEN
227      llvm_unreachable("Comparison with unknown predicate?");
228    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
229    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
230    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
231    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
232    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
233    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
234    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
235    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
236    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
237    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
238    }
239  } else {
240    switch (C->getPredicate()) {
241    default: // THIS SHOULD NEVER HAPPEN
242      llvm_unreachable("Comparison with unknown predicate?");
243    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
244    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
245    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
246    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
247    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
248    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
249    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
250    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
251    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
252    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
253    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
254    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
255    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
256    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
257    }
258  }
259}
260
261Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
262  switch(C->getOpcode()) {
263  default: // THIS SHOULD NEVER HAPPEN
264    llvm_unreachable("Cast operator with unknown opcode?");
265  case Instruction::Trunc:    return Expression::TRUNC;
266  case Instruction::ZExt:     return Expression::ZEXT;
267  case Instruction::SExt:     return Expression::SEXT;
268  case Instruction::FPToUI:   return Expression::FPTOUI;
269  case Instruction::FPToSI:   return Expression::FPTOSI;
270  case Instruction::UIToFP:   return Expression::UITOFP;
271  case Instruction::SIToFP:   return Expression::SITOFP;
272  case Instruction::FPTrunc:  return Expression::FPTRUNC;
273  case Instruction::FPExt:    return Expression::FPEXT;
274  case Instruction::PtrToInt: return Expression::PTRTOINT;
275  case Instruction::IntToPtr: return Expression::INTTOPTR;
276  case Instruction::BitCast:  return Expression::BITCAST;
277  }
278}
279
280Expression ValueTable::create_expression(CallInst* C) {
281  Expression e;
282
283  e.type = C->getType();
284  e.function = C->getCalledFunction();
285  e.opcode = Expression::CALL;
286
287  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
288       I != E; ++I)
289    e.varargs.push_back(lookup_or_add(*I));
290
291  return e;
292}
293
294Expression ValueTable::create_expression(BinaryOperator* BO) {
295  Expression e;
296  e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
297  e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
298  e.function = 0;
299  e.type = BO->getType();
300  e.opcode = getOpcode(BO);
301
302  return e;
303}
304
305Expression ValueTable::create_expression(CmpInst* C) {
306  Expression e;
307
308  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
309  e.varargs.push_back(lookup_or_add(C->getOperand(1)));
310  e.function = 0;
311  e.type = C->getType();
312  e.opcode = getOpcode(C);
313
314  return e;
315}
316
317Expression ValueTable::create_expression(CastInst* C) {
318  Expression e;
319
320  e.varargs.push_back(lookup_or_add(C->getOperand(0)));
321  e.function = 0;
322  e.type = C->getType();
323  e.opcode = getOpcode(C);
324
325  return e;
326}
327
328Expression ValueTable::create_expression(ShuffleVectorInst* S) {
329  Expression e;
330
331  e.varargs.push_back(lookup_or_add(S->getOperand(0)));
332  e.varargs.push_back(lookup_or_add(S->getOperand(1)));
333  e.varargs.push_back(lookup_or_add(S->getOperand(2)));
334  e.function = 0;
335  e.type = S->getType();
336  e.opcode = Expression::SHUFFLE;
337
338  return e;
339}
340
341Expression ValueTable::create_expression(ExtractElementInst* E) {
342  Expression e;
343
344  e.varargs.push_back(lookup_or_add(E->getOperand(0)));
345  e.varargs.push_back(lookup_or_add(E->getOperand(1)));
346  e.function = 0;
347  e.type = E->getType();
348  e.opcode = Expression::EXTRACT;
349
350  return e;
351}
352
353Expression ValueTable::create_expression(InsertElementInst* I) {
354  Expression e;
355
356  e.varargs.push_back(lookup_or_add(I->getOperand(0)));
357  e.varargs.push_back(lookup_or_add(I->getOperand(1)));
358  e.varargs.push_back(lookup_or_add(I->getOperand(2)));
359  e.function = 0;
360  e.type = I->getType();
361  e.opcode = Expression::INSERT;
362
363  return e;
364}
365
366Expression ValueTable::create_expression(SelectInst* I) {
367  Expression e;
368
369  e.varargs.push_back(lookup_or_add(I->getCondition()));
370  e.varargs.push_back(lookup_or_add(I->getTrueValue()));
371  e.varargs.push_back(lookup_or_add(I->getFalseValue()));
372  e.function = 0;
373  e.type = I->getType();
374  e.opcode = Expression::SELECT;
375
376  return e;
377}
378
379Expression ValueTable::create_expression(GetElementPtrInst* G) {
380  Expression e;
381
382  e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
383  e.function = 0;
384  e.type = G->getType();
385  e.opcode = Expression::GEP;
386
387  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
388       I != E; ++I)
389    e.varargs.push_back(lookup_or_add(*I));
390
391  return e;
392}
393
394Expression ValueTable::create_expression(ExtractValueInst* E) {
395  Expression e;
396
397  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
398  for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
399       II != IE; ++II)
400    e.varargs.push_back(*II);
401  e.function = 0;
402  e.type = E->getType();
403  e.opcode = Expression::EXTRACTVALUE;
404
405  return e;
406}
407
408Expression ValueTable::create_expression(InsertValueInst* E) {
409  Expression e;
410
411  e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
412  e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
413  for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
414       II != IE; ++II)
415    e.varargs.push_back(*II);
416  e.function = 0;
417  e.type = E->getType();
418  e.opcode = Expression::INSERTVALUE;
419
420  return e;
421}
422
423//===----------------------------------------------------------------------===//
424//                     ValueTable External Functions
425//===----------------------------------------------------------------------===//
426
427/// add - Insert a value into the table with a specified value number.
428void ValueTable::add(Value *V, uint32_t num) {
429  valueNumbering.insert(std::make_pair(V, num));
430}
431
432uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
433  if (AA->doesNotAccessMemory(C)) {
434    Expression exp = create_expression(C);
435    uint32_t& e = expressionNumbering[exp];
436    if (!e) e = nextValueNumber++;
437    valueNumbering[C] = e;
438    return e;
439  } else if (AA->onlyReadsMemory(C)) {
440    Expression exp = create_expression(C);
441    uint32_t& e = expressionNumbering[exp];
442    if (!e) {
443      e = nextValueNumber++;
444      valueNumbering[C] = e;
445      return e;
446    }
447    if (!MD) {
448      e = nextValueNumber++;
449      valueNumbering[C] = e;
450      return e;
451    }
452
453    MemDepResult local_dep = MD->getDependency(C);
454
455    if (!local_dep.isDef() && !local_dep.isNonLocal()) {
456      valueNumbering[C] =  nextValueNumber;
457      return nextValueNumber++;
458    }
459
460    if (local_dep.isDef()) {
461      CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
462
463      if (local_cdep->getNumOperands() != C->getNumOperands()) {
464        valueNumbering[C] = nextValueNumber;
465        return nextValueNumber++;
466      }
467
468      for (unsigned i = 1; i < C->getNumOperands(); ++i) {
469        uint32_t c_vn = lookup_or_add(C->getOperand(i));
470        uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
471        if (c_vn != cd_vn) {
472          valueNumbering[C] = nextValueNumber;
473          return nextValueNumber++;
474        }
475      }
476
477      uint32_t v = lookup_or_add(local_cdep);
478      valueNumbering[C] = v;
479      return v;
480    }
481
482    // Non-local case.
483    const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
484      MD->getNonLocalCallDependency(CallSite(C));
485    // FIXME: call/call dependencies for readonly calls should return def, not
486    // clobber!  Move the checking logic to MemDep!
487    CallInst* cdep = 0;
488
489    // Check to see if we have a single dominating call instruction that is
490    // identical to C.
491    for (unsigned i = 0, e = deps.size(); i != e; ++i) {
492      const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
493      // Ignore non-local dependencies.
494      if (I->second.isNonLocal())
495        continue;
496
497      // We don't handle non-depedencies.  If we already have a call, reject
498      // instruction dependencies.
499      if (I->second.isClobber() || cdep != 0) {
500        cdep = 0;
501        break;
502      }
503
504      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
505      // FIXME: All duplicated with non-local case.
506      if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
507        cdep = NonLocalDepCall;
508        continue;
509      }
510
511      cdep = 0;
512      break;
513    }
514
515    if (!cdep) {
516      valueNumbering[C] = nextValueNumber;
517      return nextValueNumber++;
518    }
519
520    if (cdep->getNumOperands() != C->getNumOperands()) {
521      valueNumbering[C] = nextValueNumber;
522      return nextValueNumber++;
523    }
524    for (unsigned i = 1; i < C->getNumOperands(); ++i) {
525      uint32_t c_vn = lookup_or_add(C->getOperand(i));
526      uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
527      if (c_vn != cd_vn) {
528        valueNumbering[C] = nextValueNumber;
529        return nextValueNumber++;
530      }
531    }
532
533    uint32_t v = lookup_or_add(cdep);
534    valueNumbering[C] = v;
535    return v;
536
537  } else {
538    valueNumbering[C] = nextValueNumber;
539    return nextValueNumber++;
540  }
541}
542
543/// lookup_or_add - Returns the value number for the specified value, assigning
544/// it a new number if it did not have one before.
545uint32_t ValueTable::lookup_or_add(Value *V) {
546  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
547  if (VI != valueNumbering.end())
548    return VI->second;
549
550  if (!isa<Instruction>(V)) {
551    valueNumbering[V] = nextValueNumber;
552    return nextValueNumber++;
553  }
554
555  Instruction* I = cast<Instruction>(V);
556  Expression exp;
557  switch (I->getOpcode()) {
558    case Instruction::Call:
559      return lookup_or_add_call(cast<CallInst>(I));
560    case Instruction::Add:
561    case Instruction::FAdd:
562    case Instruction::Sub:
563    case Instruction::FSub:
564    case Instruction::Mul:
565    case Instruction::FMul:
566    case Instruction::UDiv:
567    case Instruction::SDiv:
568    case Instruction::FDiv:
569    case Instruction::URem:
570    case Instruction::SRem:
571    case Instruction::FRem:
572    case Instruction::Shl:
573    case Instruction::LShr:
574    case Instruction::AShr:
575    case Instruction::And:
576    case Instruction::Or :
577    case Instruction::Xor:
578      exp = create_expression(cast<BinaryOperator>(I));
579      break;
580    case Instruction::ICmp:
581    case Instruction::FCmp:
582      exp = create_expression(cast<CmpInst>(I));
583      break;
584    case Instruction::Trunc:
585    case Instruction::ZExt:
586    case Instruction::SExt:
587    case Instruction::FPToUI:
588    case Instruction::FPToSI:
589    case Instruction::UIToFP:
590    case Instruction::SIToFP:
591    case Instruction::FPTrunc:
592    case Instruction::FPExt:
593    case Instruction::PtrToInt:
594    case Instruction::IntToPtr:
595    case Instruction::BitCast:
596      exp = create_expression(cast<CastInst>(I));
597      break;
598    case Instruction::Select:
599      exp = create_expression(cast<SelectInst>(I));
600      break;
601    case Instruction::ExtractElement:
602      exp = create_expression(cast<ExtractElementInst>(I));
603      break;
604    case Instruction::InsertElement:
605      exp = create_expression(cast<InsertElementInst>(I));
606      break;
607    case Instruction::ShuffleVector:
608      exp = create_expression(cast<ShuffleVectorInst>(I));
609      break;
610    case Instruction::ExtractValue:
611      exp = create_expression(cast<ExtractValueInst>(I));
612      break;
613    case Instruction::InsertValue:
614      exp = create_expression(cast<InsertValueInst>(I));
615      break;
616    case Instruction::GetElementPtr:
617      exp = create_expression(cast<GetElementPtrInst>(I));
618      break;
619    default:
620      valueNumbering[V] = nextValueNumber;
621      return nextValueNumber++;
622  }
623
624  uint32_t& e = expressionNumbering[exp];
625  if (!e) e = nextValueNumber++;
626  valueNumbering[V] = e;
627  return e;
628}
629
630/// lookup - Returns the value number of the specified value. Fails if
631/// the value has not yet been numbered.
632uint32_t ValueTable::lookup(Value *V) const {
633  DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
634  assert(VI != valueNumbering.end() && "Value not numbered?");
635  return VI->second;
636}
637
638/// clear - Remove all entries from the ValueTable
639void ValueTable::clear() {
640  valueNumbering.clear();
641  expressionNumbering.clear();
642  nextValueNumber = 1;
643}
644
645/// erase - Remove a value from the value numbering
646void ValueTable::erase(Value *V) {
647  valueNumbering.erase(V);
648}
649
650/// verifyRemoved - Verify that the value is removed from all internal data
651/// structures.
652void ValueTable::verifyRemoved(const Value *V) const {
653  for (DenseMap<Value*, uint32_t>::const_iterator
654         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
655    assert(I->first != V && "Inst still occurs in value numbering map!");
656  }
657}
658
659//===----------------------------------------------------------------------===//
660//                                GVN Pass
661//===----------------------------------------------------------------------===//
662
663namespace {
664  struct ValueNumberScope {
665    ValueNumberScope* parent;
666    DenseMap<uint32_t, Value*> table;
667
668    ValueNumberScope(ValueNumberScope* p) : parent(p) { }
669  };
670}
671
672namespace {
673
674  class GVN : public FunctionPass {
675    bool runOnFunction(Function &F);
676  public:
677    static char ID; // Pass identification, replacement for typeid
678    explicit GVN(bool nopre = false, bool noloads = false)
679      : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { }
680
681  private:
682    bool NoPRE;
683    bool NoLoads;
684    MemoryDependenceAnalysis *MD;
685    DominatorTree *DT;
686
687    ValueTable VN;
688    DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
689
690    // This transformation requires dominator postdominator info
691    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
692      AU.addRequired<DominatorTree>();
693      if (!NoLoads)
694        AU.addRequired<MemoryDependenceAnalysis>();
695      AU.addRequired<AliasAnalysis>();
696
697      AU.addPreserved<DominatorTree>();
698      AU.addPreserved<AliasAnalysis>();
699    }
700
701    // Helper fuctions
702    // FIXME: eliminate or document these better
703    bool processLoad(LoadInst* L,
704                     SmallVectorImpl<Instruction*> &toErase);
705    bool processInstruction(Instruction *I,
706                            SmallVectorImpl<Instruction*> &toErase);
707    bool processNonLocalLoad(LoadInst* L,
708                             SmallVectorImpl<Instruction*> &toErase);
709    bool processBlock(BasicBlock *BB);
710    void dump(DenseMap<uint32_t, Value*>& d);
711    bool iterateOnFunction(Function &F);
712    Value *CollapsePhi(PHINode* p);
713    bool performPRE(Function& F);
714    Value *lookupNumber(BasicBlock *BB, uint32_t num);
715    void cleanupGlobalSets();
716    void verifyRemoved(const Instruction *I) const;
717  };
718
719  char GVN::ID = 0;
720}
721
722// createGVNPass - The public interface to this file...
723FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) {
724  return new GVN(NoPRE, NoLoads);
725}
726
727static RegisterPass<GVN> X("gvn",
728                           "Global Value Numbering");
729
730void GVN::dump(DenseMap<uint32_t, Value*>& d) {
731  printf("{\n");
732  for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
733       E = d.end(); I != E; ++I) {
734      printf("%d\n", I->first);
735      I->second->dump();
736  }
737  printf("}\n");
738}
739
740static bool isSafeReplacement(PHINode* p, Instruction *inst) {
741  if (!isa<PHINode>(inst))
742    return true;
743
744  for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
745       UI != E; ++UI)
746    if (PHINode* use_phi = dyn_cast<PHINode>(UI))
747      if (use_phi->getParent() == inst->getParent())
748        return false;
749
750  return true;
751}
752
753Value *GVN::CollapsePhi(PHINode *PN) {
754  Value *ConstVal = PN->hasConstantValue(DT);
755  if (!ConstVal) return 0;
756
757  Instruction *Inst = dyn_cast<Instruction>(ConstVal);
758  if (!Inst)
759    return ConstVal;
760
761  if (DT->dominates(Inst, PN))
762    if (isSafeReplacement(PN, Inst))
763      return Inst;
764  return 0;
765}
766
767/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
768/// we're analyzing is fully available in the specified block.  As we go, keep
769/// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
770/// map is actually a tri-state map with the following values:
771///   0) we know the block *is not* fully available.
772///   1) we know the block *is* fully available.
773///   2) we do not know whether the block is fully available or not, but we are
774///      currently speculating that it will be.
775///   3) we are speculating for this block and have used that to speculate for
776///      other blocks.
777static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
778                            DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
779  // Optimistically assume that the block is fully available and check to see
780  // if we already know about this block in one lookup.
781  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
782    FullyAvailableBlocks.insert(std::make_pair(BB, 2));
783
784  // If the entry already existed for this block, return the precomputed value.
785  if (!IV.second) {
786    // If this is a speculative "available" value, mark it as being used for
787    // speculation of other blocks.
788    if (IV.first->second == 2)
789      IV.first->second = 3;
790    return IV.first->second != 0;
791  }
792
793  // Otherwise, see if it is fully available in all predecessors.
794  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
795
796  // If this block has no predecessors, it isn't live-in here.
797  if (PI == PE)
798    goto SpeculationFailure;
799
800  for (; PI != PE; ++PI)
801    // If the value isn't fully available in one of our predecessors, then it
802    // isn't fully available in this block either.  Undo our previous
803    // optimistic assumption and bail out.
804    if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
805      goto SpeculationFailure;
806
807  return true;
808
809// SpeculationFailure - If we get here, we found out that this is not, after
810// all, a fully-available block.  We have a problem if we speculated on this and
811// used the speculation to mark other blocks as available.
812SpeculationFailure:
813  char &BBVal = FullyAvailableBlocks[BB];
814
815  // If we didn't speculate on this, just return with it set to false.
816  if (BBVal == 2) {
817    BBVal = 0;
818    return false;
819  }
820
821  // If we did speculate on this value, we could have blocks set to 1 that are
822  // incorrect.  Walk the (transitive) successors of this block and mark them as
823  // 0 if set to one.
824  SmallVector<BasicBlock*, 32> BBWorklist;
825  BBWorklist.push_back(BB);
826
827  while (!BBWorklist.empty()) {
828    BasicBlock *Entry = BBWorklist.pop_back_val();
829    // Note that this sets blocks to 0 (unavailable) if they happen to not
830    // already be in FullyAvailableBlocks.  This is safe.
831    char &EntryVal = FullyAvailableBlocks[Entry];
832    if (EntryVal == 0) continue;  // Already unavailable.
833
834    // Mark as unavailable.
835    EntryVal = 0;
836
837    for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
838      BBWorklist.push_back(*I);
839  }
840
841  return false;
842}
843
844
845/// CanCoerceMustAliasedValueToLoad - Return true if
846/// CoerceAvailableValueToLoadType will succeed.
847static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
848                                            const Type *LoadTy,
849                                            const TargetData &TD) {
850  // If the loaded or stored value is an first class array or struct, don't try
851  // to transform them.  We need to be able to bitcast to integer.
852  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) ||
853      isa<StructType>(StoredVal->getType()) ||
854      isa<ArrayType>(StoredVal->getType()))
855    return false;
856
857  // The store has to be at least as big as the load.
858  if (TD.getTypeSizeInBits(StoredVal->getType()) <
859        TD.getTypeSizeInBits(LoadTy))
860    return false;
861
862  return true;
863}
864
865
866/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
867/// then a load from a must-aliased pointer of a different type, try to coerce
868/// the stored value.  LoadedTy is the type of the load we want to replace and
869/// InsertPt is the place to insert new instructions.
870///
871/// If we can't do it, return null.
872static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
873                                             const Type *LoadedTy,
874                                             Instruction *InsertPt,
875                                             const TargetData &TD) {
876  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
877    return 0;
878
879  const Type *StoredValTy = StoredVal->getType();
880
881  uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
882  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
883
884  // If the store and reload are the same size, we can always reuse it.
885  if (StoreSize == LoadSize) {
886    if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) {
887      // Pointer to Pointer -> use bitcast.
888      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
889    }
890
891    // Convert source pointers to integers, which can be bitcast.
892    if (isa<PointerType>(StoredValTy)) {
893      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
894      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
895    }
896
897    const Type *TypeToCastTo = LoadedTy;
898    if (isa<PointerType>(TypeToCastTo))
899      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
900
901    if (StoredValTy != TypeToCastTo)
902      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
903
904    // Cast to pointer if the load needs a pointer type.
905    if (isa<PointerType>(LoadedTy))
906      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
907
908    return StoredVal;
909  }
910
911  // If the loaded value is smaller than the available value, then we can
912  // extract out a piece from it.  If the available value is too small, then we
913  // can't do anything.
914  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
915
916  // Convert source pointers to integers, which can be manipulated.
917  if (isa<PointerType>(StoredValTy)) {
918    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
919    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
920  }
921
922  // Convert vectors and fp to integer, which can be manipulated.
923  if (!isa<IntegerType>(StoredValTy)) {
924    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
925    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
926  }
927
928  // If this is a big-endian system, we need to shift the value down to the low
929  // bits so that a truncate will work.
930  if (TD.isBigEndian()) {
931    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
932    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
933  }
934
935  // Truncate the integer to the right size now.
936  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
937  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
938
939  if (LoadedTy == NewIntTy)
940    return StoredVal;
941
942  // If the result is a pointer, inttoptr.
943  if (isa<PointerType>(LoadedTy))
944    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
945
946  // Otherwise, bitcast.
947  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
948}
949
950/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
951/// be expressed as a base pointer plus a constant offset.  Return the base and
952/// offset to the caller.
953static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
954                                        const TargetData &TD) {
955  Operator *PtrOp = dyn_cast<Operator>(Ptr);
956  if (PtrOp == 0) return Ptr;
957
958  // Just look through bitcasts.
959  if (PtrOp->getOpcode() == Instruction::BitCast)
960    return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
961
962  // If this is a GEP with constant indices, we can look through it.
963  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
964  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
965
966  gep_type_iterator GTI = gep_type_begin(GEP);
967  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
968       ++I, ++GTI) {
969    ConstantInt *OpC = cast<ConstantInt>(*I);
970    if (OpC->isZero()) continue;
971
972    // Handle a struct and array indices which add their offset to the pointer.
973    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
974      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
975    } else {
976      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
977      Offset += OpC->getSExtValue()*Size;
978    }
979  }
980
981  // Re-sign extend from the pointer size if needed to get overflow edge cases
982  // right.
983  unsigned PtrSize = TD.getPointerSizeInBits();
984  if (PtrSize < 64)
985    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
986
987  return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
988}
989
990
991/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
992/// memdep query of a load that ends up being a clobbering memory write (store,
993/// memset, memcpy, memmove).  This means that the write *may* provide bits used
994/// by the load but we can't be sure because the pointers don't mustalias.
995///
996/// Check this case to see if there is anything more we can do before we give
997/// up.  This returns -1 if we have to give up, or a byte number in the stored
998/// value of the piece that feeds the load.
999static int AnalyzeLoadFromClobberingWrite(LoadInst *L, Value *WritePtr,
1000                                          uint64_t WriteSizeInBits,
1001                                          const TargetData &TD) {
1002  // If the loaded or stored value is an first class array or struct, don't try
1003  // to transform them.  We need to be able to bitcast to integer.
1004  if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()))
1005    return -1;
1006
1007  int64_t StoreOffset = 0, LoadOffset = 0;
1008  Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
1009  Value *LoadBase =
1010    GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
1011  if (StoreBase != LoadBase)
1012    return -1;
1013
1014  // If the load and store are to the exact same address, they should have been
1015  // a must alias.  AA must have gotten confused.
1016  // FIXME: Study to see if/when this happens.
1017  if (LoadOffset == StoreOffset) {
1018#if 0
1019    errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
1020    << "Base       = " << *StoreBase << "\n"
1021    << "Store Ptr  = " << *WritePtr << "\n"
1022    << "Store Offs = " << StoreOffset << "\n"
1023    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
1024    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
1025    errs() << "'" << L->getParent()->getParent()->getName() << "'"
1026    << *L->getParent();
1027#endif
1028    return -1;
1029  }
1030
1031  // If the load and store don't overlap at all, the store doesn't provide
1032  // anything to the load.  In this case, they really don't alias at all, AA
1033  // must have gotten confused.
1034  // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
1035  // remove this check, as it is duplicated with what we have below.
1036  uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
1037
1038  if ((WriteSizeInBits & 7) | (LoadSize & 7))
1039    return -1;
1040  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
1041  LoadSize >>= 3;
1042
1043
1044  bool isAAFailure = false;
1045  if (StoreOffset < LoadOffset) {
1046    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
1047  } else {
1048    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
1049  }
1050  if (isAAFailure) {
1051#if 0
1052    errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
1053    << "Base       = " << *StoreBase << "\n"
1054    << "Store Ptr  = " << *WritePtr << "\n"
1055    << "Store Offs = " << StoreOffset << "\n"
1056    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
1057    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
1058    errs() << "'" << L->getParent()->getParent()->getName() << "'"
1059    << *L->getParent();
1060#endif
1061    return -1;
1062  }
1063
1064  // If the Load isn't completely contained within the stored bits, we don't
1065  // have all the bits to feed it.  We could do something crazy in the future
1066  // (issue a smaller load then merge the bits in) but this seems unlikely to be
1067  // valuable.
1068  if (StoreOffset > LoadOffset ||
1069      StoreOffset+StoreSize < LoadOffset+LoadSize)
1070    return -1;
1071
1072  // Okay, we can do this transformation.  Return the number of bytes into the
1073  // store that the load is.
1074  return LoadOffset-StoreOffset;
1075}
1076
1077/// AnalyzeLoadFromClobberingStore - This function is called when we have a
1078/// memdep query of a load that ends up being a clobbering store.
1079static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
1080                                          const TargetData &TD) {
1081  // Cannot handle reading from store of first-class aggregate yet.
1082  if (isa<StructType>(DepSI->getOperand(0)->getType()) ||
1083      isa<ArrayType>(DepSI->getOperand(0)->getType()))
1084    return -1;
1085
1086  Value *StorePtr = DepSI->getPointerOperand();
1087  uint64_t StoreSize = TD.getTypeSizeInBits(StorePtr->getType());
1088  return AnalyzeLoadFromClobberingWrite(L, StorePtr, StoreSize, TD);
1089}
1090
1091static int AnalyzeLoadFromClobberingMemInst(LoadInst *L, MemIntrinsic *MI,
1092                                            const TargetData &TD) {
1093  // If the mem operation is a non-constant size, we can't handle it.
1094  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
1095  if (SizeCst == 0) return -1;
1096  uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
1097
1098  if (MI->getIntrinsicID() == Intrinsic::memset)
1099    return AnalyzeLoadFromClobberingWrite(L, MI->getDest(), MemSizeInBits, TD);
1100
1101  // Unhandled memcpy/memmove.
1102  return -1;
1103}
1104
1105
1106/// GetStoreValueForLoad - This function is called when we have a
1107/// memdep query of a load that ends up being a clobbering store.  This means
1108/// that the store *may* provide bits used by the load but we can't be sure
1109/// because the pointers don't mustalias.  Check this case to see if there is
1110/// anything more we can do before we give up.
1111static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
1112                                   const Type *LoadTy,
1113                                   Instruction *InsertPt, const TargetData &TD){
1114  LLVMContext &Ctx = SrcVal->getType()->getContext();
1115
1116  uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
1117  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
1118
1119
1120  // Compute which bits of the stored value are being used by the load.  Convert
1121  // to an integer type to start with.
1122  if (isa<PointerType>(SrcVal->getType()))
1123    SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
1124  if (!isa<IntegerType>(SrcVal->getType()))
1125    SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
1126                             "tmp", InsertPt);
1127
1128  // Shift the bits to the least significant depending on endianness.
1129  unsigned ShiftAmt;
1130  if (TD.isLittleEndian())
1131    ShiftAmt = Offset*8;
1132  else
1133    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
1134
1135  if (ShiftAmt)
1136    SrcVal = BinaryOperator::CreateLShr(SrcVal,
1137                ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
1138
1139  if (LoadSize != StoreSize)
1140    SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
1141                           "tmp", InsertPt);
1142
1143  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
1144}
1145
1146/// GetMemInstValueForLoad - This function is called when we have a
1147/// memdep query of a load that ends up being a clobbering mem intrinsic.
1148static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
1149                                     const Type *LoadTy, Instruction *InsertPt,
1150                                     const TargetData &TD){
1151  LLVMContext &Ctx = LoadTy->getContext();
1152  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
1153
1154  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1155
1156  // We know that this method is only called when the mem transfer fully
1157  // provides the bits for the load.
1158  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
1159    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1160    // independently of what the offset is.
1161    Value *Val = MSI->getValue();
1162    if (LoadSize != 1)
1163      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
1164
1165    Value *OneElt = Val;
1166
1167    // Splat the value out to the right number of bits.
1168    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
1169      // If we can double the number of bytes set, do it.
1170      if (NumBytesSet*2 <= LoadSize) {
1171        Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
1172        Val = Builder.CreateOr(Val, ShVal);
1173        NumBytesSet <<= 1;
1174        continue;
1175      }
1176
1177      // Otherwise insert one byte at a time.
1178      Value *ShVal = Builder.CreateShl(Val, 1*8);
1179      Val = Builder.CreateOr(OneElt, ShVal);
1180      ++NumBytesSet;
1181    }
1182
1183    return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
1184  }
1185
1186  // ABORT;
1187  return 0;
1188}
1189
1190
1191
1192struct AvailableValueInBlock {
1193  /// BB - The basic block in question.
1194  BasicBlock *BB;
1195  enum ValType {
1196    SimpleVal,  // A simple offsetted value that is accessed.
1197    MemIntrin   // A memory intrinsic which is loaded from.
1198  };
1199
1200  /// V - The value that is live out of the block.
1201  PointerIntPair<Value *, 1, ValType> Val;
1202
1203  /// Offset - The byte offset in Val that is interesting for the load query.
1204  unsigned Offset;
1205
1206  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
1207                                   unsigned Offset = 0) {
1208    AvailableValueInBlock Res;
1209    Res.BB = BB;
1210    Res.Val.setPointer(V);
1211    Res.Val.setInt(SimpleVal);
1212    Res.Offset = Offset;
1213    return Res;
1214  }
1215
1216  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
1217                                     unsigned Offset = 0) {
1218    AvailableValueInBlock Res;
1219    Res.BB = BB;
1220    Res.Val.setPointer(MI);
1221    Res.Val.setInt(MemIntrin);
1222    Res.Offset = Offset;
1223    return Res;
1224  }
1225
1226  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
1227  Value *getSimpleValue() const {
1228    assert(isSimpleValue() && "Wrong accessor");
1229    return Val.getPointer();
1230  }
1231
1232  MemIntrinsic *getMemIntrinValue() const {
1233    assert(!isSimpleValue() && "Wrong accessor");
1234    return cast<MemIntrinsic>(Val.getPointer());
1235  }
1236};
1237
1238/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
1239/// construct SSA form, allowing us to eliminate LI.  This returns the value
1240/// that should be used at LI's definition site.
1241static Value *ConstructSSAForLoadSet(LoadInst *LI,
1242                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1243                                     const TargetData *TD,
1244                                     AliasAnalysis *AA) {
1245  SmallVector<PHINode*, 8> NewPHIs;
1246  SSAUpdater SSAUpdate(&NewPHIs);
1247  SSAUpdate.Initialize(LI);
1248
1249  const Type *LoadTy = LI->getType();
1250
1251  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1252    const AvailableValueInBlock &AV = ValuesPerBlock[i];
1253    BasicBlock *BB = AV.BB;
1254
1255    if (SSAUpdate.HasValueForBlock(BB))
1256      continue;
1257
1258    unsigned Offset = AV.Offset;
1259
1260    Value *AvailableVal;
1261    if (AV.isSimpleValue()) {
1262      AvailableVal = AV.getSimpleValue();
1263      if (AvailableVal->getType() != LoadTy) {
1264        assert(TD && "Need target data to handle type mismatch case");
1265        AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
1266                                            BB->getTerminator(), *TD);
1267
1268        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
1269              << *AV.getSimpleValue() << '\n'
1270              << *AvailableVal << '\n' << "\n\n\n");
1271      }
1272    } else {
1273      AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
1274                                            LoadTy, BB->getTerminator(), *TD);
1275      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1276            << "  " << *AV.getMemIntrinValue() << '\n'
1277            << *AvailableVal << '\n' << "\n\n\n");
1278    }
1279    SSAUpdate.AddAvailableValue(BB, AvailableVal);
1280  }
1281
1282  // Perform PHI construction.
1283  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
1284
1285  // If new PHI nodes were created, notify alias analysis.
1286  if (isa<PointerType>(V->getType()))
1287    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
1288      AA->copyValue(LI, NewPHIs[i]);
1289
1290  return V;
1291}
1292
1293static bool isLifetimeStart(Instruction *Inst) {
1294  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1295    return II->getIntrinsicID() == Intrinsic::lifetime_start;
1296  return false;
1297}
1298
1299/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
1300/// non-local by performing PHI construction.
1301bool GVN::processNonLocalLoad(LoadInst *LI,
1302                              SmallVectorImpl<Instruction*> &toErase) {
1303  // Find the non-local dependencies of the load.
1304  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
1305  MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
1306                                   Deps);
1307  //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
1308  //             << Deps.size() << *LI << '\n');
1309
1310  // If we had to process more than one hundred blocks to find the
1311  // dependencies, this load isn't worth worrying about.  Optimizing
1312  // it will be too expensive.
1313  if (Deps.size() > 100)
1314    return false;
1315
1316  // If we had a phi translation failure, we'll have a single entry which is a
1317  // clobber in the current block.  Reject this early.
1318  if (Deps.size() == 1 && Deps[0].second.isClobber()) {
1319    DEBUG(
1320      errs() << "GVN: non-local load ";
1321      WriteAsOperand(errs(), LI);
1322      errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';
1323    );
1324    return false;
1325  }
1326
1327  // Filter out useless results (non-locals, etc).  Keep track of the blocks
1328  // where we have a value available in repl, also keep track of whether we see
1329  // dependencies that produce an unknown value for the load (such as a call
1330  // that could potentially clobber the load).
1331  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
1332  SmallVector<BasicBlock*, 16> UnavailableBlocks;
1333
1334  const TargetData *TD = 0;
1335
1336  for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
1337    BasicBlock *DepBB = Deps[i].first;
1338    MemDepResult DepInfo = Deps[i].second;
1339
1340    if (DepInfo.isClobber()) {
1341      // If the dependence is to a store that writes to a superset of the bits
1342      // read by the load, we can extract the bits we need for the load from the
1343      // stored value.
1344      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
1345        if (TD == 0)
1346          TD = getAnalysisIfAvailable<TargetData>();
1347        if (TD) {
1348          int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
1349          if (Offset != -1) {
1350            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1351                                                           DepSI->getOperand(0),
1352                                                                Offset));
1353            continue;
1354          }
1355        }
1356      }
1357
1358      // If the clobbering value is a memset/memcpy/memmove, see if we can
1359      // forward a value on from it.
1360      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
1361        if (TD == 0)
1362          TD = getAnalysisIfAvailable<TargetData>();
1363        if (TD) {
1364          int Offset = AnalyzeLoadFromClobberingMemInst(LI, DepMI, *TD);
1365          if (Offset != -1) {
1366            ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
1367                                                                  Offset));
1368            continue;
1369          }
1370        }
1371      }
1372
1373      UnavailableBlocks.push_back(DepBB);
1374      continue;
1375    }
1376
1377    Instruction *DepInst = DepInfo.getInst();
1378
1379    // Loading the allocation -> undef.
1380    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
1381        // Loading immediately after lifetime begin -> undef.
1382        isLifetimeStart(DepInst)) {
1383      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1384                                             UndefValue::get(LI->getType())));
1385      continue;
1386    }
1387
1388    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1389      // Reject loads and stores that are to the same address but are of
1390      // different types if we have to.
1391      if (S->getOperand(0)->getType() != LI->getType()) {
1392        if (TD == 0)
1393          TD = getAnalysisIfAvailable<TargetData>();
1394
1395        // If the stored value is larger or equal to the loaded value, we can
1396        // reuse it.
1397        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
1398                                                        LI->getType(), *TD)) {
1399          UnavailableBlocks.push_back(DepBB);
1400          continue;
1401        }
1402      }
1403
1404      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1405                                                          S->getOperand(0)));
1406      continue;
1407    }
1408
1409    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1410      // If the types mismatch and we can't handle it, reject reuse of the load.
1411      if (LD->getType() != LI->getType()) {
1412        if (TD == 0)
1413          TD = getAnalysisIfAvailable<TargetData>();
1414
1415        // If the stored value is larger or equal to the loaded value, we can
1416        // reuse it.
1417        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
1418          UnavailableBlocks.push_back(DepBB);
1419          continue;
1420        }
1421      }
1422      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
1423      continue;
1424    }
1425
1426    UnavailableBlocks.push_back(DepBB);
1427    continue;
1428  }
1429
1430  // If we have no predecessors that produce a known value for this load, exit
1431  // early.
1432  if (ValuesPerBlock.empty()) return false;
1433
1434  // If all of the instructions we depend on produce a known value for this
1435  // load, then it is fully redundant and we can use PHI insertion to compute
1436  // its value.  Insert PHIs and remove the fully redundant value now.
1437  if (UnavailableBlocks.empty()) {
1438    DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1439
1440    // Perform PHI construction.
1441    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
1442                                      VN.getAliasAnalysis());
1443    LI->replaceAllUsesWith(V);
1444
1445    if (isa<PHINode>(V))
1446      V->takeName(LI);
1447    if (isa<PointerType>(V->getType()))
1448      MD->invalidateCachedPointerInfo(V);
1449    toErase.push_back(LI);
1450    NumGVNLoad++;
1451    return true;
1452  }
1453
1454  if (!EnablePRE || !EnableLoadPRE)
1455    return false;
1456
1457  // Okay, we have *some* definitions of the value.  This means that the value
1458  // is available in some of our (transitive) predecessors.  Lets think about
1459  // doing PRE of this load.  This will involve inserting a new load into the
1460  // predecessor when it's not available.  We could do this in general, but
1461  // prefer to not increase code size.  As such, we only do this when we know
1462  // that we only have to insert *one* load (which means we're basically moving
1463  // the load, not inserting a new one).
1464
1465  SmallPtrSet<BasicBlock *, 4> Blockers;
1466  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1467    Blockers.insert(UnavailableBlocks[i]);
1468
1469  // Lets find first basic block with more than one predecessor.  Walk backwards
1470  // through predecessors if needed.
1471  BasicBlock *LoadBB = LI->getParent();
1472  BasicBlock *TmpBB = LoadBB;
1473
1474  bool isSinglePred = false;
1475  bool allSingleSucc = true;
1476  while (TmpBB->getSinglePredecessor()) {
1477    isSinglePred = true;
1478    TmpBB = TmpBB->getSinglePredecessor();
1479    if (!TmpBB) // If haven't found any, bail now.
1480      return false;
1481    if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1482      return false;
1483    if (Blockers.count(TmpBB))
1484      return false;
1485    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1486      allSingleSucc = false;
1487  }
1488
1489  assert(TmpBB);
1490  LoadBB = TmpBB;
1491
1492  // If we have a repl set with LI itself in it, this means we have a loop where
1493  // at least one of the values is LI.  Since this means that we won't be able
1494  // to eliminate LI even if we insert uses in the other predecessors, we will
1495  // end up increasing code size.  Reject this by scanning for LI.
1496  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1497    if (ValuesPerBlock[i].isSimpleValue() &&
1498        ValuesPerBlock[i].getSimpleValue() == LI)
1499      return false;
1500
1501  // FIXME: It is extremely unclear what this loop is doing, other than
1502  // artificially restricting loadpre.
1503  if (isSinglePred) {
1504    bool isHot = false;
1505    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1506      const AvailableValueInBlock &AV = ValuesPerBlock[i];
1507      if (AV.isSimpleValue())
1508        // "Hot" Instruction is in some loop (because it dominates its dep.
1509        // instruction).
1510        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
1511          if (DT->dominates(LI, I)) {
1512            isHot = true;
1513            break;
1514          }
1515    }
1516
1517    // We are interested only in "hot" instructions. We don't want to do any
1518    // mis-optimizations here.
1519    if (!isHot)
1520      return false;
1521  }
1522
1523  // Okay, we have some hope :).  Check to see if the loaded value is fully
1524  // available in all but one predecessor.
1525  // FIXME: If we could restructure the CFG, we could make a common pred with
1526  // all the preds that don't have an available LI and insert a new load into
1527  // that one block.
1528  BasicBlock *UnavailablePred = 0;
1529
1530  DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1531  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1532    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
1533  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1534    FullyAvailableBlocks[UnavailableBlocks[i]] = false;
1535
1536  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
1537       PI != E; ++PI) {
1538    if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
1539      continue;
1540
1541    // If this load is not available in multiple predecessors, reject it.
1542    if (UnavailablePred && UnavailablePred != *PI)
1543      return false;
1544    UnavailablePred = *PI;
1545  }
1546
1547  assert(UnavailablePred != 0 &&
1548         "Fully available value should be eliminated above!");
1549
1550  // We don't currently handle critical edges :(
1551  if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
1552    DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
1553                 << UnavailablePred->getName() << "': " << *LI << '\n');
1554    return false;
1555  }
1556
1557  // Do PHI translation to get its value in the predecessor if necessary.  The
1558  // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1559  //
1560  // FIXME: This may insert a computation, but we don't tell scalar GVN
1561  // optimization stuff about it.  How do we do this?
1562  SmallVector<Instruction*, 8> NewInsts;
1563  Value *LoadPtr = 0;
1564
1565  // If all preds have a single successor, then we know it is safe to insert the
1566  // load on the pred (?!?), so we can insert code to materialize the pointer if
1567  // it is not available.
1568  if (allSingleSucc) {
1569    LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB,
1570                                             UnavailablePred, TD, *DT,NewInsts);
1571  } else {
1572    LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB,
1573                                                 UnavailablePred, TD, *DT);
1574  }
1575
1576  // Assign value numbers to these new instructions.
1577  for (SmallVector<Instruction*, 8>::iterator NI = NewInsts.begin(),
1578       NE = NewInsts.end(); NI != NE; ++NI) {
1579    // FIXME: We really _ought_ to insert these value numbers into their
1580    // parent's availability map.  However, in doing so, we risk getting into
1581    // ordering issues.  If a block hasn't been processed yet, we would be
1582    // marking a value as AVAIL-IN, which isn't what we intend.
1583    VN.lookup_or_add(*NI);
1584  }
1585
1586  // If we couldn't find or insert a computation of this phi translated value,
1587  // we fail PRE.
1588  if (LoadPtr == 0) {
1589    DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1590                 << *LI->getOperand(0) << "\n");
1591    return false;
1592  }
1593
1594  // Make sure it is valid to move this load here.  We have to watch out for:
1595  //  @1 = getelementptr (i8* p, ...
1596  //  test p and branch if == 0
1597  //  load @1
1598  // It is valid to have the getelementptr before the test, even if p can be 0,
1599  // as getelementptr only does address arithmetic.
1600  // If we are not pushing the value through any multiple-successor blocks
1601  // we do not have this case.  Otherwise, check that the load is safe to
1602  // put anywhere; this can be improved, but should be conservatively safe.
1603  if (!allSingleSucc &&
1604      // FIXME: REEVALUTE THIS.
1605      !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) {
1606    assert(NewInsts.empty() && "Should not have inserted instructions");
1607    return false;
1608  }
1609
1610  // Okay, we can eliminate this load by inserting a reload in the predecessor
1611  // and using PHI construction to get the value in the other predecessors, do
1612  // it.
1613  DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1614  DEBUG(if (!NewInsts.empty())
1615          errs() << "INSERTED " << NewInsts.size() << " INSTS: "
1616                 << *NewInsts.back() << '\n');
1617
1618  Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
1619                                LI->getAlignment(),
1620                                UnavailablePred->getTerminator());
1621
1622  // Add the newly created load.
1623  ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad));
1624
1625  // Perform PHI construction.
1626  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
1627                                    VN.getAliasAnalysis());
1628  LI->replaceAllUsesWith(V);
1629  if (isa<PHINode>(V))
1630    V->takeName(LI);
1631  if (isa<PointerType>(V->getType()))
1632    MD->invalidateCachedPointerInfo(V);
1633  toErase.push_back(LI);
1634  NumPRELoad++;
1635  return true;
1636}
1637
1638/// processLoad - Attempt to eliminate a load, first by eliminating it
1639/// locally, and then attempting non-local elimination if that fails.
1640bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
1641  if (!MD)
1642    return false;
1643
1644  if (L->isVolatile())
1645    return false;
1646
1647  // ... to a pointer that has been loaded from before...
1648  MemDepResult Dep = MD->getDependency(L);
1649
1650  // If the value isn't available, don't do anything!
1651  if (Dep.isClobber()) {
1652    // Check to see if we have something like this:
1653    //   store i32 123, i32* %P
1654    //   %A = bitcast i32* %P to i8*
1655    //   %B = gep i8* %A, i32 1
1656    //   %C = load i8* %B
1657    //
1658    // We could do that by recognizing if the clobber instructions are obviously
1659    // a common base + constant offset, and if the previous store (or memset)
1660    // completely covers this load.  This sort of thing can happen in bitfield
1661    // access code.
1662    Value *AvailVal = 0;
1663    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
1664      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
1665        int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
1666        if (Offset != -1)
1667          AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
1668                                          L->getType(), L, *TD);
1669      }
1670
1671    // If the clobbering value is a memset/memcpy/memmove, see if we can forward
1672    // a value on from it.
1673    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
1674      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
1675        int Offset = AnalyzeLoadFromClobberingMemInst(L, DepMI, *TD);
1676        if (Offset != -1)
1677          AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
1678      }
1679    }
1680
1681    if (AvailVal) {
1682      DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
1683            << *AvailVal << '\n' << *L << "\n\n\n");
1684
1685      // Replace the load!
1686      L->replaceAllUsesWith(AvailVal);
1687      if (isa<PointerType>(AvailVal->getType()))
1688        MD->invalidateCachedPointerInfo(AvailVal);
1689      toErase.push_back(L);
1690      NumGVNLoad++;
1691      return true;
1692    }
1693
1694    DEBUG(
1695      // fast print dep, using operator<< on instruction would be too slow
1696      errs() << "GVN: load ";
1697      WriteAsOperand(errs(), L);
1698      Instruction *I = Dep.getInst();
1699      errs() << " is clobbered by " << *I << '\n';
1700    );
1701    return false;
1702  }
1703
1704  // If it is defined in another block, try harder.
1705  if (Dep.isNonLocal())
1706    return processNonLocalLoad(L, toErase);
1707
1708  Instruction *DepInst = Dep.getInst();
1709  if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1710    Value *StoredVal = DepSI->getOperand(0);
1711
1712    // The store and load are to a must-aliased pointer, but they may not
1713    // actually have the same type.  See if we know how to reuse the stored
1714    // value (depending on its type).
1715    const TargetData *TD = 0;
1716    if (StoredVal->getType() != L->getType()) {
1717      if ((TD = getAnalysisIfAvailable<TargetData>())) {
1718        StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
1719                                                   L, *TD);
1720        if (StoredVal == 0)
1721          return false;
1722
1723        DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
1724                     << '\n' << *L << "\n\n\n");
1725      }
1726      else
1727        return false;
1728    }
1729
1730    // Remove it!
1731    L->replaceAllUsesWith(StoredVal);
1732    if (isa<PointerType>(StoredVal->getType()))
1733      MD->invalidateCachedPointerInfo(StoredVal);
1734    toErase.push_back(L);
1735    NumGVNLoad++;
1736    return true;
1737  }
1738
1739  if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
1740    Value *AvailableVal = DepLI;
1741
1742    // The loads are of a must-aliased pointer, but they may not actually have
1743    // the same type.  See if we know how to reuse the previously loaded value
1744    // (depending on its type).
1745    const TargetData *TD = 0;
1746    if (DepLI->getType() != L->getType()) {
1747      if ((TD = getAnalysisIfAvailable<TargetData>())) {
1748        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
1749        if (AvailableVal == 0)
1750          return false;
1751
1752        DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
1753                     << "\n" << *L << "\n\n\n");
1754      }
1755      else
1756        return false;
1757    }
1758
1759    // Remove it!
1760    L->replaceAllUsesWith(AvailableVal);
1761    if (isa<PointerType>(DepLI->getType()))
1762      MD->invalidateCachedPointerInfo(DepLI);
1763    toErase.push_back(L);
1764    NumGVNLoad++;
1765    return true;
1766  }
1767
1768  // If this load really doesn't depend on anything, then we must be loading an
1769  // undef value.  This can happen when loading for a fresh allocation with no
1770  // intervening stores, for example.
1771  if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
1772    L->replaceAllUsesWith(UndefValue::get(L->getType()));
1773    toErase.push_back(L);
1774    NumGVNLoad++;
1775    return true;
1776  }
1777
1778  // If this load occurs either right after a lifetime begin,
1779  // then the loaded value is undefined.
1780  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
1781    if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1782      L->replaceAllUsesWith(UndefValue::get(L->getType()));
1783      toErase.push_back(L);
1784      NumGVNLoad++;
1785      return true;
1786    }
1787  }
1788
1789  return false;
1790}
1791
1792Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
1793  DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
1794  if (I == localAvail.end())
1795    return 0;
1796
1797  ValueNumberScope *Locals = I->second;
1798  while (Locals) {
1799    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
1800    if (I != Locals->table.end())
1801      return I->second;
1802    Locals = Locals->parent;
1803  }
1804
1805  return 0;
1806}
1807
1808
1809/// processInstruction - When calculating availability, handle an instruction
1810/// by inserting it into the appropriate sets
1811bool GVN::processInstruction(Instruction *I,
1812                             SmallVectorImpl<Instruction*> &toErase) {
1813  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1814    bool Changed = processLoad(LI, toErase);
1815
1816    if (!Changed) {
1817      unsigned Num = VN.lookup_or_add(LI);
1818      localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
1819    }
1820
1821    return Changed;
1822  }
1823
1824  uint32_t NextNum = VN.getNextUnusedValueNumber();
1825  unsigned Num = VN.lookup_or_add(I);
1826
1827  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
1828    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1829
1830    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1831      return false;
1832
1833    Value *BranchCond = BI->getCondition();
1834    uint32_t CondVN = VN.lookup_or_add(BranchCond);
1835
1836    BasicBlock *TrueSucc = BI->getSuccessor(0);
1837    BasicBlock *FalseSucc = BI->getSuccessor(1);
1838
1839    if (TrueSucc->getSinglePredecessor())
1840      localAvail[TrueSucc]->table[CondVN] =
1841        ConstantInt::getTrue(TrueSucc->getContext());
1842    if (FalseSucc->getSinglePredecessor())
1843      localAvail[FalseSucc]->table[CondVN] =
1844        ConstantInt::getFalse(TrueSucc->getContext());
1845
1846    return false;
1847
1848  // Allocations are always uniquely numbered, so we can save time and memory
1849  // by fast failing them.
1850  } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
1851    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1852    return false;
1853  }
1854
1855  // Collapse PHI nodes
1856  if (PHINode* p = dyn_cast<PHINode>(I)) {
1857    Value *constVal = CollapsePhi(p);
1858
1859    if (constVal) {
1860      p->replaceAllUsesWith(constVal);
1861      if (MD && isa<PointerType>(constVal->getType()))
1862        MD->invalidateCachedPointerInfo(constVal);
1863      VN.erase(p);
1864
1865      toErase.push_back(p);
1866    } else {
1867      localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1868    }
1869
1870  // If the number we were assigned was a brand new VN, then we don't
1871  // need to do a lookup to see if the number already exists
1872  // somewhere in the domtree: it can't!
1873  } else if (Num == NextNum) {
1874    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1875
1876  // Perform fast-path value-number based elimination of values inherited from
1877  // dominators.
1878  } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
1879    // Remove it!
1880    VN.erase(I);
1881    I->replaceAllUsesWith(repl);
1882    if (MD && isa<PointerType>(repl->getType()))
1883      MD->invalidateCachedPointerInfo(repl);
1884    toErase.push_back(I);
1885    return true;
1886
1887  } else {
1888    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1889  }
1890
1891  return false;
1892}
1893
1894/// runOnFunction - This is the main transformation entry point for a function.
1895bool GVN::runOnFunction(Function& F) {
1896  if (!NoLoads)
1897    MD = &getAnalysis<MemoryDependenceAnalysis>();
1898  DT = &getAnalysis<DominatorTree>();
1899  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
1900  VN.setMemDep(MD);
1901  VN.setDomTree(DT);
1902
1903  bool Changed = false;
1904  bool ShouldContinue = true;
1905
1906  // Merge unconditional branches, allowing PRE to catch more
1907  // optimization opportunities.
1908  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
1909    BasicBlock *BB = FI;
1910    ++FI;
1911    bool removedBlock = MergeBlockIntoPredecessor(BB, this);
1912    if (removedBlock) NumGVNBlocks++;
1913
1914    Changed |= removedBlock;
1915  }
1916
1917  unsigned Iteration = 0;
1918
1919  while (ShouldContinue) {
1920    DEBUG(errs() << "GVN iteration: " << Iteration << "\n");
1921    ShouldContinue = iterateOnFunction(F);
1922    Changed |= ShouldContinue;
1923    ++Iteration;
1924  }
1925
1926  if (EnablePRE) {
1927    bool PREChanged = true;
1928    while (PREChanged) {
1929      PREChanged = performPRE(F);
1930      Changed |= PREChanged;
1931    }
1932  }
1933  // FIXME: Should perform GVN again after PRE does something.  PRE can move
1934  // computations into blocks where they become fully redundant.  Note that
1935  // we can't do this until PRE's critical edge splitting updates memdep.
1936  // Actually, when this happens, we should just fully integrate PRE into GVN.
1937
1938  cleanupGlobalSets();
1939
1940  return Changed;
1941}
1942
1943
1944bool GVN::processBlock(BasicBlock *BB) {
1945  // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
1946  // incrementing BI before processing an instruction).
1947  SmallVector<Instruction*, 8> toErase;
1948  bool ChangedFunction = false;
1949
1950  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
1951       BI != BE;) {
1952    ChangedFunction |= processInstruction(BI, toErase);
1953    if (toErase.empty()) {
1954      ++BI;
1955      continue;
1956    }
1957
1958    // If we need some instructions deleted, do it now.
1959    NumGVNInstr += toErase.size();
1960
1961    // Avoid iterator invalidation.
1962    bool AtStart = BI == BB->begin();
1963    if (!AtStart)
1964      --BI;
1965
1966    for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
1967         E = toErase.end(); I != E; ++I) {
1968      DEBUG(errs() << "GVN removed: " << **I << '\n');
1969      if (MD) MD->removeInstruction(*I);
1970      (*I)->eraseFromParent();
1971      DEBUG(verifyRemoved(*I));
1972    }
1973    toErase.clear();
1974
1975    if (AtStart)
1976      BI = BB->begin();
1977    else
1978      ++BI;
1979  }
1980
1981  return ChangedFunction;
1982}
1983
1984/// performPRE - Perform a purely local form of PRE that looks for diamond
1985/// control flow patterns and attempts to perform simple PRE at the join point.
1986bool GVN::performPRE(Function &F) {
1987  bool Changed = false;
1988  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
1989  DenseMap<BasicBlock*, Value*> predMap;
1990  for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
1991       DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
1992    BasicBlock *CurrentBlock = *DI;
1993
1994    // Nothing to PRE in the entry block.
1995    if (CurrentBlock == &F.getEntryBlock()) continue;
1996
1997    for (BasicBlock::iterator BI = CurrentBlock->begin(),
1998         BE = CurrentBlock->end(); BI != BE; ) {
1999      Instruction *CurInst = BI++;
2000
2001      if (isa<AllocaInst>(CurInst) ||
2002          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
2003          CurInst->getType()->isVoidTy() ||
2004          CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2005          isa<DbgInfoIntrinsic>(CurInst))
2006        continue;
2007
2008      uint32_t ValNo = VN.lookup(CurInst);
2009
2010      // Look for the predecessors for PRE opportunities.  We're
2011      // only trying to solve the basic diamond case, where
2012      // a value is computed in the successor and one predecessor,
2013      // but not the other.  We also explicitly disallow cases
2014      // where the successor is its own predecessor, because they're
2015      // more complicated to get right.
2016      unsigned NumWith = 0;
2017      unsigned NumWithout = 0;
2018      BasicBlock *PREPred = 0;
2019      predMap.clear();
2020
2021      for (pred_iterator PI = pred_begin(CurrentBlock),
2022           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2023        // We're not interested in PRE where the block is its
2024        // own predecessor, on in blocks with predecessors
2025        // that are not reachable.
2026        if (*PI == CurrentBlock) {
2027          NumWithout = 2;
2028          break;
2029        } else if (!localAvail.count(*PI))  {
2030          NumWithout = 2;
2031          break;
2032        }
2033
2034        DenseMap<uint32_t, Value*>::iterator predV =
2035                                            localAvail[*PI]->table.find(ValNo);
2036        if (predV == localAvail[*PI]->table.end()) {
2037          PREPred = *PI;
2038          NumWithout++;
2039        } else if (predV->second == CurInst) {
2040          NumWithout = 2;
2041        } else {
2042          predMap[*PI] = predV->second;
2043          NumWith++;
2044        }
2045      }
2046
2047      // Don't do PRE when it might increase code size, i.e. when
2048      // we would need to insert instructions in more than one pred.
2049      if (NumWithout != 1 || NumWith == 0)
2050        continue;
2051
2052      // Don't do PRE across indirect branch.
2053      if (isa<IndirectBrInst>(PREPred->getTerminator()))
2054        continue;
2055
2056      // We can't do PRE safely on a critical edge, so instead we schedule
2057      // the edge to be split and perform the PRE the next time we iterate
2058      // on the function.
2059      unsigned SuccNum = 0;
2060      for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
2061           i != e; ++i)
2062        if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
2063          SuccNum = i;
2064          break;
2065        }
2066
2067      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
2068        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
2069        continue;
2070      }
2071
2072      // Instantiate the expression the in predecessor that lacked it.
2073      // Because we are going top-down through the block, all value numbers
2074      // will be available in the predecessor by the time we need them.  Any
2075      // that weren't original present will have been instantiated earlier
2076      // in this loop.
2077      Instruction *PREInstr = CurInst->clone();
2078      bool success = true;
2079      for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
2080        Value *Op = PREInstr->getOperand(i);
2081        if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2082          continue;
2083
2084        if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
2085          PREInstr->setOperand(i, V);
2086        } else {
2087          success = false;
2088          break;
2089        }
2090      }
2091
2092      // Fail out if we encounter an operand that is not available in
2093      // the PRE predecessor.  This is typically because of loads which
2094      // are not value numbered precisely.
2095      if (!success) {
2096        delete PREInstr;
2097        DEBUG(verifyRemoved(PREInstr));
2098        continue;
2099      }
2100
2101      PREInstr->insertBefore(PREPred->getTerminator());
2102      PREInstr->setName(CurInst->getName() + ".pre");
2103      predMap[PREPred] = PREInstr;
2104      VN.add(PREInstr, ValNo);
2105      NumGVNPRE++;
2106
2107      // Update the availability map to include the new instruction.
2108      localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
2109
2110      // Create a PHI to make the value available in this block.
2111      PHINode* Phi = PHINode::Create(CurInst->getType(),
2112                                     CurInst->getName() + ".pre-phi",
2113                                     CurrentBlock->begin());
2114      for (pred_iterator PI = pred_begin(CurrentBlock),
2115           PE = pred_end(CurrentBlock); PI != PE; ++PI)
2116        Phi->addIncoming(predMap[*PI], *PI);
2117
2118      VN.add(Phi, ValNo);
2119      localAvail[CurrentBlock]->table[ValNo] = Phi;
2120
2121      CurInst->replaceAllUsesWith(Phi);
2122      if (MD && isa<PointerType>(Phi->getType()))
2123        MD->invalidateCachedPointerInfo(Phi);
2124      VN.erase(CurInst);
2125
2126      DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
2127      if (MD) MD->removeInstruction(CurInst);
2128      CurInst->eraseFromParent();
2129      DEBUG(verifyRemoved(CurInst));
2130      Changed = true;
2131    }
2132  }
2133
2134  for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
2135       I = toSplit.begin(), E = toSplit.end(); I != E; ++I)
2136    SplitCriticalEdge(I->first, I->second, this);
2137
2138  return Changed || toSplit.size();
2139}
2140
2141/// iterateOnFunction - Executes one iteration of GVN
2142bool GVN::iterateOnFunction(Function &F) {
2143  cleanupGlobalSets();
2144
2145  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
2146       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
2147    if (DI->getIDom())
2148      localAvail[DI->getBlock()] =
2149                   new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
2150    else
2151      localAvail[DI->getBlock()] = new ValueNumberScope(0);
2152  }
2153
2154  // Top-down walk of the dominator tree
2155  bool Changed = false;
2156#if 0
2157  // Needed for value numbering with phi construction to work.
2158  ReversePostOrderTraversal<Function*> RPOT(&F);
2159  for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
2160       RE = RPOT.end(); RI != RE; ++RI)
2161    Changed |= processBlock(*RI);
2162#else
2163  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
2164       DE = df_end(DT->getRootNode()); DI != DE; ++DI)
2165    Changed |= processBlock(DI->getBlock());
2166#endif
2167
2168  return Changed;
2169}
2170
2171void GVN::cleanupGlobalSets() {
2172  VN.clear();
2173
2174  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
2175       I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
2176    delete I->second;
2177  localAvail.clear();
2178}
2179
2180/// verifyRemoved - Verify that the specified instruction does not occur in our
2181/// internal data structures.
2182void GVN::verifyRemoved(const Instruction *Inst) const {
2183  VN.verifyRemoved(Inst);
2184
2185  // Walk through the value number scope to make sure the instruction isn't
2186  // ferreted away in it.
2187  for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
2188         I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
2189    const ValueNumberScope *VNS = I->second;
2190
2191    while (VNS) {
2192      for (DenseMap<uint32_t, Value*>::const_iterator
2193             II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
2194        assert(II->second != Inst && "Inst still in value numbering scope!");
2195      }
2196
2197      VNS = VNS->parent;
2198    }
2199  }
2200}
2201