GVN.cpp revision 9674d15120d657c9a9e681a7dc668349fa3a8de0
1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global value numbering to eliminate fully redundant
11// instructions.  It also performs simple dead load elimination.
12//
13// Note that this pass does the value numbering itself; it does not use the
14// ValueNumbering analysis passes.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "gvn"
19#include "llvm/Transforms/Scalar.h"
20#include "llvm/BasicBlock.h"
21#include "llvm/Constants.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/Function.h"
24#include "llvm/IntrinsicInst.h"
25#include "llvm/LLVMContext.h"
26#include "llvm/Operator.h"
27#include "llvm/Value.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/DepthFirstIterator.h"
30#include "llvm/ADT/PostOrderIterator.h"
31#include "llvm/ADT/SmallPtrSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/Analysis/Dominators.h"
35#include "llvm/Analysis/AliasAnalysis.h"
36#include "llvm/Analysis/MallocHelper.h"
37#include "llvm/Analysis/MemoryDependenceAnalysis.h"
38#include "llvm/Support/CFG.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/Debug.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/GetElementPtrTypeIterator.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/Target/TargetData.h"
45#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46#include "llvm/Transforms/Utils/Local.h"
47#include "llvm/Transforms/Utils/SSAUpdater.h"
48#include <cstdio>
49using namespace llvm;
50
51STATISTIC(NumGVNInstr,  "Number of instructions deleted");
52STATISTIC(NumGVNLoad,   "Number of loads deleted");
53STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
54STATISTIC(NumGVNBlocks, "Number of blocks merged");
55STATISTIC(NumPRELoad,   "Number of loads PRE'd");
56
57static cl::opt<bool> EnablePRE("enable-pre",
58                               cl::init(true), cl::Hidden);
59static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
60
61//===----------------------------------------------------------------------===//
62//                         ValueTable Class
63//===----------------------------------------------------------------------===//
64
65/// This class holds the mapping between values and value numbers.  It is used
66/// as an efficient mechanism to determine the expression-wise equivalence of
67/// two values.
68namespace {
69  struct Expression {
70    enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,
71                            UDIV, SDIV, FDIV, UREM, SREM,
72                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
73                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
74                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
75                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
76                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
77                            FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
78                            SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
79                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
80                            PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,
81                            EMPTY, TOMBSTONE };
82
83    ExpressionOpcode opcode;
84    const Type* type;
85    uint32_t firstVN;
86    uint32_t secondVN;
87    uint32_t thirdVN;
88    SmallVector<uint32_t, 4> varargs;
89    Value *function;
90
91    Expression() { }
92    Expression(ExpressionOpcode o) : opcode(o) { }
93
94    bool operator==(const Expression &other) const {
95      if (opcode != other.opcode)
96        return false;
97      else if (opcode == EMPTY || opcode == TOMBSTONE)
98        return true;
99      else if (type != other.type)
100        return false;
101      else if (function != other.function)
102        return false;
103      else if (firstVN != other.firstVN)
104        return false;
105      else if (secondVN != other.secondVN)
106        return false;
107      else if (thirdVN != other.thirdVN)
108        return false;
109      else {
110        if (varargs.size() != other.varargs.size())
111          return false;
112
113        for (size_t i = 0; i < varargs.size(); ++i)
114          if (varargs[i] != other.varargs[i])
115            return false;
116
117        return true;
118      }
119    }
120
121    bool operator!=(const Expression &other) const {
122      return !(*this == other);
123    }
124  };
125
126  class ValueTable {
127    private:
128      DenseMap<Value*, uint32_t> valueNumbering;
129      DenseMap<Expression, uint32_t> expressionNumbering;
130      AliasAnalysis* AA;
131      MemoryDependenceAnalysis* MD;
132      DominatorTree* DT;
133
134      uint32_t nextValueNumber;
135
136      Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
137      Expression::ExpressionOpcode getOpcode(CmpInst* C);
138      Expression::ExpressionOpcode getOpcode(CastInst* C);
139      Expression create_expression(BinaryOperator* BO);
140      Expression create_expression(CmpInst* C);
141      Expression create_expression(ShuffleVectorInst* V);
142      Expression create_expression(ExtractElementInst* C);
143      Expression create_expression(InsertElementInst* V);
144      Expression create_expression(SelectInst* V);
145      Expression create_expression(CastInst* C);
146      Expression create_expression(GetElementPtrInst* G);
147      Expression create_expression(CallInst* C);
148      Expression create_expression(Constant* C);
149    public:
150      ValueTable() : nextValueNumber(1) { }
151      uint32_t lookup_or_add(Value *V);
152      uint32_t lookup(Value *V) const;
153      void add(Value *V, uint32_t num);
154      void clear();
155      void erase(Value *v);
156      unsigned size();
157      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
158      AliasAnalysis *getAliasAnalysis() const { return AA; }
159      void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
160      void setDomTree(DominatorTree* D) { DT = D; }
161      uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
162      void verifyRemoved(const Value *) const;
163  };
164}
165
166namespace llvm {
167template <> struct DenseMapInfo<Expression> {
168  static inline Expression getEmptyKey() {
169    return Expression(Expression::EMPTY);
170  }
171
172  static inline Expression getTombstoneKey() {
173    return Expression(Expression::TOMBSTONE);
174  }
175
176  static unsigned getHashValue(const Expression e) {
177    unsigned hash = e.opcode;
178
179    hash = e.firstVN + hash * 37;
180    hash = e.secondVN + hash * 37;
181    hash = e.thirdVN + hash * 37;
182
183    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
184            (unsigned)((uintptr_t)e.type >> 9)) +
185           hash * 37;
186
187    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
188         E = e.varargs.end(); I != E; ++I)
189      hash = *I + hash * 37;
190
191    hash = ((unsigned)((uintptr_t)e.function >> 4) ^
192            (unsigned)((uintptr_t)e.function >> 9)) +
193           hash * 37;
194
195    return hash;
196  }
197  static bool isEqual(const Expression &LHS, const Expression &RHS) {
198    return LHS == RHS;
199  }
200  static bool isPod() { return true; }
201};
202}
203
204//===----------------------------------------------------------------------===//
205//                     ValueTable Internal Functions
206//===----------------------------------------------------------------------===//
207Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
208  switch(BO->getOpcode()) {
209  default: // THIS SHOULD NEVER HAPPEN
210    llvm_unreachable("Binary operator with unknown opcode?");
211  case Instruction::Add:  return Expression::ADD;
212  case Instruction::FAdd: return Expression::FADD;
213  case Instruction::Sub:  return Expression::SUB;
214  case Instruction::FSub: return Expression::FSUB;
215  case Instruction::Mul:  return Expression::MUL;
216  case Instruction::FMul: return Expression::FMUL;
217  case Instruction::UDiv: return Expression::UDIV;
218  case Instruction::SDiv: return Expression::SDIV;
219  case Instruction::FDiv: return Expression::FDIV;
220  case Instruction::URem: return Expression::UREM;
221  case Instruction::SRem: return Expression::SREM;
222  case Instruction::FRem: return Expression::FREM;
223  case Instruction::Shl:  return Expression::SHL;
224  case Instruction::LShr: return Expression::LSHR;
225  case Instruction::AShr: return Expression::ASHR;
226  case Instruction::And:  return Expression::AND;
227  case Instruction::Or:   return Expression::OR;
228  case Instruction::Xor:  return Expression::XOR;
229  }
230}
231
232Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
233  if (isa<ICmpInst>(C)) {
234    switch (C->getPredicate()) {
235    default:  // THIS SHOULD NEVER HAPPEN
236      llvm_unreachable("Comparison with unknown predicate?");
237    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
238    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
239    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
240    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
241    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
242    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
243    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
244    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
245    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
246    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
247    }
248  } else {
249    switch (C->getPredicate()) {
250    default: // THIS SHOULD NEVER HAPPEN
251      llvm_unreachable("Comparison with unknown predicate?");
252    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
253    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
254    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
255    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
256    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
257    case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
258    case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
259    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
260    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
261    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
262    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
263    case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
264    case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
265    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
266    }
267  }
268}
269
270Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
271  switch(C->getOpcode()) {
272  default: // THIS SHOULD NEVER HAPPEN
273    llvm_unreachable("Cast operator with unknown opcode?");
274  case Instruction::Trunc:    return Expression::TRUNC;
275  case Instruction::ZExt:     return Expression::ZEXT;
276  case Instruction::SExt:     return Expression::SEXT;
277  case Instruction::FPToUI:   return Expression::FPTOUI;
278  case Instruction::FPToSI:   return Expression::FPTOSI;
279  case Instruction::UIToFP:   return Expression::UITOFP;
280  case Instruction::SIToFP:   return Expression::SITOFP;
281  case Instruction::FPTrunc:  return Expression::FPTRUNC;
282  case Instruction::FPExt:    return Expression::FPEXT;
283  case Instruction::PtrToInt: return Expression::PTRTOINT;
284  case Instruction::IntToPtr: return Expression::INTTOPTR;
285  case Instruction::BitCast:  return Expression::BITCAST;
286  }
287}
288
289Expression ValueTable::create_expression(CallInst* C) {
290  Expression e;
291
292  e.type = C->getType();
293  e.firstVN = 0;
294  e.secondVN = 0;
295  e.thirdVN = 0;
296  e.function = C->getCalledFunction();
297  e.opcode = Expression::CALL;
298
299  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
300       I != E; ++I)
301    e.varargs.push_back(lookup_or_add(*I));
302
303  return e;
304}
305
306Expression ValueTable::create_expression(BinaryOperator* BO) {
307  Expression e;
308
309  e.firstVN = lookup_or_add(BO->getOperand(0));
310  e.secondVN = lookup_or_add(BO->getOperand(1));
311  e.thirdVN = 0;
312  e.function = 0;
313  e.type = BO->getType();
314  e.opcode = getOpcode(BO);
315
316  return e;
317}
318
319Expression ValueTable::create_expression(CmpInst* C) {
320  Expression e;
321
322  e.firstVN = lookup_or_add(C->getOperand(0));
323  e.secondVN = lookup_or_add(C->getOperand(1));
324  e.thirdVN = 0;
325  e.function = 0;
326  e.type = C->getType();
327  e.opcode = getOpcode(C);
328
329  return e;
330}
331
332Expression ValueTable::create_expression(CastInst* C) {
333  Expression e;
334
335  e.firstVN = lookup_or_add(C->getOperand(0));
336  e.secondVN = 0;
337  e.thirdVN = 0;
338  e.function = 0;
339  e.type = C->getType();
340  e.opcode = getOpcode(C);
341
342  return e;
343}
344
345Expression ValueTable::create_expression(ShuffleVectorInst* S) {
346  Expression e;
347
348  e.firstVN = lookup_or_add(S->getOperand(0));
349  e.secondVN = lookup_or_add(S->getOperand(1));
350  e.thirdVN = lookup_or_add(S->getOperand(2));
351  e.function = 0;
352  e.type = S->getType();
353  e.opcode = Expression::SHUFFLE;
354
355  return e;
356}
357
358Expression ValueTable::create_expression(ExtractElementInst* E) {
359  Expression e;
360
361  e.firstVN = lookup_or_add(E->getOperand(0));
362  e.secondVN = lookup_or_add(E->getOperand(1));
363  e.thirdVN = 0;
364  e.function = 0;
365  e.type = E->getType();
366  e.opcode = Expression::EXTRACT;
367
368  return e;
369}
370
371Expression ValueTable::create_expression(InsertElementInst* I) {
372  Expression e;
373
374  e.firstVN = lookup_or_add(I->getOperand(0));
375  e.secondVN = lookup_or_add(I->getOperand(1));
376  e.thirdVN = lookup_or_add(I->getOperand(2));
377  e.function = 0;
378  e.type = I->getType();
379  e.opcode = Expression::INSERT;
380
381  return e;
382}
383
384Expression ValueTable::create_expression(SelectInst* I) {
385  Expression e;
386
387  e.firstVN = lookup_or_add(I->getCondition());
388  e.secondVN = lookup_or_add(I->getTrueValue());
389  e.thirdVN = lookup_or_add(I->getFalseValue());
390  e.function = 0;
391  e.type = I->getType();
392  e.opcode = Expression::SELECT;
393
394  return e;
395}
396
397Expression ValueTable::create_expression(GetElementPtrInst* G) {
398  Expression e;
399
400  e.firstVN = lookup_or_add(G->getPointerOperand());
401  e.secondVN = 0;
402  e.thirdVN = 0;
403  e.function = 0;
404  e.type = G->getType();
405  e.opcode = Expression::GEP;
406
407  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
408       I != E; ++I)
409    e.varargs.push_back(lookup_or_add(*I));
410
411  return e;
412}
413
414//===----------------------------------------------------------------------===//
415//                     ValueTable External Functions
416//===----------------------------------------------------------------------===//
417
418/// add - Insert a value into the table with a specified value number.
419void ValueTable::add(Value *V, uint32_t num) {
420  valueNumbering.insert(std::make_pair(V, num));
421}
422
423/// lookup_or_add - Returns the value number for the specified value, assigning
424/// it a new number if it did not have one before.
425uint32_t ValueTable::lookup_or_add(Value *V) {
426  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
427  if (VI != valueNumbering.end())
428    return VI->second;
429
430  if (CallInst* C = dyn_cast<CallInst>(V)) {
431    if (AA->doesNotAccessMemory(C)) {
432      Expression e = create_expression(C);
433
434      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
435      if (EI != expressionNumbering.end()) {
436        valueNumbering.insert(std::make_pair(V, EI->second));
437        return EI->second;
438      } else {
439        expressionNumbering.insert(std::make_pair(e, nextValueNumber));
440        valueNumbering.insert(std::make_pair(V, nextValueNumber));
441
442        return nextValueNumber++;
443      }
444    } else if (AA->onlyReadsMemory(C)) {
445      Expression e = create_expression(C);
446
447      if (expressionNumbering.find(e) == expressionNumbering.end()) {
448        expressionNumbering.insert(std::make_pair(e, nextValueNumber));
449        valueNumbering.insert(std::make_pair(V, nextValueNumber));
450        return nextValueNumber++;
451      }
452
453      MemDepResult local_dep = MD->getDependency(C);
454
455      if (!local_dep.isDef() && !local_dep.isNonLocal()) {
456        valueNumbering.insert(std::make_pair(V, nextValueNumber));
457        return nextValueNumber++;
458      }
459
460      if (local_dep.isDef()) {
461        CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
462
463        if (local_cdep->getNumOperands() != C->getNumOperands()) {
464          valueNumbering.insert(std::make_pair(V, nextValueNumber));
465          return nextValueNumber++;
466        }
467
468        for (unsigned i = 1; i < C->getNumOperands(); ++i) {
469          uint32_t c_vn = lookup_or_add(C->getOperand(i));
470          uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
471          if (c_vn != cd_vn) {
472            valueNumbering.insert(std::make_pair(V, nextValueNumber));
473            return nextValueNumber++;
474          }
475        }
476
477        uint32_t v = lookup_or_add(local_cdep);
478        valueNumbering.insert(std::make_pair(V, v));
479        return v;
480      }
481
482      // Non-local case.
483      const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
484        MD->getNonLocalCallDependency(CallSite(C));
485      // FIXME: call/call dependencies for readonly calls should return def, not
486      // clobber!  Move the checking logic to MemDep!
487      CallInst* cdep = 0;
488
489      // Check to see if we have a single dominating call instruction that is
490      // identical to C.
491      for (unsigned i = 0, e = deps.size(); i != e; ++i) {
492        const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
493        // Ignore non-local dependencies.
494        if (I->second.isNonLocal())
495          continue;
496
497        // We don't handle non-depedencies.  If we already have a call, reject
498        // instruction dependencies.
499        if (I->second.isClobber() || cdep != 0) {
500          cdep = 0;
501          break;
502        }
503
504        CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
505        // FIXME: All duplicated with non-local case.
506        if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
507          cdep = NonLocalDepCall;
508          continue;
509        }
510
511        cdep = 0;
512        break;
513      }
514
515      if (!cdep) {
516        valueNumbering.insert(std::make_pair(V, nextValueNumber));
517        return nextValueNumber++;
518      }
519
520      if (cdep->getNumOperands() != C->getNumOperands()) {
521        valueNumbering.insert(std::make_pair(V, nextValueNumber));
522        return nextValueNumber++;
523      }
524      for (unsigned i = 1; i < C->getNumOperands(); ++i) {
525        uint32_t c_vn = lookup_or_add(C->getOperand(i));
526        uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
527        if (c_vn != cd_vn) {
528          valueNumbering.insert(std::make_pair(V, nextValueNumber));
529          return nextValueNumber++;
530        }
531      }
532
533      uint32_t v = lookup_or_add(cdep);
534      valueNumbering.insert(std::make_pair(V, v));
535      return v;
536
537    } else {
538      valueNumbering.insert(std::make_pair(V, nextValueNumber));
539      return nextValueNumber++;
540    }
541  } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
542    Expression e = create_expression(BO);
543
544    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
545    if (EI != expressionNumbering.end()) {
546      valueNumbering.insert(std::make_pair(V, EI->second));
547      return EI->second;
548    } else {
549      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
550      valueNumbering.insert(std::make_pair(V, nextValueNumber));
551
552      return nextValueNumber++;
553    }
554  } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {
555    Expression e = create_expression(C);
556
557    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
558    if (EI != expressionNumbering.end()) {
559      valueNumbering.insert(std::make_pair(V, EI->second));
560      return EI->second;
561    } else {
562      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
563      valueNumbering.insert(std::make_pair(V, nextValueNumber));
564
565      return nextValueNumber++;
566    }
567  } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {
568    Expression e = create_expression(U);
569
570    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
571    if (EI != expressionNumbering.end()) {
572      valueNumbering.insert(std::make_pair(V, EI->second));
573      return EI->second;
574    } else {
575      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
576      valueNumbering.insert(std::make_pair(V, nextValueNumber));
577
578      return nextValueNumber++;
579    }
580  } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {
581    Expression e = create_expression(U);
582
583    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
584    if (EI != expressionNumbering.end()) {
585      valueNumbering.insert(std::make_pair(V, EI->second));
586      return EI->second;
587    } else {
588      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
589      valueNumbering.insert(std::make_pair(V, nextValueNumber));
590
591      return nextValueNumber++;
592    }
593  } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {
594    Expression e = create_expression(U);
595
596    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
597    if (EI != expressionNumbering.end()) {
598      valueNumbering.insert(std::make_pair(V, EI->second));
599      return EI->second;
600    } else {
601      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
602      valueNumbering.insert(std::make_pair(V, nextValueNumber));
603
604      return nextValueNumber++;
605    }
606  } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {
607    Expression e = create_expression(U);
608
609    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
610    if (EI != expressionNumbering.end()) {
611      valueNumbering.insert(std::make_pair(V, EI->second));
612      return EI->second;
613    } else {
614      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
615      valueNumbering.insert(std::make_pair(V, nextValueNumber));
616
617      return nextValueNumber++;
618    }
619  } else if (CastInst* U = dyn_cast<CastInst>(V)) {
620    Expression e = create_expression(U);
621
622    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
623    if (EI != expressionNumbering.end()) {
624      valueNumbering.insert(std::make_pair(V, EI->second));
625      return EI->second;
626    } else {
627      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
628      valueNumbering.insert(std::make_pair(V, nextValueNumber));
629
630      return nextValueNumber++;
631    }
632  } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
633    Expression e = create_expression(U);
634
635    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
636    if (EI != expressionNumbering.end()) {
637      valueNumbering.insert(std::make_pair(V, EI->second));
638      return EI->second;
639    } else {
640      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
641      valueNumbering.insert(std::make_pair(V, nextValueNumber));
642
643      return nextValueNumber++;
644    }
645  } else {
646    valueNumbering.insert(std::make_pair(V, nextValueNumber));
647    return nextValueNumber++;
648  }
649}
650
651/// lookup - Returns the value number of the specified value. Fails if
652/// the value has not yet been numbered.
653uint32_t ValueTable::lookup(Value *V) const {
654  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
655  assert(VI != valueNumbering.end() && "Value not numbered?");
656  return VI->second;
657}
658
659/// clear - Remove all entries from the ValueTable
660void ValueTable::clear() {
661  valueNumbering.clear();
662  expressionNumbering.clear();
663  nextValueNumber = 1;
664}
665
666/// erase - Remove a value from the value numbering
667void ValueTable::erase(Value *V) {
668  valueNumbering.erase(V);
669}
670
671/// verifyRemoved - Verify that the value is removed from all internal data
672/// structures.
673void ValueTable::verifyRemoved(const Value *V) const {
674  for (DenseMap<Value*, uint32_t>::iterator
675         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
676    assert(I->first != V && "Inst still occurs in value numbering map!");
677  }
678}
679
680//===----------------------------------------------------------------------===//
681//                                GVN Pass
682//===----------------------------------------------------------------------===//
683
684namespace {
685  struct ValueNumberScope {
686    ValueNumberScope* parent;
687    DenseMap<uint32_t, Value*> table;
688
689    ValueNumberScope(ValueNumberScope* p) : parent(p) { }
690  };
691}
692
693namespace {
694
695  class GVN : public FunctionPass {
696    bool runOnFunction(Function &F);
697  public:
698    static char ID; // Pass identification, replacement for typeid
699    GVN() : FunctionPass(&ID) { }
700
701  private:
702    MemoryDependenceAnalysis *MD;
703    DominatorTree *DT;
704
705    ValueTable VN;
706    DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
707
708    // This transformation requires dominator postdominator info
709    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
710      AU.addRequired<DominatorTree>();
711      AU.addRequired<MemoryDependenceAnalysis>();
712      AU.addRequired<AliasAnalysis>();
713
714      AU.addPreserved<DominatorTree>();
715      AU.addPreserved<AliasAnalysis>();
716    }
717
718    // Helper fuctions
719    // FIXME: eliminate or document these better
720    bool processLoad(LoadInst* L,
721                     SmallVectorImpl<Instruction*> &toErase);
722    bool processInstruction(Instruction *I,
723                            SmallVectorImpl<Instruction*> &toErase);
724    bool processNonLocalLoad(LoadInst* L,
725                             SmallVectorImpl<Instruction*> &toErase);
726    bool processBlock(BasicBlock *BB);
727    void dump(DenseMap<uint32_t, Value*>& d);
728    bool iterateOnFunction(Function &F);
729    Value *CollapsePhi(PHINode* p);
730    bool performPRE(Function& F);
731    Value *lookupNumber(BasicBlock *BB, uint32_t num);
732    void cleanupGlobalSets();
733    void verifyRemoved(const Instruction *I) const;
734  };
735
736  char GVN::ID = 0;
737}
738
739// createGVNPass - The public interface to this file...
740FunctionPass *llvm::createGVNPass() { return new GVN(); }
741
742static RegisterPass<GVN> X("gvn",
743                           "Global Value Numbering");
744
745void GVN::dump(DenseMap<uint32_t, Value*>& d) {
746  printf("{\n");
747  for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
748       E = d.end(); I != E; ++I) {
749      printf("%d\n", I->first);
750      I->second->dump();
751  }
752  printf("}\n");
753}
754
755static bool isSafeReplacement(PHINode* p, Instruction *inst) {
756  if (!isa<PHINode>(inst))
757    return true;
758
759  for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
760       UI != E; ++UI)
761    if (PHINode* use_phi = dyn_cast<PHINode>(UI))
762      if (use_phi->getParent() == inst->getParent())
763        return false;
764
765  return true;
766}
767
768Value *GVN::CollapsePhi(PHINode *PN) {
769  Value *ConstVal = PN->hasConstantValue(DT);
770  if (!ConstVal) return 0;
771
772  Instruction *Inst = dyn_cast<Instruction>(ConstVal);
773  if (!Inst)
774    return ConstVal;
775
776  if (DT->dominates(Inst, PN))
777    if (isSafeReplacement(PN, Inst))
778      return Inst;
779  return 0;
780}
781
782/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
783/// we're analyzing is fully available in the specified block.  As we go, keep
784/// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
785/// map is actually a tri-state map with the following values:
786///   0) we know the block *is not* fully available.
787///   1) we know the block *is* fully available.
788///   2) we do not know whether the block is fully available or not, but we are
789///      currently speculating that it will be.
790///   3) we are speculating for this block and have used that to speculate for
791///      other blocks.
792static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
793                            DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
794  // Optimistically assume that the block is fully available and check to see
795  // if we already know about this block in one lookup.
796  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
797    FullyAvailableBlocks.insert(std::make_pair(BB, 2));
798
799  // If the entry already existed for this block, return the precomputed value.
800  if (!IV.second) {
801    // If this is a speculative "available" value, mark it as being used for
802    // speculation of other blocks.
803    if (IV.first->second == 2)
804      IV.first->second = 3;
805    return IV.first->second != 0;
806  }
807
808  // Otherwise, see if it is fully available in all predecessors.
809  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
810
811  // If this block has no predecessors, it isn't live-in here.
812  if (PI == PE)
813    goto SpeculationFailure;
814
815  for (; PI != PE; ++PI)
816    // If the value isn't fully available in one of our predecessors, then it
817    // isn't fully available in this block either.  Undo our previous
818    // optimistic assumption and bail out.
819    if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
820      goto SpeculationFailure;
821
822  return true;
823
824// SpeculationFailure - If we get here, we found out that this is not, after
825// all, a fully-available block.  We have a problem if we speculated on this and
826// used the speculation to mark other blocks as available.
827SpeculationFailure:
828  char &BBVal = FullyAvailableBlocks[BB];
829
830  // If we didn't speculate on this, just return with it set to false.
831  if (BBVal == 2) {
832    BBVal = 0;
833    return false;
834  }
835
836  // If we did speculate on this value, we could have blocks set to 1 that are
837  // incorrect.  Walk the (transitive) successors of this block and mark them as
838  // 0 if set to one.
839  SmallVector<BasicBlock*, 32> BBWorklist;
840  BBWorklist.push_back(BB);
841
842  while (!BBWorklist.empty()) {
843    BasicBlock *Entry = BBWorklist.pop_back_val();
844    // Note that this sets blocks to 0 (unavailable) if they happen to not
845    // already be in FullyAvailableBlocks.  This is safe.
846    char &EntryVal = FullyAvailableBlocks[Entry];
847    if (EntryVal == 0) continue;  // Already unavailable.
848
849    // Mark as unavailable.
850    EntryVal = 0;
851
852    for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
853      BBWorklist.push_back(*I);
854  }
855
856  return false;
857}
858
859
860/// CanCoerceMustAliasedValueToLoad - Return true if
861/// CoerceAvailableValueToLoadType will succeed.
862static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
863                                            const Type *LoadTy,
864                                            const TargetData &TD) {
865  // If the loaded or stored value is an first class array or struct, don't try
866  // to transform them.  We need to be able to bitcast to integer.
867  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) ||
868      isa<StructType>(StoredVal->getType()) ||
869      isa<ArrayType>(StoredVal->getType()))
870    return false;
871
872  // The store has to be at least as big as the load.
873  if (TD.getTypeSizeInBits(StoredVal->getType()) <
874        TD.getTypeSizeInBits(LoadTy))
875    return false;
876
877  return true;
878}
879
880
881/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
882/// then a load from a must-aliased pointer of a different type, try to coerce
883/// the stored value.  LoadedTy is the type of the load we want to replace and
884/// InsertPt is the place to insert new instructions.
885///
886/// If we can't do it, return null.
887static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
888                                             const Type *LoadedTy,
889                                             Instruction *InsertPt,
890                                             const TargetData &TD) {
891  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
892    return 0;
893
894  const Type *StoredValTy = StoredVal->getType();
895
896  uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
897  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
898
899  // If the store and reload are the same size, we can always reuse it.
900  if (StoreSize == LoadSize) {
901    if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) {
902      // Pointer to Pointer -> use bitcast.
903      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
904    }
905
906    // Convert source pointers to integers, which can be bitcast.
907    if (isa<PointerType>(StoredValTy)) {
908      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
909      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
910    }
911
912    const Type *TypeToCastTo = LoadedTy;
913    if (isa<PointerType>(TypeToCastTo))
914      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
915
916    if (StoredValTy != TypeToCastTo)
917      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
918
919    // Cast to pointer if the load needs a pointer type.
920    if (isa<PointerType>(LoadedTy))
921      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
922
923    return StoredVal;
924  }
925
926  // If the loaded value is smaller than the available value, then we can
927  // extract out a piece from it.  If the available value is too small, then we
928  // can't do anything.
929  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
930
931  // Convert source pointers to integers, which can be manipulated.
932  if (isa<PointerType>(StoredValTy)) {
933    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
934    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
935  }
936
937  // Convert vectors and fp to integer, which can be manipulated.
938  if (!isa<IntegerType>(StoredValTy)) {
939    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
940    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
941  }
942
943  // If this is a big-endian system, we need to shift the value down to the low
944  // bits so that a truncate will work.
945  if (TD.isBigEndian()) {
946    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
947    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
948  }
949
950  // Truncate the integer to the right size now.
951  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
952  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
953
954  if (LoadedTy == NewIntTy)
955    return StoredVal;
956
957  // If the result is a pointer, inttoptr.
958  if (isa<PointerType>(LoadedTy))
959    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
960
961  // Otherwise, bitcast.
962  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
963}
964
965/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
966/// be expressed as a base pointer plus a constant offset.  Return the base and
967/// offset to the caller.
968static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
969                                        const TargetData &TD) {
970  Operator *PtrOp = dyn_cast<Operator>(Ptr);
971  if (PtrOp == 0) return Ptr;
972
973  // Just look through bitcasts.
974  if (PtrOp->getOpcode() == Instruction::BitCast)
975    return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
976
977  // If this is a GEP with constant indices, we can look through it.
978  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
979  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
980
981  gep_type_iterator GTI = gep_type_begin(GEP);
982  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
983       ++I, ++GTI) {
984    ConstantInt *OpC = cast<ConstantInt>(*I);
985    if (OpC->isZero()) continue;
986
987    // Handle a struct and array indices which add their offset to the pointer.
988    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
989      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
990    } else {
991      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
992      Offset += OpC->getSExtValue()*Size;
993    }
994  }
995
996  // Re-sign extend from the pointer size if needed to get overflow edge cases
997  // right.
998  unsigned PtrSize = TD.getPointerSizeInBits();
999  if (PtrSize < 64)
1000    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
1001
1002  return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
1003}
1004
1005
1006/// AnalyzeLoadFromClobberingStore - This function is called when we have a
1007/// memdep query of a load that ends up being a clobbering store.  This means
1008/// that the store *may* provide bits used by the load but we can't be sure
1009/// because the pointers don't mustalias.  Check this case to see if there is
1010/// anything more we can do before we give up.  This returns -1 if we have to
1011/// give up, or a byte number in the stored value of the piece that feeds the
1012/// load.
1013static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
1014                                          const TargetData &TD) {
1015  // If the loaded or stored value is an first class array or struct, don't try
1016  // to transform them.  We need to be able to bitcast to integer.
1017  if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) ||
1018      isa<StructType>(DepSI->getOperand(0)->getType()) ||
1019      isa<ArrayType>(DepSI->getOperand(0)->getType()))
1020    return -1;
1021
1022  int64_t StoreOffset = 0, LoadOffset = 0;
1023  Value *StoreBase =
1024    GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
1025  Value *LoadBase =
1026    GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
1027  if (StoreBase != LoadBase)
1028    return -1;
1029
1030  // If the load and store are to the exact same address, they should have been
1031  // a must alias.  AA must have gotten confused.
1032  // FIXME: Study to see if/when this happens.
1033  if (LoadOffset == StoreOffset) {
1034#if 0
1035    errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
1036    << "Base       = " << *StoreBase << "\n"
1037    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
1038    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
1039    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
1040    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
1041    errs() << "'" << L->getParent()->getParent()->getName() << "'"
1042    << *L->getParent();
1043#endif
1044    return -1;
1045  }
1046
1047  // If the load and store don't overlap at all, the store doesn't provide
1048  // anything to the load.  In this case, they really don't alias at all, AA
1049  // must have gotten confused.
1050  // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
1051  // remove this check, as it is duplicated with what we have below.
1052  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
1053  uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
1054
1055  if ((StoreSize & 7) | (LoadSize & 7))
1056    return -1;
1057  StoreSize >>= 3;  // Convert to bytes.
1058  LoadSize >>= 3;
1059
1060
1061  bool isAAFailure = false;
1062  if (StoreOffset < LoadOffset) {
1063    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
1064  } else {
1065    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
1066  }
1067  if (isAAFailure) {
1068#if 0
1069    errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
1070    << "Base       = " << *StoreBase << "\n"
1071    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
1072    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
1073    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
1074    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
1075    errs() << "'" << L->getParent()->getParent()->getName() << "'"
1076    << *L->getParent();
1077#endif
1078    return -1;
1079  }
1080
1081  // If the Load isn't completely contained within the stored bits, we don't
1082  // have all the bits to feed it.  We could do something crazy in the future
1083  // (issue a smaller load then merge the bits in) but this seems unlikely to be
1084  // valuable.
1085  if (StoreOffset > LoadOffset ||
1086      StoreOffset+StoreSize < LoadOffset+LoadSize)
1087    return -1;
1088
1089  // Okay, we can do this transformation.  Return the number of bytes into the
1090  // store that the load is.
1091  return LoadOffset-StoreOffset;
1092}
1093
1094
1095/// GetStoreValueForLoad - This function is called when we have a
1096/// memdep query of a load that ends up being a clobbering store.  This means
1097/// that the store *may* provide bits used by the load but we can't be sure
1098/// because the pointers don't mustalias.  Check this case to see if there is
1099/// anything more we can do before we give up.
1100static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
1101                                   const Type *LoadTy,
1102                                   Instruction *InsertPt, const TargetData &TD){
1103  LLVMContext &Ctx = SrcVal->getType()->getContext();
1104
1105  uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
1106  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
1107
1108
1109  // Compute which bits of the stored value are being used by the load.  Convert
1110  // to an integer type to start with.
1111  if (isa<PointerType>(SrcVal->getType()))
1112    SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
1113  if (!isa<IntegerType>(SrcVal->getType()))
1114    SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
1115                             "tmp", InsertPt);
1116
1117  // Shift the bits to the least significant depending on endianness.
1118  unsigned ShiftAmt;
1119  if (TD.isLittleEndian()) {
1120    ShiftAmt = Offset*8;
1121  } else {
1122    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
1123  }
1124
1125  if (ShiftAmt)
1126    SrcVal = BinaryOperator::CreateLShr(SrcVal,
1127                ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
1128
1129  if (LoadSize != StoreSize)
1130    SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
1131                           "tmp", InsertPt);
1132
1133  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
1134}
1135
1136struct AvailableValueInBlock {
1137  /// BB - The basic block in question.
1138  BasicBlock *BB;
1139  /// V - The value that is live out of the block.
1140  Value *V;
1141  /// Offset - The byte offset in V that is interesting for the load query.
1142  unsigned Offset;
1143
1144  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
1145                                   unsigned Offset = 0) {
1146    AvailableValueInBlock Res;
1147    Res.BB = BB;
1148    Res.V = V;
1149    Res.Offset = Offset;
1150    return Res;
1151  }
1152};
1153
1154/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
1155/// construct SSA form, allowing us to eliminate LI.  This returns the value
1156/// that should be used at LI's definition site.
1157static Value *ConstructSSAForLoadSet(LoadInst *LI,
1158                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1159                                     const TargetData *TD,
1160                                     AliasAnalysis *AA) {
1161  SmallVector<PHINode*, 8> NewPHIs;
1162  SSAUpdater SSAUpdate(&NewPHIs);
1163  SSAUpdate.Initialize(LI);
1164
1165  const Type *LoadTy = LI->getType();
1166
1167  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1168    BasicBlock *BB = ValuesPerBlock[i].BB;
1169    Value *AvailableVal = ValuesPerBlock[i].V;
1170    unsigned Offset = ValuesPerBlock[i].Offset;
1171
1172    if (SSAUpdate.HasValueForBlock(BB))
1173      continue;
1174
1175    if (AvailableVal->getType() != LoadTy) {
1176      assert(TD && "Need target data to handle type mismatch case");
1177      AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
1178                                          BB->getTerminator(), *TD);
1179
1180      if (Offset) {
1181        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
1182              << *ValuesPerBlock[i].V << '\n'
1183              << *AvailableVal << '\n' << "\n\n\n");
1184      }
1185
1186
1187      DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
1188            << *ValuesPerBlock[i].V << '\n'
1189            << *AvailableVal << '\n' << "\n\n\n");
1190    }
1191
1192    SSAUpdate.AddAvailableValue(BB, AvailableVal);
1193  }
1194
1195  // Perform PHI construction.
1196  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
1197
1198  // If new PHI nodes were created, notify alias analysis.
1199  if (isa<PointerType>(V->getType()))
1200    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
1201      AA->copyValue(LI, NewPHIs[i]);
1202
1203  return V;
1204}
1205
1206/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
1207/// non-local by performing PHI construction.
1208bool GVN::processNonLocalLoad(LoadInst *LI,
1209                              SmallVectorImpl<Instruction*> &toErase) {
1210  // Find the non-local dependencies of the load.
1211  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
1212  MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
1213                                   Deps);
1214  //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
1215  //             << Deps.size() << *LI << '\n');
1216
1217  // If we had to process more than one hundred blocks to find the
1218  // dependencies, this load isn't worth worrying about.  Optimizing
1219  // it will be too expensive.
1220  if (Deps.size() > 100)
1221    return false;
1222
1223  // If we had a phi translation failure, we'll have a single entry which is a
1224  // clobber in the current block.  Reject this early.
1225  if (Deps.size() == 1 && Deps[0].second.isClobber()) {
1226    DEBUG(
1227      errs() << "GVN: non-local load ";
1228      WriteAsOperand(errs(), LI);
1229      errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';
1230    );
1231    return false;
1232  }
1233
1234  // Filter out useless results (non-locals, etc).  Keep track of the blocks
1235  // where we have a value available in repl, also keep track of whether we see
1236  // dependencies that produce an unknown value for the load (such as a call
1237  // that could potentially clobber the load).
1238  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
1239  SmallVector<BasicBlock*, 16> UnavailableBlocks;
1240
1241  const TargetData *TD = 0;
1242
1243  for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
1244    BasicBlock *DepBB = Deps[i].first;
1245    MemDepResult DepInfo = Deps[i].second;
1246
1247    if (DepInfo.isClobber()) {
1248      // If the dependence is to a store that writes to a superset of the bits
1249      // read by the load, we can extract the bits we need for the load from the
1250      // stored value.
1251      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
1252        if (TD == 0)
1253          TD = getAnalysisIfAvailable<TargetData>();
1254        if (TD) {
1255          int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
1256          if (Offset != -1) {
1257            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1258                                                           DepSI->getOperand(0),
1259                                                                Offset));
1260            continue;
1261          }
1262        }
1263      }
1264
1265      // FIXME: Handle memset/memcpy.
1266      UnavailableBlocks.push_back(DepBB);
1267      continue;
1268    }
1269
1270    Instruction *DepInst = DepInfo.getInst();
1271
1272    // Loading the allocation -> undef.
1273    if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
1274      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1275                                             UndefValue::get(LI->getType())));
1276      continue;
1277    }
1278
1279    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1280      // Reject loads and stores that are to the same address but are of
1281      // different types if we have to.
1282      if (S->getOperand(0)->getType() != LI->getType()) {
1283        if (TD == 0)
1284          TD = getAnalysisIfAvailable<TargetData>();
1285
1286        // If the stored value is larger or equal to the loaded value, we can
1287        // reuse it.
1288        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
1289                                                        LI->getType(), *TD)) {
1290          UnavailableBlocks.push_back(DepBB);
1291          continue;
1292        }
1293      }
1294
1295      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1296                                                          S->getOperand(0)));
1297      continue;
1298    }
1299
1300    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1301      // If the types mismatch and we can't handle it, reject reuse of the load.
1302      if (LD->getType() != LI->getType()) {
1303        if (TD == 0)
1304          TD = getAnalysisIfAvailable<TargetData>();
1305
1306        // If the stored value is larger or equal to the loaded value, we can
1307        // reuse it.
1308        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
1309          UnavailableBlocks.push_back(DepBB);
1310          continue;
1311        }
1312      }
1313      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
1314      continue;
1315    }
1316
1317    UnavailableBlocks.push_back(DepBB);
1318    continue;
1319  }
1320
1321  // If we have no predecessors that produce a known value for this load, exit
1322  // early.
1323  if (ValuesPerBlock.empty()) return false;
1324
1325  // If all of the instructions we depend on produce a known value for this
1326  // load, then it is fully redundant and we can use PHI insertion to compute
1327  // its value.  Insert PHIs and remove the fully redundant value now.
1328  if (UnavailableBlocks.empty()) {
1329    DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1330
1331    // Perform PHI construction.
1332    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
1333                                      VN.getAliasAnalysis());
1334    LI->replaceAllUsesWith(V);
1335
1336    if (isa<PHINode>(V))
1337      V->takeName(LI);
1338    if (isa<PointerType>(V->getType()))
1339      MD->invalidateCachedPointerInfo(V);
1340    toErase.push_back(LI);
1341    NumGVNLoad++;
1342    return true;
1343  }
1344
1345  if (!EnablePRE || !EnableLoadPRE)
1346    return false;
1347
1348  // Okay, we have *some* definitions of the value.  This means that the value
1349  // is available in some of our (transitive) predecessors.  Lets think about
1350  // doing PRE of this load.  This will involve inserting a new load into the
1351  // predecessor when it's not available.  We could do this in general, but
1352  // prefer to not increase code size.  As such, we only do this when we know
1353  // that we only have to insert *one* load (which means we're basically moving
1354  // the load, not inserting a new one).
1355
1356  SmallPtrSet<BasicBlock *, 4> Blockers;
1357  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1358    Blockers.insert(UnavailableBlocks[i]);
1359
1360  // Lets find first basic block with more than one predecessor.  Walk backwards
1361  // through predecessors if needed.
1362  BasicBlock *LoadBB = LI->getParent();
1363  BasicBlock *TmpBB = LoadBB;
1364
1365  bool isSinglePred = false;
1366  bool allSingleSucc = true;
1367  while (TmpBB->getSinglePredecessor()) {
1368    isSinglePred = true;
1369    TmpBB = TmpBB->getSinglePredecessor();
1370    if (!TmpBB) // If haven't found any, bail now.
1371      return false;
1372    if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1373      return false;
1374    if (Blockers.count(TmpBB))
1375      return false;
1376    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1377      allSingleSucc = false;
1378  }
1379
1380  assert(TmpBB);
1381  LoadBB = TmpBB;
1382
1383  // If we have a repl set with LI itself in it, this means we have a loop where
1384  // at least one of the values is LI.  Since this means that we won't be able
1385  // to eliminate LI even if we insert uses in the other predecessors, we will
1386  // end up increasing code size.  Reject this by scanning for LI.
1387  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1388    if (ValuesPerBlock[i].V == LI)
1389      return false;
1390
1391  if (isSinglePred) {
1392    bool isHot = false;
1393    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1394      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
1395        // "Hot" Instruction is in some loop (because it dominates its dep.
1396        // instruction).
1397        if (DT->dominates(LI, I)) {
1398          isHot = true;
1399          break;
1400        }
1401
1402    // We are interested only in "hot" instructions. We don't want to do any
1403    // mis-optimizations here.
1404    if (!isHot)
1405      return false;
1406  }
1407
1408  // Okay, we have some hope :).  Check to see if the loaded value is fully
1409  // available in all but one predecessor.
1410  // FIXME: If we could restructure the CFG, we could make a common pred with
1411  // all the preds that don't have an available LI and insert a new load into
1412  // that one block.
1413  BasicBlock *UnavailablePred = 0;
1414
1415  DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1416  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1417    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
1418  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1419    FullyAvailableBlocks[UnavailableBlocks[i]] = false;
1420
1421  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
1422       PI != E; ++PI) {
1423    if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
1424      continue;
1425
1426    // If this load is not available in multiple predecessors, reject it.
1427    if (UnavailablePred && UnavailablePred != *PI)
1428      return false;
1429    UnavailablePred = *PI;
1430  }
1431
1432  assert(UnavailablePred != 0 &&
1433         "Fully available value should be eliminated above!");
1434
1435  // If the loaded pointer is PHI node defined in this block, do PHI translation
1436  // to get its value in the predecessor.
1437  Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred);
1438
1439  // Make sure the value is live in the predecessor.  If it was defined by a
1440  // non-PHI instruction in this block, we don't know how to recompute it above.
1441  if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr))
1442    if (!DT->dominates(LPInst->getParent(), UnavailablePred)) {
1443      DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
1444                   << *LPInst << '\n' << *LI << "\n");
1445      return false;
1446    }
1447
1448  // We don't currently handle critical edges :(
1449  if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
1450    DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
1451                 << UnavailablePred->getName() << "': " << *LI << '\n');
1452    return false;
1453  }
1454
1455  // Make sure it is valid to move this load here.  We have to watch out for:
1456  //  @1 = getelementptr (i8* p, ...
1457  //  test p and branch if == 0
1458  //  load @1
1459  // It is valid to have the getelementptr before the test, even if p can be 0,
1460  // as getelementptr only does address arithmetic.
1461  // If we are not pushing the value through any multiple-successor blocks
1462  // we do not have this case.  Otherwise, check that the load is safe to
1463  // put anywhere; this can be improved, but should be conservatively safe.
1464  if (!allSingleSucc &&
1465      !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator()))
1466    return false;
1467
1468  // Okay, we can eliminate this load by inserting a reload in the predecessor
1469  // and using PHI construction to get the value in the other predecessors, do
1470  // it.
1471  DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1472
1473  Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
1474                                LI->getAlignment(),
1475                                UnavailablePred->getTerminator());
1476
1477  // Add the newly created load.
1478  ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad));
1479
1480  // Perform PHI construction.
1481  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD,
1482                                    VN.getAliasAnalysis());
1483  LI->replaceAllUsesWith(V);
1484  if (isa<PHINode>(V))
1485    V->takeName(LI);
1486  if (isa<PointerType>(V->getType()))
1487    MD->invalidateCachedPointerInfo(V);
1488  toErase.push_back(LI);
1489  NumPRELoad++;
1490  return true;
1491}
1492
1493/// processLoad - Attempt to eliminate a load, first by eliminating it
1494/// locally, and then attempting non-local elimination if that fails.
1495bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
1496  if (L->isVolatile())
1497    return false;
1498
1499  // ... to a pointer that has been loaded from before...
1500  MemDepResult Dep = MD->getDependency(L);
1501
1502  // If the value isn't available, don't do anything!
1503  if (Dep.isClobber()) {
1504    // FIXME: We should handle memset/memcpy/memmove as dependent instructions
1505    // to forward the value if available.
1506    //if (isa<MemIntrinsic>(Dep.getInst()))
1507    //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n";
1508
1509    // Check to see if we have something like this:
1510    //   store i32 123, i32* %P
1511    //   %A = bitcast i32* %P to i8*
1512    //   %B = gep i8* %A, i32 1
1513    //   %C = load i8* %B
1514    //
1515    // We could do that by recognizing if the clobber instructions are obviously
1516    // a common base + constant offset, and if the previous store (or memset)
1517    // completely covers this load.  This sort of thing can happen in bitfield
1518    // access code.
1519    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
1520      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
1521        int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
1522        if (Offset != -1) {
1523          Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
1524                                                 L->getType(), L, *TD);
1525          DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
1526                       << *AvailVal << '\n' << *L << "\n\n\n");
1527
1528          // Replace the load!
1529          L->replaceAllUsesWith(AvailVal);
1530          if (isa<PointerType>(AvailVal->getType()))
1531            MD->invalidateCachedPointerInfo(AvailVal);
1532          toErase.push_back(L);
1533          NumGVNLoad++;
1534          return true;
1535        }
1536      }
1537
1538    DEBUG(
1539      // fast print dep, using operator<< on instruction would be too slow
1540      errs() << "GVN: load ";
1541      WriteAsOperand(errs(), L);
1542      Instruction *I = Dep.getInst();
1543      errs() << " is clobbered by " << *I << '\n';
1544    );
1545    return false;
1546  }
1547
1548  // If it is defined in another block, try harder.
1549  if (Dep.isNonLocal())
1550    return processNonLocalLoad(L, toErase);
1551
1552  Instruction *DepInst = Dep.getInst();
1553  if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1554    Value *StoredVal = DepSI->getOperand(0);
1555
1556    // The store and load are to a must-aliased pointer, but they may not
1557    // actually have the same type.  See if we know how to reuse the stored
1558    // value (depending on its type).
1559    const TargetData *TD = 0;
1560    if (StoredVal->getType() != L->getType() &&
1561        (TD = getAnalysisIfAvailable<TargetData>())) {
1562      StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
1563                                                 L, *TD);
1564      if (StoredVal == 0)
1565        return false;
1566
1567      DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
1568                   << '\n' << *L << "\n\n\n");
1569    }
1570
1571    // Remove it!
1572    L->replaceAllUsesWith(StoredVal);
1573    if (isa<PointerType>(StoredVal->getType()))
1574      MD->invalidateCachedPointerInfo(StoredVal);
1575    toErase.push_back(L);
1576    NumGVNLoad++;
1577    return true;
1578  }
1579
1580  if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
1581    Value *AvailableVal = DepLI;
1582
1583    // The loads are of a must-aliased pointer, but they may not actually have
1584    // the same type.  See if we know how to reuse the previously loaded value
1585    // (depending on its type).
1586    const TargetData *TD = 0;
1587    if (DepLI->getType() != L->getType() &&
1588        (TD = getAnalysisIfAvailable<TargetData>())) {
1589      AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
1590      if (AvailableVal == 0)
1591        return false;
1592
1593      DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
1594                   << "\n" << *L << "\n\n\n");
1595    }
1596
1597    // Remove it!
1598    L->replaceAllUsesWith(AvailableVal);
1599    if (isa<PointerType>(DepLI->getType()))
1600      MD->invalidateCachedPointerInfo(DepLI);
1601    toErase.push_back(L);
1602    NumGVNLoad++;
1603    return true;
1604  }
1605
1606  // If this load really doesn't depend on anything, then we must be loading an
1607  // undef value.  This can happen when loading for a fresh allocation with no
1608  // intervening stores, for example.
1609  if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) {
1610    L->replaceAllUsesWith(UndefValue::get(L->getType()));
1611    toErase.push_back(L);
1612    NumGVNLoad++;
1613    return true;
1614  }
1615
1616  return false;
1617}
1618
1619Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
1620  DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
1621  if (I == localAvail.end())
1622    return 0;
1623
1624  ValueNumberScope *Locals = I->second;
1625  while (Locals) {
1626    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
1627    if (I != Locals->table.end())
1628      return I->second;
1629    Locals = Locals->parent;
1630  }
1631
1632  return 0;
1633}
1634
1635
1636/// processInstruction - When calculating availability, handle an instruction
1637/// by inserting it into the appropriate sets
1638bool GVN::processInstruction(Instruction *I,
1639                             SmallVectorImpl<Instruction*> &toErase) {
1640  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1641    bool Changed = processLoad(LI, toErase);
1642
1643    if (!Changed) {
1644      unsigned Num = VN.lookup_or_add(LI);
1645      localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
1646    }
1647
1648    return Changed;
1649  }
1650
1651  uint32_t NextNum = VN.getNextUnusedValueNumber();
1652  unsigned Num = VN.lookup_or_add(I);
1653
1654  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
1655    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1656
1657    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1658      return false;
1659
1660    Value *BranchCond = BI->getCondition();
1661    uint32_t CondVN = VN.lookup_or_add(BranchCond);
1662
1663    BasicBlock *TrueSucc = BI->getSuccessor(0);
1664    BasicBlock *FalseSucc = BI->getSuccessor(1);
1665
1666    if (TrueSucc->getSinglePredecessor())
1667      localAvail[TrueSucc]->table[CondVN] =
1668        ConstantInt::getTrue(TrueSucc->getContext());
1669    if (FalseSucc->getSinglePredecessor())
1670      localAvail[FalseSucc]->table[CondVN] =
1671        ConstantInt::getFalse(TrueSucc->getContext());
1672
1673    return false;
1674
1675  // Allocations are always uniquely numbered, so we can save time and memory
1676  // by fast failing them.
1677  } else if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) {
1678    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1679    return false;
1680  }
1681
1682  // Collapse PHI nodes
1683  if (PHINode* p = dyn_cast<PHINode>(I)) {
1684    Value *constVal = CollapsePhi(p);
1685
1686    if (constVal) {
1687      p->replaceAllUsesWith(constVal);
1688      if (isa<PointerType>(constVal->getType()))
1689        MD->invalidateCachedPointerInfo(constVal);
1690      VN.erase(p);
1691
1692      toErase.push_back(p);
1693    } else {
1694      localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1695    }
1696
1697  // If the number we were assigned was a brand new VN, then we don't
1698  // need to do a lookup to see if the number already exists
1699  // somewhere in the domtree: it can't!
1700  } else if (Num == NextNum) {
1701    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1702
1703  // Perform fast-path value-number based elimination of values inherited from
1704  // dominators.
1705  } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
1706    // Remove it!
1707    VN.erase(I);
1708    I->replaceAllUsesWith(repl);
1709    if (isa<PointerType>(repl->getType()))
1710      MD->invalidateCachedPointerInfo(repl);
1711    toErase.push_back(I);
1712    return true;
1713
1714  } else {
1715    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1716  }
1717
1718  return false;
1719}
1720
1721/// runOnFunction - This is the main transformation entry point for a function.
1722bool GVN::runOnFunction(Function& F) {
1723  MD = &getAnalysis<MemoryDependenceAnalysis>();
1724  DT = &getAnalysis<DominatorTree>();
1725  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
1726  VN.setMemDep(MD);
1727  VN.setDomTree(DT);
1728
1729  bool Changed = false;
1730  bool ShouldContinue = true;
1731
1732  // Merge unconditional branches, allowing PRE to catch more
1733  // optimization opportunities.
1734  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
1735    BasicBlock *BB = FI;
1736    ++FI;
1737    bool removedBlock = MergeBlockIntoPredecessor(BB, this);
1738    if (removedBlock) NumGVNBlocks++;
1739
1740    Changed |= removedBlock;
1741  }
1742
1743  unsigned Iteration = 0;
1744
1745  while (ShouldContinue) {
1746    DEBUG(errs() << "GVN iteration: " << Iteration << "\n");
1747    ShouldContinue = iterateOnFunction(F);
1748    Changed |= ShouldContinue;
1749    ++Iteration;
1750  }
1751
1752  if (EnablePRE) {
1753    bool PREChanged = true;
1754    while (PREChanged) {
1755      PREChanged = performPRE(F);
1756      Changed |= PREChanged;
1757    }
1758  }
1759  // FIXME: Should perform GVN again after PRE does something.  PRE can move
1760  // computations into blocks where they become fully redundant.  Note that
1761  // we can't do this until PRE's critical edge splitting updates memdep.
1762  // Actually, when this happens, we should just fully integrate PRE into GVN.
1763
1764  cleanupGlobalSets();
1765
1766  return Changed;
1767}
1768
1769
1770bool GVN::processBlock(BasicBlock *BB) {
1771  // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
1772  // incrementing BI before processing an instruction).
1773  SmallVector<Instruction*, 8> toErase;
1774  bool ChangedFunction = false;
1775
1776  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
1777       BI != BE;) {
1778    ChangedFunction |= processInstruction(BI, toErase);
1779    if (toErase.empty()) {
1780      ++BI;
1781      continue;
1782    }
1783
1784    // If we need some instructions deleted, do it now.
1785    NumGVNInstr += toErase.size();
1786
1787    // Avoid iterator invalidation.
1788    bool AtStart = BI == BB->begin();
1789    if (!AtStart)
1790      --BI;
1791
1792    for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
1793         E = toErase.end(); I != E; ++I) {
1794      DEBUG(errs() << "GVN removed: " << **I << '\n');
1795      MD->removeInstruction(*I);
1796      (*I)->eraseFromParent();
1797      DEBUG(verifyRemoved(*I));
1798    }
1799    toErase.clear();
1800
1801    if (AtStart)
1802      BI = BB->begin();
1803    else
1804      ++BI;
1805  }
1806
1807  return ChangedFunction;
1808}
1809
1810/// performPRE - Perform a purely local form of PRE that looks for diamond
1811/// control flow patterns and attempts to perform simple PRE at the join point.
1812bool GVN::performPRE(Function& F) {
1813  bool Changed = false;
1814  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
1815  DenseMap<BasicBlock*, Value*> predMap;
1816  for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
1817       DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
1818    BasicBlock *CurrentBlock = *DI;
1819
1820    // Nothing to PRE in the entry block.
1821    if (CurrentBlock == &F.getEntryBlock()) continue;
1822
1823    for (BasicBlock::iterator BI = CurrentBlock->begin(),
1824         BE = CurrentBlock->end(); BI != BE; ) {
1825      Instruction *CurInst = BI++;
1826
1827      if (isa<AllocationInst>(CurInst) ||
1828          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
1829          CurInst->getType()->isVoidTy() ||
1830          CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
1831          isa<DbgInfoIntrinsic>(CurInst))
1832        continue;
1833
1834      uint32_t ValNo = VN.lookup(CurInst);
1835
1836      // Look for the predecessors for PRE opportunities.  We're
1837      // only trying to solve the basic diamond case, where
1838      // a value is computed in the successor and one predecessor,
1839      // but not the other.  We also explicitly disallow cases
1840      // where the successor is its own predecessor, because they're
1841      // more complicated to get right.
1842      unsigned NumWith = 0;
1843      unsigned NumWithout = 0;
1844      BasicBlock *PREPred = 0;
1845      predMap.clear();
1846
1847      for (pred_iterator PI = pred_begin(CurrentBlock),
1848           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
1849        // We're not interested in PRE where the block is its
1850        // own predecessor, on in blocks with predecessors
1851        // that are not reachable.
1852        if (*PI == CurrentBlock) {
1853          NumWithout = 2;
1854          break;
1855        } else if (!localAvail.count(*PI))  {
1856          NumWithout = 2;
1857          break;
1858        }
1859
1860        DenseMap<uint32_t, Value*>::iterator predV =
1861                                            localAvail[*PI]->table.find(ValNo);
1862        if (predV == localAvail[*PI]->table.end()) {
1863          PREPred = *PI;
1864          NumWithout++;
1865        } else if (predV->second == CurInst) {
1866          NumWithout = 2;
1867        } else {
1868          predMap[*PI] = predV->second;
1869          NumWith++;
1870        }
1871      }
1872
1873      // Don't do PRE when it might increase code size, i.e. when
1874      // we would need to insert instructions in more than one pred.
1875      if (NumWithout != 1 || NumWith == 0)
1876        continue;
1877
1878      // We can't do PRE safely on a critical edge, so instead we schedule
1879      // the edge to be split and perform the PRE the next time we iterate
1880      // on the function.
1881      unsigned SuccNum = 0;
1882      for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
1883           i != e; ++i)
1884        if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
1885          SuccNum = i;
1886          break;
1887        }
1888
1889      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
1890        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
1891        continue;
1892      }
1893
1894      // Instantiate the expression the in predecessor that lacked it.
1895      // Because we are going top-down through the block, all value numbers
1896      // will be available in the predecessor by the time we need them.  Any
1897      // that weren't original present will have been instantiated earlier
1898      // in this loop.
1899      Instruction *PREInstr = CurInst->clone();
1900      bool success = true;
1901      for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
1902        Value *Op = PREInstr->getOperand(i);
1903        if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
1904          continue;
1905
1906        if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
1907          PREInstr->setOperand(i, V);
1908        } else {
1909          success = false;
1910          break;
1911        }
1912      }
1913
1914      // Fail out if we encounter an operand that is not available in
1915      // the PRE predecessor.  This is typically because of loads which
1916      // are not value numbered precisely.
1917      if (!success) {
1918        delete PREInstr;
1919        DEBUG(verifyRemoved(PREInstr));
1920        continue;
1921      }
1922
1923      PREInstr->insertBefore(PREPred->getTerminator());
1924      PREInstr->setName(CurInst->getName() + ".pre");
1925      predMap[PREPred] = PREInstr;
1926      VN.add(PREInstr, ValNo);
1927      NumGVNPRE++;
1928
1929      // Update the availability map to include the new instruction.
1930      localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
1931
1932      // Create a PHI to make the value available in this block.
1933      PHINode* Phi = PHINode::Create(CurInst->getType(),
1934                                     CurInst->getName() + ".pre-phi",
1935                                     CurrentBlock->begin());
1936      for (pred_iterator PI = pred_begin(CurrentBlock),
1937           PE = pred_end(CurrentBlock); PI != PE; ++PI)
1938        Phi->addIncoming(predMap[*PI], *PI);
1939
1940      VN.add(Phi, ValNo);
1941      localAvail[CurrentBlock]->table[ValNo] = Phi;
1942
1943      CurInst->replaceAllUsesWith(Phi);
1944      if (isa<PointerType>(Phi->getType()))
1945        MD->invalidateCachedPointerInfo(Phi);
1946      VN.erase(CurInst);
1947
1948      DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
1949      MD->removeInstruction(CurInst);
1950      CurInst->eraseFromParent();
1951      DEBUG(verifyRemoved(CurInst));
1952      Changed = true;
1953    }
1954  }
1955
1956  for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
1957       I = toSplit.begin(), E = toSplit.end(); I != E; ++I)
1958    SplitCriticalEdge(I->first, I->second, this);
1959
1960  return Changed || toSplit.size();
1961}
1962
1963/// iterateOnFunction - Executes one iteration of GVN
1964bool GVN::iterateOnFunction(Function &F) {
1965  cleanupGlobalSets();
1966
1967  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
1968       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
1969    if (DI->getIDom())
1970      localAvail[DI->getBlock()] =
1971                   new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
1972    else
1973      localAvail[DI->getBlock()] = new ValueNumberScope(0);
1974  }
1975
1976  // Top-down walk of the dominator tree
1977  bool Changed = false;
1978#if 0
1979  // Needed for value numbering with phi construction to work.
1980  ReversePostOrderTraversal<Function*> RPOT(&F);
1981  for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
1982       RE = RPOT.end(); RI != RE; ++RI)
1983    Changed |= processBlock(*RI);
1984#else
1985  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
1986       DE = df_end(DT->getRootNode()); DI != DE; ++DI)
1987    Changed |= processBlock(DI->getBlock());
1988#endif
1989
1990  return Changed;
1991}
1992
1993void GVN::cleanupGlobalSets() {
1994  VN.clear();
1995
1996  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
1997       I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
1998    delete I->second;
1999  localAvail.clear();
2000}
2001
2002/// verifyRemoved - Verify that the specified instruction does not occur in our
2003/// internal data structures.
2004void GVN::verifyRemoved(const Instruction *Inst) const {
2005  VN.verifyRemoved(Inst);
2006
2007  // Walk through the value number scope to make sure the instruction isn't
2008  // ferreted away in it.
2009  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
2010         I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
2011    const ValueNumberScope *VNS = I->second;
2012
2013    while (VNS) {
2014      for (DenseMap<uint32_t, Value*>::iterator
2015             II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
2016        assert(II->second != Inst && "Inst still in value numbering scope!");
2017      }
2018
2019      VNS = VNS->parent;
2020    }
2021  }
2022}
2023