1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global value numbering to eliminate fully redundant
11// instructions.  It also performs simple dead load elimination.
12//
13// Note that this pass does the value numbering itself; it does not use the
14// ValueNumbering analysis passes.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/Transforms/Scalar/GVN.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/DepthFirstIterator.h"
21#include "llvm/ADT/Hashing.h"
22#include "llvm/ADT/MapVector.h"
23#include "llvm/ADT/PostOrderIterator.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/ADT/SmallPtrSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/Analysis/AssumptionCache.h"
29#include "llvm/Analysis/CFG.h"
30#include "llvm/Analysis/ConstantFolding.h"
31#include "llvm/Analysis/GlobalsModRef.h"
32#include "llvm/Analysis/InstructionSimplify.h"
33#include "llvm/Analysis/Loads.h"
34#include "llvm/Analysis/MemoryBuiltins.h"
35#include "llvm/Analysis/MemoryDependenceAnalysis.h"
36#include "llvm/Analysis/PHITransAddr.h"
37#include "llvm/Analysis/TargetLibraryInfo.h"
38#include "llvm/Analysis/ValueTracking.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/IR/Dominators.h"
41#include "llvm/IR/GlobalVariable.h"
42#include "llvm/IR/IRBuilder.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/LLVMContext.h"
45#include "llvm/IR/Metadata.h"
46#include "llvm/IR/PatternMatch.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/raw_ostream.h"
50#include "llvm/Transforms/Utils/BasicBlockUtils.h"
51#include "llvm/Transforms/Utils/Local.h"
52#include "llvm/Transforms/Utils/SSAUpdater.h"
53#include <vector>
54using namespace llvm;
55using namespace llvm::gvn;
56using namespace PatternMatch;
57
58#define DEBUG_TYPE "gvn"
59
60STATISTIC(NumGVNInstr,  "Number of instructions deleted");
61STATISTIC(NumGVNLoad,   "Number of loads deleted");
62STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
63STATISTIC(NumGVNBlocks, "Number of blocks merged");
64STATISTIC(NumGVNSimpl,  "Number of instructions simplified");
65STATISTIC(NumGVNEqProp, "Number of equalities propagated");
66STATISTIC(NumPRELoad,   "Number of loads PRE'd");
67
68static cl::opt<bool> EnablePRE("enable-pre",
69                               cl::init(true), cl::Hidden);
70static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
71
72// Maximum allowed recursion depth.
73static cl::opt<uint32_t>
74MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
75                cl::desc("Max recurse depth (default = 1000)"));
76
77struct llvm::GVN::Expression {
78  uint32_t opcode;
79  Type *type;
80  SmallVector<uint32_t, 4> varargs;
81
82  Expression(uint32_t o = ~2U) : opcode(o) {}
83
84  bool operator==(const Expression &other) const {
85    if (opcode != other.opcode)
86      return false;
87    if (opcode == ~0U || opcode == ~1U)
88      return true;
89    if (type != other.type)
90      return false;
91    if (varargs != other.varargs)
92      return false;
93    return true;
94  }
95
96  friend hash_code hash_value(const Expression &Value) {
97    return hash_combine(
98        Value.opcode, Value.type,
99        hash_combine_range(Value.varargs.begin(), Value.varargs.end()));
100  }
101};
102
103namespace llvm {
104template <> struct DenseMapInfo<GVN::Expression> {
105  static inline GVN::Expression getEmptyKey() { return ~0U; }
106
107  static inline GVN::Expression getTombstoneKey() { return ~1U; }
108
109  static unsigned getHashValue(const GVN::Expression &e) {
110    using llvm::hash_value;
111    return static_cast<unsigned>(hash_value(e));
112  }
113  static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) {
114    return LHS == RHS;
115  }
116};
117} // End llvm namespace.
118
119/// Represents a particular available value that we know how to materialize.
120/// Materialization of an AvailableValue never fails.  An AvailableValue is
121/// implicitly associated with a rematerialization point which is the
122/// location of the instruction from which it was formed.
123struct llvm::gvn::AvailableValue {
124  enum ValType {
125    SimpleVal, // A simple offsetted value that is accessed.
126    LoadVal,   // A value produced by a load.
127    MemIntrin, // A memory intrinsic which is loaded from.
128    UndefVal   // A UndefValue representing a value from dead block (which
129               // is not yet physically removed from the CFG).
130  };
131
132  /// V - The value that is live out of the block.
133  PointerIntPair<Value *, 2, ValType> Val;
134
135  /// Offset - The byte offset in Val that is interesting for the load query.
136  unsigned Offset;
137
138  static AvailableValue get(Value *V, unsigned Offset = 0) {
139    AvailableValue Res;
140    Res.Val.setPointer(V);
141    Res.Val.setInt(SimpleVal);
142    Res.Offset = Offset;
143    return Res;
144  }
145
146  static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
147    AvailableValue Res;
148    Res.Val.setPointer(MI);
149    Res.Val.setInt(MemIntrin);
150    Res.Offset = Offset;
151    return Res;
152  }
153
154  static AvailableValue getLoad(LoadInst *LI, unsigned Offset = 0) {
155    AvailableValue Res;
156    Res.Val.setPointer(LI);
157    Res.Val.setInt(LoadVal);
158    Res.Offset = Offset;
159    return Res;
160  }
161
162  static AvailableValue getUndef() {
163    AvailableValue Res;
164    Res.Val.setPointer(nullptr);
165    Res.Val.setInt(UndefVal);
166    Res.Offset = 0;
167    return Res;
168  }
169
170  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
171  bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
172  bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
173  bool isUndefValue() const { return Val.getInt() == UndefVal; }
174
175  Value *getSimpleValue() const {
176    assert(isSimpleValue() && "Wrong accessor");
177    return Val.getPointer();
178  }
179
180  LoadInst *getCoercedLoadValue() const {
181    assert(isCoercedLoadValue() && "Wrong accessor");
182    return cast<LoadInst>(Val.getPointer());
183  }
184
185  MemIntrinsic *getMemIntrinValue() const {
186    assert(isMemIntrinValue() && "Wrong accessor");
187    return cast<MemIntrinsic>(Val.getPointer());
188  }
189
190  /// Emit code at the specified insertion point to adjust the value defined
191  /// here to the specified type. This handles various coercion cases.
192  Value *MaterializeAdjustedValue(LoadInst *LI, Instruction *InsertPt,
193                                  GVN &gvn) const;
194};
195
196/// Represents an AvailableValue which can be rematerialized at the end of
197/// the associated BasicBlock.
198struct llvm::gvn::AvailableValueInBlock {
199  /// BB - The basic block in question.
200  BasicBlock *BB;
201
202  /// AV - The actual available value
203  AvailableValue AV;
204
205  static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
206    AvailableValueInBlock Res;
207    Res.BB = BB;
208    Res.AV = std::move(AV);
209    return Res;
210  }
211
212  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
213                                   unsigned Offset = 0) {
214    return get(BB, AvailableValue::get(V, Offset));
215  }
216  static AvailableValueInBlock getUndef(BasicBlock *BB) {
217    return get(BB, AvailableValue::getUndef());
218  }
219
220  /// Emit code at the end of this block to adjust the value defined here to
221  /// the specified type. This handles various coercion cases.
222  Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const {
223    return AV.MaterializeAdjustedValue(LI, BB->getTerminator(), gvn);
224  }
225};
226
227//===----------------------------------------------------------------------===//
228//                     ValueTable Internal Functions
229//===----------------------------------------------------------------------===//
230
231GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
232  Expression e;
233  e.type = I->getType();
234  e.opcode = I->getOpcode();
235  for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
236       OI != OE; ++OI)
237    e.varargs.push_back(lookupOrAdd(*OI));
238  if (I->isCommutative()) {
239    // Ensure that commutative instructions that only differ by a permutation
240    // of their operands get the same value number by sorting the operand value
241    // numbers.  Since all commutative instructions have two operands it is more
242    // efficient to sort by hand rather than using, say, std::sort.
243    assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
244    if (e.varargs[0] > e.varargs[1])
245      std::swap(e.varargs[0], e.varargs[1]);
246  }
247
248  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
249    // Sort the operand value numbers so x<y and y>x get the same value number.
250    CmpInst::Predicate Predicate = C->getPredicate();
251    if (e.varargs[0] > e.varargs[1]) {
252      std::swap(e.varargs[0], e.varargs[1]);
253      Predicate = CmpInst::getSwappedPredicate(Predicate);
254    }
255    e.opcode = (C->getOpcode() << 8) | Predicate;
256  } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
257    for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
258         II != IE; ++II)
259      e.varargs.push_back(*II);
260  }
261
262  return e;
263}
264
265GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
266                                               CmpInst::Predicate Predicate,
267                                               Value *LHS, Value *RHS) {
268  assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
269         "Not a comparison!");
270  Expression e;
271  e.type = CmpInst::makeCmpResultType(LHS->getType());
272  e.varargs.push_back(lookupOrAdd(LHS));
273  e.varargs.push_back(lookupOrAdd(RHS));
274
275  // Sort the operand value numbers so x<y and y>x get the same value number.
276  if (e.varargs[0] > e.varargs[1]) {
277    std::swap(e.varargs[0], e.varargs[1]);
278    Predicate = CmpInst::getSwappedPredicate(Predicate);
279  }
280  e.opcode = (Opcode << 8) | Predicate;
281  return e;
282}
283
284GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
285  assert(EI && "Not an ExtractValueInst?");
286  Expression e;
287  e.type = EI->getType();
288  e.opcode = 0;
289
290  IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
291  if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
292    // EI might be an extract from one of our recognised intrinsics. If it
293    // is we'll synthesize a semantically equivalent expression instead on
294    // an extract value expression.
295    switch (I->getIntrinsicID()) {
296      case Intrinsic::sadd_with_overflow:
297      case Intrinsic::uadd_with_overflow:
298        e.opcode = Instruction::Add;
299        break;
300      case Intrinsic::ssub_with_overflow:
301      case Intrinsic::usub_with_overflow:
302        e.opcode = Instruction::Sub;
303        break;
304      case Intrinsic::smul_with_overflow:
305      case Intrinsic::umul_with_overflow:
306        e.opcode = Instruction::Mul;
307        break;
308      default:
309        break;
310    }
311
312    if (e.opcode != 0) {
313      // Intrinsic recognized. Grab its args to finish building the expression.
314      assert(I->getNumArgOperands() == 2 &&
315             "Expect two args for recognised intrinsics.");
316      e.varargs.push_back(lookupOrAdd(I->getArgOperand(0)));
317      e.varargs.push_back(lookupOrAdd(I->getArgOperand(1)));
318      return e;
319    }
320  }
321
322  // Not a recognised intrinsic. Fall back to producing an extract value
323  // expression.
324  e.opcode = EI->getOpcode();
325  for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
326       OI != OE; ++OI)
327    e.varargs.push_back(lookupOrAdd(*OI));
328
329  for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
330         II != IE; ++II)
331    e.varargs.push_back(*II);
332
333  return e;
334}
335
336//===----------------------------------------------------------------------===//
337//                     ValueTable External Functions
338//===----------------------------------------------------------------------===//
339
340GVN::ValueTable::ValueTable() : nextValueNumber(1) {}
341GVN::ValueTable::ValueTable(const ValueTable &Arg)
342    : valueNumbering(Arg.valueNumbering),
343      expressionNumbering(Arg.expressionNumbering), AA(Arg.AA), MD(Arg.MD),
344      DT(Arg.DT), nextValueNumber(Arg.nextValueNumber) {}
345GVN::ValueTable::ValueTable(ValueTable &&Arg)
346    : valueNumbering(std::move(Arg.valueNumbering)),
347      expressionNumbering(std::move(Arg.expressionNumbering)),
348      AA(std::move(Arg.AA)), MD(std::move(Arg.MD)), DT(std::move(Arg.DT)),
349      nextValueNumber(std::move(Arg.nextValueNumber)) {}
350GVN::ValueTable::~ValueTable() {}
351
352/// add - Insert a value into the table with a specified value number.
353void GVN::ValueTable::add(Value *V, uint32_t num) {
354  valueNumbering.insert(std::make_pair(V, num));
355}
356
357uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
358  if (AA->doesNotAccessMemory(C)) {
359    Expression exp = createExpr(C);
360    uint32_t &e = expressionNumbering[exp];
361    if (!e) e = nextValueNumber++;
362    valueNumbering[C] = e;
363    return e;
364  } else if (AA->onlyReadsMemory(C)) {
365    Expression exp = createExpr(C);
366    uint32_t &e = expressionNumbering[exp];
367    if (!e) {
368      e = nextValueNumber++;
369      valueNumbering[C] = e;
370      return e;
371    }
372    if (!MD) {
373      e = nextValueNumber++;
374      valueNumbering[C] = e;
375      return e;
376    }
377
378    MemDepResult local_dep = MD->getDependency(C);
379
380    if (!local_dep.isDef() && !local_dep.isNonLocal()) {
381      valueNumbering[C] =  nextValueNumber;
382      return nextValueNumber++;
383    }
384
385    if (local_dep.isDef()) {
386      CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
387
388      if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
389        valueNumbering[C] = nextValueNumber;
390        return nextValueNumber++;
391      }
392
393      for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
394        uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
395        uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
396        if (c_vn != cd_vn) {
397          valueNumbering[C] = nextValueNumber;
398          return nextValueNumber++;
399        }
400      }
401
402      uint32_t v = lookupOrAdd(local_cdep);
403      valueNumbering[C] = v;
404      return v;
405    }
406
407    // Non-local case.
408    const MemoryDependenceResults::NonLocalDepInfo &deps =
409      MD->getNonLocalCallDependency(CallSite(C));
410    // FIXME: Move the checking logic to MemDep!
411    CallInst* cdep = nullptr;
412
413    // Check to see if we have a single dominating call instruction that is
414    // identical to C.
415    for (unsigned i = 0, e = deps.size(); i != e; ++i) {
416      const NonLocalDepEntry *I = &deps[i];
417      if (I->getResult().isNonLocal())
418        continue;
419
420      // We don't handle non-definitions.  If we already have a call, reject
421      // instruction dependencies.
422      if (!I->getResult().isDef() || cdep != nullptr) {
423        cdep = nullptr;
424        break;
425      }
426
427      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
428      // FIXME: All duplicated with non-local case.
429      if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
430        cdep = NonLocalDepCall;
431        continue;
432      }
433
434      cdep = nullptr;
435      break;
436    }
437
438    if (!cdep) {
439      valueNumbering[C] = nextValueNumber;
440      return nextValueNumber++;
441    }
442
443    if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
444      valueNumbering[C] = nextValueNumber;
445      return nextValueNumber++;
446    }
447    for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
448      uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
449      uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
450      if (c_vn != cd_vn) {
451        valueNumbering[C] = nextValueNumber;
452        return nextValueNumber++;
453      }
454    }
455
456    uint32_t v = lookupOrAdd(cdep);
457    valueNumbering[C] = v;
458    return v;
459
460  } else {
461    valueNumbering[C] = nextValueNumber;
462    return nextValueNumber++;
463  }
464}
465
466/// Returns true if a value number exists for the specified value.
467bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
468
469/// lookup_or_add - Returns the value number for the specified value, assigning
470/// it a new number if it did not have one before.
471uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
472  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
473  if (VI != valueNumbering.end())
474    return VI->second;
475
476  if (!isa<Instruction>(V)) {
477    valueNumbering[V] = nextValueNumber;
478    return nextValueNumber++;
479  }
480
481  Instruction* I = cast<Instruction>(V);
482  Expression exp;
483  switch (I->getOpcode()) {
484    case Instruction::Call:
485      return lookupOrAddCall(cast<CallInst>(I));
486    case Instruction::Add:
487    case Instruction::FAdd:
488    case Instruction::Sub:
489    case Instruction::FSub:
490    case Instruction::Mul:
491    case Instruction::FMul:
492    case Instruction::UDiv:
493    case Instruction::SDiv:
494    case Instruction::FDiv:
495    case Instruction::URem:
496    case Instruction::SRem:
497    case Instruction::FRem:
498    case Instruction::Shl:
499    case Instruction::LShr:
500    case Instruction::AShr:
501    case Instruction::And:
502    case Instruction::Or:
503    case Instruction::Xor:
504    case Instruction::ICmp:
505    case Instruction::FCmp:
506    case Instruction::Trunc:
507    case Instruction::ZExt:
508    case Instruction::SExt:
509    case Instruction::FPToUI:
510    case Instruction::FPToSI:
511    case Instruction::UIToFP:
512    case Instruction::SIToFP:
513    case Instruction::FPTrunc:
514    case Instruction::FPExt:
515    case Instruction::PtrToInt:
516    case Instruction::IntToPtr:
517    case Instruction::BitCast:
518    case Instruction::Select:
519    case Instruction::ExtractElement:
520    case Instruction::InsertElement:
521    case Instruction::ShuffleVector:
522    case Instruction::InsertValue:
523    case Instruction::GetElementPtr:
524      exp = createExpr(I);
525      break;
526    case Instruction::ExtractValue:
527      exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
528      break;
529    default:
530      valueNumbering[V] = nextValueNumber;
531      return nextValueNumber++;
532  }
533
534  uint32_t& e = expressionNumbering[exp];
535  if (!e) e = nextValueNumber++;
536  valueNumbering[V] = e;
537  return e;
538}
539
540/// Returns the value number of the specified value. Fails if
541/// the value has not yet been numbered.
542uint32_t GVN::ValueTable::lookup(Value *V) const {
543  DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
544  assert(VI != valueNumbering.end() && "Value not numbered?");
545  return VI->second;
546}
547
548/// Returns the value number of the given comparison,
549/// assigning it a new number if it did not have one before.  Useful when
550/// we deduced the result of a comparison, but don't immediately have an
551/// instruction realizing that comparison to hand.
552uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
553                                         CmpInst::Predicate Predicate,
554                                         Value *LHS, Value *RHS) {
555  Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
556  uint32_t& e = expressionNumbering[exp];
557  if (!e) e = nextValueNumber++;
558  return e;
559}
560
561/// Remove all entries from the ValueTable.
562void GVN::ValueTable::clear() {
563  valueNumbering.clear();
564  expressionNumbering.clear();
565  nextValueNumber = 1;
566}
567
568/// Remove a value from the value numbering.
569void GVN::ValueTable::erase(Value *V) {
570  valueNumbering.erase(V);
571}
572
573/// verifyRemoved - Verify that the value is removed from all internal data
574/// structures.
575void GVN::ValueTable::verifyRemoved(const Value *V) const {
576  for (DenseMap<Value*, uint32_t>::const_iterator
577         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
578    assert(I->first != V && "Inst still occurs in value numbering map!");
579  }
580}
581
582//===----------------------------------------------------------------------===//
583//                                GVN Pass
584//===----------------------------------------------------------------------===//
585
586PreservedAnalyses GVN::run(Function &F, AnalysisManager<Function> &AM) {
587  // FIXME: The order of evaluation of these 'getResult' calls is very
588  // significant! Re-ordering these variables will cause GVN when run alone to
589  // be less effective! We should fix memdep and basic-aa to not exhibit this
590  // behavior, but until then don't change the order here.
591  auto &AC = AM.getResult<AssumptionAnalysis>(F);
592  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
593  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
594  auto &AA = AM.getResult<AAManager>(F);
595  auto &MemDep = AM.getResult<MemoryDependenceAnalysis>(F);
596  bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep);
597  if (!Changed)
598    return PreservedAnalyses::all();
599  PreservedAnalyses PA;
600  PA.preserve<DominatorTreeAnalysis>();
601  PA.preserve<GlobalsAA>();
602  return PA;
603}
604
605LLVM_DUMP_METHOD
606void GVN::dump(DenseMap<uint32_t, Value*>& d) {
607  errs() << "{\n";
608  for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
609       E = d.end(); I != E; ++I) {
610      errs() << I->first << "\n";
611      I->second->dump();
612  }
613  errs() << "}\n";
614}
615
616/// Return true if we can prove that the value
617/// we're analyzing is fully available in the specified block.  As we go, keep
618/// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
619/// map is actually a tri-state map with the following values:
620///   0) we know the block *is not* fully available.
621///   1) we know the block *is* fully available.
622///   2) we do not know whether the block is fully available or not, but we are
623///      currently speculating that it will be.
624///   3) we are speculating for this block and have used that to speculate for
625///      other blocks.
626static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
627                            DenseMap<BasicBlock*, char> &FullyAvailableBlocks,
628                            uint32_t RecurseDepth) {
629  if (RecurseDepth > MaxRecurseDepth)
630    return false;
631
632  // Optimistically assume that the block is fully available and check to see
633  // if we already know about this block in one lookup.
634  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
635    FullyAvailableBlocks.insert(std::make_pair(BB, 2));
636
637  // If the entry already existed for this block, return the precomputed value.
638  if (!IV.second) {
639    // If this is a speculative "available" value, mark it as being used for
640    // speculation of other blocks.
641    if (IV.first->second == 2)
642      IV.first->second = 3;
643    return IV.first->second != 0;
644  }
645
646  // Otherwise, see if it is fully available in all predecessors.
647  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
648
649  // If this block has no predecessors, it isn't live-in here.
650  if (PI == PE)
651    goto SpeculationFailure;
652
653  for (; PI != PE; ++PI)
654    // If the value isn't fully available in one of our predecessors, then it
655    // isn't fully available in this block either.  Undo our previous
656    // optimistic assumption and bail out.
657    if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1))
658      goto SpeculationFailure;
659
660  return true;
661
662// If we get here, we found out that this is not, after
663// all, a fully-available block.  We have a problem if we speculated on this and
664// used the speculation to mark other blocks as available.
665SpeculationFailure:
666  char &BBVal = FullyAvailableBlocks[BB];
667
668  // If we didn't speculate on this, just return with it set to false.
669  if (BBVal == 2) {
670    BBVal = 0;
671    return false;
672  }
673
674  // If we did speculate on this value, we could have blocks set to 1 that are
675  // incorrect.  Walk the (transitive) successors of this block and mark them as
676  // 0 if set to one.
677  SmallVector<BasicBlock*, 32> BBWorklist;
678  BBWorklist.push_back(BB);
679
680  do {
681    BasicBlock *Entry = BBWorklist.pop_back_val();
682    // Note that this sets blocks to 0 (unavailable) if they happen to not
683    // already be in FullyAvailableBlocks.  This is safe.
684    char &EntryVal = FullyAvailableBlocks[Entry];
685    if (EntryVal == 0) continue;  // Already unavailable.
686
687    // Mark as unavailable.
688    EntryVal = 0;
689
690    BBWorklist.append(succ_begin(Entry), succ_end(Entry));
691  } while (!BBWorklist.empty());
692
693  return false;
694}
695
696
697/// Return true if CoerceAvailableValueToLoadType will succeed.
698static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
699                                            Type *LoadTy,
700                                            const DataLayout &DL) {
701  // If the loaded or stored value is an first class array or struct, don't try
702  // to transform them.  We need to be able to bitcast to integer.
703  if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
704      StoredVal->getType()->isStructTy() ||
705      StoredVal->getType()->isArrayTy())
706    return false;
707
708  // The store has to be at least as big as the load.
709  if (DL.getTypeSizeInBits(StoredVal->getType()) <
710        DL.getTypeSizeInBits(LoadTy))
711    return false;
712
713  return true;
714}
715
716/// If we saw a store of a value to memory, and
717/// then a load from a must-aliased pointer of a different type, try to coerce
718/// the stored value.  LoadedTy is the type of the load we want to replace.
719/// IRB is IRBuilder used to insert new instructions.
720///
721/// If we can't do it, return null.
722static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
723                                             IRBuilder<> &IRB,
724                                             const DataLayout &DL) {
725  assert(CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
726         "precondition violation - materialization can't fail");
727
728  // If this is already the right type, just return it.
729  Type *StoredValTy = StoredVal->getType();
730
731  uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
732  uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
733
734  // If the store and reload are the same size, we can always reuse it.
735  if (StoredValSize == LoadedValSize) {
736    // Pointer to Pointer -> use bitcast.
737    if (StoredValTy->getScalarType()->isPointerTy() &&
738        LoadedTy->getScalarType()->isPointerTy())
739      return IRB.CreateBitCast(StoredVal, LoadedTy);
740
741    // Convert source pointers to integers, which can be bitcast.
742    if (StoredValTy->getScalarType()->isPointerTy()) {
743      StoredValTy = DL.getIntPtrType(StoredValTy);
744      StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
745    }
746
747    Type *TypeToCastTo = LoadedTy;
748    if (TypeToCastTo->getScalarType()->isPointerTy())
749      TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
750
751    if (StoredValTy != TypeToCastTo)
752      StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo);
753
754    // Cast to pointer if the load needs a pointer type.
755    if (LoadedTy->getScalarType()->isPointerTy())
756      StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy);
757
758    return StoredVal;
759  }
760
761  // If the loaded value is smaller than the available value, then we can
762  // extract out a piece from it.  If the available value is too small, then we
763  // can't do anything.
764  assert(StoredValSize >= LoadedValSize &&
765         "CanCoerceMustAliasedValueToLoad fail");
766
767  // Convert source pointers to integers, which can be manipulated.
768  if (StoredValTy->getScalarType()->isPointerTy()) {
769    StoredValTy = DL.getIntPtrType(StoredValTy);
770    StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
771  }
772
773  // Convert vectors and fp to integer, which can be manipulated.
774  if (!StoredValTy->isIntegerTy()) {
775    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
776    StoredVal = IRB.CreateBitCast(StoredVal, StoredValTy);
777  }
778
779  // If this is a big-endian system, we need to shift the value down to the low
780  // bits so that a truncate will work.
781  if (DL.isBigEndian()) {
782    uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
783                        DL.getTypeStoreSizeInBits(LoadedTy);
784    StoredVal = IRB.CreateLShr(StoredVal, ShiftAmt, "tmp");
785  }
786
787  // Truncate the integer to the right size now.
788  Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
789  StoredVal  = IRB.CreateTrunc(StoredVal, NewIntTy, "trunc");
790
791  if (LoadedTy == NewIntTy)
792    return StoredVal;
793
794  // If the result is a pointer, inttoptr.
795  if (LoadedTy->getScalarType()->isPointerTy())
796    return IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr");
797
798  // Otherwise, bitcast.
799  return IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast");
800}
801
802/// This function is called when we have a
803/// memdep query of a load that ends up being a clobbering memory write (store,
804/// memset, memcpy, memmove).  This means that the write *may* provide bits used
805/// by the load but we can't be sure because the pointers don't mustalias.
806///
807/// Check this case to see if there is anything more we can do before we give
808/// up.  This returns -1 if we have to give up, or a byte number in the stored
809/// value of the piece that feeds the load.
810static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
811                                          Value *WritePtr,
812                                          uint64_t WriteSizeInBits,
813                                          const DataLayout &DL) {
814  // If the loaded or stored value is a first class array or struct, don't try
815  // to transform them.  We need to be able to bitcast to integer.
816  if (LoadTy->isStructTy() || LoadTy->isArrayTy())
817    return -1;
818
819  int64_t StoreOffset = 0, LoadOffset = 0;
820  Value *StoreBase =
821      GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
822  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
823  if (StoreBase != LoadBase)
824    return -1;
825
826  // If the load and store are to the exact same address, they should have been
827  // a must alias.  AA must have gotten confused.
828  // FIXME: Study to see if/when this happens.  One case is forwarding a memset
829  // to a load from the base of the memset.
830#if 0
831  if (LoadOffset == StoreOffset) {
832    dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
833    << "Base       = " << *StoreBase << "\n"
834    << "Store Ptr  = " << *WritePtr << "\n"
835    << "Store Offs = " << StoreOffset << "\n"
836    << "Load Ptr   = " << *LoadPtr << "\n";
837    abort();
838  }
839#endif
840
841  // If the load and store don't overlap at all, the store doesn't provide
842  // anything to the load.  In this case, they really don't alias at all, AA
843  // must have gotten confused.
844  uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
845
846  if ((WriteSizeInBits & 7) | (LoadSize & 7))
847    return -1;
848  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
849  LoadSize >>= 3;
850
851
852  bool isAAFailure = false;
853  if (StoreOffset < LoadOffset)
854    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
855  else
856    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
857
858  if (isAAFailure) {
859#if 0
860    dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
861    << "Base       = " << *StoreBase << "\n"
862    << "Store Ptr  = " << *WritePtr << "\n"
863    << "Store Offs = " << StoreOffset << "\n"
864    << "Load Ptr   = " << *LoadPtr << "\n";
865    abort();
866#endif
867    return -1;
868  }
869
870  // If the Load isn't completely contained within the stored bits, we don't
871  // have all the bits to feed it.  We could do something crazy in the future
872  // (issue a smaller load then merge the bits in) but this seems unlikely to be
873  // valuable.
874  if (StoreOffset > LoadOffset ||
875      StoreOffset+StoreSize < LoadOffset+LoadSize)
876    return -1;
877
878  // Okay, we can do this transformation.  Return the number of bytes into the
879  // store that the load is.
880  return LoadOffset-StoreOffset;
881}
882
883/// This function is called when we have a
884/// memdep query of a load that ends up being a clobbering store.
885static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
886                                          StoreInst *DepSI) {
887  // Cannot handle reading from store of first-class aggregate yet.
888  if (DepSI->getValueOperand()->getType()->isStructTy() ||
889      DepSI->getValueOperand()->getType()->isArrayTy())
890    return -1;
891
892  const DataLayout &DL = DepSI->getModule()->getDataLayout();
893  Value *StorePtr = DepSI->getPointerOperand();
894  uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
895  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
896                                        StorePtr, StoreSize, DL);
897}
898
899/// This function is called when we have a
900/// memdep query of a load that ends up being clobbered by another load.  See if
901/// the other load can feed into the second load.
902static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
903                                         LoadInst *DepLI, const DataLayout &DL){
904  // Cannot handle reading from store of first-class aggregate yet.
905  if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
906    return -1;
907
908  Value *DepPtr = DepLI->getPointerOperand();
909  uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
910  int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
911  if (R != -1) return R;
912
913  // If we have a load/load clobber an DepLI can be widened to cover this load,
914  // then we should widen it!
915  int64_t LoadOffs = 0;
916  const Value *LoadBase =
917      GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
918  unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
919
920  unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
921      LoadBase, LoadOffs, LoadSize, DepLI);
922  if (Size == 0) return -1;
923
924  // Check non-obvious conditions enforced by MDA which we rely on for being
925  // able to materialize this potentially available value
926  assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
927  assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
928
929  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
930}
931
932
933
934static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
935                                            MemIntrinsic *MI,
936                                            const DataLayout &DL) {
937  // If the mem operation is a non-constant size, we can't handle it.
938  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
939  if (!SizeCst) return -1;
940  uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
941
942  // If this is memset, we just need to see if the offset is valid in the size
943  // of the memset..
944  if (MI->getIntrinsicID() == Intrinsic::memset)
945    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
946                                          MemSizeInBits, DL);
947
948  // If we have a memcpy/memmove, the only case we can handle is if this is a
949  // copy from constant memory.  In that case, we can read directly from the
950  // constant memory.
951  MemTransferInst *MTI = cast<MemTransferInst>(MI);
952
953  Constant *Src = dyn_cast<Constant>(MTI->getSource());
954  if (!Src) return -1;
955
956  GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
957  if (!GV || !GV->isConstant()) return -1;
958
959  // See if the access is within the bounds of the transfer.
960  int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
961                                              MI->getDest(), MemSizeInBits, DL);
962  if (Offset == -1)
963    return Offset;
964
965  unsigned AS = Src->getType()->getPointerAddressSpace();
966  // Otherwise, see if we can constant fold a load from the constant with the
967  // offset applied as appropriate.
968  Src = ConstantExpr::getBitCast(Src,
969                                 Type::getInt8PtrTy(Src->getContext(), AS));
970  Constant *OffsetCst =
971    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
972  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
973                                       OffsetCst);
974  Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
975  if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
976    return Offset;
977  return -1;
978}
979
980
981/// This function is called when we have a
982/// memdep query of a load that ends up being a clobbering store.  This means
983/// that the store provides bits used by the load but we the pointers don't
984/// mustalias.  Check this case to see if there is anything more we can do
985/// before we give up.
986static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
987                                   Type *LoadTy,
988                                   Instruction *InsertPt, const DataLayout &DL){
989  LLVMContext &Ctx = SrcVal->getType()->getContext();
990
991  uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
992  uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
993
994  IRBuilder<> Builder(InsertPt);
995
996  // Compute which bits of the stored value are being used by the load.  Convert
997  // to an integer type to start with.
998  if (SrcVal->getType()->getScalarType()->isPointerTy())
999    SrcVal = Builder.CreatePtrToInt(SrcVal,
1000        DL.getIntPtrType(SrcVal->getType()));
1001  if (!SrcVal->getType()->isIntegerTy())
1002    SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
1003
1004  // Shift the bits to the least significant depending on endianness.
1005  unsigned ShiftAmt;
1006  if (DL.isLittleEndian())
1007    ShiftAmt = Offset*8;
1008  else
1009    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
1010
1011  if (ShiftAmt)
1012    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
1013
1014  if (LoadSize != StoreSize)
1015    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
1016
1017  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
1018}
1019
1020/// This function is called when we have a
1021/// memdep query of a load that ends up being a clobbering load.  This means
1022/// that the load *may* provide bits used by the load but we can't be sure
1023/// because the pointers don't mustalias.  Check this case to see if there is
1024/// anything more we can do before we give up.
1025static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
1026                                  Type *LoadTy, Instruction *InsertPt,
1027                                  GVN &gvn) {
1028  const DataLayout &DL = SrcVal->getModule()->getDataLayout();
1029  // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
1030  // widen SrcVal out to a larger load.
1031  unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
1032  unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
1033  if (Offset+LoadSize > SrcValStoreSize) {
1034    assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
1035    assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
1036    // If we have a load/load clobber an DepLI can be widened to cover this
1037    // load, then we should widen it to the next power of 2 size big enough!
1038    unsigned NewLoadSize = Offset+LoadSize;
1039    if (!isPowerOf2_32(NewLoadSize))
1040      NewLoadSize = NextPowerOf2(NewLoadSize);
1041
1042    Value *PtrVal = SrcVal->getPointerOperand();
1043
1044    // Insert the new load after the old load.  This ensures that subsequent
1045    // memdep queries will find the new load.  We can't easily remove the old
1046    // load completely because it is already in the value numbering table.
1047    IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
1048    Type *DestPTy =
1049      IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
1050    DestPTy = PointerType::get(DestPTy,
1051                               PtrVal->getType()->getPointerAddressSpace());
1052    Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
1053    PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
1054    LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
1055    NewLoad->takeName(SrcVal);
1056    NewLoad->setAlignment(SrcVal->getAlignment());
1057
1058    DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
1059    DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
1060
1061    // Replace uses of the original load with the wider load.  On a big endian
1062    // system, we need to shift down to get the relevant bits.
1063    Value *RV = NewLoad;
1064    if (DL.isBigEndian())
1065      RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
1066    RV = Builder.CreateTrunc(RV, SrcVal->getType());
1067    SrcVal->replaceAllUsesWith(RV);
1068
1069    // We would like to use gvn.markInstructionForDeletion here, but we can't
1070    // because the load is already memoized into the leader map table that GVN
1071    // tracks.  It is potentially possible to remove the load from the table,
1072    // but then there all of the operations based on it would need to be
1073    // rehashed.  Just leave the dead load around.
1074    gvn.getMemDep().removeInstruction(SrcVal);
1075    SrcVal = NewLoad;
1076  }
1077
1078  return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
1079}
1080
1081
1082/// This function is called when we have a
1083/// memdep query of a load that ends up being a clobbering mem intrinsic.
1084static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
1085                                     Type *LoadTy, Instruction *InsertPt,
1086                                     const DataLayout &DL){
1087  LLVMContext &Ctx = LoadTy->getContext();
1088  uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8;
1089
1090  IRBuilder<> Builder(InsertPt);
1091
1092  // We know that this method is only called when the mem transfer fully
1093  // provides the bits for the load.
1094  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
1095    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1096    // independently of what the offset is.
1097    Value *Val = MSI->getValue();
1098    if (LoadSize != 1)
1099      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
1100
1101    Value *OneElt = Val;
1102
1103    // Splat the value out to the right number of bits.
1104    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
1105      // If we can double the number of bytes set, do it.
1106      if (NumBytesSet*2 <= LoadSize) {
1107        Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
1108        Val = Builder.CreateOr(Val, ShVal);
1109        NumBytesSet <<= 1;
1110        continue;
1111      }
1112
1113      // Otherwise insert one byte at a time.
1114      Value *ShVal = Builder.CreateShl(Val, 1*8);
1115      Val = Builder.CreateOr(OneElt, ShVal);
1116      ++NumBytesSet;
1117    }
1118
1119    return CoerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
1120  }
1121
1122  // Otherwise, this is a memcpy/memmove from a constant global.
1123  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
1124  Constant *Src = cast<Constant>(MTI->getSource());
1125  unsigned AS = Src->getType()->getPointerAddressSpace();
1126
1127  // Otherwise, see if we can constant fold a load from the constant with the
1128  // offset applied as appropriate.
1129  Src = ConstantExpr::getBitCast(Src,
1130                                 Type::getInt8PtrTy(Src->getContext(), AS));
1131  Constant *OffsetCst =
1132    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1133  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
1134                                       OffsetCst);
1135  Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
1136  return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
1137}
1138
1139
1140/// Given a set of loads specified by ValuesPerBlock,
1141/// construct SSA form, allowing us to eliminate LI.  This returns the value
1142/// that should be used at LI's definition site.
1143static Value *ConstructSSAForLoadSet(LoadInst *LI,
1144                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1145                                     GVN &gvn) {
1146  // Check for the fully redundant, dominating load case.  In this case, we can
1147  // just use the dominating value directly.
1148  if (ValuesPerBlock.size() == 1 &&
1149      gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
1150                                               LI->getParent())) {
1151    assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1152           "Dead BB dominate this block");
1153    return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn);
1154  }
1155
1156  // Otherwise, we have to construct SSA form.
1157  SmallVector<PHINode*, 8> NewPHIs;
1158  SSAUpdater SSAUpdate(&NewPHIs);
1159  SSAUpdate.Initialize(LI->getType(), LI->getName());
1160
1161  for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1162    BasicBlock *BB = AV.BB;
1163
1164    if (SSAUpdate.HasValueForBlock(BB))
1165      continue;
1166
1167    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn));
1168  }
1169
1170  // Perform PHI construction.
1171  return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
1172}
1173
1174Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
1175                                                Instruction *InsertPt,
1176                                                GVN &gvn) const {
1177  Value *Res;
1178  Type *LoadTy = LI->getType();
1179  const DataLayout &DL = LI->getModule()->getDataLayout();
1180  if (isSimpleValue()) {
1181    Res = getSimpleValue();
1182    if (Res->getType() != LoadTy) {
1183      Res = GetStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
1184
1185      DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
1186                   << *getSimpleValue() << '\n'
1187                   << *Res << '\n' << "\n\n\n");
1188    }
1189  } else if (isCoercedLoadValue()) {
1190    LoadInst *Load = getCoercedLoadValue();
1191    if (Load->getType() == LoadTy && Offset == 0) {
1192      Res = Load;
1193    } else {
1194      Res = GetLoadValueForLoad(Load, Offset, LoadTy, InsertPt, gvn);
1195
1196      DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
1197                   << *getCoercedLoadValue() << '\n'
1198                   << *Res << '\n' << "\n\n\n");
1199    }
1200  } else if (isMemIntrinValue()) {
1201    Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
1202                                 InsertPt, DL);
1203    DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1204                 << "  " << *getMemIntrinValue() << '\n'
1205                 << *Res << '\n' << "\n\n\n");
1206  } else {
1207    assert(isUndefValue() && "Should be UndefVal");
1208    DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
1209    return UndefValue::get(LoadTy);
1210  }
1211  assert(Res && "failed to materialize?");
1212  return Res;
1213}
1214
1215static bool isLifetimeStart(const Instruction *Inst) {
1216  if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1217    return II->getIntrinsicID() == Intrinsic::lifetime_start;
1218  return false;
1219}
1220
1221bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
1222                                  Value *Address, AvailableValue &Res) {
1223
1224  assert((DepInfo.isDef() || DepInfo.isClobber()) &&
1225         "expected a local dependence");
1226  assert(LI->isUnordered() && "rules below are incorrect for ordered access");
1227
1228  const DataLayout &DL = LI->getModule()->getDataLayout();
1229
1230  if (DepInfo.isClobber()) {
1231    // If the dependence is to a store that writes to a superset of the bits
1232    // read by the load, we can extract the bits we need for the load from the
1233    // stored value.
1234    if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
1235      // Can't forward from non-atomic to atomic without violating memory model.
1236      if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
1237        int Offset =
1238          AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
1239        if (Offset != -1) {
1240          Res = AvailableValue::get(DepSI->getValueOperand(), Offset);
1241          return true;
1242        }
1243      }
1244    }
1245
1246    // Check to see if we have something like this:
1247    //    load i32* P
1248    //    load i8* (P+1)
1249    // if we have this, replace the later with an extraction from the former.
1250    if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
1251      // If this is a clobber and L is the first instruction in its block, then
1252      // we have the first instruction in the entry block.
1253      // Can't forward from non-atomic to atomic without violating memory model.
1254      if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
1255        int Offset =
1256          AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
1257
1258        if (Offset != -1) {
1259          Res = AvailableValue::getLoad(DepLI, Offset);
1260          return true;
1261        }
1262      }
1263    }
1264
1265    // If the clobbering value is a memset/memcpy/memmove, see if we can
1266    // forward a value on from it.
1267    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
1268      if (Address && !LI->isAtomic()) {
1269        int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
1270                                                      DepMI, DL);
1271        if (Offset != -1) {
1272          Res = AvailableValue::getMI(DepMI, Offset);
1273          return true;
1274        }
1275      }
1276    }
1277    // Nothing known about this clobber, have to be conservative
1278    DEBUG(
1279      // fast print dep, using operator<< on instruction is too slow.
1280      dbgs() << "GVN: load ";
1281      LI->printAsOperand(dbgs());
1282      Instruction *I = DepInfo.getInst();
1283      dbgs() << " is clobbered by " << *I << '\n';
1284    );
1285    return false;
1286  }
1287  assert(DepInfo.isDef() && "follows from above");
1288
1289  Instruction *DepInst = DepInfo.getInst();
1290
1291  // Loading the allocation -> undef.
1292  if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
1293      // Loading immediately after lifetime begin -> undef.
1294      isLifetimeStart(DepInst)) {
1295    Res = AvailableValue::get(UndefValue::get(LI->getType()));
1296    return true;
1297  }
1298
1299  // Loading from calloc (which zero initializes memory) -> zero
1300  if (isCallocLikeFn(DepInst, TLI)) {
1301    Res = AvailableValue::get(Constant::getNullValue(LI->getType()));
1302    return true;
1303  }
1304
1305  if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1306    // Reject loads and stores that are to the same address but are of
1307    // different types if we have to. If the stored value is larger or equal to
1308    // the loaded value, we can reuse it.
1309    if (S->getValueOperand()->getType() != LI->getType() &&
1310        !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
1311                                         LI->getType(), DL))
1312      return false;
1313
1314    // Can't forward from non-atomic to atomic without violating memory model.
1315    if (S->isAtomic() < LI->isAtomic())
1316      return false;
1317
1318    Res = AvailableValue::get(S->getValueOperand());
1319    return true;
1320  }
1321
1322  if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1323    // If the types mismatch and we can't handle it, reject reuse of the load.
1324    // If the stored value is larger or equal to the loaded value, we can reuse
1325    // it.
1326    if (LD->getType() != LI->getType() &&
1327        !CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
1328      return false;
1329
1330    // Can't forward from non-atomic to atomic without violating memory model.
1331    if (LD->isAtomic() < LI->isAtomic())
1332      return false;
1333
1334    Res = AvailableValue::getLoad(LD);
1335    return true;
1336  }
1337
1338  // Unknown def - must be conservative
1339  DEBUG(
1340    // fast print dep, using operator<< on instruction is too slow.
1341    dbgs() << "GVN: load ";
1342    LI->printAsOperand(dbgs());
1343    dbgs() << " has unknown def " << *DepInst << '\n';
1344  );
1345  return false;
1346}
1347
1348void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
1349                                  AvailValInBlkVect &ValuesPerBlock,
1350                                  UnavailBlkVect &UnavailableBlocks) {
1351
1352  // Filter out useless results (non-locals, etc).  Keep track of the blocks
1353  // where we have a value available in repl, also keep track of whether we see
1354  // dependencies that produce an unknown value for the load (such as a call
1355  // that could potentially clobber the load).
1356  unsigned NumDeps = Deps.size();
1357  for (unsigned i = 0, e = NumDeps; i != e; ++i) {
1358    BasicBlock *DepBB = Deps[i].getBB();
1359    MemDepResult DepInfo = Deps[i].getResult();
1360
1361    if (DeadBlocks.count(DepBB)) {
1362      // Dead dependent mem-op disguise as a load evaluating the same value
1363      // as the load in question.
1364      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
1365      continue;
1366    }
1367
1368    if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1369      UnavailableBlocks.push_back(DepBB);
1370      continue;
1371    }
1372
1373    // The address being loaded in this non-local block may not be the same as
1374    // the pointer operand of the load if PHI translation occurs.  Make sure
1375    // to consider the right address.
1376    Value *Address = Deps[i].getAddress();
1377
1378    AvailableValue AV;
1379    if (AnalyzeLoadAvailability(LI, DepInfo, Address, AV)) {
1380      // subtlety: because we know this was a non-local dependency, we know
1381      // it's safe to materialize anywhere between the instruction within
1382      // DepInfo and the end of it's block.
1383      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1384                                                          std::move(AV)));
1385    } else {
1386      UnavailableBlocks.push_back(DepBB);
1387    }
1388  }
1389
1390  assert(NumDeps == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1391         "post condition violation");
1392}
1393
1394bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
1395                         UnavailBlkVect &UnavailableBlocks) {
1396  // Okay, we have *some* definitions of the value.  This means that the value
1397  // is available in some of our (transitive) predecessors.  Lets think about
1398  // doing PRE of this load.  This will involve inserting a new load into the
1399  // predecessor when it's not available.  We could do this in general, but
1400  // prefer to not increase code size.  As such, we only do this when we know
1401  // that we only have to insert *one* load (which means we're basically moving
1402  // the load, not inserting a new one).
1403
1404  SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
1405                                        UnavailableBlocks.end());
1406
1407  // Let's find the first basic block with more than one predecessor.  Walk
1408  // backwards through predecessors if needed.
1409  BasicBlock *LoadBB = LI->getParent();
1410  BasicBlock *TmpBB = LoadBB;
1411
1412  while (TmpBB->getSinglePredecessor()) {
1413    TmpBB = TmpBB->getSinglePredecessor();
1414    if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1415      return false;
1416    if (Blockers.count(TmpBB))
1417      return false;
1418
1419    // If any of these blocks has more than one successor (i.e. if the edge we
1420    // just traversed was critical), then there are other paths through this
1421    // block along which the load may not be anticipated.  Hoisting the load
1422    // above this block would be adding the load to execution paths along
1423    // which it was not previously executed.
1424    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1425      return false;
1426  }
1427
1428  assert(TmpBB);
1429  LoadBB = TmpBB;
1430
1431  // Check to see how many predecessors have the loaded value fully
1432  // available.
1433  MapVector<BasicBlock *, Value *> PredLoads;
1434  DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1435  for (const AvailableValueInBlock &AV : ValuesPerBlock)
1436    FullyAvailableBlocks[AV.BB] = true;
1437  for (BasicBlock *UnavailableBB : UnavailableBlocks)
1438    FullyAvailableBlocks[UnavailableBB] = false;
1439
1440  SmallVector<BasicBlock *, 4> CriticalEdgePred;
1441  for (BasicBlock *Pred : predecessors(LoadBB)) {
1442    // If any predecessor block is an EH pad that does not allow non-PHI
1443    // instructions before the terminator, we can't PRE the load.
1444    if (Pred->getTerminator()->isEHPad()) {
1445      DEBUG(dbgs()
1446            << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1447            << Pred->getName() << "': " << *LI << '\n');
1448      return false;
1449    }
1450
1451    if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
1452      continue;
1453    }
1454
1455    if (Pred->getTerminator()->getNumSuccessors() != 1) {
1456      if (isa<IndirectBrInst>(Pred->getTerminator())) {
1457        DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1458              << Pred->getName() << "': " << *LI << '\n');
1459        return false;
1460      }
1461
1462      if (LoadBB->isEHPad()) {
1463        DEBUG(dbgs()
1464              << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1465              << Pred->getName() << "': " << *LI << '\n');
1466        return false;
1467      }
1468
1469      CriticalEdgePred.push_back(Pred);
1470    } else {
1471      // Only add the predecessors that will not be split for now.
1472      PredLoads[Pred] = nullptr;
1473    }
1474  }
1475
1476  // Decide whether PRE is profitable for this load.
1477  unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size();
1478  assert(NumUnavailablePreds != 0 &&
1479         "Fully available value should already be eliminated!");
1480
1481  // If this load is unavailable in multiple predecessors, reject it.
1482  // FIXME: If we could restructure the CFG, we could make a common pred with
1483  // all the preds that don't have an available LI and insert a new load into
1484  // that one block.
1485  if (NumUnavailablePreds != 1)
1486      return false;
1487
1488  // Split critical edges, and update the unavailable predecessors accordingly.
1489  for (BasicBlock *OrigPred : CriticalEdgePred) {
1490    BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
1491    assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1492    PredLoads[NewPred] = nullptr;
1493    DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1494                 << LoadBB->getName() << '\n');
1495  }
1496
1497  // Check if the load can safely be moved to all the unavailable predecessors.
1498  bool CanDoPRE = true;
1499  const DataLayout &DL = LI->getModule()->getDataLayout();
1500  SmallVector<Instruction*, 8> NewInsts;
1501  for (auto &PredLoad : PredLoads) {
1502    BasicBlock *UnavailablePred = PredLoad.first;
1503
1504    // Do PHI translation to get its value in the predecessor if necessary.  The
1505    // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1506
1507    // If all preds have a single successor, then we know it is safe to insert
1508    // the load on the pred (?!?), so we can insert code to materialize the
1509    // pointer if it is not available.
1510    PHITransAddr Address(LI->getPointerOperand(), DL, AC);
1511    Value *LoadPtr = nullptr;
1512    LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
1513                                                *DT, NewInsts);
1514
1515    // If we couldn't find or insert a computation of this phi translated value,
1516    // we fail PRE.
1517    if (!LoadPtr) {
1518      DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1519            << *LI->getPointerOperand() << "\n");
1520      CanDoPRE = false;
1521      break;
1522    }
1523
1524    PredLoad.second = LoadPtr;
1525  }
1526
1527  if (!CanDoPRE) {
1528    while (!NewInsts.empty()) {
1529      Instruction *I = NewInsts.pop_back_val();
1530      if (MD) MD->removeInstruction(I);
1531      I->eraseFromParent();
1532    }
1533    // HINT: Don't revert the edge-splitting as following transformation may
1534    // also need to split these critical edges.
1535    return !CriticalEdgePred.empty();
1536  }
1537
1538  // Okay, we can eliminate this load by inserting a reload in the predecessor
1539  // and using PHI construction to get the value in the other predecessors, do
1540  // it.
1541  DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1542  DEBUG(if (!NewInsts.empty())
1543          dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
1544                 << *NewInsts.back() << '\n');
1545
1546  // Assign value numbers to the new instructions.
1547  for (Instruction *I : NewInsts) {
1548    // FIXME: We really _ought_ to insert these value numbers into their
1549    // parent's availability map.  However, in doing so, we risk getting into
1550    // ordering issues.  If a block hasn't been processed yet, we would be
1551    // marking a value as AVAIL-IN, which isn't what we intend.
1552    VN.lookupOrAdd(I);
1553  }
1554
1555  for (const auto &PredLoad : PredLoads) {
1556    BasicBlock *UnavailablePred = PredLoad.first;
1557    Value *LoadPtr = PredLoad.second;
1558
1559    auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre",
1560                                 LI->isVolatile(), LI->getAlignment(),
1561                                 LI->getOrdering(), LI->getSynchScope(),
1562                                 UnavailablePred->getTerminator());
1563
1564    // Transfer the old load's AA tags to the new load.
1565    AAMDNodes Tags;
1566    LI->getAAMetadata(Tags);
1567    if (Tags)
1568      NewLoad->setAAMetadata(Tags);
1569
1570    if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load))
1571      NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
1572    if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group))
1573      NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
1574    if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range))
1575      NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
1576
1577    // Transfer DebugLoc.
1578    NewLoad->setDebugLoc(LI->getDebugLoc());
1579
1580    // Add the newly created load.
1581    ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
1582                                                        NewLoad));
1583    MD->invalidateCachedPointerInfo(LoadPtr);
1584    DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1585  }
1586
1587  // Perform PHI construction.
1588  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
1589  LI->replaceAllUsesWith(V);
1590  if (isa<PHINode>(V))
1591    V->takeName(LI);
1592  if (Instruction *I = dyn_cast<Instruction>(V))
1593    I->setDebugLoc(LI->getDebugLoc());
1594  if (V->getType()->getScalarType()->isPointerTy())
1595    MD->invalidateCachedPointerInfo(V);
1596  markInstructionForDeletion(LI);
1597  ++NumPRELoad;
1598  return true;
1599}
1600
1601/// Attempt to eliminate a load whose dependencies are
1602/// non-local by performing PHI construction.
1603bool GVN::processNonLocalLoad(LoadInst *LI) {
1604  // non-local speculations are not allowed under asan.
1605  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
1606    return false;
1607
1608  // Step 1: Find the non-local dependencies of the load.
1609  LoadDepVect Deps;
1610  MD->getNonLocalPointerDependency(LI, Deps);
1611
1612  // If we had to process more than one hundred blocks to find the
1613  // dependencies, this load isn't worth worrying about.  Optimizing
1614  // it will be too expensive.
1615  unsigned NumDeps = Deps.size();
1616  if (NumDeps > 100)
1617    return false;
1618
1619  // If we had a phi translation failure, we'll have a single entry which is a
1620  // clobber in the current block.  Reject this early.
1621  if (NumDeps == 1 &&
1622      !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
1623    DEBUG(
1624      dbgs() << "GVN: non-local load ";
1625      LI->printAsOperand(dbgs());
1626      dbgs() << " has unknown dependencies\n";
1627    );
1628    return false;
1629  }
1630
1631  // If this load follows a GEP, see if we can PRE the indices before analyzing.
1632  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0))) {
1633    for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
1634                                        OE = GEP->idx_end();
1635         OI != OE; ++OI)
1636      if (Instruction *I = dyn_cast<Instruction>(OI->get()))
1637        performScalarPRE(I);
1638  }
1639
1640  // Step 2: Analyze the availability of the load
1641  AvailValInBlkVect ValuesPerBlock;
1642  UnavailBlkVect UnavailableBlocks;
1643  AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
1644
1645  // If we have no predecessors that produce a known value for this load, exit
1646  // early.
1647  if (ValuesPerBlock.empty())
1648    return false;
1649
1650  // Step 3: Eliminate fully redundancy.
1651  //
1652  // If all of the instructions we depend on produce a known value for this
1653  // load, then it is fully redundant and we can use PHI insertion to compute
1654  // its value.  Insert PHIs and remove the fully redundant value now.
1655  if (UnavailableBlocks.empty()) {
1656    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1657
1658    // Perform PHI construction.
1659    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
1660    LI->replaceAllUsesWith(V);
1661
1662    if (isa<PHINode>(V))
1663      V->takeName(LI);
1664    if (Instruction *I = dyn_cast<Instruction>(V))
1665      if (LI->getDebugLoc())
1666        I->setDebugLoc(LI->getDebugLoc());
1667    if (V->getType()->getScalarType()->isPointerTy())
1668      MD->invalidateCachedPointerInfo(V);
1669    markInstructionForDeletion(LI);
1670    ++NumGVNLoad;
1671    return true;
1672  }
1673
1674  // Step 4: Eliminate partial redundancy.
1675  if (!EnablePRE || !EnableLoadPRE)
1676    return false;
1677
1678  return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
1679}
1680
1681bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
1682  assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume &&
1683         "This function can only be called with llvm.assume intrinsic");
1684  Value *V = IntrinsicI->getArgOperand(0);
1685
1686  if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
1687    if (Cond->isZero()) {
1688      Type *Int8Ty = Type::getInt8Ty(V->getContext());
1689      // Insert a new store to null instruction before the load to indicate that
1690      // this code is not reachable.  FIXME: We could insert unreachable
1691      // instruction directly because we can modify the CFG.
1692      new StoreInst(UndefValue::get(Int8Ty),
1693                    Constant::getNullValue(Int8Ty->getPointerTo()),
1694                    IntrinsicI);
1695    }
1696    markInstructionForDeletion(IntrinsicI);
1697    return false;
1698  }
1699
1700  Constant *True = ConstantInt::getTrue(V->getContext());
1701  bool Changed = false;
1702
1703  for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
1704    BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
1705
1706    // This property is only true in dominated successors, propagateEquality
1707    // will check dominance for us.
1708    Changed |= propagateEquality(V, True, Edge, false);
1709  }
1710
1711  // We can replace assume value with true, which covers cases like this:
1712  // call void @llvm.assume(i1 %cmp)
1713  // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
1714  ReplaceWithConstMap[V] = True;
1715
1716  // If one of *cmp *eq operand is const, adding it to map will cover this:
1717  // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
1718  // call void @llvm.assume(i1 %cmp)
1719  // ret float %0 ; will change it to ret float 3.000000e+00
1720  if (auto *CmpI = dyn_cast<CmpInst>(V)) {
1721    if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
1722        CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
1723        (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
1724         CmpI->getFastMathFlags().noNaNs())) {
1725      Value *CmpLHS = CmpI->getOperand(0);
1726      Value *CmpRHS = CmpI->getOperand(1);
1727      if (isa<Constant>(CmpLHS))
1728        std::swap(CmpLHS, CmpRHS);
1729      auto *RHSConst = dyn_cast<Constant>(CmpRHS);
1730
1731      // If only one operand is constant.
1732      if (RHSConst != nullptr && !isa<Constant>(CmpLHS))
1733        ReplaceWithConstMap[CmpLHS] = RHSConst;
1734    }
1735  }
1736  return Changed;
1737}
1738
1739static void patchReplacementInstruction(Instruction *I, Value *Repl) {
1740  auto *ReplInst = dyn_cast<Instruction>(Repl);
1741  if (!ReplInst)
1742    return;
1743
1744  // Patch the replacement so that it is not more restrictive than the value
1745  // being replaced.
1746  ReplInst->andIRFlags(I);
1747
1748  // FIXME: If both the original and replacement value are part of the
1749  // same control-flow region (meaning that the execution of one
1750  // guarantees the execution of the other), then we can combine the
1751  // noalias scopes here and do better than the general conservative
1752  // answer used in combineMetadata().
1753
1754  // In general, GVN unifies expressions over different control-flow
1755  // regions, and so we need a conservative combination of the noalias
1756  // scopes.
1757  static const unsigned KnownIDs[] = {
1758      LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
1759      LLVMContext::MD_noalias,        LLVMContext::MD_range,
1760      LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
1761      LLVMContext::MD_invariant_group};
1762  combineMetadata(ReplInst, I, KnownIDs);
1763}
1764
1765static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
1766  patchReplacementInstruction(I, Repl);
1767  I->replaceAllUsesWith(Repl);
1768}
1769
1770/// Attempt to eliminate a load, first by eliminating it
1771/// locally, and then attempting non-local elimination if that fails.
1772bool GVN::processLoad(LoadInst *L) {
1773  if (!MD)
1774    return false;
1775
1776  // This code hasn't been audited for ordered or volatile memory access
1777  if (!L->isUnordered())
1778    return false;
1779
1780  if (L->use_empty()) {
1781    markInstructionForDeletion(L);
1782    return true;
1783  }
1784
1785  // ... to a pointer that has been loaded from before...
1786  MemDepResult Dep = MD->getDependency(L);
1787
1788  // If it is defined in another block, try harder.
1789  if (Dep.isNonLocal())
1790    return processNonLocalLoad(L);
1791
1792  // Only handle the local case below
1793  if (!Dep.isDef() && !Dep.isClobber()) {
1794    // This might be a NonFuncLocal or an Unknown
1795    DEBUG(
1796      // fast print dep, using operator<< on instruction is too slow.
1797      dbgs() << "GVN: load ";
1798      L->printAsOperand(dbgs());
1799      dbgs() << " has unknown dependence\n";
1800    );
1801    return false;
1802  }
1803
1804  AvailableValue AV;
1805  if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
1806    Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
1807
1808    // Replace the load!
1809    patchAndReplaceAllUsesWith(L, AvailableValue);
1810    markInstructionForDeletion(L);
1811    ++NumGVNLoad;
1812    // Tell MDA to rexamine the reused pointer since we might have more
1813    // information after forwarding it.
1814    if (MD && AvailableValue->getType()->getScalarType()->isPointerTy())
1815      MD->invalidateCachedPointerInfo(AvailableValue);
1816    return true;
1817  }
1818
1819  return false;
1820}
1821
1822// In order to find a leader for a given value number at a
1823// specific basic block, we first obtain the list of all Values for that number,
1824// and then scan the list to find one whose block dominates the block in
1825// question.  This is fast because dominator tree queries consist of only
1826// a few comparisons of DFS numbers.
1827Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
1828  LeaderTableEntry Vals = LeaderTable[num];
1829  if (!Vals.Val) return nullptr;
1830
1831  Value *Val = nullptr;
1832  if (DT->dominates(Vals.BB, BB)) {
1833    Val = Vals.Val;
1834    if (isa<Constant>(Val)) return Val;
1835  }
1836
1837  LeaderTableEntry* Next = Vals.Next;
1838  while (Next) {
1839    if (DT->dominates(Next->BB, BB)) {
1840      if (isa<Constant>(Next->Val)) return Next->Val;
1841      if (!Val) Val = Next->Val;
1842    }
1843
1844    Next = Next->Next;
1845  }
1846
1847  return Val;
1848}
1849
1850/// There is an edge from 'Src' to 'Dst'.  Return
1851/// true if every path from the entry block to 'Dst' passes via this edge.  In
1852/// particular 'Dst' must not be reachable via another edge from 'Src'.
1853static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
1854                                       DominatorTree *DT) {
1855  // While in theory it is interesting to consider the case in which Dst has
1856  // more than one predecessor, because Dst might be part of a loop which is
1857  // only reachable from Src, in practice it is pointless since at the time
1858  // GVN runs all such loops have preheaders, which means that Dst will have
1859  // been changed to have only one predecessor, namely Src.
1860  const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
1861  assert((!Pred || Pred == E.getStart()) &&
1862         "No edge between these basic blocks!");
1863  return Pred != nullptr;
1864}
1865
1866// Tries to replace instruction with const, using information from
1867// ReplaceWithConstMap.
1868bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
1869  bool Changed = false;
1870  for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
1871    Value *Operand = Instr->getOperand(OpNum);
1872    auto it = ReplaceWithConstMap.find(Operand);
1873    if (it != ReplaceWithConstMap.end()) {
1874      assert(!isa<Constant>(Operand) &&
1875             "Replacing constants with constants is invalid");
1876      DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second
1877                   << " in instruction " << *Instr << '\n');
1878      Instr->setOperand(OpNum, it->second);
1879      Changed = true;
1880    }
1881  }
1882  return Changed;
1883}
1884
1885/// The given values are known to be equal in every block
1886/// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
1887/// 'RHS' everywhere in the scope.  Returns whether a change was made.
1888/// If DominatesByEdge is false, then it means that we will propagate the RHS
1889/// value starting from the end of Root.Start.
1890bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
1891                            bool DominatesByEdge) {
1892  SmallVector<std::pair<Value*, Value*>, 4> Worklist;
1893  Worklist.push_back(std::make_pair(LHS, RHS));
1894  bool Changed = false;
1895  // For speed, compute a conservative fast approximation to
1896  // DT->dominates(Root, Root.getEnd());
1897  const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
1898
1899  while (!Worklist.empty()) {
1900    std::pair<Value*, Value*> Item = Worklist.pop_back_val();
1901    LHS = Item.first; RHS = Item.second;
1902
1903    if (LHS == RHS)
1904      continue;
1905    assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
1906
1907    // Don't try to propagate equalities between constants.
1908    if (isa<Constant>(LHS) && isa<Constant>(RHS))
1909      continue;
1910
1911    // Prefer a constant on the right-hand side, or an Argument if no constants.
1912    if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
1913      std::swap(LHS, RHS);
1914    assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
1915
1916    // If there is no obvious reason to prefer the left-hand side over the
1917    // right-hand side, ensure the longest lived term is on the right-hand side,
1918    // so the shortest lived term will be replaced by the longest lived.
1919    // This tends to expose more simplifications.
1920    uint32_t LVN = VN.lookupOrAdd(LHS);
1921    if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
1922        (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
1923      // Move the 'oldest' value to the right-hand side, using the value number
1924      // as a proxy for age.
1925      uint32_t RVN = VN.lookupOrAdd(RHS);
1926      if (LVN < RVN) {
1927        std::swap(LHS, RHS);
1928        LVN = RVN;
1929      }
1930    }
1931
1932    // If value numbering later sees that an instruction in the scope is equal
1933    // to 'LHS' then ensure it will be turned into 'RHS'.  In order to preserve
1934    // the invariant that instructions only occur in the leader table for their
1935    // own value number (this is used by removeFromLeaderTable), do not do this
1936    // if RHS is an instruction (if an instruction in the scope is morphed into
1937    // LHS then it will be turned into RHS by the next GVN iteration anyway, so
1938    // using the leader table is about compiling faster, not optimizing better).
1939    // The leader table only tracks basic blocks, not edges. Only add to if we
1940    // have the simple case where the edge dominates the end.
1941    if (RootDominatesEnd && !isa<Instruction>(RHS))
1942      addToLeaderTable(LVN, RHS, Root.getEnd());
1943
1944    // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
1945    // LHS always has at least one use that is not dominated by Root, this will
1946    // never do anything if LHS has only one use.
1947    if (!LHS->hasOneUse()) {
1948      unsigned NumReplacements =
1949          DominatesByEdge
1950              ? replaceDominatedUsesWith(LHS, RHS, *DT, Root)
1951              : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getStart());
1952
1953      Changed |= NumReplacements > 0;
1954      NumGVNEqProp += NumReplacements;
1955    }
1956
1957    // Now try to deduce additional equalities from this one. For example, if
1958    // the known equality was "(A != B)" == "false" then it follows that A and B
1959    // are equal in the scope. Only boolean equalities with an explicit true or
1960    // false RHS are currently supported.
1961    if (!RHS->getType()->isIntegerTy(1))
1962      // Not a boolean equality - bail out.
1963      continue;
1964    ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
1965    if (!CI)
1966      // RHS neither 'true' nor 'false' - bail out.
1967      continue;
1968    // Whether RHS equals 'true'.  Otherwise it equals 'false'.
1969    bool isKnownTrue = CI->isAllOnesValue();
1970    bool isKnownFalse = !isKnownTrue;
1971
1972    // If "A && B" is known true then both A and B are known true.  If "A || B"
1973    // is known false then both A and B are known false.
1974    Value *A, *B;
1975    if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
1976        (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
1977      Worklist.push_back(std::make_pair(A, RHS));
1978      Worklist.push_back(std::make_pair(B, RHS));
1979      continue;
1980    }
1981
1982    // If we are propagating an equality like "(A == B)" == "true" then also
1983    // propagate the equality A == B.  When propagating a comparison such as
1984    // "(A >= B)" == "true", replace all instances of "A < B" with "false".
1985    if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) {
1986      Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1987
1988      // If "A == B" is known true, or "A != B" is known false, then replace
1989      // A with B everywhere in the scope.
1990      if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
1991          (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
1992        Worklist.push_back(std::make_pair(Op0, Op1));
1993
1994      // Handle the floating point versions of equality comparisons too.
1995      if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
1996          (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
1997
1998        // Floating point -0.0 and 0.0 compare equal, so we can only
1999        // propagate values if we know that we have a constant and that
2000        // its value is non-zero.
2001
2002        // FIXME: We should do this optimization if 'no signed zeros' is
2003        // applicable via an instruction-level fast-math-flag or some other
2004        // indicator that relaxed FP semantics are being used.
2005
2006        if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero())
2007          Worklist.push_back(std::make_pair(Op0, Op1));
2008      }
2009
2010      // If "A >= B" is known true, replace "A < B" with false everywhere.
2011      CmpInst::Predicate NotPred = Cmp->getInversePredicate();
2012      Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
2013      // Since we don't have the instruction "A < B" immediately to hand, work
2014      // out the value number that it would have and use that to find an
2015      // appropriate instruction (if any).
2016      uint32_t NextNum = VN.getNextUnusedValueNumber();
2017      uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
2018      // If the number we were assigned was brand new then there is no point in
2019      // looking for an instruction realizing it: there cannot be one!
2020      if (Num < NextNum) {
2021        Value *NotCmp = findLeader(Root.getEnd(), Num);
2022        if (NotCmp && isa<Instruction>(NotCmp)) {
2023          unsigned NumReplacements =
2024              DominatesByEdge
2025                  ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
2026                  : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
2027                                             Root.getStart());
2028          Changed |= NumReplacements > 0;
2029          NumGVNEqProp += NumReplacements;
2030        }
2031      }
2032      // Ensure that any instruction in scope that gets the "A < B" value number
2033      // is replaced with false.
2034      // The leader table only tracks basic blocks, not edges. Only add to if we
2035      // have the simple case where the edge dominates the end.
2036      if (RootDominatesEnd)
2037        addToLeaderTable(Num, NotVal, Root.getEnd());
2038
2039      continue;
2040    }
2041  }
2042
2043  return Changed;
2044}
2045
2046/// When calculating availability, handle an instruction
2047/// by inserting it into the appropriate sets
2048bool GVN::processInstruction(Instruction *I) {
2049  // Ignore dbg info intrinsics.
2050  if (isa<DbgInfoIntrinsic>(I))
2051    return false;
2052
2053  // If the instruction can be easily simplified then do so now in preference
2054  // to value numbering it.  Value numbering often exposes redundancies, for
2055  // example if it determines that %y is equal to %x then the instruction
2056  // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2057  const DataLayout &DL = I->getModule()->getDataLayout();
2058  if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
2059    bool Changed = false;
2060    if (!I->use_empty()) {
2061      I->replaceAllUsesWith(V);
2062      Changed = true;
2063    }
2064    if (isInstructionTriviallyDead(I, TLI)) {
2065      markInstructionForDeletion(I);
2066      Changed = true;
2067    }
2068    if (Changed) {
2069      if (MD && V->getType()->getScalarType()->isPointerTy())
2070        MD->invalidateCachedPointerInfo(V);
2071      ++NumGVNSimpl;
2072      return true;
2073    }
2074  }
2075
2076  if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I))
2077    if (IntrinsicI->getIntrinsicID() == Intrinsic::assume)
2078      return processAssumeIntrinsic(IntrinsicI);
2079
2080  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
2081    if (processLoad(LI))
2082      return true;
2083
2084    unsigned Num = VN.lookupOrAdd(LI);
2085    addToLeaderTable(Num, LI, LI->getParent());
2086    return false;
2087  }
2088
2089  // For conditional branches, we can perform simple conditional propagation on
2090  // the condition value itself.
2091  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
2092    if (!BI->isConditional())
2093      return false;
2094
2095    if (isa<Constant>(BI->getCondition()))
2096      return processFoldableCondBr(BI);
2097
2098    Value *BranchCond = BI->getCondition();
2099    BasicBlock *TrueSucc = BI->getSuccessor(0);
2100    BasicBlock *FalseSucc = BI->getSuccessor(1);
2101    // Avoid multiple edges early.
2102    if (TrueSucc == FalseSucc)
2103      return false;
2104
2105    BasicBlock *Parent = BI->getParent();
2106    bool Changed = false;
2107
2108    Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
2109    BasicBlockEdge TrueE(Parent, TrueSucc);
2110    Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
2111
2112    Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
2113    BasicBlockEdge FalseE(Parent, FalseSucc);
2114    Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
2115
2116    return Changed;
2117  }
2118
2119  // For switches, propagate the case values into the case destinations.
2120  if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
2121    Value *SwitchCond = SI->getCondition();
2122    BasicBlock *Parent = SI->getParent();
2123    bool Changed = false;
2124
2125    // Remember how many outgoing edges there are to every successor.
2126    SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
2127    for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
2128      ++SwitchEdges[SI->getSuccessor(i)];
2129
2130    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
2131         i != e; ++i) {
2132      BasicBlock *Dst = i.getCaseSuccessor();
2133      // If there is only a single edge, propagate the case value into it.
2134      if (SwitchEdges.lookup(Dst) == 1) {
2135        BasicBlockEdge E(Parent, Dst);
2136        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E, true);
2137      }
2138    }
2139    return Changed;
2140  }
2141
2142  // Instructions with void type don't return a value, so there's
2143  // no point in trying to find redundancies in them.
2144  if (I->getType()->isVoidTy())
2145    return false;
2146
2147  uint32_t NextNum = VN.getNextUnusedValueNumber();
2148  unsigned Num = VN.lookupOrAdd(I);
2149
2150  // Allocations are always uniquely numbered, so we can save time and memory
2151  // by fast failing them.
2152  if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
2153    addToLeaderTable(Num, I, I->getParent());
2154    return false;
2155  }
2156
2157  // If the number we were assigned was a brand new VN, then we don't
2158  // need to do a lookup to see if the number already exists
2159  // somewhere in the domtree: it can't!
2160  if (Num >= NextNum) {
2161    addToLeaderTable(Num, I, I->getParent());
2162    return false;
2163  }
2164
2165  // Perform fast-path value-number based elimination of values inherited from
2166  // dominators.
2167  Value *Repl = findLeader(I->getParent(), Num);
2168  if (!Repl) {
2169    // Failure, just remember this instance for future use.
2170    addToLeaderTable(Num, I, I->getParent());
2171    return false;
2172  } else if (Repl == I) {
2173    // If I was the result of a shortcut PRE, it might already be in the table
2174    // and the best replacement for itself. Nothing to do.
2175    return false;
2176  }
2177
2178  // Remove it!
2179  patchAndReplaceAllUsesWith(I, Repl);
2180  if (MD && Repl->getType()->getScalarType()->isPointerTy())
2181    MD->invalidateCachedPointerInfo(Repl);
2182  markInstructionForDeletion(I);
2183  return true;
2184}
2185
2186/// runOnFunction - This is the main transformation entry point for a function.
2187bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
2188                  const TargetLibraryInfo &RunTLI, AAResults &RunAA,
2189                  MemoryDependenceResults *RunMD) {
2190  AC = &RunAC;
2191  DT = &RunDT;
2192  VN.setDomTree(DT);
2193  TLI = &RunTLI;
2194  VN.setAliasAnalysis(&RunAA);
2195  MD = RunMD;
2196  VN.setMemDep(MD);
2197
2198  bool Changed = false;
2199  bool ShouldContinue = true;
2200
2201  // Merge unconditional branches, allowing PRE to catch more
2202  // optimization opportunities.
2203  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
2204    BasicBlock *BB = &*FI++;
2205
2206    bool removedBlock =
2207        MergeBlockIntoPredecessor(BB, DT, /* LoopInfo */ nullptr, MD);
2208    if (removedBlock) ++NumGVNBlocks;
2209
2210    Changed |= removedBlock;
2211  }
2212
2213  unsigned Iteration = 0;
2214  while (ShouldContinue) {
2215    DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2216    ShouldContinue = iterateOnFunction(F);
2217    Changed |= ShouldContinue;
2218    ++Iteration;
2219  }
2220
2221  if (EnablePRE) {
2222    // Fabricate val-num for dead-code in order to suppress assertion in
2223    // performPRE().
2224    assignValNumForDeadCode();
2225    bool PREChanged = true;
2226    while (PREChanged) {
2227      PREChanged = performPRE(F);
2228      Changed |= PREChanged;
2229    }
2230  }
2231
2232  // FIXME: Should perform GVN again after PRE does something.  PRE can move
2233  // computations into blocks where they become fully redundant.  Note that
2234  // we can't do this until PRE's critical edge splitting updates memdep.
2235  // Actually, when this happens, we should just fully integrate PRE into GVN.
2236
2237  cleanupGlobalSets();
2238  // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2239  // iteration.
2240  DeadBlocks.clear();
2241
2242  return Changed;
2243}
2244
2245bool GVN::processBlock(BasicBlock *BB) {
2246  // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2247  // (and incrementing BI before processing an instruction).
2248  assert(InstrsToErase.empty() &&
2249         "We expect InstrsToErase to be empty across iterations");
2250  if (DeadBlocks.count(BB))
2251    return false;
2252
2253  // Clearing map before every BB because it can be used only for single BB.
2254  ReplaceWithConstMap.clear();
2255  bool ChangedFunction = false;
2256
2257  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2258       BI != BE;) {
2259    if (!ReplaceWithConstMap.empty())
2260      ChangedFunction |= replaceOperandsWithConsts(&*BI);
2261    ChangedFunction |= processInstruction(&*BI);
2262
2263    if (InstrsToErase.empty()) {
2264      ++BI;
2265      continue;
2266    }
2267
2268    // If we need some instructions deleted, do it now.
2269    NumGVNInstr += InstrsToErase.size();
2270
2271    // Avoid iterator invalidation.
2272    bool AtStart = BI == BB->begin();
2273    if (!AtStart)
2274      --BI;
2275
2276    for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(),
2277         E = InstrsToErase.end(); I != E; ++I) {
2278      DEBUG(dbgs() << "GVN removed: " << **I << '\n');
2279      if (MD) MD->removeInstruction(*I);
2280      DEBUG(verifyRemoved(*I));
2281      (*I)->eraseFromParent();
2282    }
2283    InstrsToErase.clear();
2284
2285    if (AtStart)
2286      BI = BB->begin();
2287    else
2288      ++BI;
2289  }
2290
2291  return ChangedFunction;
2292}
2293
2294// Instantiate an expression in a predecessor that lacked it.
2295bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2296                                    unsigned int ValNo) {
2297  // Because we are going top-down through the block, all value numbers
2298  // will be available in the predecessor by the time we need them.  Any
2299  // that weren't originally present will have been instantiated earlier
2300  // in this loop.
2301  bool success = true;
2302  for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) {
2303    Value *Op = Instr->getOperand(i);
2304    if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2305      continue;
2306    // This could be a newly inserted instruction, in which case, we won't
2307    // find a value number, and should give up before we hurt ourselves.
2308    // FIXME: Rewrite the infrastructure to let it easier to value number
2309    // and process newly inserted instructions.
2310    if (!VN.exists(Op)) {
2311      success = false;
2312      break;
2313    }
2314    if (Value *V = findLeader(Pred, VN.lookup(Op))) {
2315      Instr->setOperand(i, V);
2316    } else {
2317      success = false;
2318      break;
2319    }
2320  }
2321
2322  // Fail out if we encounter an operand that is not available in
2323  // the PRE predecessor.  This is typically because of loads which
2324  // are not value numbered precisely.
2325  if (!success)
2326    return false;
2327
2328  Instr->insertBefore(Pred->getTerminator());
2329  Instr->setName(Instr->getName() + ".pre");
2330  Instr->setDebugLoc(Instr->getDebugLoc());
2331  VN.add(Instr, ValNo);
2332
2333  // Update the availability map to include the new instruction.
2334  addToLeaderTable(ValNo, Instr, Pred);
2335  return true;
2336}
2337
2338bool GVN::performScalarPRE(Instruction *CurInst) {
2339  if (isa<AllocaInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
2340      isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
2341      CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2342      isa<DbgInfoIntrinsic>(CurInst))
2343    return false;
2344
2345  // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2346  // sinking the compare again, and it would force the code generator to
2347  // move the i1 from processor flags or predicate registers into a general
2348  // purpose register.
2349  if (isa<CmpInst>(CurInst))
2350    return false;
2351
2352  // We don't currently value number ANY inline asm calls.
2353  if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
2354    if (CallI->isInlineAsm())
2355      return false;
2356
2357  uint32_t ValNo = VN.lookup(CurInst);
2358
2359  // Look for the predecessors for PRE opportunities.  We're
2360  // only trying to solve the basic diamond case, where
2361  // a value is computed in the successor and one predecessor,
2362  // but not the other.  We also explicitly disallow cases
2363  // where the successor is its own predecessor, because they're
2364  // more complicated to get right.
2365  unsigned NumWith = 0;
2366  unsigned NumWithout = 0;
2367  BasicBlock *PREPred = nullptr;
2368  BasicBlock *CurrentBlock = CurInst->getParent();
2369
2370  SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
2371  for (BasicBlock *P : predecessors(CurrentBlock)) {
2372    // We're not interested in PRE where the block is its
2373    // own predecessor, or in blocks with predecessors
2374    // that are not reachable.
2375    if (P == CurrentBlock) {
2376      NumWithout = 2;
2377      break;
2378    } else if (!DT->isReachableFromEntry(P)) {
2379      NumWithout = 2;
2380      break;
2381    }
2382
2383    Value *predV = findLeader(P, ValNo);
2384    if (!predV) {
2385      predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
2386      PREPred = P;
2387      ++NumWithout;
2388    } else if (predV == CurInst) {
2389      /* CurInst dominates this predecessor. */
2390      NumWithout = 2;
2391      break;
2392    } else {
2393      predMap.push_back(std::make_pair(predV, P));
2394      ++NumWith;
2395    }
2396  }
2397
2398  // Don't do PRE when it might increase code size, i.e. when
2399  // we would need to insert instructions in more than one pred.
2400  if (NumWithout > 1 || NumWith == 0)
2401    return false;
2402
2403  // We may have a case where all predecessors have the instruction,
2404  // and we just need to insert a phi node. Otherwise, perform
2405  // insertion.
2406  Instruction *PREInstr = nullptr;
2407
2408  if (NumWithout != 0) {
2409    // Don't do PRE across indirect branch.
2410    if (isa<IndirectBrInst>(PREPred->getTerminator()))
2411      return false;
2412
2413    // We can't do PRE safely on a critical edge, so instead we schedule
2414    // the edge to be split and perform the PRE the next time we iterate
2415    // on the function.
2416    unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
2417    if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
2418      toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
2419      return false;
2420    }
2421    // We need to insert somewhere, so let's give it a shot
2422    PREInstr = CurInst->clone();
2423    if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) {
2424      // If we failed insertion, make sure we remove the instruction.
2425      DEBUG(verifyRemoved(PREInstr));
2426      delete PREInstr;
2427      return false;
2428    }
2429  }
2430
2431  // Either we should have filled in the PRE instruction, or we should
2432  // not have needed insertions.
2433  assert (PREInstr != nullptr || NumWithout == 0);
2434
2435  ++NumGVNPRE;
2436
2437  // Create a PHI to make the value available in this block.
2438  PHINode *Phi =
2439      PHINode::Create(CurInst->getType(), predMap.size(),
2440                      CurInst->getName() + ".pre-phi", &CurrentBlock->front());
2441  for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
2442    if (Value *V = predMap[i].first)
2443      Phi->addIncoming(V, predMap[i].second);
2444    else
2445      Phi->addIncoming(PREInstr, PREPred);
2446  }
2447
2448  VN.add(Phi, ValNo);
2449  addToLeaderTable(ValNo, Phi, CurrentBlock);
2450  Phi->setDebugLoc(CurInst->getDebugLoc());
2451  CurInst->replaceAllUsesWith(Phi);
2452  if (MD && Phi->getType()->getScalarType()->isPointerTy())
2453    MD->invalidateCachedPointerInfo(Phi);
2454  VN.erase(CurInst);
2455  removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
2456
2457  DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
2458  if (MD)
2459    MD->removeInstruction(CurInst);
2460  DEBUG(verifyRemoved(CurInst));
2461  CurInst->eraseFromParent();
2462  ++NumGVNInstr;
2463
2464  return true;
2465}
2466
2467/// Perform a purely local form of PRE that looks for diamond
2468/// control flow patterns and attempts to perform simple PRE at the join point.
2469bool GVN::performPRE(Function &F) {
2470  bool Changed = false;
2471  for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
2472    // Nothing to PRE in the entry block.
2473    if (CurrentBlock == &F.getEntryBlock())
2474      continue;
2475
2476    // Don't perform PRE on an EH pad.
2477    if (CurrentBlock->isEHPad())
2478      continue;
2479
2480    for (BasicBlock::iterator BI = CurrentBlock->begin(),
2481                              BE = CurrentBlock->end();
2482         BI != BE;) {
2483      Instruction *CurInst = &*BI++;
2484      Changed |= performScalarPRE(CurInst);
2485    }
2486  }
2487
2488  if (splitCriticalEdges())
2489    Changed = true;
2490
2491  return Changed;
2492}
2493
2494/// Split the critical edge connecting the given two blocks, and return
2495/// the block inserted to the critical edge.
2496BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
2497  BasicBlock *BB =
2498      SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
2499  if (MD)
2500    MD->invalidateCachedPredecessors();
2501  return BB;
2502}
2503
2504/// Split critical edges found during the previous
2505/// iteration that may enable further optimization.
2506bool GVN::splitCriticalEdges() {
2507  if (toSplit.empty())
2508    return false;
2509  do {
2510    std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
2511    SplitCriticalEdge(Edge.first, Edge.second,
2512                      CriticalEdgeSplittingOptions(DT));
2513  } while (!toSplit.empty());
2514  if (MD) MD->invalidateCachedPredecessors();
2515  return true;
2516}
2517
2518/// Executes one iteration of GVN
2519bool GVN::iterateOnFunction(Function &F) {
2520  cleanupGlobalSets();
2521
2522  // Top-down walk of the dominator tree
2523  bool Changed = false;
2524  // Save the blocks this function have before transformation begins. GVN may
2525  // split critical edge, and hence may invalidate the RPO/DT iterator.
2526  //
2527  std::vector<BasicBlock *> BBVect;
2528  BBVect.reserve(256);
2529  // Needed for value numbering with phi construction to work.
2530  ReversePostOrderTraversal<Function *> RPOT(&F);
2531  for (ReversePostOrderTraversal<Function *>::rpo_iterator RI = RPOT.begin(),
2532                                                           RE = RPOT.end();
2533       RI != RE; ++RI)
2534    BBVect.push_back(*RI);
2535
2536  for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
2537       I != E; I++)
2538    Changed |= processBlock(*I);
2539
2540  return Changed;
2541}
2542
2543void GVN::cleanupGlobalSets() {
2544  VN.clear();
2545  LeaderTable.clear();
2546  TableAllocator.Reset();
2547}
2548
2549/// Verify that the specified instruction does not occur in our
2550/// internal data structures.
2551void GVN::verifyRemoved(const Instruction *Inst) const {
2552  VN.verifyRemoved(Inst);
2553
2554  // Walk through the value number scope to make sure the instruction isn't
2555  // ferreted away in it.
2556  for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
2557       I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
2558    const LeaderTableEntry *Node = &I->second;
2559    assert(Node->Val != Inst && "Inst still in value numbering scope!");
2560
2561    while (Node->Next) {
2562      Node = Node->Next;
2563      assert(Node->Val != Inst && "Inst still in value numbering scope!");
2564    }
2565  }
2566}
2567
2568/// BB is declared dead, which implied other blocks become dead as well. This
2569/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
2570/// live successors, update their phi nodes by replacing the operands
2571/// corresponding to dead blocks with UndefVal.
2572void GVN::addDeadBlock(BasicBlock *BB) {
2573  SmallVector<BasicBlock *, 4> NewDead;
2574  SmallSetVector<BasicBlock *, 4> DF;
2575
2576  NewDead.push_back(BB);
2577  while (!NewDead.empty()) {
2578    BasicBlock *D = NewDead.pop_back_val();
2579    if (DeadBlocks.count(D))
2580      continue;
2581
2582    // All blocks dominated by D are dead.
2583    SmallVector<BasicBlock *, 8> Dom;
2584    DT->getDescendants(D, Dom);
2585    DeadBlocks.insert(Dom.begin(), Dom.end());
2586
2587    // Figure out the dominance-frontier(D).
2588    for (BasicBlock *B : Dom) {
2589      for (BasicBlock *S : successors(B)) {
2590        if (DeadBlocks.count(S))
2591          continue;
2592
2593        bool AllPredDead = true;
2594        for (BasicBlock *P : predecessors(S))
2595          if (!DeadBlocks.count(P)) {
2596            AllPredDead = false;
2597            break;
2598          }
2599
2600        if (!AllPredDead) {
2601          // S could be proved dead later on. That is why we don't update phi
2602          // operands at this moment.
2603          DF.insert(S);
2604        } else {
2605          // While S is not dominated by D, it is dead by now. This could take
2606          // place if S already have a dead predecessor before D is declared
2607          // dead.
2608          NewDead.push_back(S);
2609        }
2610      }
2611    }
2612  }
2613
2614  // For the dead blocks' live successors, update their phi nodes by replacing
2615  // the operands corresponding to dead blocks with UndefVal.
2616  for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
2617        I != E; I++) {
2618    BasicBlock *B = *I;
2619    if (DeadBlocks.count(B))
2620      continue;
2621
2622    SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
2623    for (BasicBlock *P : Preds) {
2624      if (!DeadBlocks.count(P))
2625        continue;
2626
2627      if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) {
2628        if (BasicBlock *S = splitCriticalEdges(P, B))
2629          DeadBlocks.insert(P = S);
2630      }
2631
2632      for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
2633        PHINode &Phi = cast<PHINode>(*II);
2634        Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
2635                             UndefValue::get(Phi.getType()));
2636      }
2637    }
2638  }
2639}
2640
2641// If the given branch is recognized as a foldable branch (i.e. conditional
2642// branch with constant condition), it will perform following analyses and
2643// transformation.
2644//  1) If the dead out-coming edge is a critical-edge, split it. Let
2645//     R be the target of the dead out-coming edge.
2646//  1) Identify the set of dead blocks implied by the branch's dead outcoming
2647//     edge. The result of this step will be {X| X is dominated by R}
2648//  2) Identify those blocks which haves at least one dead predecessor. The
2649//     result of this step will be dominance-frontier(R).
2650//  3) Update the PHIs in DF(R) by replacing the operands corresponding to
2651//     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
2652//
2653// Return true iff *NEW* dead code are found.
2654bool GVN::processFoldableCondBr(BranchInst *BI) {
2655  if (!BI || BI->isUnconditional())
2656    return false;
2657
2658  // If a branch has two identical successors, we cannot declare either dead.
2659  if (BI->getSuccessor(0) == BI->getSuccessor(1))
2660    return false;
2661
2662  ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
2663  if (!Cond)
2664    return false;
2665
2666  BasicBlock *DeadRoot =
2667      Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
2668  if (DeadBlocks.count(DeadRoot))
2669    return false;
2670
2671  if (!DeadRoot->getSinglePredecessor())
2672    DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
2673
2674  addDeadBlock(DeadRoot);
2675  return true;
2676}
2677
2678// performPRE() will trigger assert if it comes across an instruction without
2679// associated val-num. As it normally has far more live instructions than dead
2680// instructions, it makes more sense just to "fabricate" a val-number for the
2681// dead code than checking if instruction involved is dead or not.
2682void GVN::assignValNumForDeadCode() {
2683  for (BasicBlock *BB : DeadBlocks) {
2684    for (Instruction &Inst : *BB) {
2685      unsigned ValNum = VN.lookupOrAdd(&Inst);
2686      addToLeaderTable(ValNum, &Inst, BB);
2687    }
2688  }
2689}
2690
2691class llvm::gvn::GVNLegacyPass : public FunctionPass {
2692public:
2693  static char ID; // Pass identification, replacement for typeid
2694  explicit GVNLegacyPass(bool NoLoads = false)
2695      : FunctionPass(ID), NoLoads(NoLoads) {
2696    initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
2697  }
2698
2699  bool runOnFunction(Function &F) override {
2700    if (skipFunction(F))
2701      return false;
2702
2703    return Impl.runImpl(
2704        F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
2705        getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
2706        getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
2707        getAnalysis<AAResultsWrapperPass>().getAAResults(),
2708        NoLoads ? nullptr
2709                : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep());
2710  }
2711
2712  void getAnalysisUsage(AnalysisUsage &AU) const override {
2713    AU.addRequired<AssumptionCacheTracker>();
2714    AU.addRequired<DominatorTreeWrapperPass>();
2715    AU.addRequired<TargetLibraryInfoWrapperPass>();
2716    if (!NoLoads)
2717      AU.addRequired<MemoryDependenceWrapperPass>();
2718    AU.addRequired<AAResultsWrapperPass>();
2719
2720    AU.addPreserved<DominatorTreeWrapperPass>();
2721    AU.addPreserved<GlobalsAAWrapperPass>();
2722  }
2723
2724private:
2725  bool NoLoads;
2726  GVN Impl;
2727};
2728
2729char GVNLegacyPass::ID = 0;
2730
2731// The public interface to this file...
2732FunctionPass *llvm::createGVNPass(bool NoLoads) {
2733  return new GVNLegacyPass(NoLoads);
2734}
2735
2736INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
2737INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
2738INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
2739INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2740INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2741INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
2742INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
2743INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
2744