GVN.cpp revision 051a950000e21935165db56695e35bade668193b
1//===- GVN.cpp - Eliminate redundant values and loads ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global value numbering to eliminate fully redundant
11// instructions.  It also performs simple dead load elimination.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "gvn"
16#include "llvm/Transforms/Scalar.h"
17#include "llvm/BasicBlock.h"
18#include "llvm/Constants.h"
19#include "llvm/DerivedTypes.h"
20#include "llvm/Function.h"
21#include "llvm/IntrinsicInst.h"
22#include "llvm/Instructions.h"
23#include "llvm/ParameterAttributes.h"
24#include "llvm/Value.h"
25#include "llvm/ADT/BitVector.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/DepthFirstIterator.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/Dominators.h"
32#include "llvm/Analysis/AliasAnalysis.h"
33#include "llvm/Analysis/MemoryDependenceAnalysis.h"
34#include "llvm/Support/CFG.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Compiler.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/GetElementPtrTypeIterator.h"
39#include "llvm/Target/TargetData.h"
40#include <list>
41using namespace llvm;
42
43STATISTIC(NumGVNInstr, "Number of instructions deleted");
44STATISTIC(NumGVNLoad, "Number of loads deleted");
45STATISTIC(NumMemSetInfer, "Number of memsets inferred");
46
47namespace {
48  cl::opt<bool>
49  FormMemSet("form-memset-from-stores",
50             cl::desc("Transform straight-line stores to memsets"),
51             cl::init(true), cl::Hidden);
52}
53
54//===----------------------------------------------------------------------===//
55//                         ValueTable Class
56//===----------------------------------------------------------------------===//
57
58/// This class holds the mapping between values and value numbers.  It is used
59/// as an efficient mechanism to determine the expression-wise equivalence of
60/// two values.
61namespace {
62  struct VISIBILITY_HIDDEN Expression {
63    enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM,
64                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
65                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
66                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
67                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
68                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
69                            FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
70                            SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
71                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
72                            PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, EMPTY,
73                            TOMBSTONE };
74
75    ExpressionOpcode opcode;
76    const Type* type;
77    uint32_t firstVN;
78    uint32_t secondVN;
79    uint32_t thirdVN;
80    SmallVector<uint32_t, 4> varargs;
81    Value* function;
82
83    Expression() { }
84    Expression(ExpressionOpcode o) : opcode(o) { }
85
86    bool operator==(const Expression &other) const {
87      if (opcode != other.opcode)
88        return false;
89      else if (opcode == EMPTY || opcode == TOMBSTONE)
90        return true;
91      else if (type != other.type)
92        return false;
93      else if (function != other.function)
94        return false;
95      else if (firstVN != other.firstVN)
96        return false;
97      else if (secondVN != other.secondVN)
98        return false;
99      else if (thirdVN != other.thirdVN)
100        return false;
101      else {
102        if (varargs.size() != other.varargs.size())
103          return false;
104
105        for (size_t i = 0; i < varargs.size(); ++i)
106          if (varargs[i] != other.varargs[i])
107            return false;
108
109        return true;
110      }
111    }
112
113    bool operator!=(const Expression &other) const {
114      if (opcode != other.opcode)
115        return true;
116      else if (opcode == EMPTY || opcode == TOMBSTONE)
117        return false;
118      else if (type != other.type)
119        return true;
120      else if (function != other.function)
121        return true;
122      else if (firstVN != other.firstVN)
123        return true;
124      else if (secondVN != other.secondVN)
125        return true;
126      else if (thirdVN != other.thirdVN)
127        return true;
128      else {
129        if (varargs.size() != other.varargs.size())
130          return true;
131
132        for (size_t i = 0; i < varargs.size(); ++i)
133          if (varargs[i] != other.varargs[i])
134            return true;
135
136          return false;
137      }
138    }
139  };
140
141  class VISIBILITY_HIDDEN ValueTable {
142    private:
143      DenseMap<Value*, uint32_t> valueNumbering;
144      DenseMap<Expression, uint32_t> expressionNumbering;
145      AliasAnalysis* AA;
146
147      uint32_t nextValueNumber;
148
149      Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
150      Expression::ExpressionOpcode getOpcode(CmpInst* C);
151      Expression::ExpressionOpcode getOpcode(CastInst* C);
152      Expression create_expression(BinaryOperator* BO);
153      Expression create_expression(CmpInst* C);
154      Expression create_expression(ShuffleVectorInst* V);
155      Expression create_expression(ExtractElementInst* C);
156      Expression create_expression(InsertElementInst* V);
157      Expression create_expression(SelectInst* V);
158      Expression create_expression(CastInst* C);
159      Expression create_expression(GetElementPtrInst* G);
160      Expression create_expression(CallInst* C);
161    public:
162      ValueTable() : nextValueNumber(1) { }
163      uint32_t lookup_or_add(Value* V);
164      uint32_t lookup(Value* V) const;
165      void add(Value* V, uint32_t num);
166      void clear();
167      void erase(Value* v);
168      unsigned size();
169      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
170      uint32_t hash_operand(Value* v);
171  };
172}
173
174namespace llvm {
175template <> struct DenseMapInfo<Expression> {
176  static inline Expression getEmptyKey() {
177    return Expression(Expression::EMPTY);
178  }
179
180  static inline Expression getTombstoneKey() {
181    return Expression(Expression::TOMBSTONE);
182  }
183
184  static unsigned getHashValue(const Expression e) {
185    unsigned hash = e.opcode;
186
187    hash = e.firstVN + hash * 37;
188    hash = e.secondVN + hash * 37;
189    hash = e.thirdVN + hash * 37;
190
191    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
192            (unsigned)((uintptr_t)e.type >> 9)) +
193           hash * 37;
194
195    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
196         E = e.varargs.end(); I != E; ++I)
197      hash = *I + hash * 37;
198
199    hash = ((unsigned)((uintptr_t)e.function >> 4) ^
200            (unsigned)((uintptr_t)e.function >> 9)) +
201           hash * 37;
202
203    return hash;
204  }
205  static bool isEqual(const Expression &LHS, const Expression &RHS) {
206    return LHS == RHS;
207  }
208  static bool isPod() { return true; }
209};
210}
211
212//===----------------------------------------------------------------------===//
213//                     ValueTable Internal Functions
214//===----------------------------------------------------------------------===//
215Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
216  switch(BO->getOpcode()) {
217  default: // THIS SHOULD NEVER HAPPEN
218    assert(0 && "Binary operator with unknown opcode?");
219  case Instruction::Add:  return Expression::ADD;
220  case Instruction::Sub:  return Expression::SUB;
221  case Instruction::Mul:  return Expression::MUL;
222  case Instruction::UDiv: return Expression::UDIV;
223  case Instruction::SDiv: return Expression::SDIV;
224  case Instruction::FDiv: return Expression::FDIV;
225  case Instruction::URem: return Expression::UREM;
226  case Instruction::SRem: return Expression::SREM;
227  case Instruction::FRem: return Expression::FREM;
228  case Instruction::Shl:  return Expression::SHL;
229  case Instruction::LShr: return Expression::LSHR;
230  case Instruction::AShr: return Expression::ASHR;
231  case Instruction::And:  return Expression::AND;
232  case Instruction::Or:   return Expression::OR;
233  case Instruction::Xor:  return Expression::XOR;
234  }
235}
236
237Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
238  if (isa<ICmpInst>(C)) {
239    switch (C->getPredicate()) {
240    default:  // THIS SHOULD NEVER HAPPEN
241      assert(0 && "Comparison with unknown predicate?");
242    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
243    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
244    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
245    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
246    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
247    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
248    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
249    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
250    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
251    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
252    }
253  }
254  assert(isa<FCmpInst>(C) && "Unknown compare");
255  switch (C->getPredicate()) {
256  default: // THIS SHOULD NEVER HAPPEN
257    assert(0 && "Comparison with unknown predicate?");
258  case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
259  case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
260  case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
261  case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
262  case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
263  case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
264  case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
265  case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
266  case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
267  case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
268  case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
269  case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
270  case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
271  case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
272  }
273}
274
275Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
276  switch(C->getOpcode()) {
277  default: // THIS SHOULD NEVER HAPPEN
278    assert(0 && "Cast operator with unknown opcode?");
279  case Instruction::Trunc:    return Expression::TRUNC;
280  case Instruction::ZExt:     return Expression::ZEXT;
281  case Instruction::SExt:     return Expression::SEXT;
282  case Instruction::FPToUI:   return Expression::FPTOUI;
283  case Instruction::FPToSI:   return Expression::FPTOSI;
284  case Instruction::UIToFP:   return Expression::UITOFP;
285  case Instruction::SIToFP:   return Expression::SITOFP;
286  case Instruction::FPTrunc:  return Expression::FPTRUNC;
287  case Instruction::FPExt:    return Expression::FPEXT;
288  case Instruction::PtrToInt: return Expression::PTRTOINT;
289  case Instruction::IntToPtr: return Expression::INTTOPTR;
290  case Instruction::BitCast:  return Expression::BITCAST;
291  }
292}
293
294uint32_t ValueTable::hash_operand(Value* v) {
295  if (CallInst* CI = dyn_cast<CallInst>(v))
296    if (!AA->doesNotAccessMemory(CI))
297      return nextValueNumber++;
298
299  return lookup_or_add(v);
300}
301
302Expression ValueTable::create_expression(CallInst* C) {
303  Expression e;
304
305  e.type = C->getType();
306  e.firstVN = 0;
307  e.secondVN = 0;
308  e.thirdVN = 0;
309  e.function = C->getCalledFunction();
310  e.opcode = Expression::CALL;
311
312  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
313       I != E; ++I)
314    e.varargs.push_back(hash_operand(*I));
315
316  return e;
317}
318
319Expression ValueTable::create_expression(BinaryOperator* BO) {
320  Expression e;
321
322  e.firstVN = hash_operand(BO->getOperand(0));
323  e.secondVN = hash_operand(BO->getOperand(1));
324  e.thirdVN = 0;
325  e.function = 0;
326  e.type = BO->getType();
327  e.opcode = getOpcode(BO);
328
329  return e;
330}
331
332Expression ValueTable::create_expression(CmpInst* C) {
333  Expression e;
334
335  e.firstVN = hash_operand(C->getOperand(0));
336  e.secondVN = hash_operand(C->getOperand(1));
337  e.thirdVN = 0;
338  e.function = 0;
339  e.type = C->getType();
340  e.opcode = getOpcode(C);
341
342  return e;
343}
344
345Expression ValueTable::create_expression(CastInst* C) {
346  Expression e;
347
348  e.firstVN = hash_operand(C->getOperand(0));
349  e.secondVN = 0;
350  e.thirdVN = 0;
351  e.function = 0;
352  e.type = C->getType();
353  e.opcode = getOpcode(C);
354
355  return e;
356}
357
358Expression ValueTable::create_expression(ShuffleVectorInst* S) {
359  Expression e;
360
361  e.firstVN = hash_operand(S->getOperand(0));
362  e.secondVN = hash_operand(S->getOperand(1));
363  e.thirdVN = hash_operand(S->getOperand(2));
364  e.function = 0;
365  e.type = S->getType();
366  e.opcode = Expression::SHUFFLE;
367
368  return e;
369}
370
371Expression ValueTable::create_expression(ExtractElementInst* E) {
372  Expression e;
373
374  e.firstVN = hash_operand(E->getOperand(0));
375  e.secondVN = hash_operand(E->getOperand(1));
376  e.thirdVN = 0;
377  e.function = 0;
378  e.type = E->getType();
379  e.opcode = Expression::EXTRACT;
380
381  return e;
382}
383
384Expression ValueTable::create_expression(InsertElementInst* I) {
385  Expression e;
386
387  e.firstVN = hash_operand(I->getOperand(0));
388  e.secondVN = hash_operand(I->getOperand(1));
389  e.thirdVN = hash_operand(I->getOperand(2));
390  e.function = 0;
391  e.type = I->getType();
392  e.opcode = Expression::INSERT;
393
394  return e;
395}
396
397Expression ValueTable::create_expression(SelectInst* I) {
398  Expression e;
399
400  e.firstVN = hash_operand(I->getCondition());
401  e.secondVN = hash_operand(I->getTrueValue());
402  e.thirdVN = hash_operand(I->getFalseValue());
403  e.function = 0;
404  e.type = I->getType();
405  e.opcode = Expression::SELECT;
406
407  return e;
408}
409
410Expression ValueTable::create_expression(GetElementPtrInst* G) {
411  Expression e;
412
413  e.firstVN = hash_operand(G->getPointerOperand());
414  e.secondVN = 0;
415  e.thirdVN = 0;
416  e.function = 0;
417  e.type = G->getType();
418  e.opcode = Expression::GEP;
419
420  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
421       I != E; ++I)
422    e.varargs.push_back(hash_operand(*I));
423
424  return e;
425}
426
427//===----------------------------------------------------------------------===//
428//                     ValueTable External Functions
429//===----------------------------------------------------------------------===//
430
431/// lookup_or_add - Returns the value number for the specified value, assigning
432/// it a new number if it did not have one before.
433uint32_t ValueTable::lookup_or_add(Value* V) {
434  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
435  if (VI != valueNumbering.end())
436    return VI->second;
437
438  if (CallInst* C = dyn_cast<CallInst>(V)) {
439    if (AA->onlyReadsMemory(C)) { // includes doesNotAccessMemory
440      Expression e = create_expression(C);
441
442      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
443      if (EI != expressionNumbering.end()) {
444        valueNumbering.insert(std::make_pair(V, EI->second));
445        return EI->second;
446      } else {
447        expressionNumbering.insert(std::make_pair(e, nextValueNumber));
448        valueNumbering.insert(std::make_pair(V, nextValueNumber));
449
450        return nextValueNumber++;
451      }
452    } else {
453      valueNumbering.insert(std::make_pair(V, nextValueNumber));
454      return nextValueNumber++;
455    }
456  } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
457    Expression e = create_expression(BO);
458
459    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
460    if (EI != expressionNumbering.end()) {
461      valueNumbering.insert(std::make_pair(V, EI->second));
462      return EI->second;
463    } else {
464      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
465      valueNumbering.insert(std::make_pair(V, nextValueNumber));
466
467      return nextValueNumber++;
468    }
469  } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {
470    Expression e = create_expression(C);
471
472    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
473    if (EI != expressionNumbering.end()) {
474      valueNumbering.insert(std::make_pair(V, EI->second));
475      return EI->second;
476    } else {
477      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
478      valueNumbering.insert(std::make_pair(V, nextValueNumber));
479
480      return nextValueNumber++;
481    }
482  } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {
483    Expression e = create_expression(U);
484
485    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
486    if (EI != expressionNumbering.end()) {
487      valueNumbering.insert(std::make_pair(V, EI->second));
488      return EI->second;
489    } else {
490      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
491      valueNumbering.insert(std::make_pair(V, nextValueNumber));
492
493      return nextValueNumber++;
494    }
495  } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {
496    Expression e = create_expression(U);
497
498    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
499    if (EI != expressionNumbering.end()) {
500      valueNumbering.insert(std::make_pair(V, EI->second));
501      return EI->second;
502    } else {
503      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
504      valueNumbering.insert(std::make_pair(V, nextValueNumber));
505
506      return nextValueNumber++;
507    }
508  } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {
509    Expression e = create_expression(U);
510
511    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
512    if (EI != expressionNumbering.end()) {
513      valueNumbering.insert(std::make_pair(V, EI->second));
514      return EI->second;
515    } else {
516      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
517      valueNumbering.insert(std::make_pair(V, nextValueNumber));
518
519      return nextValueNumber++;
520    }
521  } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {
522    Expression e = create_expression(U);
523
524    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
525    if (EI != expressionNumbering.end()) {
526      valueNumbering.insert(std::make_pair(V, EI->second));
527      return EI->second;
528    } else {
529      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
530      valueNumbering.insert(std::make_pair(V, nextValueNumber));
531
532      return nextValueNumber++;
533    }
534  } else if (CastInst* U = dyn_cast<CastInst>(V)) {
535    Expression e = create_expression(U);
536
537    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
538    if (EI != expressionNumbering.end()) {
539      valueNumbering.insert(std::make_pair(V, EI->second));
540      return EI->second;
541    } else {
542      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
543      valueNumbering.insert(std::make_pair(V, nextValueNumber));
544
545      return nextValueNumber++;
546    }
547  } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
548    Expression e = create_expression(U);
549
550    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
551    if (EI != expressionNumbering.end()) {
552      valueNumbering.insert(std::make_pair(V, EI->second));
553      return EI->second;
554    } else {
555      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
556      valueNumbering.insert(std::make_pair(V, nextValueNumber));
557
558      return nextValueNumber++;
559    }
560  } else {
561    valueNumbering.insert(std::make_pair(V, nextValueNumber));
562    return nextValueNumber++;
563  }
564}
565
566/// lookup - Returns the value number of the specified value. Fails if
567/// the value has not yet been numbered.
568uint32_t ValueTable::lookup(Value* V) const {
569  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
570  assert(VI != valueNumbering.end() && "Value not numbered?");
571  return VI->second;
572}
573
574/// clear - Remove all entries from the ValueTable
575void ValueTable::clear() {
576  valueNumbering.clear();
577  expressionNumbering.clear();
578  nextValueNumber = 1;
579}
580
581/// erase - Remove a value from the value numbering
582void ValueTable::erase(Value* V) {
583  valueNumbering.erase(V);
584}
585
586//===----------------------------------------------------------------------===//
587//                       ValueNumberedSet Class
588//===----------------------------------------------------------------------===//
589namespace {
590class VISIBILITY_HIDDEN ValueNumberedSet {
591  private:
592    SmallPtrSet<Value*, 8> contents;
593    BitVector numbers;
594  public:
595    ValueNumberedSet() { numbers.resize(1); }
596    ValueNumberedSet(const ValueNumberedSet& other) {
597      numbers = other.numbers;
598      contents = other.contents;
599    }
600
601    typedef SmallPtrSet<Value*, 8>::iterator iterator;
602
603    iterator begin() { return contents.begin(); }
604    iterator end() { return contents.end(); }
605
606    bool insert(Value* v) { return contents.insert(v); }
607    void insert(iterator I, iterator E) { contents.insert(I, E); }
608    void erase(Value* v) { contents.erase(v); }
609    unsigned count(Value* v) { return contents.count(v); }
610    size_t size() { return contents.size(); }
611
612    void set(unsigned i)  {
613      if (i >= numbers.size())
614        numbers.resize(i+1);
615
616      numbers.set(i);
617    }
618
619    void operator=(const ValueNumberedSet& other) {
620      contents = other.contents;
621      numbers = other.numbers;
622    }
623
624    void reset(unsigned i)  {
625      if (i < numbers.size())
626        numbers.reset(i);
627    }
628
629    bool test(unsigned i)  {
630      if (i >= numbers.size())
631        return false;
632
633      return numbers.test(i);
634    }
635
636    void clear() {
637      contents.clear();
638      numbers.clear();
639    }
640};
641}
642
643//===----------------------------------------------------------------------===//
644//                         GVN Pass
645//===----------------------------------------------------------------------===//
646
647namespace {
648
649  class VISIBILITY_HIDDEN GVN : public FunctionPass {
650    bool runOnFunction(Function &F);
651  public:
652    static char ID; // Pass identification, replacement for typeid
653    GVN() : FunctionPass((intptr_t)&ID) { }
654
655  private:
656    ValueTable VN;
657
658    DenseMap<BasicBlock*, ValueNumberedSet> availableOut;
659
660    typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType;
661    PhiMapType phiMap;
662
663
664    // This transformation requires dominator postdominator info
665    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
666      AU.setPreservesCFG();
667      AU.addRequired<DominatorTree>();
668      AU.addRequired<MemoryDependenceAnalysis>();
669      AU.addRequired<AliasAnalysis>();
670      AU.addRequired<TargetData>();
671      AU.addPreserved<AliasAnalysis>();
672      AU.addPreserved<MemoryDependenceAnalysis>();
673      AU.addPreserved<TargetData>();
674    }
675
676    // Helper fuctions
677    // FIXME: eliminate or document these better
678    Value* find_leader(ValueNumberedSet& vals, uint32_t v) ;
679    void val_insert(ValueNumberedSet& s, Value* v);
680    bool processLoad(LoadInst* L,
681                     DenseMap<Value*, LoadInst*> &lastLoad,
682                     SmallVectorImpl<Instruction*> &toErase);
683    bool processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase);
684    bool processInstruction(Instruction* I,
685                            ValueNumberedSet& currAvail,
686                            DenseMap<Value*, LoadInst*>& lastSeenLoad,
687                            SmallVectorImpl<Instruction*> &toErase);
688    bool processNonLocalLoad(LoadInst* L,
689                             SmallVectorImpl<Instruction*> &toErase);
690    bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
691                       SmallVectorImpl<Instruction*> &toErase);
692    bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C,
693                              SmallVectorImpl<Instruction*> &toErase);
694    Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
695                            DenseMap<BasicBlock*, Value*> &Phis,
696                            bool top_level = false);
697    void dump(DenseMap<BasicBlock*, Value*>& d);
698    bool iterateOnFunction(Function &F);
699    Value* CollapsePhi(PHINode* p);
700    bool isSafeReplacement(PHINode* p, Instruction* inst);
701  };
702
703  char GVN::ID = 0;
704}
705
706// createGVNPass - The public interface to this file...
707FunctionPass *llvm::createGVNPass() { return new GVN(); }
708
709static RegisterPass<GVN> X("gvn",
710                           "Global Value Numbering");
711
712/// find_leader - Given a set and a value number, return the first
713/// element of the set with that value number, or 0 if no such element
714/// is present
715Value* GVN::find_leader(ValueNumberedSet& vals, uint32_t v) {
716  if (!vals.test(v))
717    return 0;
718
719  for (ValueNumberedSet::iterator I = vals.begin(), E = vals.end();
720       I != E; ++I)
721    if (v == VN.lookup(*I))
722      return *I;
723
724  assert(0 && "No leader found, but present bit is set?");
725  return 0;
726}
727
728/// val_insert - Insert a value into a set only if there is not a value
729/// with the same value number already in the set
730void GVN::val_insert(ValueNumberedSet& s, Value* v) {
731  uint32_t num = VN.lookup(v);
732  if (!s.test(num))
733    s.insert(v);
734}
735
736void GVN::dump(DenseMap<BasicBlock*, Value*>& d) {
737  printf("{\n");
738  for (DenseMap<BasicBlock*, Value*>::iterator I = d.begin(),
739       E = d.end(); I != E; ++I) {
740    if (I->second == MemoryDependenceAnalysis::None)
741      printf("None\n");
742    else
743      I->second->dump();
744  }
745  printf("}\n");
746}
747
748Value* GVN::CollapsePhi(PHINode* p) {
749  DominatorTree &DT = getAnalysis<DominatorTree>();
750  Value* constVal = p->hasConstantValue();
751
752  if (!constVal) return 0;
753
754  Instruction* inst = dyn_cast<Instruction>(constVal);
755  if (!inst)
756    return constVal;
757
758  if (DT.dominates(inst, p))
759    if (isSafeReplacement(p, inst))
760      return inst;
761  return 0;
762}
763
764bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) {
765  if (!isa<PHINode>(inst))
766    return true;
767
768  for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
769       UI != E; ++UI)
770    if (PHINode* use_phi = dyn_cast<PHINode>(UI))
771      if (use_phi->getParent() == inst->getParent())
772        return false;
773
774  return true;
775}
776
777/// GetValueForBlock - Get the value to use within the specified basic block.
778/// available values are in Phis.
779Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig,
780                             DenseMap<BasicBlock*, Value*> &Phis,
781                             bool top_level) {
782
783  // If we have already computed this value, return the previously computed val.
784  DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB);
785  if (V != Phis.end() && !top_level) return V->second;
786
787  BasicBlock* singlePred = BB->getSinglePredecessor();
788  if (singlePred) {
789    Value *ret = GetValueForBlock(singlePred, orig, Phis);
790    Phis[BB] = ret;
791    return ret;
792  }
793
794  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
795  // now, then get values to fill in the incoming values for the PHI.
796  PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle",
797                                BB->begin());
798  PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB)));
799
800  if (Phis.count(BB) == 0)
801    Phis.insert(std::make_pair(BB, PN));
802
803  // Fill in the incoming values for the block.
804  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
805    Value* val = GetValueForBlock(*PI, orig, Phis);
806    PN->addIncoming(val, *PI);
807  }
808
809  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
810  AA.copyValue(orig, PN);
811
812  // Attempt to collapse PHI nodes that are trivially redundant
813  Value* v = CollapsePhi(PN);
814  if (!v) {
815    // Cache our phi construction results
816    phiMap[orig->getPointerOperand()].insert(PN);
817    return PN;
818  }
819
820  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
821
822  MD.removeInstruction(PN);
823  PN->replaceAllUsesWith(v);
824
825  for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(),
826       E = Phis.end(); I != E; ++I)
827    if (I->second == PN)
828      I->second = v;
829
830  PN->eraseFromParent();
831
832  Phis[BB] = v;
833  return v;
834}
835
836/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
837/// non-local by performing PHI construction.
838bool GVN::processNonLocalLoad(LoadInst* L,
839                              SmallVectorImpl<Instruction*> &toErase) {
840  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
841
842  // Find the non-local dependencies of the load
843  DenseMap<BasicBlock*, Value*> deps;
844  MD.getNonLocalDependency(L, deps);
845
846  DenseMap<BasicBlock*, Value*> repl;
847
848  // Filter out useless results (non-locals, etc)
849  for (DenseMap<BasicBlock*, Value*>::iterator I = deps.begin(), E = deps.end();
850       I != E; ++I) {
851    if (I->second == MemoryDependenceAnalysis::None)
852      return false;
853
854    if (I->second == MemoryDependenceAnalysis::NonLocal)
855      continue;
856
857    if (StoreInst* S = dyn_cast<StoreInst>(I->second)) {
858      if (S->getPointerOperand() != L->getPointerOperand())
859        return false;
860      repl[I->first] = S->getOperand(0);
861    } else if (LoadInst* LD = dyn_cast<LoadInst>(I->second)) {
862      if (LD->getPointerOperand() != L->getPointerOperand())
863        return false;
864      repl[I->first] = LD;
865    } else {
866      return false;
867    }
868  }
869
870  // Use cached PHI construction information from previous runs
871  SmallPtrSet<Instruction*, 4>& p = phiMap[L->getPointerOperand()];
872  for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
873       I != E; ++I) {
874    if ((*I)->getParent() == L->getParent()) {
875      MD.removeInstruction(L);
876      L->replaceAllUsesWith(*I);
877      toErase.push_back(L);
878      NumGVNLoad++;
879      return true;
880    }
881
882    repl.insert(std::make_pair((*I)->getParent(), *I));
883  }
884
885  // Perform PHI construction
886  SmallPtrSet<BasicBlock*, 4> visited;
887  Value* v = GetValueForBlock(L->getParent(), L, repl, true);
888
889  MD.removeInstruction(L);
890  L->replaceAllUsesWith(v);
891  toErase.push_back(L);
892  NumGVNLoad++;
893
894  return true;
895}
896
897/// processLoad - Attempt to eliminate a load, first by eliminating it
898/// locally, and then attempting non-local elimination if that fails.
899bool GVN::processLoad(LoadInst *L, DenseMap<Value*, LoadInst*> &lastLoad,
900                      SmallVectorImpl<Instruction*> &toErase) {
901  if (L->isVolatile()) {
902    lastLoad[L->getPointerOperand()] = L;
903    return false;
904  }
905
906  Value* pointer = L->getPointerOperand();
907  LoadInst*& last = lastLoad[pointer];
908
909  // ... to a pointer that has been loaded from before...
910  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
911  bool removedNonLocal = false;
912  Instruction* dep = MD.getDependency(L);
913  if (dep == MemoryDependenceAnalysis::NonLocal &&
914      L->getParent() != &L->getParent()->getParent()->getEntryBlock()) {
915    removedNonLocal = processNonLocalLoad(L, toErase);
916
917    if (!removedNonLocal)
918      last = L;
919
920    return removedNonLocal;
921  }
922
923
924  bool deletedLoad = false;
925
926  // Walk up the dependency chain until we either find
927  // a dependency we can use, or we can't walk any further
928  while (dep != MemoryDependenceAnalysis::None &&
929         dep != MemoryDependenceAnalysis::NonLocal &&
930         (isa<LoadInst>(dep) || isa<StoreInst>(dep))) {
931    // ... that depends on a store ...
932    if (StoreInst* S = dyn_cast<StoreInst>(dep)) {
933      if (S->getPointerOperand() == pointer) {
934        // Remove it!
935        MD.removeInstruction(L);
936
937        L->replaceAllUsesWith(S->getOperand(0));
938        toErase.push_back(L);
939        deletedLoad = true;
940        NumGVNLoad++;
941      }
942
943      // Whether we removed it or not, we can't
944      // go any further
945      break;
946    } else if (!last) {
947      // If we don't depend on a store, and we haven't
948      // been loaded before, bail.
949      break;
950    } else if (dep == last) {
951      // Remove it!
952      MD.removeInstruction(L);
953
954      L->replaceAllUsesWith(last);
955      toErase.push_back(L);
956      deletedLoad = true;
957      NumGVNLoad++;
958
959      break;
960    } else {
961      dep = MD.getDependency(L, dep);
962    }
963  }
964
965  if (dep != MemoryDependenceAnalysis::None &&
966      dep != MemoryDependenceAnalysis::NonLocal &&
967      isa<AllocationInst>(dep)) {
968    // Check that this load is actually from the
969    // allocation we found
970    Value* v = L->getOperand(0);
971    while (true) {
972      if (BitCastInst *BC = dyn_cast<BitCastInst>(v))
973        v = BC->getOperand(0);
974      else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v))
975        v = GEP->getOperand(0);
976      else
977        break;
978    }
979    if (v == dep) {
980      // If this load depends directly on an allocation, there isn't
981      // anything stored there; therefore, we can optimize this load
982      // to undef.
983      MD.removeInstruction(L);
984
985      L->replaceAllUsesWith(UndefValue::get(L->getType()));
986      toErase.push_back(L);
987      deletedLoad = true;
988      NumGVNLoad++;
989    }
990  }
991
992  if (!deletedLoad)
993    last = L;
994
995  return deletedLoad;
996}
997
998/// isBytewiseValue - If the specified value can be set by repeating the same
999/// byte in memory, return the i8 value that it is represented with.  This is
1000/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
1001/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
1002/// byte store (e.g. i16 0x1234), return null.
1003static Value *isBytewiseValue(Value *V) {
1004  // All byte-wide stores are splatable, even of arbitrary variables.
1005  if (V->getType() == Type::Int8Ty) return V;
1006
1007  // Constant float and double values can be handled as integer values if the
1008  // corresponding integer value is "byteable".  An important case is 0.0.
1009  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
1010    if (CFP->getType() == Type::FloatTy)
1011      V = ConstantExpr::getBitCast(CFP, Type::Int32Ty);
1012    if (CFP->getType() == Type::DoubleTy)
1013      V = ConstantExpr::getBitCast(CFP, Type::Int64Ty);
1014    // Don't handle long double formats, which have strange constraints.
1015  }
1016
1017  // We can handle constant integers that are power of two in size and a
1018  // multiple of 8 bits.
1019  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
1020    unsigned Width = CI->getBitWidth();
1021    if (isPowerOf2_32(Width) && Width > 8) {
1022      // We can handle this value if the recursive binary decomposition is the
1023      // same at all levels.
1024      APInt Val = CI->getValue();
1025      APInt Val2;
1026      while (Val.getBitWidth() != 8) {
1027        unsigned NextWidth = Val.getBitWidth()/2;
1028        Val2  = Val.lshr(NextWidth);
1029        Val2.trunc(Val.getBitWidth()/2);
1030        Val.trunc(Val.getBitWidth()/2);
1031
1032        // If the top/bottom halves aren't the same, reject it.
1033        if (Val != Val2)
1034          return 0;
1035      }
1036      return ConstantInt::get(Val);
1037    }
1038  }
1039
1040  // Conceptually, we could handle things like:
1041  //   %a = zext i8 %X to i16
1042  //   %b = shl i16 %a, 8
1043  //   %c = or i16 %a, %b
1044  // but until there is an example that actually needs this, it doesn't seem
1045  // worth worrying about.
1046  return 0;
1047}
1048
1049static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
1050                                  bool &VariableIdxFound, TargetData &TD) {
1051  // Skip over the first indices.
1052  gep_type_iterator GTI = gep_type_begin(GEP);
1053  for (unsigned i = 1; i != Idx; ++i, ++GTI)
1054    /*skip along*/;
1055
1056  // Compute the offset implied by the rest of the indices.
1057  int64_t Offset = 0;
1058  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
1059    ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
1060    if (OpC == 0)
1061      return VariableIdxFound = true;
1062    if (OpC->isZero()) continue;  // No offset.
1063
1064    // Handle struct indices, which add their field offset to the pointer.
1065    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
1066      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
1067      continue;
1068    }
1069
1070    // Otherwise, we have a sequential type like an array or vector.  Multiply
1071    // the index by the ElementSize.
1072    uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
1073    Offset += Size*OpC->getSExtValue();
1074  }
1075
1076  return Offset;
1077}
1078
1079/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
1080/// constant offset, and return that constant offset.  For example, Ptr1 might
1081/// be &A[42], and Ptr2 might be &A[40].  In this case offset would be -8.
1082static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
1083                            TargetData &TD) {
1084  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
1085  // base.  After that base, they may have some number of common (and
1086  // potentially variable) indices.  After that they handle some constant
1087  // offset, which determines their offset from each other.  At this point, we
1088  // handle no other case.
1089  GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
1090  GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
1091  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
1092    return false;
1093
1094  // Skip any common indices and track the GEP types.
1095  unsigned Idx = 1;
1096  for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
1097    if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
1098      break;
1099
1100  bool VariableIdxFound = false;
1101  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
1102  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
1103  if (VariableIdxFound) return false;
1104
1105  Offset = Offset2-Offset1;
1106  return true;
1107}
1108
1109
1110/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
1111/// This allows us to analyze stores like:
1112///   store 0 -> P+1
1113///   store 0 -> P+0
1114///   store 0 -> P+3
1115///   store 0 -> P+2
1116/// which sometimes happens with stores to arrays of structs etc.  When we see
1117/// the first store, we make a range [1, 2).  The second store extends the range
1118/// to [0, 2).  The third makes a new range [2, 3).  The fourth store joins the
1119/// two ranges into [0, 3) which is memset'able.
1120namespace {
1121struct MemsetRange {
1122  // Start/End - A semi range that describes the span that this range covers.
1123  // The range is closed at the start and open at the end: [Start, End).
1124  int64_t Start, End;
1125
1126  /// StartPtr - The getelementptr instruction that points to the start of the
1127  /// range.
1128  Value *StartPtr;
1129
1130  /// Alignment - The known alignment of the first store.
1131  unsigned Alignment;
1132
1133  /// TheStores - The actual stores that make up this range.
1134  SmallVector<StoreInst*, 16> TheStores;
1135
1136  bool isProfitableToUseMemset(const TargetData &TD) const;
1137
1138};
1139} // end anon namespace
1140
1141bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
1142  // If we found more than 8 stores to merge or 64 bytes, use memset.
1143  if (TheStores.size() >= 8 || End-Start >= 64) return true;
1144
1145  // Assume that the code generator is capable of merging pairs of stores
1146  // together if it wants to.
1147  if (TheStores.size() <= 2) return false;
1148
1149  // If we have fewer than 8 stores, it can still be worthwhile to do this.
1150  // For example, merging 4 i8 stores into an i32 store is useful almost always.
1151  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
1152  // memset will be split into 2 32-bit stores anyway) and doing so can
1153  // pessimize the llvm optimizer.
1154  //
1155  // Since we don't have perfect knowledge here, make some assumptions: assume
1156  // the maximum GPR width is the same size as the pointer size and assume that
1157  // this width can be stored.  If so, check to see whether we will end up
1158  // actually reducing the number of stores used.
1159  unsigned Bytes = unsigned(End-Start);
1160  unsigned NumPointerStores = Bytes/TD.getPointerSize();
1161
1162  // Assume the remaining bytes if any are done a byte at a time.
1163  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
1164
1165  // If we will reduce the # stores (according to this heuristic), do the
1166  // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
1167  // etc.
1168  return TheStores.size() > NumPointerStores+NumByteStores;
1169}
1170
1171
1172namespace {
1173class MemsetRanges {
1174  /// Ranges - A sorted list of the memset ranges.  We use std::list here
1175  /// because each element is relatively large and expensive to copy.
1176  std::list<MemsetRange> Ranges;
1177  typedef std::list<MemsetRange>::iterator range_iterator;
1178  TargetData &TD;
1179public:
1180  MemsetRanges(TargetData &td) : TD(td) {}
1181
1182  typedef std::list<MemsetRange>::const_iterator const_iterator;
1183  const_iterator begin() const { return Ranges.begin(); }
1184  const_iterator end() const { return Ranges.end(); }
1185  bool empty() const { return Ranges.empty(); }
1186
1187  void addStore(int64_t OffsetFromFirst, StoreInst *SI);
1188};
1189
1190} // end anon namespace
1191
1192
1193/// addStore - Add a new store to the MemsetRanges data structure.  This adds a
1194/// new range for the specified store at the specified offset, merging into
1195/// existing ranges as appropriate.
1196void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
1197  int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
1198
1199  // Do a linear search of the ranges to see if this can be joined and/or to
1200  // find the insertion point in the list.  We keep the ranges sorted for
1201  // simplicity here.  This is a linear search of a linked list, which is ugly,
1202  // however the number of ranges is limited, so this won't get crazy slow.
1203  range_iterator I = Ranges.begin(), E = Ranges.end();
1204
1205  while (I != E && Start > I->End)
1206    ++I;
1207
1208  // We now know that I == E, in which case we didn't find anything to merge
1209  // with, or that Start <= I->End.  If End < I->Start or I == E, then we need
1210  // to insert a new range.  Handle this now.
1211  if (I == E || End < I->Start) {
1212    MemsetRange &R = *Ranges.insert(I, MemsetRange());
1213    R.Start        = Start;
1214    R.End          = End;
1215    R.StartPtr     = SI->getPointerOperand();
1216    R.Alignment    = SI->getAlignment();
1217    R.TheStores.push_back(SI);
1218    return;
1219  }
1220
1221  // This store overlaps with I, add it.
1222  I->TheStores.push_back(SI);
1223
1224  // At this point, we may have an interval that completely contains our store.
1225  // If so, just add it to the interval and return.
1226  if (I->Start <= Start && I->End >= End)
1227    return;
1228
1229  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
1230  // but is not entirely contained within the range.
1231
1232  // See if the range extends the start of the range.  In this case, it couldn't
1233  // possibly cause it to join the prior range, because otherwise we would have
1234  // stopped on *it*.
1235  if (Start < I->Start) {
1236    I->Start = Start;
1237    I->StartPtr = SI->getPointerOperand();
1238  }
1239
1240  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
1241  // is in or right at the end of I), and that End >= I->Start.  Extend I out to
1242  // End.
1243  if (End > I->End) {
1244    I->End = End;
1245    range_iterator NextI = I;;
1246    while (++NextI != E && End >= NextI->Start) {
1247      // Merge the range in.
1248      I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
1249      if (NextI->End > I->End)
1250        I->End = NextI->End;
1251      Ranges.erase(NextI);
1252      NextI = I;
1253    }
1254  }
1255}
1256
1257
1258
1259/// processStore - When GVN is scanning forward over instructions, we look for
1260/// some other patterns to fold away.  In particular, this looks for stores to
1261/// neighboring locations of memory.  If it sees enough consequtive ones
1262/// (currently 4) it attempts to merge them together into a memcpy/memset.
1263bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
1264  if (!FormMemSet) return false;
1265  if (SI->isVolatile()) return false;
1266
1267  // There are two cases that are interesting for this code to handle: memcpy
1268  // and memset.  Right now we only handle memset.
1269
1270  // Ensure that the value being stored is something that can be memset'able a
1271  // byte at a time like "0" or "-1" or any width, as well as things like
1272  // 0xA0A0A0A0 and 0.0.
1273  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
1274  if (!ByteVal)
1275    return false;
1276
1277  TargetData &TD = getAnalysis<TargetData>();
1278  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
1279
1280  // Okay, so we now have a single store that can be splatable.  Scan to find
1281  // all subsequent stores of the same value to offset from the same pointer.
1282  // Join these together into ranges, so we can decide whether contiguous blocks
1283  // are stored.
1284  MemsetRanges Ranges(TD);
1285
1286  Value *StartPtr = SI->getPointerOperand();
1287
1288  BasicBlock::iterator BI = SI;
1289  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
1290    if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
1291      // If the call is readnone, ignore it, otherwise bail out.  We don't even
1292      // allow readonly here because we don't want something like:
1293      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
1294      if (AA.getModRefBehavior(CallSite::get(BI)) ==
1295            AliasAnalysis::DoesNotAccessMemory)
1296        continue;
1297
1298      // TODO: If this is a memset, try to join it in.
1299
1300      break;
1301    } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
1302      break;
1303
1304    // If this is a non-store instruction it is fine, ignore it.
1305    StoreInst *NextStore = dyn_cast<StoreInst>(BI);
1306    if (NextStore == 0) continue;
1307
1308    // If this is a store, see if we can merge it in.
1309    if (NextStore->isVolatile()) break;
1310
1311    // Check to see if this stored value is of the same byte-splattable value.
1312    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
1313      break;
1314
1315    // Check to see if this store is to a constant offset from the start ptr.
1316    int64_t Offset;
1317    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD))
1318      break;
1319
1320    Ranges.addStore(Offset, NextStore);
1321  }
1322
1323  // If we have no ranges, then we just had a single store with nothing that
1324  // could be merged in.  This is a very common case of course.
1325  if (Ranges.empty())
1326    return false;
1327
1328  // If we had at least one store that could be merged in, add the starting
1329  // store as well.  We try to avoid this unless there is at least something
1330  // interesting as a small compile-time optimization.
1331  Ranges.addStore(0, SI);
1332
1333
1334  Function *MemSetF = 0;
1335
1336  // Now that we have full information about ranges, loop over the ranges and
1337  // emit memset's for anything big enough to be worthwhile.
1338  bool MadeChange = false;
1339  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
1340       I != E; ++I) {
1341    const MemsetRange &Range = *I;
1342
1343    if (Range.TheStores.size() == 1) continue;
1344
1345    // If it is profitable to lower this range to memset, do so now.
1346    if (!Range.isProfitableToUseMemset(TD))
1347      continue;
1348
1349    // Otherwise, we do want to transform this!  Create a new memset.  We put
1350    // the memset right before the first instruction that isn't part of this
1351    // memset block.  This ensure that the memset is dominated by any addressing
1352    // instruction needed by the start of the block.
1353    BasicBlock::iterator InsertPt = BI;
1354
1355    if (MemSetF == 0)
1356      MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
1357                                          ->getParent(), Intrinsic::memset_i64);
1358
1359    // Get the starting pointer of the block.
1360    StartPtr = Range.StartPtr;
1361
1362    // Cast the start ptr to be i8* as memset requires.
1363    const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
1364    if (StartPtr->getType() != i8Ptr)
1365      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
1366                                 InsertPt);
1367
1368    Value *Ops[] = {
1369      StartPtr, ByteVal,   // Start, value
1370      ConstantInt::get(Type::Int64Ty, Range.End-Range.Start),  // size
1371      ConstantInt::get(Type::Int32Ty, Range.Alignment)   // align
1372    };
1373    Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
1374    DEBUG(cerr << "Replace stores:\n";
1375          for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
1376            cerr << *Range.TheStores[i];
1377          cerr << "With: " << *C); C=C;
1378
1379    // Zap all the stores.
1380    toErase.append(Range.TheStores.begin(), Range.TheStores.end());
1381    ++NumMemSetInfer;
1382    MadeChange = true;
1383  }
1384
1385  return MadeChange;
1386}
1387
1388
1389/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
1390/// and checks for the possibility of a call slot optimization by having
1391/// the call write its result directly into the destination of the memcpy.
1392bool GVN::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
1393                               SmallVectorImpl<Instruction*> &toErase) {
1394  // The general transformation to keep in mind is
1395  //
1396  //   call @func(..., src, ...)
1397  //   memcpy(dest, src, ...)
1398  //
1399  // ->
1400  //
1401  //   memcpy(dest, src, ...)
1402  //   call @func(..., dest, ...)
1403  //
1404  // Since moving the memcpy is technically awkward, we additionally check that
1405  // src only holds uninitialized values at the moment of the call, meaning that
1406  // the memcpy can be discarded rather than moved.
1407
1408  // Deliberately get the source and destination with bitcasts stripped away,
1409  // because we'll need to do type comparisons based on the underlying type.
1410  Value* cpyDest = cpy->getDest();
1411  Value* cpySrc = cpy->getSource();
1412  CallSite CS = CallSite::get(C);
1413
1414  // We need to be able to reason about the size of the memcpy, so we require
1415  // that it be a constant.
1416  ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
1417  if (!cpyLength)
1418    return false;
1419
1420  // Require that src be an alloca.  This simplifies the reasoning considerably.
1421  AllocaInst* srcAlloca = dyn_cast<AllocaInst>(cpySrc);
1422  if (!srcAlloca)
1423    return false;
1424
1425  // Check that all of src is copied to dest.
1426  TargetData& TD = getAnalysis<TargetData>();
1427
1428  ConstantInt* srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
1429  if (!srcArraySize)
1430    return false;
1431
1432  uint64_t srcSize = TD.getABITypeSize(srcAlloca->getAllocatedType()) *
1433    srcArraySize->getZExtValue();
1434
1435  if (cpyLength->getZExtValue() < srcSize)
1436    return false;
1437
1438  // Check that accessing the first srcSize bytes of dest will not cause a
1439  // trap.  Otherwise the transform is invalid since it might cause a trap
1440  // to occur earlier than it otherwise would.
1441  if (AllocaInst* A = dyn_cast<AllocaInst>(cpyDest)) {
1442    // The destination is an alloca.  Check it is larger than srcSize.
1443    ConstantInt* destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
1444    if (!destArraySize)
1445      return false;
1446
1447    uint64_t destSize = TD.getABITypeSize(A->getAllocatedType()) *
1448      destArraySize->getZExtValue();
1449
1450    if (destSize < srcSize)
1451      return false;
1452  } else if (Argument* A = dyn_cast<Argument>(cpyDest)) {
1453    // If the destination is an sret parameter then only accesses that are
1454    // outside of the returned struct type can trap.
1455    if (!A->hasStructRetAttr())
1456      return false;
1457
1458    const Type* StructTy = cast<PointerType>(A->getType())->getElementType();
1459    uint64_t destSize = TD.getABITypeSize(StructTy);
1460
1461    if (destSize < srcSize)
1462      return false;
1463  } else {
1464    return false;
1465  }
1466
1467  // Check that src is not accessed except via the call and the memcpy.  This
1468  // guarantees that it holds only undefined values when passed in (so the final
1469  // memcpy can be dropped), that it is not read or written between the call and
1470  // the memcpy, and that writing beyond the end of it is undefined.
1471  SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
1472                                   srcAlloca->use_end());
1473  while (!srcUseList.empty()) {
1474    User* UI = srcUseList.back();
1475    srcUseList.pop_back();
1476
1477    if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
1478      for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
1479           I != E; ++I)
1480        srcUseList.push_back(*I);
1481    } else if (UI != C && UI != cpy) {
1482      return false;
1483    }
1484  }
1485
1486  // Since we're changing the parameter to the callsite, we need to make sure
1487  // that what would be the new parameter dominates the callsite.
1488  DominatorTree& DT = getAnalysis<DominatorTree>();
1489  if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest))
1490    if (!DT.dominates(cpyDestInst, C))
1491      return false;
1492
1493  // In addition to knowing that the call does not access src in some
1494  // unexpected manner, for example via a global, which we deduce from
1495  // the use analysis, we also need to know that it does not sneakily
1496  // access dest.  We rely on AA to figure this out for us.
1497  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
1498  if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
1499      AliasAnalysis::NoModRef)
1500    return false;
1501
1502  // All the checks have passed, so do the transformation.
1503  for (unsigned i = 0; i < CS.arg_size(); ++i)
1504    if (CS.getArgument(i) == cpySrc) {
1505      if (cpySrc->getType() != cpyDest->getType())
1506        cpyDest = CastInst::createPointerCast(cpyDest, cpySrc->getType(),
1507                                              cpyDest->getName(), C);
1508      CS.setArgument(i, cpyDest);
1509    }
1510
1511  // Drop any cached information about the call, because we may have changed
1512  // its dependence information by changing its parameter.
1513  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1514  MD.dropInstruction(C);
1515
1516  // Remove the memcpy
1517  MD.removeInstruction(cpy);
1518  toErase.push_back(cpy);
1519
1520  return true;
1521}
1522
1523/// processMemCpy - perform simplication of memcpy's.  If we have memcpy A which
1524/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
1525/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
1526///  This allows later passes to remove the first memcpy altogether.
1527bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
1528                        SmallVectorImpl<Instruction*> &toErase) {
1529  // We can only transforms memcpy's where the dest of one is the source of the
1530  // other
1531  if (M->getSource() != MDep->getDest())
1532    return false;
1533
1534  // Second, the length of the memcpy's must be the same, or the preceeding one
1535  // must be larger than the following one.
1536  ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
1537  ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
1538  if (!C1 || !C2)
1539    return false;
1540
1541  uint64_t DepSize = C1->getValue().getZExtValue();
1542  uint64_t CpySize = C2->getValue().getZExtValue();
1543
1544  if (DepSize < CpySize)
1545    return false;
1546
1547  // Finally, we have to make sure that the dest of the second does not
1548  // alias the source of the first
1549  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
1550  if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
1551      AliasAnalysis::NoAlias)
1552    return false;
1553  else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
1554           AliasAnalysis::NoAlias)
1555    return false;
1556  else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
1557           != AliasAnalysis::NoAlias)
1558    return false;
1559
1560  // If all checks passed, then we can transform these memcpy's
1561  Function* MemCpyFun = Intrinsic::getDeclaration(
1562                                 M->getParent()->getParent()->getParent(),
1563                                 M->getIntrinsicID());
1564
1565  std::vector<Value*> args;
1566  args.push_back(M->getRawDest());
1567  args.push_back(MDep->getRawSource());
1568  args.push_back(M->getLength());
1569  args.push_back(M->getAlignment());
1570
1571  CallInst* C = CallInst::Create(MemCpyFun, args.begin(), args.end(), "", M);
1572
1573  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1574  if (MD.getDependency(C) == MDep) {
1575    MD.dropInstruction(M);
1576    toErase.push_back(M);
1577    return true;
1578  }
1579
1580  MD.removeInstruction(C);
1581  toErase.push_back(C);
1582  return false;
1583}
1584
1585/// processInstruction - When calculating availability, handle an instruction
1586/// by inserting it into the appropriate sets
1587bool GVN::processInstruction(Instruction *I, ValueNumberedSet &currAvail,
1588                             DenseMap<Value*, LoadInst*> &lastSeenLoad,
1589                             SmallVectorImpl<Instruction*> &toErase) {
1590  if (LoadInst* L = dyn_cast<LoadInst>(I))
1591    return processLoad(L, lastSeenLoad, toErase);
1592
1593  if (StoreInst *SI = dyn_cast<StoreInst>(I))
1594    return processStore(SI, toErase);
1595
1596  if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
1597    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1598
1599    // The are two possible optimizations we can do for memcpy:
1600    //   a) memcpy-memcpy xform which exposes redundance for DSE
1601    //   b) call-memcpy xform for return slot optimization
1602    Instruction* dep = MD.getDependency(M);
1603    if (dep == MemoryDependenceAnalysis::None ||
1604        dep == MemoryDependenceAnalysis::NonLocal)
1605      return false;
1606    if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
1607      return processMemCpy(M, MemCpy, toErase);
1608    if (CallInst* C = dyn_cast<CallInst>(dep))
1609      return performCallSlotOptzn(M, C, toErase);
1610    return false;
1611  }
1612
1613  unsigned num = VN.lookup_or_add(I);
1614
1615  // Collapse PHI nodes
1616  if (PHINode* p = dyn_cast<PHINode>(I)) {
1617    Value* constVal = CollapsePhi(p);
1618
1619    if (constVal) {
1620      for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end();
1621           PI != PE; ++PI)
1622        if (PI->second.count(p))
1623          PI->second.erase(p);
1624
1625      p->replaceAllUsesWith(constVal);
1626      toErase.push_back(p);
1627    }
1628  // Perform value-number based elimination
1629  } else if (currAvail.test(num)) {
1630    Value* repl = find_leader(currAvail, num);
1631
1632    if (CallInst* CI = dyn_cast<CallInst>(I)) {
1633      AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
1634      if (!AA.doesNotAccessMemory(CI)) {
1635        MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1636        if (cast<Instruction>(repl)->getParent() != CI->getParent() ||
1637            MD.getDependency(CI) != MD.getDependency(cast<CallInst>(repl))) {
1638          // There must be an intervening may-alias store, so nothing from
1639          // this point on will be able to be replaced with the preceding call
1640          currAvail.erase(repl);
1641          currAvail.insert(I);
1642
1643          return false;
1644        }
1645      }
1646    }
1647
1648    // Remove it!
1649    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1650    MD.removeInstruction(I);
1651
1652    VN.erase(I);
1653    I->replaceAllUsesWith(repl);
1654    toErase.push_back(I);
1655    return true;
1656  } else if (!I->isTerminator()) {
1657    currAvail.set(num);
1658    currAvail.insert(I);
1659  }
1660
1661  return false;
1662}
1663
1664// GVN::runOnFunction - This is the main transformation entry point for a
1665// function.
1666//
1667bool GVN::runOnFunction(Function& F) {
1668  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
1669
1670  bool changed = false;
1671  bool shouldContinue = true;
1672
1673  while (shouldContinue) {
1674    shouldContinue = iterateOnFunction(F);
1675    changed |= shouldContinue;
1676  }
1677
1678  return changed;
1679}
1680
1681
1682// GVN::iterateOnFunction - Executes one iteration of GVN
1683bool GVN::iterateOnFunction(Function &F) {
1684  // Clean out global sets from any previous functions
1685  VN.clear();
1686  availableOut.clear();
1687  phiMap.clear();
1688
1689  bool changed_function = false;
1690
1691  DominatorTree &DT = getAnalysis<DominatorTree>();
1692
1693  SmallVector<Instruction*, 8> toErase;
1694  DenseMap<Value*, LoadInst*> lastSeenLoad;
1695
1696  // Top-down walk of the dominator tree
1697  for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
1698         E = df_end(DT.getRootNode()); DI != E; ++DI) {
1699
1700    // Get the set to update for this block
1701    ValueNumberedSet& currAvail = availableOut[DI->getBlock()];
1702    lastSeenLoad.clear();
1703
1704    BasicBlock* BB = DI->getBlock();
1705
1706    // A block inherits AVAIL_OUT from its dominator
1707    if (DI->getIDom() != 0)
1708      currAvail = availableOut[DI->getIDom()->getBlock()];
1709
1710    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
1711         BI != BE;) {
1712      changed_function |= processInstruction(BI, currAvail,
1713                                             lastSeenLoad, toErase);
1714      if (toErase.empty()) {
1715        ++BI;
1716        continue;
1717      }
1718
1719      // If we need some instructions deleted, do it now.
1720      NumGVNInstr += toErase.size();
1721
1722      // Avoid iterator invalidation.
1723      bool AtStart = BI == BB->begin();
1724      if (!AtStart)
1725        --BI;
1726
1727      for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
1728           E = toErase.end(); I != E; ++I)
1729        (*I)->eraseFromParent();
1730
1731      if (AtStart)
1732        BI = BB->begin();
1733      else
1734        ++BI;
1735
1736      toErase.clear();
1737    }
1738  }
1739
1740  return changed_function;
1741}
1742