GVN.cpp revision d22fe2b51f553f7eca200cd22b9e2247f9aea2ff
1//===- GVN.cpp - Eliminate redundant values and loads ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global value numbering to eliminate fully redundant
11// instructions.  It also performs simple dead load elimination.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "gvn"
16#include "llvm/Transforms/Scalar.h"
17#include "llvm/BasicBlock.h"
18#include "llvm/Constants.h"
19#include "llvm/DerivedTypes.h"
20#include "llvm/Function.h"
21#include "llvm/IntrinsicInst.h"
22#include "llvm/Instructions.h"
23#include "llvm/ParameterAttributes.h"
24#include "llvm/Value.h"
25#include "llvm/ADT/BitVector.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/DepthFirstIterator.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/Dominators.h"
32#include "llvm/Analysis/AliasAnalysis.h"
33#include "llvm/Analysis/MemoryDependenceAnalysis.h"
34#include "llvm/Support/CFG.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Compiler.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/GetElementPtrTypeIterator.h"
39#include "llvm/Target/TargetData.h"
40#include <list>
41using namespace llvm;
42
43STATISTIC(NumGVNInstr, "Number of instructions deleted");
44STATISTIC(NumGVNLoad, "Number of loads deleted");
45STATISTIC(NumMemSetInfer, "Number of memsets inferred");
46
47namespace {
48  cl::opt<bool>
49  FormMemSet("form-memset-from-stores",
50             cl::desc("Transform straight-line stores to memsets"),
51             cl::init(true), cl::Hidden);
52}
53
54//===----------------------------------------------------------------------===//
55//                         ValueTable Class
56//===----------------------------------------------------------------------===//
57
58/// This class holds the mapping between values and value numbers.  It is used
59/// as an efficient mechanism to determine the expression-wise equivalence of
60/// two values.
61namespace {
62  struct VISIBILITY_HIDDEN Expression {
63    enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM,
64                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,
65                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
66                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
67                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
68                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
69                            FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
70                            SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
71                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
72                            PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, EMPTY,
73                            TOMBSTONE };
74
75    ExpressionOpcode opcode;
76    const Type* type;
77    uint32_t firstVN;
78    uint32_t secondVN;
79    uint32_t thirdVN;
80    SmallVector<uint32_t, 4> varargs;
81    Value* function;
82
83    Expression() { }
84    Expression(ExpressionOpcode o) : opcode(o) { }
85
86    bool operator==(const Expression &other) const {
87      if (opcode != other.opcode)
88        return false;
89      else if (opcode == EMPTY || opcode == TOMBSTONE)
90        return true;
91      else if (type != other.type)
92        return false;
93      else if (function != other.function)
94        return false;
95      else if (firstVN != other.firstVN)
96        return false;
97      else if (secondVN != other.secondVN)
98        return false;
99      else if (thirdVN != other.thirdVN)
100        return false;
101      else {
102        if (varargs.size() != other.varargs.size())
103          return false;
104
105        for (size_t i = 0; i < varargs.size(); ++i)
106          if (varargs[i] != other.varargs[i])
107            return false;
108
109        return true;
110      }
111    }
112
113    bool operator!=(const Expression &other) const {
114      if (opcode != other.opcode)
115        return true;
116      else if (opcode == EMPTY || opcode == TOMBSTONE)
117        return false;
118      else if (type != other.type)
119        return true;
120      else if (function != other.function)
121        return true;
122      else if (firstVN != other.firstVN)
123        return true;
124      else if (secondVN != other.secondVN)
125        return true;
126      else if (thirdVN != other.thirdVN)
127        return true;
128      else {
129        if (varargs.size() != other.varargs.size())
130          return true;
131
132        for (size_t i = 0; i < varargs.size(); ++i)
133          if (varargs[i] != other.varargs[i])
134            return true;
135
136          return false;
137      }
138    }
139  };
140
141  class VISIBILITY_HIDDEN ValueTable {
142    private:
143      DenseMap<Value*, uint32_t> valueNumbering;
144      DenseMap<Expression, uint32_t> expressionNumbering;
145      AliasAnalysis* AA;
146
147      uint32_t nextValueNumber;
148
149      Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);
150      Expression::ExpressionOpcode getOpcode(CmpInst* C);
151      Expression::ExpressionOpcode getOpcode(CastInst* C);
152      Expression create_expression(BinaryOperator* BO);
153      Expression create_expression(CmpInst* C);
154      Expression create_expression(ShuffleVectorInst* V);
155      Expression create_expression(ExtractElementInst* C);
156      Expression create_expression(InsertElementInst* V);
157      Expression create_expression(SelectInst* V);
158      Expression create_expression(CastInst* C);
159      Expression create_expression(GetElementPtrInst* G);
160      Expression create_expression(CallInst* C);
161    public:
162      ValueTable() : nextValueNumber(1) { }
163      uint32_t lookup_or_add(Value* V);
164      uint32_t lookup(Value* V) const;
165      void add(Value* V, uint32_t num);
166      void clear();
167      void erase(Value* v);
168      unsigned size();
169      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
170      uint32_t hash_operand(Value* v);
171  };
172}
173
174namespace llvm {
175template <> struct DenseMapInfo<Expression> {
176  static inline Expression getEmptyKey() {
177    return Expression(Expression::EMPTY);
178  }
179
180  static inline Expression getTombstoneKey() {
181    return Expression(Expression::TOMBSTONE);
182  }
183
184  static unsigned getHashValue(const Expression e) {
185    unsigned hash = e.opcode;
186
187    hash = e.firstVN + hash * 37;
188    hash = e.secondVN + hash * 37;
189    hash = e.thirdVN + hash * 37;
190
191    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
192            (unsigned)((uintptr_t)e.type >> 9)) +
193           hash * 37;
194
195    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
196         E = e.varargs.end(); I != E; ++I)
197      hash = *I + hash * 37;
198
199    hash = ((unsigned)((uintptr_t)e.function >> 4) ^
200            (unsigned)((uintptr_t)e.function >> 9)) +
201           hash * 37;
202
203    return hash;
204  }
205  static bool isEqual(const Expression &LHS, const Expression &RHS) {
206    return LHS == RHS;
207  }
208  static bool isPod() { return true; }
209};
210}
211
212//===----------------------------------------------------------------------===//
213//                     ValueTable Internal Functions
214//===----------------------------------------------------------------------===//
215Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {
216  switch(BO->getOpcode()) {
217  default: // THIS SHOULD NEVER HAPPEN
218    assert(0 && "Binary operator with unknown opcode?");
219  case Instruction::Add:  return Expression::ADD;
220  case Instruction::Sub:  return Expression::SUB;
221  case Instruction::Mul:  return Expression::MUL;
222  case Instruction::UDiv: return Expression::UDIV;
223  case Instruction::SDiv: return Expression::SDIV;
224  case Instruction::FDiv: return Expression::FDIV;
225  case Instruction::URem: return Expression::UREM;
226  case Instruction::SRem: return Expression::SREM;
227  case Instruction::FRem: return Expression::FREM;
228  case Instruction::Shl:  return Expression::SHL;
229  case Instruction::LShr: return Expression::LSHR;
230  case Instruction::AShr: return Expression::ASHR;
231  case Instruction::And:  return Expression::AND;
232  case Instruction::Or:   return Expression::OR;
233  case Instruction::Xor:  return Expression::XOR;
234  }
235}
236
237Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
238  if (isa<ICmpInst>(C)) {
239    switch (C->getPredicate()) {
240    default:  // THIS SHOULD NEVER HAPPEN
241      assert(0 && "Comparison with unknown predicate?");
242    case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;
243    case ICmpInst::ICMP_NE:  return Expression::ICMPNE;
244    case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
245    case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
246    case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
247    case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
248    case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
249    case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
250    case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
251    case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
252    }
253  }
254  assert(isa<FCmpInst>(C) && "Unknown compare");
255  switch (C->getPredicate()) {
256  default: // THIS SHOULD NEVER HAPPEN
257    assert(0 && "Comparison with unknown predicate?");
258  case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
259  case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
260  case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
261  case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
262  case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
263  case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
264  case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
265  case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
266  case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
267  case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
268  case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
269  case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
270  case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
271  case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
272  }
273}
274
275Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {
276  switch(C->getOpcode()) {
277  default: // THIS SHOULD NEVER HAPPEN
278    assert(0 && "Cast operator with unknown opcode?");
279  case Instruction::Trunc:    return Expression::TRUNC;
280  case Instruction::ZExt:     return Expression::ZEXT;
281  case Instruction::SExt:     return Expression::SEXT;
282  case Instruction::FPToUI:   return Expression::FPTOUI;
283  case Instruction::FPToSI:   return Expression::FPTOSI;
284  case Instruction::UIToFP:   return Expression::UITOFP;
285  case Instruction::SIToFP:   return Expression::SITOFP;
286  case Instruction::FPTrunc:  return Expression::FPTRUNC;
287  case Instruction::FPExt:    return Expression::FPEXT;
288  case Instruction::PtrToInt: return Expression::PTRTOINT;
289  case Instruction::IntToPtr: return Expression::INTTOPTR;
290  case Instruction::BitCast:  return Expression::BITCAST;
291  }
292}
293
294uint32_t ValueTable::hash_operand(Value* v) {
295  if (CallInst* CI = dyn_cast<CallInst>(v))
296    if (!AA->doesNotAccessMemory(CI))
297      return nextValueNumber++;
298
299  return lookup_or_add(v);
300}
301
302Expression ValueTable::create_expression(CallInst* C) {
303  Expression e;
304
305  e.type = C->getType();
306  e.firstVN = 0;
307  e.secondVN = 0;
308  e.thirdVN = 0;
309  e.function = C->getCalledFunction();
310  e.opcode = Expression::CALL;
311
312  for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
313       I != E; ++I)
314    e.varargs.push_back(hash_operand(*I));
315
316  return e;
317}
318
319Expression ValueTable::create_expression(BinaryOperator* BO) {
320  Expression e;
321
322  e.firstVN = hash_operand(BO->getOperand(0));
323  e.secondVN = hash_operand(BO->getOperand(1));
324  e.thirdVN = 0;
325  e.function = 0;
326  e.type = BO->getType();
327  e.opcode = getOpcode(BO);
328
329  return e;
330}
331
332Expression ValueTable::create_expression(CmpInst* C) {
333  Expression e;
334
335  e.firstVN = hash_operand(C->getOperand(0));
336  e.secondVN = hash_operand(C->getOperand(1));
337  e.thirdVN = 0;
338  e.function = 0;
339  e.type = C->getType();
340  e.opcode = getOpcode(C);
341
342  return e;
343}
344
345Expression ValueTable::create_expression(CastInst* C) {
346  Expression e;
347
348  e.firstVN = hash_operand(C->getOperand(0));
349  e.secondVN = 0;
350  e.thirdVN = 0;
351  e.function = 0;
352  e.type = C->getType();
353  e.opcode = getOpcode(C);
354
355  return e;
356}
357
358Expression ValueTable::create_expression(ShuffleVectorInst* S) {
359  Expression e;
360
361  e.firstVN = hash_operand(S->getOperand(0));
362  e.secondVN = hash_operand(S->getOperand(1));
363  e.thirdVN = hash_operand(S->getOperand(2));
364  e.function = 0;
365  e.type = S->getType();
366  e.opcode = Expression::SHUFFLE;
367
368  return e;
369}
370
371Expression ValueTable::create_expression(ExtractElementInst* E) {
372  Expression e;
373
374  e.firstVN = hash_operand(E->getOperand(0));
375  e.secondVN = hash_operand(E->getOperand(1));
376  e.thirdVN = 0;
377  e.function = 0;
378  e.type = E->getType();
379  e.opcode = Expression::EXTRACT;
380
381  return e;
382}
383
384Expression ValueTable::create_expression(InsertElementInst* I) {
385  Expression e;
386
387  e.firstVN = hash_operand(I->getOperand(0));
388  e.secondVN = hash_operand(I->getOperand(1));
389  e.thirdVN = hash_operand(I->getOperand(2));
390  e.function = 0;
391  e.type = I->getType();
392  e.opcode = Expression::INSERT;
393
394  return e;
395}
396
397Expression ValueTable::create_expression(SelectInst* I) {
398  Expression e;
399
400  e.firstVN = hash_operand(I->getCondition());
401  e.secondVN = hash_operand(I->getTrueValue());
402  e.thirdVN = hash_operand(I->getFalseValue());
403  e.function = 0;
404  e.type = I->getType();
405  e.opcode = Expression::SELECT;
406
407  return e;
408}
409
410Expression ValueTable::create_expression(GetElementPtrInst* G) {
411  Expression e;
412
413  e.firstVN = hash_operand(G->getPointerOperand());
414  e.secondVN = 0;
415  e.thirdVN = 0;
416  e.function = 0;
417  e.type = G->getType();
418  e.opcode = Expression::GEP;
419
420  for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
421       I != E; ++I)
422    e.varargs.push_back(hash_operand(*I));
423
424  return e;
425}
426
427//===----------------------------------------------------------------------===//
428//                     ValueTable External Functions
429//===----------------------------------------------------------------------===//
430
431/// lookup_or_add - Returns the value number for the specified value, assigning
432/// it a new number if it did not have one before.
433uint32_t ValueTable::lookup_or_add(Value* V) {
434  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
435  if (VI != valueNumbering.end())
436    return VI->second;
437
438  if (CallInst* C = dyn_cast<CallInst>(V)) {
439    if (AA->onlyReadsMemory(C)) { // includes doesNotAccessMemory
440      Expression e = create_expression(C);
441
442      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
443      if (EI != expressionNumbering.end()) {
444        valueNumbering.insert(std::make_pair(V, EI->second));
445        return EI->second;
446      } else {
447        expressionNumbering.insert(std::make_pair(e, nextValueNumber));
448        valueNumbering.insert(std::make_pair(V, nextValueNumber));
449
450        return nextValueNumber++;
451      }
452    } else {
453      valueNumbering.insert(std::make_pair(V, nextValueNumber));
454      return nextValueNumber++;
455    }
456  } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
457    Expression e = create_expression(BO);
458
459    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
460    if (EI != expressionNumbering.end()) {
461      valueNumbering.insert(std::make_pair(V, EI->second));
462      return EI->second;
463    } else {
464      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
465      valueNumbering.insert(std::make_pair(V, nextValueNumber));
466
467      return nextValueNumber++;
468    }
469  } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {
470    Expression e = create_expression(C);
471
472    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
473    if (EI != expressionNumbering.end()) {
474      valueNumbering.insert(std::make_pair(V, EI->second));
475      return EI->second;
476    } else {
477      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
478      valueNumbering.insert(std::make_pair(V, nextValueNumber));
479
480      return nextValueNumber++;
481    }
482  } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {
483    Expression e = create_expression(U);
484
485    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
486    if (EI != expressionNumbering.end()) {
487      valueNumbering.insert(std::make_pair(V, EI->second));
488      return EI->second;
489    } else {
490      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
491      valueNumbering.insert(std::make_pair(V, nextValueNumber));
492
493      return nextValueNumber++;
494    }
495  } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {
496    Expression e = create_expression(U);
497
498    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
499    if (EI != expressionNumbering.end()) {
500      valueNumbering.insert(std::make_pair(V, EI->second));
501      return EI->second;
502    } else {
503      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
504      valueNumbering.insert(std::make_pair(V, nextValueNumber));
505
506      return nextValueNumber++;
507    }
508  } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {
509    Expression e = create_expression(U);
510
511    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
512    if (EI != expressionNumbering.end()) {
513      valueNumbering.insert(std::make_pair(V, EI->second));
514      return EI->second;
515    } else {
516      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
517      valueNumbering.insert(std::make_pair(V, nextValueNumber));
518
519      return nextValueNumber++;
520    }
521  } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {
522    Expression e = create_expression(U);
523
524    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
525    if (EI != expressionNumbering.end()) {
526      valueNumbering.insert(std::make_pair(V, EI->second));
527      return EI->second;
528    } else {
529      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
530      valueNumbering.insert(std::make_pair(V, nextValueNumber));
531
532      return nextValueNumber++;
533    }
534  } else if (CastInst* U = dyn_cast<CastInst>(V)) {
535    Expression e = create_expression(U);
536
537    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
538    if (EI != expressionNumbering.end()) {
539      valueNumbering.insert(std::make_pair(V, EI->second));
540      return EI->second;
541    } else {
542      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
543      valueNumbering.insert(std::make_pair(V, nextValueNumber));
544
545      return nextValueNumber++;
546    }
547  } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {
548    Expression e = create_expression(U);
549
550    DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
551    if (EI != expressionNumbering.end()) {
552      valueNumbering.insert(std::make_pair(V, EI->second));
553      return EI->second;
554    } else {
555      expressionNumbering.insert(std::make_pair(e, nextValueNumber));
556      valueNumbering.insert(std::make_pair(V, nextValueNumber));
557
558      return nextValueNumber++;
559    }
560  } else {
561    valueNumbering.insert(std::make_pair(V, nextValueNumber));
562    return nextValueNumber++;
563  }
564}
565
566/// lookup - Returns the value number of the specified value. Fails if
567/// the value has not yet been numbered.
568uint32_t ValueTable::lookup(Value* V) const {
569  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
570  assert(VI != valueNumbering.end() && "Value not numbered?");
571  return VI->second;
572}
573
574/// clear - Remove all entries from the ValueTable
575void ValueTable::clear() {
576  valueNumbering.clear();
577  expressionNumbering.clear();
578  nextValueNumber = 1;
579}
580
581/// erase - Remove a value from the value numbering
582void ValueTable::erase(Value* V) {
583  valueNumbering.erase(V);
584}
585
586//===----------------------------------------------------------------------===//
587//                       ValueNumberedSet Class
588//===----------------------------------------------------------------------===//
589namespace {
590class VISIBILITY_HIDDEN ValueNumberedSet {
591  private:
592    SmallPtrSet<Value*, 8> contents;
593    BitVector numbers;
594  public:
595    ValueNumberedSet() { numbers.resize(1); }
596    ValueNumberedSet(const ValueNumberedSet& other) {
597      numbers = other.numbers;
598      contents = other.contents;
599    }
600
601    typedef SmallPtrSet<Value*, 8>::iterator iterator;
602
603    iterator begin() { return contents.begin(); }
604    iterator end() { return contents.end(); }
605
606    bool insert(Value* v) { return contents.insert(v); }
607    void insert(iterator I, iterator E) { contents.insert(I, E); }
608    void erase(Value* v) { contents.erase(v); }
609    unsigned count(Value* v) { return contents.count(v); }
610    size_t size() { return contents.size(); }
611
612    void set(unsigned i)  {
613      if (i >= numbers.size())
614        numbers.resize(i+1);
615
616      numbers.set(i);
617    }
618
619    void operator=(const ValueNumberedSet& other) {
620      contents = other.contents;
621      numbers = other.numbers;
622    }
623
624    void reset(unsigned i)  {
625      if (i < numbers.size())
626        numbers.reset(i);
627    }
628
629    bool test(unsigned i)  {
630      if (i >= numbers.size())
631        return false;
632
633      return numbers.test(i);
634    }
635
636    void clear() {
637      contents.clear();
638      numbers.clear();
639    }
640};
641}
642
643//===----------------------------------------------------------------------===//
644//                         GVN Pass
645//===----------------------------------------------------------------------===//
646
647namespace {
648
649  class VISIBILITY_HIDDEN GVN : public FunctionPass {
650    bool runOnFunction(Function &F);
651  public:
652    static char ID; // Pass identification, replacement for typeid
653    GVN() : FunctionPass((intptr_t)&ID) { }
654
655  private:
656    ValueTable VN;
657
658    DenseMap<BasicBlock*, ValueNumberedSet> availableOut;
659
660    typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType;
661    PhiMapType phiMap;
662
663
664    // This transformation requires dominator postdominator info
665    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
666      AU.setPreservesCFG();
667      AU.addRequired<DominatorTree>();
668      AU.addRequired<MemoryDependenceAnalysis>();
669      AU.addRequired<AliasAnalysis>();
670      AU.addRequired<TargetData>();
671      AU.addPreserved<AliasAnalysis>();
672      AU.addPreserved<MemoryDependenceAnalysis>();
673      AU.addPreserved<TargetData>();
674    }
675
676    // Helper fuctions
677    // FIXME: eliminate or document these better
678    Value* find_leader(ValueNumberedSet& vals, uint32_t v) ;
679    void val_insert(ValueNumberedSet& s, Value* v);
680    bool processLoad(LoadInst* L,
681                     DenseMap<Value*, LoadInst*> &lastLoad,
682                     SmallVectorImpl<Instruction*> &toErase);
683    bool processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase);
684    bool processInstruction(Instruction* I,
685                            ValueNumberedSet& currAvail,
686                            DenseMap<Value*, LoadInst*>& lastSeenLoad,
687                            SmallVectorImpl<Instruction*> &toErase);
688    bool processNonLocalLoad(LoadInst* L,
689                             SmallVectorImpl<Instruction*> &toErase);
690    bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
691                       SmallVectorImpl<Instruction*> &toErase);
692    bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C,
693                              SmallVectorImpl<Instruction*> &toErase);
694    Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
695                            DenseMap<BasicBlock*, Value*> &Phis,
696                            bool top_level = false);
697    void dump(DenseMap<BasicBlock*, Value*>& d);
698    bool iterateOnFunction(Function &F);
699    Value* CollapsePhi(PHINode* p);
700    bool isSafeReplacement(PHINode* p, Instruction* inst);
701  };
702
703  char GVN::ID = 0;
704}
705
706// createGVNPass - The public interface to this file...
707FunctionPass *llvm::createGVNPass() { return new GVN(); }
708
709static RegisterPass<GVN> X("gvn",
710                           "Global Value Numbering");
711
712/// find_leader - Given a set and a value number, return the first
713/// element of the set with that value number, or 0 if no such element
714/// is present
715Value* GVN::find_leader(ValueNumberedSet& vals, uint32_t v) {
716  if (!vals.test(v))
717    return 0;
718
719  for (ValueNumberedSet::iterator I = vals.begin(), E = vals.end();
720       I != E; ++I)
721    if (v == VN.lookup(*I))
722      return *I;
723
724  assert(0 && "No leader found, but present bit is set?");
725  return 0;
726}
727
728/// val_insert - Insert a value into a set only if there is not a value
729/// with the same value number already in the set
730void GVN::val_insert(ValueNumberedSet& s, Value* v) {
731  uint32_t num = VN.lookup(v);
732  if (!s.test(num))
733    s.insert(v);
734}
735
736void GVN::dump(DenseMap<BasicBlock*, Value*>& d) {
737  printf("{\n");
738  for (DenseMap<BasicBlock*, Value*>::iterator I = d.begin(),
739       E = d.end(); I != E; ++I) {
740    if (I->second == MemoryDependenceAnalysis::None)
741      printf("None\n");
742    else
743      I->second->dump();
744  }
745  printf("}\n");
746}
747
748Value* GVN::CollapsePhi(PHINode* p) {
749  DominatorTree &DT = getAnalysis<DominatorTree>();
750  Value* constVal = p->hasConstantValue();
751
752  if (!constVal) return 0;
753
754  Instruction* inst = dyn_cast<Instruction>(constVal);
755  if (!inst)
756    return constVal;
757
758  if (DT.dominates(inst, p))
759    if (isSafeReplacement(p, inst))
760      return inst;
761  return 0;
762}
763
764bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) {
765  if (!isa<PHINode>(inst))
766    return true;
767
768  for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
769       UI != E; ++UI)
770    if (PHINode* use_phi = dyn_cast<PHINode>(UI))
771      if (use_phi->getParent() == inst->getParent())
772        return false;
773
774  return true;
775}
776
777/// GetValueForBlock - Get the value to use within the specified basic block.
778/// available values are in Phis.
779Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig,
780                             DenseMap<BasicBlock*, Value*> &Phis,
781                             bool top_level) {
782
783  // If we have already computed this value, return the previously computed val.
784  DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB);
785  if (V != Phis.end() && !top_level) return V->second;
786
787  BasicBlock* singlePred = BB->getSinglePredecessor();
788  if (singlePred) {
789    Value *ret = GetValueForBlock(singlePred, orig, Phis);
790    Phis[BB] = ret;
791    return ret;
792  }
793
794  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
795  // now, then get values to fill in the incoming values for the PHI.
796  PHINode *PN = new PHINode(orig->getType(), orig->getName()+".rle",
797                            BB->begin());
798  PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB)));
799
800  if (Phis.count(BB) == 0)
801    Phis.insert(std::make_pair(BB, PN));
802
803  // Fill in the incoming values for the block.
804  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
805    Value* val = GetValueForBlock(*PI, orig, Phis);
806    PN->addIncoming(val, *PI);
807  }
808
809  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
810  AA.copyValue(orig, PN);
811
812  // Attempt to collapse PHI nodes that are trivially redundant
813  Value* v = CollapsePhi(PN);
814  if (!v) {
815    // Cache our phi construction results
816    phiMap[orig->getPointerOperand()].insert(PN);
817    return PN;
818  }
819
820  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
821
822  MD.removeInstruction(PN);
823  PN->replaceAllUsesWith(v);
824
825  for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(),
826       E = Phis.end(); I != E; ++I)
827    if (I->second == PN)
828      I->second = v;
829
830  PN->eraseFromParent();
831
832  Phis[BB] = v;
833  return v;
834}
835
836/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
837/// non-local by performing PHI construction.
838bool GVN::processNonLocalLoad(LoadInst* L,
839                              SmallVectorImpl<Instruction*> &toErase) {
840  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
841
842  // Find the non-local dependencies of the load
843  DenseMap<BasicBlock*, Value*> deps;
844  MD.getNonLocalDependency(L, deps);
845
846  DenseMap<BasicBlock*, Value*> repl;
847
848  // Filter out useless results (non-locals, etc)
849  for (DenseMap<BasicBlock*, Value*>::iterator I = deps.begin(), E = deps.end();
850       I != E; ++I) {
851    if (I->second == MemoryDependenceAnalysis::None)
852      return false;
853
854    if (I->second == MemoryDependenceAnalysis::NonLocal)
855      continue;
856
857    if (StoreInst* S = dyn_cast<StoreInst>(I->second)) {
858      if (S->getPointerOperand() != L->getPointerOperand())
859        return false;
860      repl[I->first] = S->getOperand(0);
861    } else if (LoadInst* LD = dyn_cast<LoadInst>(I->second)) {
862      if (LD->getPointerOperand() != L->getPointerOperand())
863        return false;
864      repl[I->first] = LD;
865    } else {
866      return false;
867    }
868  }
869
870  // Use cached PHI construction information from previous runs
871  SmallPtrSet<Instruction*, 4>& p = phiMap[L->getPointerOperand()];
872  for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end();
873       I != E; ++I) {
874    if ((*I)->getParent() == L->getParent()) {
875      MD.removeInstruction(L);
876      L->replaceAllUsesWith(*I);
877      toErase.push_back(L);
878      NumGVNLoad++;
879      return true;
880    }
881
882    repl.insert(std::make_pair((*I)->getParent(), *I));
883  }
884
885  // Perform PHI construction
886  SmallPtrSet<BasicBlock*, 4> visited;
887  Value* v = GetValueForBlock(L->getParent(), L, repl, true);
888
889  MD.removeInstruction(L);
890  L->replaceAllUsesWith(v);
891  toErase.push_back(L);
892  NumGVNLoad++;
893
894  return true;
895}
896
897/// processLoad - Attempt to eliminate a load, first by eliminating it
898/// locally, and then attempting non-local elimination if that fails.
899bool GVN::processLoad(LoadInst *L, DenseMap<Value*, LoadInst*> &lastLoad,
900                      SmallVectorImpl<Instruction*> &toErase) {
901  if (L->isVolatile()) {
902    lastLoad[L->getPointerOperand()] = L;
903    return false;
904  }
905
906  Value* pointer = L->getPointerOperand();
907  LoadInst*& last = lastLoad[pointer];
908
909  // ... to a pointer that has been loaded from before...
910  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
911  bool removedNonLocal = false;
912  Instruction* dep = MD.getDependency(L);
913  if (dep == MemoryDependenceAnalysis::NonLocal &&
914      L->getParent() != &L->getParent()->getParent()->getEntryBlock()) {
915    removedNonLocal = processNonLocalLoad(L, toErase);
916
917    if (!removedNonLocal)
918      last = L;
919
920    return removedNonLocal;
921  }
922
923
924  bool deletedLoad = false;
925
926  // Walk up the dependency chain until we either find
927  // a dependency we can use, or we can't walk any further
928  while (dep != MemoryDependenceAnalysis::None &&
929         dep != MemoryDependenceAnalysis::NonLocal &&
930         (isa<LoadInst>(dep) || isa<StoreInst>(dep))) {
931    // ... that depends on a store ...
932    if (StoreInst* S = dyn_cast<StoreInst>(dep)) {
933      if (S->getPointerOperand() == pointer) {
934        // Remove it!
935        MD.removeInstruction(L);
936
937        L->replaceAllUsesWith(S->getOperand(0));
938        toErase.push_back(L);
939        deletedLoad = true;
940        NumGVNLoad++;
941      }
942
943      // Whether we removed it or not, we can't
944      // go any further
945      break;
946    } else if (!last) {
947      // If we don't depend on a store, and we haven't
948      // been loaded before, bail.
949      break;
950    } else if (dep == last) {
951      // Remove it!
952      MD.removeInstruction(L);
953
954      L->replaceAllUsesWith(last);
955      toErase.push_back(L);
956      deletedLoad = true;
957      NumGVNLoad++;
958
959      break;
960    } else {
961      dep = MD.getDependency(L, dep);
962    }
963  }
964
965  if (dep != MemoryDependenceAnalysis::None &&
966      dep != MemoryDependenceAnalysis::NonLocal &&
967      isa<AllocationInst>(dep)) {
968    // Check that this load is actually from the
969    // allocation we found
970    Value* v = L->getOperand(0);
971    while (true) {
972      if (BitCastInst *BC = dyn_cast<BitCastInst>(v))
973        v = BC->getOperand(0);
974      else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v))
975        v = GEP->getOperand(0);
976      else
977        break;
978    }
979    if (v == dep) {
980      // If this load depends directly on an allocation, there isn't
981      // anything stored there; therefore, we can optimize this load
982      // to undef.
983      MD.removeInstruction(L);
984
985      L->replaceAllUsesWith(UndefValue::get(L->getType()));
986      toErase.push_back(L);
987      deletedLoad = true;
988      NumGVNLoad++;
989    }
990  }
991
992  if (!deletedLoad)
993    last = L;
994
995  return deletedLoad;
996}
997
998/// isBytewiseValue - If the specified value can be set by repeating the same
999/// byte in memory, return the i8 value that it is represented with.  This is
1000/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
1001/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
1002/// byte store (e.g. i16 0x1234), return null.
1003static Value *isBytewiseValue(Value *V) {
1004  // All byte-wide stores are splatable, even of arbitrary variables.
1005  if (V->getType() == Type::Int8Ty) return V;
1006
1007  // Constant float and double values can be handled as integer values if the
1008  // corresponding integer value is "byteable".  An important case is 0.0.
1009  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
1010    if (CFP->getType() == Type::FloatTy)
1011      V = ConstantExpr::getBitCast(CFP, Type::Int32Ty);
1012    if (CFP->getType() == Type::DoubleTy)
1013      V = ConstantExpr::getBitCast(CFP, Type::Int64Ty);
1014    // Don't handle long double formats, which have strange constraints.
1015  }
1016
1017  // We can handle constant integers that are power of two in size and a
1018  // multiple of 8 bits.
1019  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
1020    unsigned Width = CI->getBitWidth();
1021    if (isPowerOf2_32(Width) && Width > 8) {
1022      // We can handle this value if the recursive binary decomposition is the
1023      // same at all levels.
1024      APInt Val = CI->getValue();
1025      APInt Val2;
1026      while (Val.getBitWidth() != 8) {
1027        unsigned NextWidth = Val.getBitWidth()/2;
1028        Val2  = Val.lshr(NextWidth);
1029        Val2.trunc(Val.getBitWidth()/2);
1030        Val.trunc(Val.getBitWidth()/2);
1031
1032        // If the top/bottom halves aren't the same, reject it.
1033        if (Val != Val2)
1034          return 0;
1035      }
1036      return ConstantInt::get(Val);
1037    }
1038  }
1039
1040  // Conceptually, we could handle things like:
1041  //   %a = zext i8 %X to i16
1042  //   %b = shl i16 %a, 8
1043  //   %c = or i16 %a, %b
1044  // but until there is an example that actually needs this, it doesn't seem
1045  // worth worrying about.
1046  return 0;
1047}
1048
1049static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
1050                                  bool &VariableIdxFound, TargetData &TD) {
1051  // Skip over the first indices.
1052  gep_type_iterator GTI = gep_type_begin(GEP);
1053  for (unsigned i = 1; i != Idx; ++i, ++GTI)
1054    /*skip along*/;
1055
1056  // Compute the offset implied by the rest of the indices.
1057  int64_t Offset = 0;
1058  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
1059    ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
1060    if (OpC == 0)
1061      return VariableIdxFound = true;
1062    if (OpC->isZero()) continue;  // No offset.
1063
1064    // Handle struct indices, which add their field offset to the pointer.
1065    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
1066      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
1067      continue;
1068    }
1069
1070    // Otherwise, we have a sequential type like an array or vector.  Multiply
1071    // the index by the ElementSize.
1072    uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
1073    Offset += Size*OpC->getSExtValue();
1074  }
1075
1076  return Offset;
1077}
1078
1079/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
1080/// constant offset, and return that constant offset.  For example, Ptr1 might
1081/// be &A[42], and Ptr2 might be &A[40].  In this case offset would be -8.
1082static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
1083                            TargetData &TD) {
1084  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
1085  // base.  After that base, they may have some number of common (and
1086  // potentially variable) indices.  After that they handle some constant
1087  // offset, which determines their offset from each other.  At this point, we
1088  // handle no other case.
1089  GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
1090  GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
1091  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
1092    return false;
1093
1094  // Skip any common indices and track the GEP types.
1095  unsigned Idx = 1;
1096  for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
1097    if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
1098      break;
1099
1100  bool VariableIdxFound = false;
1101  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
1102  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
1103  if (VariableIdxFound) return false;
1104
1105  Offset = Offset2-Offset1;
1106  return true;
1107}
1108
1109
1110/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
1111/// This allows us to analyze stores like:
1112///   store 0 -> P+1
1113///   store 0 -> P+0
1114///   store 0 -> P+3
1115///   store 0 -> P+2
1116/// which sometimes happens with stores to arrays of structs etc.  When we see
1117/// the first store, we make a range [1, 2).  The second store extends the range
1118/// to [0, 2).  The third makes a new range [2, 3).  The fourth store joins the
1119/// two ranges into [0, 3) which is memset'able.
1120namespace {
1121struct MemsetRange {
1122  // Start/End - A semi range that describes the span that this range covers.
1123  // The range is closed at the start and open at the end: [Start, End).
1124  int64_t Start, End;
1125
1126  /// StartPtr - The getelementptr instruction that points to the start of the
1127  /// range.
1128  Value *StartPtr;
1129
1130  /// Alignment - The known alignment of the first store.
1131  unsigned Alignment;
1132
1133  /// TheStores - The actual stores that make up this range.
1134  SmallVector<StoreInst*, 16> TheStores;
1135
1136  bool isProfitableToUseMemset(const TargetData &TD) const;
1137
1138};
1139} // end anon namespace
1140
1141bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
1142  // If we found more than 8 stores to merge or 64 bytes, use memset.
1143  if (TheStores.size() >= 8 || End-Start >= 64) return true;
1144
1145  // Assume that the code generator is capable of merging pairs of stores
1146  // together if it wants to.
1147  if (TheStores.size() <= 2) return false;
1148
1149  // If we have fewer than 8 stores, it can still be worthwhile to do this.
1150  // For example, merging 4 i8 stores into an i32 store is useful almost always.
1151  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
1152  // memset will be split into 2 32-bit stores anyway) and doing so can
1153  // pessimize the llvm optimizer.
1154  //
1155  // Since we don't have perfect knowledge here, make some assumptions: assume
1156  // the maximum GPR width is the same size as the pointer size and assume that
1157  // this width can be stored.  If so, check to see whether we will end up
1158  // actually reducing the number of stores used.
1159  unsigned Bytes = unsigned(End-Start);
1160  unsigned NumPointerStores = Bytes/TD.getPointerSize();
1161
1162  // Assume the remaining bytes if any are done a byte at a time.
1163  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
1164
1165  // If we will reduce the # stores (according to this heuristic), do the
1166  // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
1167  // etc.
1168  return TheStores.size() > NumPointerStores+NumByteStores;
1169}
1170
1171
1172namespace {
1173class MemsetRanges {
1174  /// Ranges - A sorted list of the memset ranges.  We use std::list here
1175  /// because each element is relatively large and expensive to copy.
1176  std::list<MemsetRange> Ranges;
1177  typedef std::list<MemsetRange>::iterator range_iterator;
1178  TargetData &TD;
1179public:
1180  MemsetRanges(TargetData &td) : TD(td) {}
1181
1182  typedef std::list<MemsetRange>::const_iterator const_iterator;
1183  const_iterator begin() const { return Ranges.begin(); }
1184  const_iterator end() const { return Ranges.end(); }
1185  bool empty() const { return Ranges.empty(); }
1186
1187  void addStore(int64_t OffsetFromFirst, StoreInst *SI);
1188};
1189
1190} // end anon namespace
1191
1192
1193/// addStore - Add a new store to the MemsetRanges data structure.  This adds a
1194/// new range for the specified store at the specified offset, merging into
1195/// existing ranges as appropriate.
1196void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
1197  int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
1198
1199  // Do a linear search of the ranges to see if this can be joined and/or to
1200  // find the insertion point in the list.  We keep the ranges sorted for
1201  // simplicity here.  This is a linear search of a linked list, which is ugly,
1202  // however the number of ranges is limited, so this won't get crazy slow.
1203  range_iterator I = Ranges.begin(), E = Ranges.end();
1204
1205  while (I != E && Start > I->End)
1206    ++I;
1207
1208  // We now know that I == E, in which case we didn't find anything to merge
1209  // with, or that Start <= I->End.  If End < I->Start or I == E, then we need
1210  // to insert a new range.  Handle this now.
1211  if (I == E || End < I->Start) {
1212    MemsetRange &R = *Ranges.insert(I, MemsetRange());
1213    R.Start        = Start;
1214    R.End          = End;
1215    R.StartPtr     = SI->getPointerOperand();
1216    R.Alignment    = SI->getAlignment();
1217    R.TheStores.push_back(SI);
1218    return;
1219  }
1220
1221  // This store overlaps with I, add it.
1222  I->TheStores.push_back(SI);
1223
1224  // At this point, we may have an interval that completely contains our store.
1225  // If so, just add it to the interval and return.
1226  if (I->Start <= Start && I->End >= End)
1227    return;
1228
1229  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
1230  // but is not entirely contained within the range.
1231
1232  // See if the range extends the start of the range.  In this case, it couldn't
1233  // possibly cause it to join the prior range, because otherwise we would have
1234  // stopped on *it*.
1235  if (Start < I->Start)
1236    I->Start = Start;
1237
1238  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
1239  // is in or right at the end of I), and that End >= I->Start.  Extend I out to
1240  // End.
1241  if (End > I->End) {
1242    I->End = End;
1243    range_iterator NextI = I;;
1244    while (++NextI != E && End >= NextI->Start) {
1245      // Merge the range in.
1246      I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
1247      if (NextI->End > I->End)
1248        I->End = NextI->End;
1249      Ranges.erase(NextI);
1250      NextI = I;
1251    }
1252  }
1253}
1254
1255
1256
1257/// processStore - When GVN is scanning forward over instructions, we look for
1258/// some other patterns to fold away.  In particular, this looks for stores to
1259/// neighboring locations of memory.  If it sees enough consequtive ones
1260/// (currently 4) it attempts to merge them together into a memcpy/memset.
1261bool GVN::processStore(StoreInst *SI, SmallVectorImpl<Instruction*> &toErase) {
1262  if (!FormMemSet) return false;
1263  if (SI->isVolatile()) return false;
1264
1265  // There are two cases that are interesting for this code to handle: memcpy
1266  // and memset.  Right now we only handle memset.
1267
1268  // Ensure that the value being stored is something that can be memset'able a
1269  // byte at a time like "0" or "-1" or any width, as well as things like
1270  // 0xA0A0A0A0 and 0.0.
1271  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
1272  if (!ByteVal)
1273    return false;
1274
1275  TargetData &TD = getAnalysis<TargetData>();
1276  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
1277
1278  // Okay, so we now have a single store that can be splatable.  Scan to find
1279  // all subsequent stores of the same value to offset from the same pointer.
1280  // Join these together into ranges, so we can decide whether contiguous blocks
1281  // are stored.
1282  MemsetRanges Ranges(TD);
1283
1284  Value *StartPtr = SI->getPointerOperand();
1285
1286  BasicBlock::iterator BI = SI;
1287  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
1288    if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
1289      // If the call is readnone, ignore it, otherwise bail out.  We don't even
1290      // allow readonly here because we don't want something like:
1291      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
1292      if (AA.getModRefBehavior(CallSite::get(BI)) ==
1293            AliasAnalysis::DoesNotAccessMemory)
1294        continue;
1295
1296      // TODO: If this is a memset, try to join it in.
1297
1298      break;
1299    } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
1300      break;
1301
1302    // If this is a non-store instruction it is fine, ignore it.
1303    StoreInst *NextStore = dyn_cast<StoreInst>(BI);
1304    if (NextStore == 0) continue;
1305
1306    // If this is a store, see if we can merge it in.
1307    if (NextStore->isVolatile()) break;
1308
1309    // Check to see if this stored value is of the same byte-splattable value.
1310    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
1311      break;
1312
1313    // Check to see if this store is to a constant offset from the start ptr.
1314    int64_t Offset;
1315    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD))
1316      break;
1317
1318    Ranges.addStore(Offset, NextStore);
1319  }
1320
1321  // If we have no ranges, then we just had a single store with nothing that
1322  // could be merged in.  This is a very common case of course.
1323  if (Ranges.empty())
1324    return false;
1325
1326  // If we had at least one store that could be merged in, add the starting
1327  // store as well.  We try to avoid this unless there is at least something
1328  // interesting as a small compile-time optimization.
1329  Ranges.addStore(0, SI);
1330
1331
1332  Function *MemSetF = 0;
1333
1334  // Now that we have full information about ranges, loop over the ranges and
1335  // emit memset's for anything big enough to be worthwhile.
1336  bool MadeChange = false;
1337  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
1338       I != E; ++I) {
1339    const MemsetRange &Range = *I;
1340
1341    if (Range.TheStores.size() == 1) continue;
1342
1343    // If it is profitable to lower this range to memset, do so now.
1344    if (!Range.isProfitableToUseMemset(TD))
1345      continue;
1346
1347    // Otherwise, we do want to transform this!  Create a new memset.  We put
1348    // the memset right after the first store that we found in this block.  This
1349    // ensures that the caller will increment the iterator to the memset before
1350    // it deletes all the stores.
1351    BasicBlock::iterator InsertPt = SI; ++InsertPt;
1352
1353    if (MemSetF == 0)
1354      MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
1355                                          ->getParent(), Intrinsic::memset_i64);
1356
1357    // StartPtr may not dominate the starting point.  Instead of using it, base
1358    // the destination pointer off the input to the first store in the block.
1359    StartPtr = SI->getPointerOperand();
1360
1361    // Cast the start ptr to be i8* as memset requires.
1362    const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
1363    if (StartPtr->getType() != i8Ptr)
1364      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
1365                                 InsertPt);
1366
1367    // Offset the pointer if needed.
1368    if (Range.Start)
1369      StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty,
1370                                                                  Range.Start),
1371                                       "ptroffset", InsertPt);
1372
1373    Value *Ops[] = {
1374      StartPtr, ByteVal,   // Start, value
1375      ConstantInt::get(Type::Int64Ty, Range.End-Range.Start),  // size
1376      ConstantInt::get(Type::Int32Ty, Range.Alignment)   // align
1377    };
1378    Value *C = new CallInst(MemSetF, Ops, Ops+4, "", InsertPt);
1379    DEBUG(cerr << "Replace stores:\n";
1380          for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
1381            cerr << *Range.TheStores[i];
1382          cerr << "With: " << *C); C=C;
1383
1384    // Zap all the stores.
1385    toErase.append(Range.TheStores.begin(), Range.TheStores.end());
1386    ++NumMemSetInfer;
1387    MadeChange = true;
1388  }
1389
1390  return MadeChange;
1391}
1392
1393
1394/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
1395/// and checks for the possibility of a call slot optimization by having
1396/// the call write its result directly into the destination of the memcpy.
1397bool GVN::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C,
1398                               SmallVectorImpl<Instruction*> &toErase) {
1399  // The general transformation to keep in mind is
1400  //
1401  //   call @func(..., src, ...)
1402  //   memcpy(dest, src, ...)
1403  //
1404  // ->
1405  //
1406  //   memcpy(dest, src, ...)
1407  //   call @func(..., dest, ...)
1408  //
1409  // Since moving the memcpy is technically awkward, we additionally check that
1410  // src only holds uninitialized values at the moment of the call, meaning that
1411  // the memcpy can be discarded rather than moved.
1412
1413  // Deliberately get the source and destination with bitcasts stripped away,
1414  // because we'll need to do type comparisons based on the underlying type.
1415  Value* cpyDest = cpy->getDest();
1416  Value* cpySrc = cpy->getSource();
1417  CallSite CS = CallSite::get(C);
1418
1419  // We need to be able to reason about the size of the memcpy, so we require
1420  // that it be a constant.
1421  ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
1422  if (!cpyLength)
1423    return false;
1424
1425  // Require that src be an alloca.  This simplifies the reasoning considerably.
1426  AllocaInst* srcAlloca = dyn_cast<AllocaInst>(cpySrc);
1427  if (!srcAlloca)
1428    return false;
1429
1430  // Check that all of src is copied to dest.
1431  TargetData& TD = getAnalysis<TargetData>();
1432
1433  ConstantInt* srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
1434  if (!srcArraySize)
1435    return false;
1436
1437  uint64_t srcSize = TD.getABITypeSize(srcAlloca->getAllocatedType()) *
1438    srcArraySize->getZExtValue();
1439
1440  if (cpyLength->getZExtValue() < srcSize)
1441    return false;
1442
1443  // Check that accessing the first srcSize bytes of dest will not cause a
1444  // trap.  Otherwise the transform is invalid since it might cause a trap
1445  // to occur earlier than it otherwise would.
1446  if (AllocaInst* A = dyn_cast<AllocaInst>(cpyDest)) {
1447    // The destination is an alloca.  Check it is larger than srcSize.
1448    ConstantInt* destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
1449    if (!destArraySize)
1450      return false;
1451
1452    uint64_t destSize = TD.getABITypeSize(A->getAllocatedType()) *
1453      destArraySize->getZExtValue();
1454
1455    if (destSize < srcSize)
1456      return false;
1457  } else if (Argument* A = dyn_cast<Argument>(cpyDest)) {
1458    // If the destination is an sret parameter then only accesses that are
1459    // outside of the returned struct type can trap.
1460    if (!A->hasStructRetAttr())
1461      return false;
1462
1463    const Type* StructTy = cast<PointerType>(A->getType())->getElementType();
1464    uint64_t destSize = TD.getABITypeSize(StructTy);
1465
1466    if (destSize < srcSize)
1467      return false;
1468  } else {
1469    return false;
1470  }
1471
1472  // Check that src is not accessed except via the call and the memcpy.  This
1473  // guarantees that it holds only undefined values when passed in (so the final
1474  // memcpy can be dropped), that it is not read or written between the call and
1475  // the memcpy, and that writing beyond the end of it is undefined.
1476  SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
1477                                   srcAlloca->use_end());
1478  while (!srcUseList.empty()) {
1479    User* UI = srcUseList.back();
1480    srcUseList.pop_back();
1481
1482    if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
1483      for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
1484           I != E; ++I)
1485        srcUseList.push_back(*I);
1486    } else if (UI != C && UI != cpy) {
1487      return false;
1488    }
1489  }
1490
1491  // Since we're changing the parameter to the callsite, we need to make sure
1492  // that what would be the new parameter dominates the callsite.
1493  DominatorTree& DT = getAnalysis<DominatorTree>();
1494  if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest))
1495    if (!DT.dominates(cpyDestInst, C))
1496      return false;
1497
1498  // In addition to knowing that the call does not access src in some
1499  // unexpected manner, for example via a global, which we deduce from
1500  // the use analysis, we also need to know that it does not sneakily
1501  // access dest.  We rely on AA to figure this out for us.
1502  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
1503  if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
1504      AliasAnalysis::NoModRef)
1505    return false;
1506
1507  // All the checks have passed, so do the transformation.
1508  for (unsigned i = 0; i < CS.arg_size(); ++i)
1509    if (CS.getArgument(i) == cpySrc) {
1510      if (cpySrc->getType() != cpyDest->getType())
1511        cpyDest = CastInst::createPointerCast(cpyDest, cpySrc->getType(),
1512                                              cpyDest->getName(), C);
1513      CS.setArgument(i, cpyDest);
1514    }
1515
1516  // Drop any cached information about the call, because we may have changed
1517  // its dependence information by changing its parameter.
1518  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1519  MD.dropInstruction(C);
1520
1521  // Remove the memcpy
1522  MD.removeInstruction(cpy);
1523  toErase.push_back(cpy);
1524
1525  return true;
1526}
1527
1528/// processMemCpy - perform simplication of memcpy's.  If we have memcpy A which
1529/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
1530/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
1531///  This allows later passes to remove the first memcpy altogether.
1532bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
1533                        SmallVectorImpl<Instruction*> &toErase) {
1534  // We can only transforms memcpy's where the dest of one is the source of the
1535  // other
1536  if (M->getSource() != MDep->getDest())
1537    return false;
1538
1539  // Second, the length of the memcpy's must be the same, or the preceeding one
1540  // must be larger than the following one.
1541  ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
1542  ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
1543  if (!C1 || !C2)
1544    return false;
1545
1546  uint64_t DepSize = C1->getValue().getZExtValue();
1547  uint64_t CpySize = C2->getValue().getZExtValue();
1548
1549  if (DepSize < CpySize)
1550    return false;
1551
1552  // Finally, we have to make sure that the dest of the second does not
1553  // alias the source of the first
1554  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
1555  if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
1556      AliasAnalysis::NoAlias)
1557    return false;
1558  else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
1559           AliasAnalysis::NoAlias)
1560    return false;
1561  else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
1562           != AliasAnalysis::NoAlias)
1563    return false;
1564
1565  // If all checks passed, then we can transform these memcpy's
1566  Function* MemCpyFun = Intrinsic::getDeclaration(
1567                                 M->getParent()->getParent()->getParent(),
1568                                 M->getIntrinsicID());
1569
1570  std::vector<Value*> args;
1571  args.push_back(M->getRawDest());
1572  args.push_back(MDep->getRawSource());
1573  args.push_back(M->getLength());
1574  args.push_back(M->getAlignment());
1575
1576  CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M);
1577
1578  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1579  if (MD.getDependency(C) == MDep) {
1580    MD.dropInstruction(M);
1581    toErase.push_back(M);
1582    return true;
1583  }
1584
1585  MD.removeInstruction(C);
1586  toErase.push_back(C);
1587  return false;
1588}
1589
1590/// processInstruction - When calculating availability, handle an instruction
1591/// by inserting it into the appropriate sets
1592bool GVN::processInstruction(Instruction *I, ValueNumberedSet &currAvail,
1593                             DenseMap<Value*, LoadInst*> &lastSeenLoad,
1594                             SmallVectorImpl<Instruction*> &toErase) {
1595  if (LoadInst* L = dyn_cast<LoadInst>(I))
1596    return processLoad(L, lastSeenLoad, toErase);
1597
1598  if (StoreInst *SI = dyn_cast<StoreInst>(I))
1599    return processStore(SI, toErase);
1600
1601  if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
1602    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1603
1604    // The are two possible optimizations we can do for memcpy:
1605    //   a) memcpy-memcpy xform which exposes redundance for DSE
1606    //   b) call-memcpy xform for return slot optimization
1607    Instruction* dep = MD.getDependency(M);
1608    if (dep == MemoryDependenceAnalysis::None ||
1609        dep == MemoryDependenceAnalysis::NonLocal)
1610      return false;
1611    if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
1612      return processMemCpy(M, MemCpy, toErase);
1613    if (CallInst* C = dyn_cast<CallInst>(dep))
1614      return performCallSlotOptzn(M, C, toErase);
1615    return false;
1616  }
1617
1618  unsigned num = VN.lookup_or_add(I);
1619
1620  // Collapse PHI nodes
1621  if (PHINode* p = dyn_cast<PHINode>(I)) {
1622    Value* constVal = CollapsePhi(p);
1623
1624    if (constVal) {
1625      for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end();
1626           PI != PE; ++PI)
1627        if (PI->second.count(p))
1628          PI->second.erase(p);
1629
1630      p->replaceAllUsesWith(constVal);
1631      toErase.push_back(p);
1632    }
1633  // Perform value-number based elimination
1634  } else if (currAvail.test(num)) {
1635    Value* repl = find_leader(currAvail, num);
1636
1637    if (CallInst* CI = dyn_cast<CallInst>(I)) {
1638      AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
1639      if (!AA.doesNotAccessMemory(CI)) {
1640        MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1641        if (cast<Instruction>(repl)->getParent() != CI->getParent() ||
1642            MD.getDependency(CI) != MD.getDependency(cast<CallInst>(repl))) {
1643          // There must be an intervening may-alias store, so nothing from
1644          // this point on will be able to be replaced with the preceding call
1645          currAvail.erase(repl);
1646          currAvail.insert(I);
1647
1648          return false;
1649        }
1650      }
1651    }
1652
1653    // Remove it!
1654    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
1655    MD.removeInstruction(I);
1656
1657    VN.erase(I);
1658    I->replaceAllUsesWith(repl);
1659    toErase.push_back(I);
1660    return true;
1661  } else if (!I->isTerminator()) {
1662    currAvail.set(num);
1663    currAvail.insert(I);
1664  }
1665
1666  return false;
1667}
1668
1669// GVN::runOnFunction - This is the main transformation entry point for a
1670// function.
1671//
1672bool GVN::runOnFunction(Function& F) {
1673  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
1674
1675  bool changed = false;
1676  bool shouldContinue = true;
1677
1678  while (shouldContinue) {
1679    shouldContinue = iterateOnFunction(F);
1680    changed |= shouldContinue;
1681  }
1682
1683  return changed;
1684}
1685
1686
1687// GVN::iterateOnFunction - Executes one iteration of GVN
1688bool GVN::iterateOnFunction(Function &F) {
1689  // Clean out global sets from any previous functions
1690  VN.clear();
1691  availableOut.clear();
1692  phiMap.clear();
1693
1694  bool changed_function = false;
1695
1696  DominatorTree &DT = getAnalysis<DominatorTree>();
1697
1698  SmallVector<Instruction*, 4> toErase;
1699  DenseMap<Value*, LoadInst*> lastSeenLoad;
1700
1701  // Top-down walk of the dominator tree
1702  for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()),
1703         E = df_end(DT.getRootNode()); DI != E; ++DI) {
1704
1705    // Get the set to update for this block
1706    ValueNumberedSet& currAvail = availableOut[DI->getBlock()];
1707    lastSeenLoad.clear();
1708
1709    BasicBlock* BB = DI->getBlock();
1710
1711    // A block inherits AVAIL_OUT from its dominator
1712    if (DI->getIDom() != 0)
1713      currAvail = availableOut[DI->getIDom()->getBlock()];
1714
1715    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
1716         BI != BE; ) {
1717      changed_function |= processInstruction(BI, currAvail,
1718                                             lastSeenLoad, toErase);
1719
1720      NumGVNInstr += toErase.size();
1721
1722      // Avoid iterator invalidation
1723      ++BI;
1724
1725      for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
1726           E = toErase.end(); I != E; ++I)
1727        (*I)->eraseFromParent();
1728
1729      toErase.clear();
1730    }
1731  }
1732
1733  return changed_function;
1734}
1735