EarlyCSE.cpp revision e508dd4c75705f325764e1197854c0e83266a7ea
1//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs a simple dominator tree walk that eliminates trivially
11// redundant instructions.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "early-cse"
16#include "llvm/Transforms/Scalar.h"
17#include "llvm/Instructions.h"
18#include "llvm/Pass.h"
19#include "llvm/Analysis/Dominators.h"
20#include "llvm/Analysis/InstructionSimplify.h"
21#include "llvm/Target/TargetData.h"
22#include "llvm/Transforms/Utils/Local.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/RecyclingAllocator.h"
25#include "llvm/ADT/ScopedHashTable.h"
26#include "llvm/ADT/Statistic.h"
27using namespace llvm;
28
29STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
30STATISTIC(NumCSE,      "Number of instructions CSE'd");
31STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
32STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
33STATISTIC(NumDSE,      "Number of trivial dead stores removed");
34
35static unsigned getHash(const void *V) {
36  return DenseMapInfo<const void*>::getHashValue(V);
37}
38
39//===----------------------------------------------------------------------===//
40// SimpleValue
41//===----------------------------------------------------------------------===//
42
43namespace {
44  /// SimpleValue - Instances of this struct represent available values in the
45  /// scoped hash table.
46  struct SimpleValue {
47    Instruction *Inst;
48
49    SimpleValue(Instruction *I) : Inst(I) {
50      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
51    }
52
53    bool isSentinel() const {
54      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
55             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
56    }
57
58    static bool canHandle(Instruction *Inst) {
59      // This can only handle non-void readnone functions.
60      if (CallInst *CI = dyn_cast<CallInst>(Inst))
61        return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
62      return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
63             isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
64             isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
65             isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
66             isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
67    }
68  };
69}
70
71namespace llvm {
72// SimpleValue is POD.
73template<> struct isPodLike<SimpleValue> {
74  static const bool value = true;
75};
76
77template<> struct DenseMapInfo<SimpleValue> {
78  static inline SimpleValue getEmptyKey() {
79    return DenseMapInfo<Instruction*>::getEmptyKey();
80  }
81  static inline SimpleValue getTombstoneKey() {
82    return DenseMapInfo<Instruction*>::getTombstoneKey();
83  }
84  static unsigned getHashValue(SimpleValue Val);
85  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
86};
87}
88
89unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
90  Instruction *Inst = Val.Inst;
91
92  // Hash in all of the operands as pointers.
93  unsigned Res = 0;
94  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
95    Res ^= getHash(Inst->getOperand(i)) << i;
96
97  if (CastInst *CI = dyn_cast<CastInst>(Inst))
98    Res ^= getHash(CI->getType());
99  else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
100    Res ^= CI->getPredicate();
101  else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
102    for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
103         E = EVI->idx_end(); I != E; ++I)
104      Res ^= *I;
105  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
106    for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
107         E = IVI->idx_end(); I != E; ++I)
108      Res ^= *I;
109  } else {
110    // nothing extra to hash in.
111    assert((isa<CallInst>(Inst) ||
112            isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
113            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
114            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
115           "Invalid/unknown instruction");
116  }
117
118  // Mix in the opcode.
119  return (Res << 1) ^ Inst->getOpcode();
120}
121
122bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
123  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
124
125  if (LHS.isSentinel() || RHS.isSentinel())
126    return LHSI == RHSI;
127
128  if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
129  return LHSI->isIdenticalTo(RHSI);
130}
131
132//===----------------------------------------------------------------------===//
133// CallValue
134//===----------------------------------------------------------------------===//
135
136namespace {
137  /// CallValue - Instances of this struct represent available call values in
138  /// the scoped hash table.
139  struct CallValue {
140    Instruction *Inst;
141
142    CallValue(Instruction *I) : Inst(I) {
143      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
144    }
145
146    bool isSentinel() const {
147      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
148             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
149    }
150
151    static bool canHandle(Instruction *Inst) {
152      // Don't value number anything that returns void.
153      if (Inst->getType()->isVoidTy())
154        return false;
155
156      CallInst *CI = dyn_cast<CallInst>(Inst);
157      if (CI == 0 || !CI->onlyReadsMemory())
158        return false;
159      return true;
160    }
161  };
162}
163
164namespace llvm {
165  // CallValue is POD.
166  template<> struct isPodLike<CallValue> {
167    static const bool value = true;
168  };
169
170  template<> struct DenseMapInfo<CallValue> {
171    static inline CallValue getEmptyKey() {
172      return DenseMapInfo<Instruction*>::getEmptyKey();
173    }
174    static inline CallValue getTombstoneKey() {
175      return DenseMapInfo<Instruction*>::getTombstoneKey();
176    }
177    static unsigned getHashValue(CallValue Val);
178    static bool isEqual(CallValue LHS, CallValue RHS);
179  };
180}
181unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
182  Instruction *Inst = Val.Inst;
183  // Hash in all of the operands as pointers.
184  unsigned Res = 0;
185  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
186    assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
187           "Cannot value number calls with metadata operands");
188    Res ^= getHash(Inst->getOperand(i)) << i;
189  }
190
191  // Mix in the opcode.
192  return (Res << 1) ^ Inst->getOpcode();
193}
194
195bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
196  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
197  if (LHS.isSentinel() || RHS.isSentinel())
198    return LHSI == RHSI;
199  return LHSI->isIdenticalTo(RHSI);
200}
201
202
203//===----------------------------------------------------------------------===//
204// EarlyCSE pass.
205//===----------------------------------------------------------------------===//
206
207namespace {
208
209/// EarlyCSE - This pass does a simple depth-first walk over the dominator
210/// tree, eliminating trivially redundant instructions and using instsimplify
211/// to canonicalize things as it goes.  It is intended to be fast and catch
212/// obvious cases so that instcombine and other passes are more effective.  It
213/// is expected that a later pass of GVN will catch the interesting/hard
214/// cases.
215class EarlyCSE : public FunctionPass {
216public:
217  const TargetData *TD;
218  DominatorTree *DT;
219  typedef RecyclingAllocator<BumpPtrAllocator,
220                      ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
221  typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
222                          AllocatorTy> ScopedHTType;
223
224  /// AvailableValues - This scoped hash table contains the current values of
225  /// all of our simple scalar expressions.  As we walk down the domtree, we
226  /// look to see if instructions are in this: if so, we replace them with what
227  /// we find, otherwise we insert them so that dominated values can succeed in
228  /// their lookup.
229  ScopedHTType *AvailableValues;
230
231  /// AvailableLoads - This scoped hash table contains the current values
232  /// of loads.  This allows us to get efficient access to dominating loads when
233  /// we have a fully redundant load.  In addition to the most recent load, we
234  /// keep track of a generation count of the read, which is compared against
235  /// the current generation count.  The current generation count is
236  /// incremented after every possibly writing memory operation, which ensures
237  /// that we only CSE loads with other loads that have no intervening store.
238  typedef RecyclingAllocator<BumpPtrAllocator,
239    ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
240  typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
241                          DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
242  LoadHTType *AvailableLoads;
243
244  /// AvailableCalls - This scoped hash table contains the current values
245  /// of read-only call values.  It uses the same generation count as loads.
246  typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
247  CallHTType *AvailableCalls;
248
249  /// CurrentGeneration - This is the current generation of the memory value.
250  unsigned CurrentGeneration;
251
252  static char ID;
253  explicit EarlyCSE() : FunctionPass(ID) {
254    initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
255  }
256
257  bool runOnFunction(Function &F);
258
259private:
260
261  bool processNode(DomTreeNode *Node);
262
263  // This transformation requires dominator postdominator info
264  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
265    AU.addRequired<DominatorTree>();
266    AU.setPreservesCFG();
267  }
268};
269}
270
271char EarlyCSE::ID = 0;
272
273// createEarlyCSEPass - The public interface to this file.
274FunctionPass *llvm::createEarlyCSEPass() {
275  return new EarlyCSE();
276}
277
278INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
279INITIALIZE_PASS_DEPENDENCY(DominatorTree)
280INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
281
282bool EarlyCSE::processNode(DomTreeNode *Node) {
283  // Define a scope in the scoped hash table.  When we are done processing this
284  // domtree node and recurse back up to our parent domtree node, this will pop
285  // off all the values we install.
286  ScopedHTType::ScopeTy Scope(*AvailableValues);
287
288  // Define a scope for the load values so that anything we add will get
289  // popped when we recurse back up to our parent domtree node.
290  LoadHTType::ScopeTy LoadScope(*AvailableLoads);
291
292  // Define a scope for the call values so that anything we add will get
293  // popped when we recurse back up to our parent domtree node.
294  CallHTType::ScopeTy CallScope(*AvailableCalls);
295
296  BasicBlock *BB = Node->getBlock();
297
298  // If this block has a single predecessor, then the predecessor is the parent
299  // of the domtree node and all of the live out memory values are still current
300  // in this block.  If this block has multiple predecessors, then they could
301  // have invalidated the live-out memory values of our parent value.  For now,
302  // just be conservative and invalidate memory if this block has multiple
303  // predecessors.
304  if (BB->getSinglePredecessor() == 0)
305    ++CurrentGeneration;
306
307  /// LastStore - Keep track of the last non-volatile store that we saw... for
308  /// as long as there in no instruction that reads memory.  If we see a store
309  /// to the same location, we delete the dead store.  This zaps trivial dead
310  /// stores which can occur in bitfield code among other things.
311  StoreInst *LastStore = 0;
312
313  bool Changed = false;
314
315  // See if any instructions in the block can be eliminated.  If so, do it.  If
316  // not, add them to AvailableValues.
317  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
318    Instruction *Inst = I++;
319
320    // Dead instructions should just be removed.
321    if (isInstructionTriviallyDead(Inst)) {
322      DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
323      Inst->eraseFromParent();
324      Changed = true;
325      ++NumSimplify;
326      continue;
327    }
328
329    // If the instruction can be simplified (e.g. X+0 = X) then replace it with
330    // its simpler value.
331    if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
332      DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
333      Inst->replaceAllUsesWith(V);
334      Inst->eraseFromParent();
335      Changed = true;
336      ++NumSimplify;
337      continue;
338    }
339
340    // If this is a simple instruction that we can value number, process it.
341    if (SimpleValue::canHandle(Inst)) {
342      // See if the instruction has an available value.  If so, use it.
343      if (Value *V = AvailableValues->lookup(Inst)) {
344        DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
345        Inst->replaceAllUsesWith(V);
346        Inst->eraseFromParent();
347        Changed = true;
348        ++NumCSE;
349        continue;
350      }
351
352      // Otherwise, just remember that this value is available.
353      AvailableValues->insert(Inst, Inst);
354      continue;
355    }
356
357    // If this is a non-volatile load, process it.
358    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
359      // Ignore volatile loads.
360      if (LI->isVolatile()) {
361        LastStore = 0;
362        continue;
363      }
364
365      // If we have an available version of this load, and if it is the right
366      // generation, replace this instruction.
367      std::pair<Value*, unsigned> InVal =
368        AvailableLoads->lookup(Inst->getOperand(0));
369      if (InVal.first != 0 && InVal.second == CurrentGeneration) {
370        DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << "  to: "
371              << *InVal.first << '\n');
372        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
373        Inst->eraseFromParent();
374        Changed = true;
375        ++NumCSELoad;
376        continue;
377      }
378
379      // Otherwise, remember that we have this instruction.
380      AvailableLoads->insert(Inst->getOperand(0),
381                          std::pair<Value*, unsigned>(Inst, CurrentGeneration));
382      LastStore = 0;
383      continue;
384    }
385
386    // If this instruction may read from memory, forget LastStore.
387    if (Inst->mayReadFromMemory())
388      LastStore = 0;
389
390    // If this is a read-only call, process it.
391    if (CallValue::canHandle(Inst)) {
392      // If we have an available version of this call, and if it is the right
393      // generation, replace this instruction.
394      std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
395      if (InVal.first != 0 && InVal.second == CurrentGeneration) {
396        DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << "  to: "
397                     << *InVal.first << '\n');
398        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
399        Inst->eraseFromParent();
400        Changed = true;
401        ++NumCSECall;
402        continue;
403      }
404
405      // Otherwise, remember that we have this instruction.
406      AvailableCalls->insert(Inst,
407                         std::pair<Value*, unsigned>(Inst, CurrentGeneration));
408      continue;
409    }
410
411    // Okay, this isn't something we can CSE at all.  Check to see if it is
412    // something that could modify memory.  If so, our available memory values
413    // cannot be used so bump the generation count.
414    if (Inst->mayWriteToMemory()) {
415      ++CurrentGeneration;
416
417      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
418        // We do a trivial form of DSE if there are two stores to the same
419        // location with no intervening loads.  Delete the earlier store.
420        if (LastStore &&
421            LastStore->getPointerOperand() == SI->getPointerOperand()) {
422          DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << "  due to: "
423                       << *Inst << '\n');
424          LastStore->eraseFromParent();
425          Changed = true;
426          ++NumDSE;
427          LastStore = 0;
428          continue;
429        }
430
431        // Okay, we just invalidated anything we knew about loaded values.  Try
432        // to salvage *something* by remembering that the stored value is a live
433        // version of the pointer.  It is safe to forward from volatile stores
434        // to non-volatile loads, so we don't have to check for volatility of
435        // the store.
436        AvailableLoads->insert(SI->getPointerOperand(),
437         std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
438
439        // Remember that this was the last store we saw for DSE.
440        if (!SI->isVolatile())
441          LastStore = SI;
442      }
443    }
444  }
445
446  unsigned LiveOutGeneration = CurrentGeneration;
447  for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
448    Changed |= processNode(*I);
449    // Pop any generation changes off the stack from the recursive walk.
450    CurrentGeneration = LiveOutGeneration;
451  }
452  return Changed;
453}
454
455
456bool EarlyCSE::runOnFunction(Function &F) {
457  TD = getAnalysisIfAvailable<TargetData>();
458  DT = &getAnalysis<DominatorTree>();
459
460  // Tables that the pass uses when walking the domtree.
461  ScopedHTType AVTable;
462  AvailableValues = &AVTable;
463  LoadHTType LoadTable;
464  AvailableLoads = &LoadTable;
465  CallHTType CallTable;
466  AvailableCalls = &CallTable;
467
468  CurrentGeneration = 0;
469  return processNode(DT->getRootNode());
470}
471