MachineCSE.cpp revision 49cec1d818d0c7d801e786c458896a60eb424524
1//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global common subexpression elimination on machine
11// instructions using a scoped hash table based value numbering scheme. It
12// must be run while the machine function is still in SSA form.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "machine-cse"
17#include "llvm/CodeGen/Passes.h"
18#include "llvm/CodeGen/MachineDominators.h"
19#include "llvm/CodeGen/MachineInstr.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Target/TargetInstrInfo.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/ScopedHashTable.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/RecyclingAllocator.h"
29
30using namespace llvm;
31
32STATISTIC(NumCoalesces, "Number of copies coalesced");
33STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
34STATISTIC(NumPhysCSEs,
35          "Number of physreg referencing common subexpr eliminated");
36STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
37
38namespace {
39  class MachineCSE : public MachineFunctionPass {
40    const TargetInstrInfo *TII;
41    const TargetRegisterInfo *TRI;
42    AliasAnalysis *AA;
43    MachineDominatorTree *DT;
44    MachineRegisterInfo *MRI;
45  public:
46    static char ID; // Pass identification
47    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
48      initializeMachineCSEPass(*PassRegistry::getPassRegistry());
49    }
50
51    virtual bool runOnMachineFunction(MachineFunction &MF);
52
53    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
54      AU.setPreservesCFG();
55      MachineFunctionPass::getAnalysisUsage(AU);
56      AU.addRequired<AliasAnalysis>();
57      AU.addPreservedID(MachineLoopInfoID);
58      AU.addRequired<MachineDominatorTree>();
59      AU.addPreserved<MachineDominatorTree>();
60    }
61
62    virtual void releaseMemory() {
63      ScopeMap.clear();
64      Exps.clear();
65    }
66
67  private:
68    const unsigned LookAheadLimit;
69    typedef RecyclingAllocator<BumpPtrAllocator,
70        ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
71    typedef ScopedHashTable<MachineInstr*, unsigned,
72        MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
73    typedef ScopedHTType::ScopeTy ScopeType;
74    DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
75    ScopedHTType VNT;
76    SmallVector<MachineInstr*, 64> Exps;
77    unsigned CurrVN;
78
79    bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
80    bool isPhysDefTriviallyDead(unsigned Reg,
81                                MachineBasicBlock::const_iterator I,
82                                MachineBasicBlock::const_iterator E) const ;
83    bool hasLivePhysRegDefUses(const MachineInstr *MI,
84                               const MachineBasicBlock *MBB,
85                               SmallSet<unsigned,8> &PhysRefs,
86                               SmallVector<unsigned,8> &PhysDefs) const;
87    bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
88                          SmallSet<unsigned,8> &PhysRefs) const;
89    bool isCSECandidate(MachineInstr *MI);
90    bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
91                           MachineInstr *CSMI, MachineInstr *MI);
92    void EnterScope(MachineBasicBlock *MBB);
93    void ExitScope(MachineBasicBlock *MBB);
94    bool ProcessBlock(MachineBasicBlock *MBB);
95    void ExitScopeIfDone(MachineDomTreeNode *Node,
96                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
97                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
98    bool PerformCSE(MachineDomTreeNode *Node);
99  };
100} // end anonymous namespace
101
102char MachineCSE::ID = 0;
103INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
104                "Machine Common Subexpression Elimination", false, false)
105INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
106INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
107INITIALIZE_PASS_END(MachineCSE, "machine-cse",
108                "Machine Common Subexpression Elimination", false, false)
109
110FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
111
112bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
113                                          MachineBasicBlock *MBB) {
114  bool Changed = false;
115  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
116    MachineOperand &MO = MI->getOperand(i);
117    if (!MO.isReg() || !MO.isUse())
118      continue;
119    unsigned Reg = MO.getReg();
120    if (!TargetRegisterInfo::isVirtualRegister(Reg))
121      continue;
122    if (!MRI->hasOneNonDBGUse(Reg))
123      // Only coalesce single use copies. This ensure the copy will be
124      // deleted.
125      continue;
126    MachineInstr *DefMI = MRI->getVRegDef(Reg);
127    if (DefMI->getParent() != MBB)
128      continue;
129    if (!DefMI->isCopy())
130      continue;
131    unsigned SrcReg = DefMI->getOperand(1).getReg();
132    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
133      continue;
134    if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
135      continue;
136    if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
137      continue;
138    DEBUG(dbgs() << "Coalescing: " << *DefMI);
139    DEBUG(dbgs() << "***     to: " << *MI);
140    MO.setReg(SrcReg);
141    MRI->clearKillFlags(SrcReg);
142    DefMI->eraseFromParent();
143    ++NumCoalesces;
144    Changed = true;
145  }
146
147  return Changed;
148}
149
150bool
151MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
152                                   MachineBasicBlock::const_iterator I,
153                                   MachineBasicBlock::const_iterator E) const {
154  unsigned LookAheadLeft = LookAheadLimit;
155  while (LookAheadLeft) {
156    // Skip over dbg_value's.
157    while (I != E && I->isDebugValue())
158      ++I;
159
160    if (I == E)
161      // Reached end of block, register is obviously dead.
162      return true;
163
164    bool SeenDef = false;
165    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
166      const MachineOperand &MO = I->getOperand(i);
167      if (!MO.isReg() || !MO.getReg())
168        continue;
169      if (!TRI->regsOverlap(MO.getReg(), Reg))
170        continue;
171      if (MO.isUse())
172        // Found a use!
173        return false;
174      SeenDef = true;
175    }
176    if (SeenDef)
177      // See a def of Reg (or an alias) before encountering any use, it's
178      // trivially dead.
179      return true;
180
181    --LookAheadLeft;
182    ++I;
183  }
184  return false;
185}
186
187/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
188/// physical registers (except for dead defs of physical registers). It also
189/// returns the physical register def by reference if it's the only one and the
190/// instruction does not uses a physical register.
191bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
192                                       const MachineBasicBlock *MBB,
193                                       SmallSet<unsigned,8> &PhysRefs,
194                                       SmallVector<unsigned,8> &PhysDefs) const{
195  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
196  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
197    const MachineOperand &MO = MI->getOperand(i);
198    if (!MO.isReg())
199      continue;
200    unsigned Reg = MO.getReg();
201    if (!Reg)
202      continue;
203    if (TargetRegisterInfo::isVirtualRegister(Reg))
204      continue;
205    // If the def is dead, it's ok. But the def may not marked "dead". That's
206    // common since this pass is run before livevariables. We can scan
207    // forward a few instructions and check if it is obviously dead.
208    if (MO.isDef() &&
209        (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
210      continue;
211    PhysDefs.push_back(Reg);
212    PhysRefs.insert(Reg);
213    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
214      PhysRefs.insert(*Alias);
215  }
216
217  return !PhysRefs.empty();
218}
219
220bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
221                                  SmallSet<unsigned,8> &PhysRefs) const {
222  // Look backward from MI to find CSMI.
223  unsigned LookAheadLeft = LookAheadLimit;
224  MachineBasicBlock::const_reverse_iterator I(MI);
225  MachineBasicBlock::const_reverse_iterator E(MI->getParent()->rend());
226  while (LookAheadLeft) {
227    while (LookAheadLeft && I != E) {
228      // Skip over dbg_value's.
229      while (I != E && I->isDebugValue())
230        ++I;
231
232      if (&*I == CSMI)
233        return true;
234
235      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
236        const MachineOperand &MO = I->getOperand(i);
237        if (!MO.isReg() || !MO.isDef())
238          continue;
239        unsigned MOReg = MO.getReg();
240        if (TargetRegisterInfo::isVirtualRegister(MOReg))
241          continue;
242        if (PhysRefs.count(MOReg))
243          return false;
244      }
245
246      --LookAheadLeft;
247      ++I;
248    }
249    // Go back another BB; for now, only go back at most one BB.
250    MachineBasicBlock *CSBB = CSMI->getParent();
251    MachineBasicBlock *BB = MI->getParent();
252    if (!CSBB->isSuccessor(BB) || BB->pred_size() != 1)
253      return false;
254    I = CSBB->rbegin();
255    E = CSBB->rend();
256  }
257
258  return false;
259}
260
261bool MachineCSE::isCSECandidate(MachineInstr *MI) {
262  if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
263      MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
264    return false;
265
266  // Ignore copies.
267  if (MI->isCopyLike())
268    return false;
269
270  // Ignore stuff that we obviously can't move.
271  const TargetInstrDesc &TID = MI->getDesc();
272  if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
273      MI->hasUnmodeledSideEffects())
274    return false;
275
276  if (TID.mayLoad()) {
277    // Okay, this instruction does a load. As a refinement, we allow the target
278    // to decide whether the loaded value is actually a constant. If so, we can
279    // actually use it as a load.
280    if (!MI->isInvariantLoad(AA))
281      // FIXME: we should be able to hoist loads with no other side effects if
282      // there are no other instructions which can change memory in this loop.
283      // This is a trivial form of alias analysis.
284      return false;
285  }
286  return true;
287}
288
289/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
290/// common expression that defines Reg.
291bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
292                                   MachineInstr *CSMI, MachineInstr *MI) {
293  // FIXME: Heuristics that works around the lack the live range splitting.
294
295  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
296  // an immediate predecessor. We don't want to increase register pressure and
297  // end up causing other computation to be spilled.
298  if (MI->getDesc().isAsCheapAsAMove()) {
299    MachineBasicBlock *CSBB = CSMI->getParent();
300    MachineBasicBlock *BB = MI->getParent();
301    if (CSBB != BB && !CSBB->isSuccessor(BB))
302      return false;
303  }
304
305  // Heuristics #2: If the expression doesn't not use a vr and the only use
306  // of the redundant computation are copies, do not cse.
307  bool HasVRegUse = false;
308  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
309    const MachineOperand &MO = MI->getOperand(i);
310    if (MO.isReg() && MO.isUse() &&
311        TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
312      HasVRegUse = true;
313      break;
314    }
315  }
316  if (!HasVRegUse) {
317    bool HasNonCopyUse = false;
318    for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(Reg),
319           E = MRI->use_nodbg_end(); I != E; ++I) {
320      MachineInstr *Use = &*I;
321      // Ignore copies.
322      if (!Use->isCopyLike()) {
323        HasNonCopyUse = true;
324        break;
325      }
326    }
327    if (!HasNonCopyUse)
328      return false;
329  }
330
331  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
332  // it unless the defined value is already used in the BB of the new use.
333  bool HasPHI = false;
334  SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
335  for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(CSReg),
336       E = MRI->use_nodbg_end(); I != E; ++I) {
337    MachineInstr *Use = &*I;
338    HasPHI |= Use->isPHI();
339    CSBBs.insert(Use->getParent());
340  }
341
342  if (!HasPHI)
343    return true;
344  return CSBBs.count(MI->getParent());
345}
346
347void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
348  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
349  ScopeType *Scope = new ScopeType(VNT);
350  ScopeMap[MBB] = Scope;
351}
352
353void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
354  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
355  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
356  assert(SI != ScopeMap.end());
357  ScopeMap.erase(SI);
358  delete SI->second;
359}
360
361bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
362  bool Changed = false;
363
364  SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
365  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
366    MachineInstr *MI = &*I;
367    ++I;
368
369    if (!isCSECandidate(MI))
370      continue;
371
372    bool FoundCSE = VNT.count(MI);
373    if (!FoundCSE) {
374      // Look for trivial copy coalescing opportunities.
375      if (PerformTrivialCoalescing(MI, MBB)) {
376        Changed = true;
377
378        // After coalescing MI itself may become a copy.
379        if (MI->isCopyLike())
380          continue;
381        FoundCSE = VNT.count(MI);
382      }
383    }
384
385    // Commute commutable instructions.
386    bool Commuted = false;
387    if (!FoundCSE && MI->getDesc().isCommutable()) {
388      MachineInstr *NewMI = TII->commuteInstruction(MI);
389      if (NewMI) {
390        Commuted = true;
391        FoundCSE = VNT.count(NewMI);
392        if (NewMI != MI) {
393          // New instruction. It doesn't need to be kept.
394          NewMI->eraseFromParent();
395          Changed = true;
396        } else if (!FoundCSE)
397          // MI was changed but it didn't help, commute it back!
398          (void)TII->commuteInstruction(MI);
399      }
400    }
401
402    // If the instruction defines physical registers and the values *may* be
403    // used, then it's not safe to replace it with a common subexpression.
404    // It's also not safe if the instruction uses physical registers.
405    SmallSet<unsigned,8> PhysRefs;
406    SmallVector<unsigned,8> DirectPhysRefs;
407    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, DirectPhysRefs)) {
408      FoundCSE = false;
409
410      // ... Unless the CS is local and it also defines the physical register
411      // which is not clobbered in between and the physical register uses
412      // were not clobbered.
413      unsigned CSVN = VNT.lookup(MI);
414      MachineInstr *CSMI = Exps[CSVN];
415      if (PhysRegDefsReach(CSMI, MI, PhysRefs))
416        FoundCSE = true;
417    }
418
419    if (!FoundCSE) {
420      VNT.insert(MI, CurrVN++);
421      Exps.push_back(MI);
422      continue;
423    }
424
425    // Found a common subexpression, eliminate it.
426    unsigned CSVN = VNT.lookup(MI);
427    MachineInstr *CSMI = Exps[CSVN];
428    DEBUG(dbgs() << "Examining: " << *MI);
429    DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
430
431    // Check if it's profitable to perform this CSE.
432    bool DoCSE = true;
433    unsigned NumDefs = MI->getDesc().getNumDefs();
434    for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
435      MachineOperand &MO = MI->getOperand(i);
436      if (!MO.isReg() || !MO.isDef())
437        continue;
438      unsigned OldReg = MO.getReg();
439      unsigned NewReg = CSMI->getOperand(i).getReg();
440      if (OldReg == NewReg)
441        continue;
442      assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
443             TargetRegisterInfo::isVirtualRegister(NewReg) &&
444             "Do not CSE physical register defs!");
445      if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
446        DoCSE = false;
447        break;
448      }
449      CSEPairs.push_back(std::make_pair(OldReg, NewReg));
450      --NumDefs;
451    }
452
453    // Actually perform the elimination.
454    if (DoCSE) {
455      for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
456        MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
457        MRI->clearKillFlags(CSEPairs[i].second);
458      }
459      MI->eraseFromParent();
460      if (!DirectPhysRefs.empty() && CSMI->getParent() != MBB) {
461        assert(CSMI->getParent()->isSuccessor(MBB));
462        SmallVector<unsigned,8>::iterator PI = DirectPhysRefs.begin(),
463                                          PE = DirectPhysRefs.end();
464        for (; PI != PE; ++PI)
465          MBB->addLiveIn(*PI);
466      }
467      ++NumCSEs;
468      if (!PhysRefs.empty())
469        ++NumPhysCSEs;
470      if (Commuted)
471        ++NumCommutes;
472      Changed = true;
473    } else {
474      DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
475      VNT.insert(MI, CurrVN++);
476      Exps.push_back(MI);
477    }
478    CSEPairs.clear();
479  }
480
481  return Changed;
482}
483
484/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
485/// dominator tree node if its a leaf or all of its children are done. Walk
486/// up the dominator tree to destroy ancestors which are now done.
487void
488MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
489                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
490                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
491  if (OpenChildren[Node])
492    return;
493
494  // Pop scope.
495  ExitScope(Node->getBlock());
496
497  // Now traverse upwards to pop ancestors whose offsprings are all done.
498  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
499    unsigned Left = --OpenChildren[Parent];
500    if (Left != 0)
501      break;
502    ExitScope(Parent->getBlock());
503    Node = Parent;
504  }
505}
506
507bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
508  SmallVector<MachineDomTreeNode*, 32> Scopes;
509  SmallVector<MachineDomTreeNode*, 8> WorkList;
510  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
511  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
512
513  CurrVN = 0;
514
515  // Perform a DFS walk to determine the order of visit.
516  WorkList.push_back(Node);
517  do {
518    Node = WorkList.pop_back_val();
519    Scopes.push_back(Node);
520    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
521    unsigned NumChildren = Children.size();
522    OpenChildren[Node] = NumChildren;
523    for (unsigned i = 0; i != NumChildren; ++i) {
524      MachineDomTreeNode *Child = Children[i];
525      ParentMap[Child] = Node;
526      WorkList.push_back(Child);
527    }
528  } while (!WorkList.empty());
529
530  // Now perform CSE.
531  bool Changed = false;
532  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
533    MachineDomTreeNode *Node = Scopes[i];
534    MachineBasicBlock *MBB = Node->getBlock();
535    EnterScope(MBB);
536    Changed |= ProcessBlock(MBB);
537    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
538    ExitScopeIfDone(Node, OpenChildren, ParentMap);
539  }
540
541  return Changed;
542}
543
544bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
545  TII = MF.getTarget().getInstrInfo();
546  TRI = MF.getTarget().getRegisterInfo();
547  MRI = &MF.getRegInfo();
548  AA = &getAnalysis<AliasAnalysis>();
549  DT = &getAnalysis<MachineDominatorTree>();
550  return PerformCSE(DT->getRootNode());
551}
552