MachineCSE.cpp revision 3844173f6e5c2d3e309d71d8980e25cca1b9305d
1//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs global common subexpression elimination on machine
11// instructions using a scoped hash table based value numbering scheme. It
12// must be run while the machine function is still in SSA form.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "machine-cse"
17#include "llvm/CodeGen/Passes.h"
18#include "llvm/CodeGen/MachineDominators.h"
19#include "llvm/CodeGen/MachineInstr.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Target/TargetInstrInfo.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/ScopedHashTable.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31STATISTIC(NumCoalesces, "Number of copies coalesced");
32STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
33
34static cl::opt<bool> CSEPhysDef("machine-cse-phys-defs",
35                                cl::init(false), cl::Hidden);
36
37namespace {
38  class MachineCSE : public MachineFunctionPass {
39    const TargetInstrInfo *TII;
40    const TargetRegisterInfo *TRI;
41    AliasAnalysis *AA;
42    MachineDominatorTree *DT;
43    MachineRegisterInfo *MRI;
44  public:
45    static char ID; // Pass identification
46    MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {}
47
48    virtual bool runOnMachineFunction(MachineFunction &MF);
49
50    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
51      AU.setPreservesCFG();
52      MachineFunctionPass::getAnalysisUsage(AU);
53      AU.addRequired<AliasAnalysis>();
54      AU.addRequired<MachineDominatorTree>();
55      AU.addPreserved<MachineDominatorTree>();
56    }
57
58  private:
59    const unsigned LookAheadLimit;
60    typedef ScopedHashTableScope<MachineInstr*, unsigned,
61                                 MachineInstrExpressionTrait> ScopeType;
62    DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
63    ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
64    SmallVector<MachineInstr*, 64> Exps;
65    unsigned CurrVN;
66
67    bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
68    bool isPhysDefTriviallyDead(unsigned Reg,
69                                MachineBasicBlock::const_iterator I,
70                                MachineBasicBlock::const_iterator E) const ;
71    bool hasLivePhysRegDefUse(const MachineInstr *MI,
72                              const MachineBasicBlock *MBB,
73                              unsigned &PhysDef) const;
74    bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
75                           unsigned PhysDef) const;
76    bool isCSECandidate(MachineInstr *MI);
77    bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
78                           MachineInstr *CSMI, MachineInstr *MI);
79    void EnterScope(MachineBasicBlock *MBB);
80    void ExitScope(MachineBasicBlock *MBB);
81    bool ProcessBlock(MachineBasicBlock *MBB);
82    void ExitScopeIfDone(MachineDomTreeNode *Node,
83                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
84                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
85    bool PerformCSE(MachineDomTreeNode *Node);
86  };
87} // end anonymous namespace
88
89char MachineCSE::ID = 0;
90static RegisterPass<MachineCSE>
91X("machine-cse", "Machine Common Subexpression Elimination");
92
93FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
94
95bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
96                                          MachineBasicBlock *MBB) {
97  bool Changed = false;
98  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
99    MachineOperand &MO = MI->getOperand(i);
100    if (!MO.isReg() || !MO.isUse())
101      continue;
102    unsigned Reg = MO.getReg();
103    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
104      continue;
105    if (!MRI->hasOneUse(Reg))
106      // Only coalesce single use copies. This ensure the copy will be
107      // deleted.
108      continue;
109    MachineInstr *DefMI = MRI->getVRegDef(Reg);
110    if (DefMI->getParent() != MBB)
111      continue;
112    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
113    if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
114        TargetRegisterInfo::isVirtualRegister(SrcReg) &&
115        !SrcSubIdx && !DstSubIdx) {
116      const TargetRegisterClass *SRC   = MRI->getRegClass(SrcReg);
117      const TargetRegisterClass *RC    = MRI->getRegClass(Reg);
118      const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
119      if (!NewRC)
120        continue;
121      DEBUG(dbgs() << "Coalescing: " << *DefMI);
122      DEBUG(dbgs() << "*** to: " << *MI);
123      MO.setReg(SrcReg);
124      MRI->clearKillFlags(SrcReg);
125      if (NewRC != SRC)
126        MRI->setRegClass(SrcReg, NewRC);
127      DefMI->eraseFromParent();
128      ++NumCoalesces;
129      Changed = true;
130    }
131  }
132
133  return Changed;
134}
135
136bool
137MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
138                                   MachineBasicBlock::const_iterator I,
139                                   MachineBasicBlock::const_iterator E) const {
140  unsigned LookAheadLeft = LookAheadLimit;
141  while (LookAheadLeft) {
142    // Skip over dbg_value's.
143    while (I != E && I->isDebugValue())
144      ++I;
145
146    if (I == E)
147      // Reached end of block, register is obviously dead.
148      return true;
149
150    bool SeenDef = false;
151    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
152      const MachineOperand &MO = I->getOperand(i);
153      if (!MO.isReg() || !MO.getReg())
154        continue;
155      if (!TRI->regsOverlap(MO.getReg(), Reg))
156        continue;
157      if (MO.isUse())
158        // Found a use!
159        return false;
160      SeenDef = true;
161    }
162    if (SeenDef)
163      // See a def of Reg (or an alias) before encountering any use, it's
164      // trivially dead.
165      return true;
166
167    --LookAheadLeft;
168    ++I;
169  }
170  return false;
171}
172
173/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
174/// physical registers (except for dead defs of physical registers). It also
175/// returns the physical register def by reference if it's the only one.
176bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
177                                      const MachineBasicBlock *MBB,
178                                      unsigned &PhysDef) const {
179  PhysDef = 0;
180  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
181    const MachineOperand &MO = MI->getOperand(i);
182    if (!MO.isReg())
183      continue;
184    unsigned Reg = MO.getReg();
185    if (!Reg)
186      continue;
187    if (TargetRegisterInfo::isVirtualRegister(Reg))
188      continue;
189    if (MO.isUse())
190      // Can't touch anything to read a physical register.
191      return true;
192    if (MO.isDead())
193      // If the def is dead, it's ok.
194      continue;
195    // Ok, this is a physical register def that's not marked "dead". That's
196    // common since this pass is run before livevariables. We can scan
197    // forward a few instructions and check if it is obviously dead.
198    if (PhysDef) {
199      // Multiple physical register defs. These are rare, forget about it.
200      PhysDef = 0;
201      return true;
202    }
203    PhysDef = Reg;
204  }
205
206  if (PhysDef) {
207    MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
208    if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
209      return true;
210  }
211  return false;
212}
213
214bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
215                                  unsigned PhysDef) const {
216  // For now conservatively returns false if the common subexpression is
217  // not in the same basic block as the given instruction.
218  MachineBasicBlock *MBB = MI->getParent();
219  if (CSMI->getParent() != MBB)
220    return false;
221  MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
222  MachineBasicBlock::const_iterator E = MI;
223  unsigned LookAheadLeft = LookAheadLimit;
224  while (LookAheadLeft) {
225    // Skip over dbg_value's.
226    while (I != E && I->isDebugValue())
227      ++I;
228
229    if (I == E)
230      return true;
231    if (I->modifiesRegister(PhysDef, TRI))
232      return false;
233
234    --LookAheadLeft;
235    ++I;
236  }
237
238  return false;
239}
240
241static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) {
242  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
243  return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
244    MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg();
245}
246
247bool MachineCSE::isCSECandidate(MachineInstr *MI) {
248  if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
249      MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
250    return false;
251
252  // Ignore copies.
253  if (isCopy(MI, TII))
254    return false;
255
256  // Ignore stuff that we obviously can't move.
257  const TargetInstrDesc &TID = MI->getDesc();
258  if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
259      TID.hasUnmodeledSideEffects())
260    return false;
261
262  if (TID.mayLoad()) {
263    // Okay, this instruction does a load. As a refinement, we allow the target
264    // to decide whether the loaded value is actually a constant. If so, we can
265    // actually use it as a load.
266    if (!MI->isInvariantLoad(AA))
267      // FIXME: we should be able to hoist loads with no other side effects if
268      // there are no other instructions which can change memory in this loop.
269      // This is a trivial form of alias analysis.
270      return false;
271  }
272  return true;
273}
274
275/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
276/// common expression that defines Reg.
277bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
278                                   MachineInstr *CSMI, MachineInstr *MI) {
279  // FIXME: Heuristics that works around the lack the live range splitting.
280
281  // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an
282  // immediate predecessor. We don't want to increase register pressure and end up
283  // causing other computation to be spilled.
284  if (MI->getDesc().isAsCheapAsAMove()) {
285    MachineBasicBlock *CSBB = CSMI->getParent();
286    MachineBasicBlock *BB = MI->getParent();
287    if (CSBB != BB &&
288        find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end())
289      return false;
290  }
291
292  // Heuristics #2: If the expression doesn't not use a vr and the only use
293  // of the redundant computation are copies, do not cse.
294  bool HasVRegUse = false;
295  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
296    const MachineOperand &MO = MI->getOperand(i);
297    if (MO.isReg() && MO.isUse() && MO.getReg() &&
298        TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
299      HasVRegUse = true;
300      break;
301    }
302  }
303  if (!HasVRegUse) {
304    bool HasNonCopyUse = false;
305    for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(Reg),
306           E = MRI->use_nodbg_end(); I != E; ++I) {
307      MachineInstr *Use = &*I;
308      // Ignore copies.
309      if (!isCopy(Use, TII)) {
310        HasNonCopyUse = true;
311        break;
312      }
313    }
314    if (!HasNonCopyUse)
315      return false;
316  }
317
318  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
319  // it unless the defined value is already used in the BB of the new use.
320  bool HasPHI = false;
321  SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
322  for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(CSReg),
323       E = MRI->use_nodbg_end(); I != E; ++I) {
324    MachineInstr *Use = &*I;
325    HasPHI |= Use->isPHI();
326    CSBBs.insert(Use->getParent());
327  }
328
329  if (!HasPHI)
330    return true;
331  return CSBBs.count(MI->getParent());
332}
333
334void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
335  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
336  ScopeType *Scope = new ScopeType(VNT);
337  ScopeMap[MBB] = Scope;
338}
339
340void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
341  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
342  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
343  assert(SI != ScopeMap.end());
344  ScopeMap.erase(SI);
345  delete SI->second;
346}
347
348bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
349  bool Changed = false;
350
351  SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
352  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
353    MachineInstr *MI = &*I;
354    ++I;
355
356    if (!isCSECandidate(MI))
357      continue;
358
359    bool FoundCSE = VNT.count(MI);
360    if (!FoundCSE) {
361      // Look for trivial copy coalescing opportunities.
362      if (PerformTrivialCoalescing(MI, MBB)) {
363        // After coalescing MI itself may become a copy.
364        if (isCopy(MI, TII))
365          continue;
366        FoundCSE = VNT.count(MI);
367      }
368    }
369    // FIXME: commute commutable instructions?
370
371    // If the instruction defines a physical register and the value *may* be
372    // used, then it's not safe to replace it with a common subexpression.
373    unsigned PhysDef = 0;
374    if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) {
375      FoundCSE = false;
376
377      // ... Unless the CS is local and it also defines the physical register
378      // which is not clobbered in between.
379      if (PhysDef && CSEPhysDef) {
380        unsigned CSVN = VNT.lookup(MI);
381        MachineInstr *CSMI = Exps[CSVN];
382        if (PhysRegDefReaches(CSMI, MI, PhysDef))
383          FoundCSE = true;
384      }
385    }
386
387    if (!FoundCSE) {
388      VNT.insert(MI, CurrVN++);
389      Exps.push_back(MI);
390      continue;
391    }
392
393    // Found a common subexpression, eliminate it.
394    unsigned CSVN = VNT.lookup(MI);
395    MachineInstr *CSMI = Exps[CSVN];
396    DEBUG(dbgs() << "Examining: " << *MI);
397    DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
398
399    // Check if it's profitable to perform this CSE.
400    bool DoCSE = true;
401    unsigned NumDefs = MI->getDesc().getNumDefs();
402    for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
403      MachineOperand &MO = MI->getOperand(i);
404      if (!MO.isReg() || !MO.isDef())
405        continue;
406      unsigned OldReg = MO.getReg();
407      unsigned NewReg = CSMI->getOperand(i).getReg();
408      if (OldReg == NewReg)
409        continue;
410      assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
411             TargetRegisterInfo::isVirtualRegister(NewReg) &&
412             "Do not CSE physical register defs!");
413      if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
414        DoCSE = false;
415        break;
416      }
417      CSEPairs.push_back(std::make_pair(OldReg, NewReg));
418      --NumDefs;
419    }
420
421    // Actually perform the elimination.
422    if (DoCSE) {
423      for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
424        MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
425        MRI->clearKillFlags(CSEPairs[i].second);
426      }
427      MI->eraseFromParent();
428      ++NumCSEs;
429    } else {
430      DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
431      VNT.insert(MI, CurrVN++);
432      Exps.push_back(MI);
433    }
434    CSEPairs.clear();
435  }
436
437  return Changed;
438}
439
440/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
441/// dominator tree node if its a leaf or all of its children are done. Walk
442/// up the dominator tree to destroy ancestors which are now done.
443void
444MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
445                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
446                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
447  if (OpenChildren[Node])
448    return;
449
450  // Pop scope.
451  ExitScope(Node->getBlock());
452
453  // Now traverse upwards to pop ancestors whose offsprings are all done.
454  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
455    unsigned Left = --OpenChildren[Parent];
456    if (Left != 0)
457      break;
458    ExitScope(Parent->getBlock());
459    Node = Parent;
460  }
461}
462
463bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
464  SmallVector<MachineDomTreeNode*, 32> Scopes;
465  SmallVector<MachineDomTreeNode*, 8> WorkList;
466  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
467  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
468
469  // Perform a DFS walk to determine the order of visit.
470  WorkList.push_back(Node);
471  do {
472    Node = WorkList.pop_back_val();
473    Scopes.push_back(Node);
474    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
475    unsigned NumChildren = Children.size();
476    OpenChildren[Node] = NumChildren;
477    for (unsigned i = 0; i != NumChildren; ++i) {
478      MachineDomTreeNode *Child = Children[i];
479      ParentMap[Child] = Node;
480      WorkList.push_back(Child);
481    }
482  } while (!WorkList.empty());
483
484  // Now perform CSE.
485  bool Changed = false;
486  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
487    MachineDomTreeNode *Node = Scopes[i];
488    MachineBasicBlock *MBB = Node->getBlock();
489    EnterScope(MBB);
490    Changed |= ProcessBlock(MBB);
491    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
492    ExitScopeIfDone(Node, OpenChildren, ParentMap);
493  }
494
495  return Changed;
496}
497
498bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
499  TII = MF.getTarget().getInstrInfo();
500  TRI = MF.getTarget().getRegisterInfo();
501  MRI = &MF.getRegInfo();
502  AA = &getAnalysis<AliasAnalysis>();
503  DT = &getAnalysis<MachineDominatorTree>();
504  return PerformCSE(DT->getRootNode());
505}
506