MachineCSE.cpp revision 9d709a8edb696e89b33cd3c2886c8db735b02318
1//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global common subexpression elimination on machine 11// instructions using a scoped hash table based value numbering scheme. It 12// must be run while the machine function is still in SSA form. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "machine-cse" 17#include "llvm/CodeGen/Passes.h" 18#include "llvm/CodeGen/MachineDominators.h" 19#include "llvm/CodeGen/MachineInstr.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/Analysis/AliasAnalysis.h" 22#include "llvm/Target/TargetInstrInfo.h" 23#include "llvm/ADT/DenseMap.h" 24#include "llvm/ADT/ScopedHashTable.h" 25#include "llvm/ADT/Statistic.h" 26#include "llvm/Support/CommandLine.h" 27#include "llvm/Support/Debug.h" 28 29using namespace llvm; 30 31STATISTIC(NumCoalesces, "Number of copies coalesced"); 32STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 33 34namespace { 35 class MachineCSE : public MachineFunctionPass { 36 const TargetInstrInfo *TII; 37 const TargetRegisterInfo *TRI; 38 AliasAnalysis *AA; 39 MachineDominatorTree *DT; 40 MachineRegisterInfo *MRI; 41 public: 42 static char ID; // Pass identification 43 MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {} 44 45 virtual bool runOnMachineFunction(MachineFunction &MF); 46 47 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 48 AU.setPreservesCFG(); 49 MachineFunctionPass::getAnalysisUsage(AU); 50 AU.addRequired<AliasAnalysis>(); 51 AU.addRequired<MachineDominatorTree>(); 52 AU.addPreserved<MachineDominatorTree>(); 53 } 54 55 private: 56 const unsigned LookAheadLimit; 57 typedef ScopedHashTableScope<MachineInstr*, unsigned, 58 MachineInstrExpressionTrait> ScopeType; 59 DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; 60 ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; 61 SmallVector<MachineInstr*, 64> Exps; 62 unsigned CurrVN; 63 64 bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); 65 bool isPhysDefTriviallyDead(unsigned Reg, 66 MachineBasicBlock::const_iterator I, 67 MachineBasicBlock::const_iterator E) const ; 68 bool hasLivePhysRegDefUse(const MachineInstr *MI, 69 const MachineBasicBlock *MBB, 70 unsigned &PhysDef) const; 71 bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 72 unsigned PhysDef) const; 73 bool isCSECandidate(MachineInstr *MI); 74 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 75 MachineInstr *CSMI, MachineInstr *MI); 76 void EnterScope(MachineBasicBlock *MBB); 77 void ExitScope(MachineBasicBlock *MBB); 78 bool ProcessBlock(MachineBasicBlock *MBB); 79 void ExitScopeIfDone(MachineDomTreeNode *Node, 80 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 81 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 82 bool PerformCSE(MachineDomTreeNode *Node); 83 }; 84} // end anonymous namespace 85 86char MachineCSE::ID = 0; 87static RegisterPass<MachineCSE> 88X("machine-cse", "Machine Common Subexpression Elimination"); 89 90FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } 91 92bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, 93 MachineBasicBlock *MBB) { 94 bool Changed = false; 95 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 96 MachineOperand &MO = MI->getOperand(i); 97 if (!MO.isReg() || !MO.isUse()) 98 continue; 99 unsigned Reg = MO.getReg(); 100 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 101 continue; 102 if (!MRI->hasOneUse(Reg)) 103 // Only coalesce single use copies. This ensure the copy will be 104 // deleted. 105 continue; 106 MachineInstr *DefMI = MRI->getVRegDef(Reg); 107 if (DefMI->getParent() != MBB) 108 continue; 109 unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; 110 if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && 111 TargetRegisterInfo::isVirtualRegister(SrcReg) && 112 !SrcSubIdx && !DstSubIdx) { 113 const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); 114 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 115 const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); 116 if (!NewRC) 117 continue; 118 DEBUG(dbgs() << "Coalescing: " << *DefMI); 119 DEBUG(dbgs() << "*** to: " << *MI); 120 MO.setReg(SrcReg); 121 MRI->clearKillFlags(SrcReg); 122 if (NewRC != SRC) 123 MRI->setRegClass(SrcReg, NewRC); 124 DefMI->eraseFromParent(); 125 ++NumCoalesces; 126 Changed = true; 127 } 128 } 129 130 return Changed; 131} 132 133bool 134MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 135 MachineBasicBlock::const_iterator I, 136 MachineBasicBlock::const_iterator E) const { 137 unsigned LookAheadLeft = LookAheadLimit; 138 while (LookAheadLeft) { 139 // Skip over dbg_value's. 140 while (I != E && I->isDebugValue()) 141 ++I; 142 143 if (I == E) 144 // Reached end of block, register is obviously dead. 145 return true; 146 147 bool SeenDef = false; 148 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 149 const MachineOperand &MO = I->getOperand(i); 150 if (!MO.isReg() || !MO.getReg()) 151 continue; 152 if (!TRI->regsOverlap(MO.getReg(), Reg)) 153 continue; 154 if (MO.isUse()) 155 // Found a use! 156 return false; 157 SeenDef = true; 158 } 159 if (SeenDef) 160 // See a def of Reg (or an alias) before encountering any use, it's 161 // trivially dead. 162 return true; 163 164 --LookAheadLeft; 165 ++I; 166 } 167 return false; 168} 169 170/// hasLivePhysRegDefUse - Return true if the specified instruction read / write 171/// physical registers (except for dead defs of physical registers). It also 172/// returns the physical register def by reference if it's the only one. 173bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, 174 const MachineBasicBlock *MBB, 175 unsigned &PhysDef) const { 176 PhysDef = 0; 177 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 178 const MachineOperand &MO = MI->getOperand(i); 179 if (!MO.isReg()) 180 continue; 181 unsigned Reg = MO.getReg(); 182 if (!Reg) 183 continue; 184 if (TargetRegisterInfo::isVirtualRegister(Reg)) 185 continue; 186 if (MO.isUse()) 187 // Can't touch anything to read a physical register. 188 return true; 189 if (MO.isDead()) 190 // If the def is dead, it's ok. 191 continue; 192 // Ok, this is a physical register def that's not marked "dead". That's 193 // common since this pass is run before livevariables. We can scan 194 // forward a few instructions and check if it is obviously dead. 195 if (PhysDef) { 196 // Multiple physical register defs. These are rare, forget about it. 197 PhysDef = 0; 198 return true; 199 } 200 PhysDef = Reg; 201 } 202 203 if (PhysDef) { 204 MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); 205 if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) 206 return true; 207 } 208 return false; 209} 210 211bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 212 unsigned PhysDef) const { 213 // For now conservatively returns false if the common subexpression is 214 // not in the same basic block as the given instruction. 215 MachineBasicBlock *MBB = MI->getParent(); 216 if (CSMI->getParent() != MBB) 217 return false; 218 MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); 219 MachineBasicBlock::const_iterator E = MI; 220 unsigned LookAheadLeft = LookAheadLimit; 221 while (LookAheadLeft) { 222 // Skip over dbg_value's. 223 while (I != E && I->isDebugValue()) 224 ++I; 225 226 if (I == E) 227 return true; 228 if (I->modifiesRegister(PhysDef, TRI)) 229 return false; 230 231 --LookAheadLeft; 232 ++I; 233 } 234 235 return false; 236} 237 238static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) { 239 unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; 240 return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) || 241 MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg(); 242} 243 244bool MachineCSE::isCSECandidate(MachineInstr *MI) { 245 if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || 246 MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) 247 return false; 248 249 // Ignore copies. 250 if (isCopy(MI, TII)) 251 return false; 252 253 // Ignore stuff that we obviously can't move. 254 const TargetInstrDesc &TID = MI->getDesc(); 255 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 256 TID.hasUnmodeledSideEffects()) 257 return false; 258 259 if (TID.mayLoad()) { 260 // Okay, this instruction does a load. As a refinement, we allow the target 261 // to decide whether the loaded value is actually a constant. If so, we can 262 // actually use it as a load. 263 if (!MI->isInvariantLoad(AA)) 264 // FIXME: we should be able to hoist loads with no other side effects if 265 // there are no other instructions which can change memory in this loop. 266 // This is a trivial form of alias analysis. 267 return false; 268 } 269 return true; 270} 271 272/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 273/// common expression that defines Reg. 274bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 275 MachineInstr *CSMI, MachineInstr *MI) { 276 // FIXME: Heuristics that works around the lack the live range splitting. 277 278 // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an 279 // immediate predecessor. We don't want to increase register pressure and end up 280 // causing other computation to be spilled. 281 if (MI->getDesc().isAsCheapAsAMove()) { 282 MachineBasicBlock *CSBB = CSMI->getParent(); 283 MachineBasicBlock *BB = MI->getParent(); 284 if (CSBB != BB && 285 find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end()) 286 return false; 287 } 288 289 // Heuristics #2: If the expression doesn't not use a vr and the only use 290 // of the redundant computation are copies, do not cse. 291 bool HasVRegUse = false; 292 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 293 const MachineOperand &MO = MI->getOperand(i); 294 if (MO.isReg() && MO.isUse() && MO.getReg() && 295 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 296 HasVRegUse = true; 297 break; 298 } 299 } 300 if (!HasVRegUse) { 301 bool HasNonCopyUse = false; 302 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 303 E = MRI->use_nodbg_end(); I != E; ++I) { 304 MachineInstr *Use = &*I; 305 // Ignore copies. 306 if (!isCopy(Use, TII)) { 307 HasNonCopyUse = true; 308 break; 309 } 310 } 311 if (!HasNonCopyUse) 312 return false; 313 } 314 315 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 316 // it unless the defined value is already used in the BB of the new use. 317 bool HasPHI = false; 318 SmallPtrSet<MachineBasicBlock*, 4> CSBBs; 319 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), 320 E = MRI->use_nodbg_end(); I != E; ++I) { 321 MachineInstr *Use = &*I; 322 HasPHI |= Use->isPHI(); 323 CSBBs.insert(Use->getParent()); 324 } 325 326 if (!HasPHI) 327 return true; 328 return CSBBs.count(MI->getParent()); 329} 330 331void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 332 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 333 ScopeType *Scope = new ScopeType(VNT); 334 ScopeMap[MBB] = Scope; 335} 336 337void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 338 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 339 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 340 assert(SI != ScopeMap.end()); 341 ScopeMap.erase(SI); 342 delete SI->second; 343} 344 345bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 346 bool Changed = false; 347 348 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 349 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 350 MachineInstr *MI = &*I; 351 ++I; 352 353 if (!isCSECandidate(MI)) 354 continue; 355 356 bool FoundCSE = VNT.count(MI); 357 if (!FoundCSE) { 358 // Look for trivial copy coalescing opportunities. 359 if (PerformTrivialCoalescing(MI, MBB)) { 360 // After coalescing MI itself may become a copy. 361 if (isCopy(MI, TII)) 362 continue; 363 FoundCSE = VNT.count(MI); 364 } 365 } 366 // FIXME: commute commutable instructions? 367 368 // If the instruction defines a physical register and the value *may* be 369 // used, then it's not safe to replace it with a common subexpression. 370 unsigned PhysDef = 0; 371 if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) { 372 FoundCSE = false; 373 374 // ... Unless the CS is local and it also defines the physical register 375 // which is not clobbered in between. 376 if (PhysDef) { 377 unsigned CSVN = VNT.lookup(MI); 378 MachineInstr *CSMI = Exps[CSVN]; 379 if (PhysRegDefReaches(CSMI, MI, PhysDef)) 380 FoundCSE = true; 381 } 382 } 383 384 if (!FoundCSE) { 385 VNT.insert(MI, CurrVN++); 386 Exps.push_back(MI); 387 continue; 388 } 389 390 // Found a common subexpression, eliminate it. 391 unsigned CSVN = VNT.lookup(MI); 392 MachineInstr *CSMI = Exps[CSVN]; 393 DEBUG(dbgs() << "Examining: " << *MI); 394 DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 395 396 // Check if it's profitable to perform this CSE. 397 bool DoCSE = true; 398 unsigned NumDefs = MI->getDesc().getNumDefs(); 399 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 400 MachineOperand &MO = MI->getOperand(i); 401 if (!MO.isReg() || !MO.isDef()) 402 continue; 403 unsigned OldReg = MO.getReg(); 404 unsigned NewReg = CSMI->getOperand(i).getReg(); 405 if (OldReg == NewReg) 406 continue; 407 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 408 TargetRegisterInfo::isVirtualRegister(NewReg) && 409 "Do not CSE physical register defs!"); 410 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 411 DoCSE = false; 412 break; 413 } 414 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 415 --NumDefs; 416 } 417 418 // Actually perform the elimination. 419 if (DoCSE) { 420 for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { 421 MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); 422 MRI->clearKillFlags(CSEPairs[i].second); 423 } 424 MI->eraseFromParent(); 425 ++NumCSEs; 426 } else { 427 DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 428 VNT.insert(MI, CurrVN++); 429 Exps.push_back(MI); 430 } 431 CSEPairs.clear(); 432 } 433 434 return Changed; 435} 436 437/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 438/// dominator tree node if its a leaf or all of its children are done. Walk 439/// up the dominator tree to destroy ancestors which are now done. 440void 441MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 442 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 443 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 444 if (OpenChildren[Node]) 445 return; 446 447 // Pop scope. 448 ExitScope(Node->getBlock()); 449 450 // Now traverse upwards to pop ancestors whose offsprings are all done. 451 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 452 unsigned Left = --OpenChildren[Parent]; 453 if (Left != 0) 454 break; 455 ExitScope(Parent->getBlock()); 456 Node = Parent; 457 } 458} 459 460bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 461 SmallVector<MachineDomTreeNode*, 32> Scopes; 462 SmallVector<MachineDomTreeNode*, 8> WorkList; 463 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 464 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 465 466 // Perform a DFS walk to determine the order of visit. 467 WorkList.push_back(Node); 468 do { 469 Node = WorkList.pop_back_val(); 470 Scopes.push_back(Node); 471 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 472 unsigned NumChildren = Children.size(); 473 OpenChildren[Node] = NumChildren; 474 for (unsigned i = 0; i != NumChildren; ++i) { 475 MachineDomTreeNode *Child = Children[i]; 476 ParentMap[Child] = Node; 477 WorkList.push_back(Child); 478 } 479 } while (!WorkList.empty()); 480 481 // Now perform CSE. 482 bool Changed = false; 483 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 484 MachineDomTreeNode *Node = Scopes[i]; 485 MachineBasicBlock *MBB = Node->getBlock(); 486 EnterScope(MBB); 487 Changed |= ProcessBlock(MBB); 488 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 489 ExitScopeIfDone(Node, OpenChildren, ParentMap); 490 } 491 492 return Changed; 493} 494 495bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 496 TII = MF.getTarget().getInstrInfo(); 497 TRI = MF.getTarget().getRegisterInfo(); 498 MRI = &MF.getRegInfo(); 499 AA = &getAnalysis<AliasAnalysis>(); 500 DT = &getAnalysis<MachineDominatorTree>(); 501 return PerformCSE(DT->getRootNode()); 502} 503