MachineCSE.cpp revision a65d6a686e6ad865c61aec70c5bdfb30bf6f5b22
1//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global common subexpression elimination on machine 11// instructions using a scoped hash table based value numbering scheme. It 12// must be run while the machine function is still in SSA form. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "machine-cse" 17#include "llvm/CodeGen/Passes.h" 18#include "llvm/CodeGen/MachineDominators.h" 19#include "llvm/CodeGen/MachineInstr.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/Analysis/AliasAnalysis.h" 22#include "llvm/Target/TargetInstrInfo.h" 23#include "llvm/ADT/DenseMap.h" 24#include "llvm/ADT/ScopedHashTable.h" 25#include "llvm/ADT/Statistic.h" 26#include "llvm/Support/CommandLine.h" 27#include "llvm/Support/Debug.h" 28 29using namespace llvm; 30 31STATISTIC(NumCoalesces, "Number of copies coalesced"); 32STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 33STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); 34 35namespace { 36 class MachineCSE : public MachineFunctionPass { 37 const TargetInstrInfo *TII; 38 const TargetRegisterInfo *TRI; 39 AliasAnalysis *AA; 40 MachineDominatorTree *DT; 41 MachineRegisterInfo *MRI; 42 public: 43 static char ID; // Pass identification 44 MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) { 45 initializeMachineCSEPass(*PassRegistry::getPassRegistry()); 46 } 47 48 virtual bool runOnMachineFunction(MachineFunction &MF); 49 50 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 51 AU.setPreservesCFG(); 52 MachineFunctionPass::getAnalysisUsage(AU); 53 AU.addRequired<AliasAnalysis>(); 54 AU.addPreservedID(MachineLoopInfoID); 55 AU.addRequired<MachineDominatorTree>(); 56 AU.addPreserved<MachineDominatorTree>(); 57 } 58 59 virtual void releaseMemory() { 60 ScopeMap.clear(); 61 Exps.clear(); 62 } 63 64 private: 65 const unsigned LookAheadLimit; 66 typedef ScopedHashTableScope<MachineInstr*, unsigned, 67 MachineInstrExpressionTrait> ScopeType; 68 DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; 69 ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; 70 SmallVector<MachineInstr*, 64> Exps; 71 unsigned CurrVN; 72 73 bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); 74 bool isPhysDefTriviallyDead(unsigned Reg, 75 MachineBasicBlock::const_iterator I, 76 MachineBasicBlock::const_iterator E) const ; 77 bool hasLivePhysRegDefUse(const MachineInstr *MI, 78 const MachineBasicBlock *MBB, 79 unsigned &PhysDef) const; 80 bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 81 unsigned PhysDef) const; 82 bool isCSECandidate(MachineInstr *MI); 83 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 84 MachineInstr *CSMI, MachineInstr *MI); 85 void EnterScope(MachineBasicBlock *MBB); 86 void ExitScope(MachineBasicBlock *MBB); 87 bool ProcessBlock(MachineBasicBlock *MBB); 88 void ExitScopeIfDone(MachineDomTreeNode *Node, 89 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 90 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 91 bool PerformCSE(MachineDomTreeNode *Node); 92 }; 93} // end anonymous namespace 94 95char MachineCSE::ID = 0; 96INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", 97 "Machine Common Subexpression Elimination", false, false) 98INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 99INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 100INITIALIZE_PASS_END(MachineCSE, "machine-cse", 101 "Machine Common Subexpression Elimination", false, false) 102 103FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } 104 105bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, 106 MachineBasicBlock *MBB) { 107 bool Changed = false; 108 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 109 MachineOperand &MO = MI->getOperand(i); 110 if (!MO.isReg() || !MO.isUse()) 111 continue; 112 unsigned Reg = MO.getReg(); 113 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 114 continue; 115 if (!MRI->hasOneNonDBGUse(Reg)) 116 // Only coalesce single use copies. This ensure the copy will be 117 // deleted. 118 continue; 119 MachineInstr *DefMI = MRI->getVRegDef(Reg); 120 if (DefMI->getParent() != MBB) 121 continue; 122 if (!DefMI->isCopy()) 123 continue; 124 unsigned SrcReg = DefMI->getOperand(1).getReg(); 125 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 126 continue; 127 if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) 128 continue; 129 if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) 130 continue; 131 DEBUG(dbgs() << "Coalescing: " << *DefMI); 132 DEBUG(dbgs() << "*** to: " << *MI); 133 MO.setReg(SrcReg); 134 MRI->clearKillFlags(SrcReg); 135 DefMI->eraseFromParent(); 136 ++NumCoalesces; 137 Changed = true; 138 } 139 140 return Changed; 141} 142 143bool 144MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 145 MachineBasicBlock::const_iterator I, 146 MachineBasicBlock::const_iterator E) const { 147 unsigned LookAheadLeft = LookAheadLimit; 148 while (LookAheadLeft) { 149 // Skip over dbg_value's. 150 while (I != E && I->isDebugValue()) 151 ++I; 152 153 if (I == E) 154 // Reached end of block, register is obviously dead. 155 return true; 156 157 bool SeenDef = false; 158 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 159 const MachineOperand &MO = I->getOperand(i); 160 if (!MO.isReg() || !MO.getReg()) 161 continue; 162 if (!TRI->regsOverlap(MO.getReg(), Reg)) 163 continue; 164 if (MO.isUse()) 165 // Found a use! 166 return false; 167 SeenDef = true; 168 } 169 if (SeenDef) 170 // See a def of Reg (or an alias) before encountering any use, it's 171 // trivially dead. 172 return true; 173 174 --LookAheadLeft; 175 ++I; 176 } 177 return false; 178} 179 180/// hasLivePhysRegDefUse - Return true if the specified instruction read / write 181/// physical registers (except for dead defs of physical registers). It also 182/// returns the physical register def by reference if it's the only one and the 183/// instruction does not uses a physical register. 184bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, 185 const MachineBasicBlock *MBB, 186 unsigned &PhysDef) const { 187 PhysDef = 0; 188 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 189 const MachineOperand &MO = MI->getOperand(i); 190 if (!MO.isReg()) 191 continue; 192 unsigned Reg = MO.getReg(); 193 if (!Reg) 194 continue; 195 if (TargetRegisterInfo::isVirtualRegister(Reg)) 196 continue; 197 if (MO.isUse()) { 198 // Can't touch anything to read a physical register. 199 PhysDef = 0; 200 return true; 201 } 202 if (MO.isDead()) 203 // If the def is dead, it's ok. 204 continue; 205 // Ok, this is a physical register def that's not marked "dead". That's 206 // common since this pass is run before livevariables. We can scan 207 // forward a few instructions and check if it is obviously dead. 208 if (PhysDef) { 209 // Multiple physical register defs. These are rare, forget about it. 210 PhysDef = 0; 211 return true; 212 } 213 PhysDef = Reg; 214 } 215 216 if (PhysDef) { 217 MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); 218 if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) 219 return true; 220 } 221 return false; 222} 223 224bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 225 unsigned PhysDef) const { 226 // For now conservatively returns false if the common subexpression is 227 // not in the same basic block as the given instruction. 228 MachineBasicBlock *MBB = MI->getParent(); 229 if (CSMI->getParent() != MBB) 230 return false; 231 MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); 232 MachineBasicBlock::const_iterator E = MI; 233 unsigned LookAheadLeft = LookAheadLimit; 234 while (LookAheadLeft) { 235 // Skip over dbg_value's. 236 while (I != E && I->isDebugValue()) 237 ++I; 238 239 if (I == E) 240 return true; 241 if (I->modifiesRegister(PhysDef, TRI)) 242 return false; 243 244 --LookAheadLeft; 245 ++I; 246 } 247 248 return false; 249} 250 251bool MachineCSE::isCSECandidate(MachineInstr *MI) { 252 if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || 253 MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) 254 return false; 255 256 // Ignore copies. 257 if (MI->isCopyLike()) 258 return false; 259 260 // Ignore stuff that we obviously can't move. 261 const TargetInstrDesc &TID = MI->getDesc(); 262 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 263 TID.hasUnmodeledSideEffects()) 264 return false; 265 266 if (TID.mayLoad()) { 267 // Okay, this instruction does a load. As a refinement, we allow the target 268 // to decide whether the loaded value is actually a constant. If so, we can 269 // actually use it as a load. 270 if (!MI->isInvariantLoad(AA)) 271 // FIXME: we should be able to hoist loads with no other side effects if 272 // there are no other instructions which can change memory in this loop. 273 // This is a trivial form of alias analysis. 274 return false; 275 } 276 return true; 277} 278 279/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 280/// common expression that defines Reg. 281bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 282 MachineInstr *CSMI, MachineInstr *MI) { 283 // FIXME: Heuristics that works around the lack the live range splitting. 284 285 // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an 286 // immediate predecessor. We don't want to increase register pressure and end up 287 // causing other computation to be spilled. 288 if (MI->getDesc().isAsCheapAsAMove()) { 289 MachineBasicBlock *CSBB = CSMI->getParent(); 290 MachineBasicBlock *BB = MI->getParent(); 291 if (CSBB != BB && 292 find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end()) 293 return false; 294 } 295 296 // Heuristics #2: If the expression doesn't not use a vr and the only use 297 // of the redundant computation are copies, do not cse. 298 bool HasVRegUse = false; 299 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 300 const MachineOperand &MO = MI->getOperand(i); 301 if (MO.isReg() && MO.isUse() && MO.getReg() && 302 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 303 HasVRegUse = true; 304 break; 305 } 306 } 307 if (!HasVRegUse) { 308 bool HasNonCopyUse = false; 309 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 310 E = MRI->use_nodbg_end(); I != E; ++I) { 311 MachineInstr *Use = &*I; 312 // Ignore copies. 313 if (!Use->isCopyLike()) { 314 HasNonCopyUse = true; 315 break; 316 } 317 } 318 if (!HasNonCopyUse) 319 return false; 320 } 321 322 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 323 // it unless the defined value is already used in the BB of the new use. 324 bool HasPHI = false; 325 SmallPtrSet<MachineBasicBlock*, 4> CSBBs; 326 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), 327 E = MRI->use_nodbg_end(); I != E; ++I) { 328 MachineInstr *Use = &*I; 329 HasPHI |= Use->isPHI(); 330 CSBBs.insert(Use->getParent()); 331 } 332 333 if (!HasPHI) 334 return true; 335 return CSBBs.count(MI->getParent()); 336} 337 338void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 339 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 340 ScopeType *Scope = new ScopeType(VNT); 341 ScopeMap[MBB] = Scope; 342} 343 344void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 345 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 346 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 347 assert(SI != ScopeMap.end()); 348 ScopeMap.erase(SI); 349 delete SI->second; 350} 351 352bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 353 bool Changed = false; 354 355 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 356 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 357 MachineInstr *MI = &*I; 358 ++I; 359 360 if (!isCSECandidate(MI)) 361 continue; 362 363 bool DefPhys = false; 364 bool FoundCSE = VNT.count(MI); 365 if (!FoundCSE) { 366 // Look for trivial copy coalescing opportunities. 367 if (PerformTrivialCoalescing(MI, MBB)) { 368 // After coalescing MI itself may become a copy. 369 if (MI->isCopyLike()) 370 continue; 371 FoundCSE = VNT.count(MI); 372 } 373 } 374 // FIXME: commute commutable instructions? 375 376 // If the instruction defines a physical register and the value *may* be 377 // used, then it's not safe to replace it with a common subexpression. 378 unsigned PhysDef = 0; 379 if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) { 380 FoundCSE = false; 381 382 // ... Unless the CS is local and it also defines the physical register 383 // which is not clobbered in between. 384 if (PhysDef) { 385 unsigned CSVN = VNT.lookup(MI); 386 MachineInstr *CSMI = Exps[CSVN]; 387 if (PhysRegDefReaches(CSMI, MI, PhysDef)) { 388 FoundCSE = true; 389 DefPhys = true; 390 } 391 } 392 } 393 394 if (!FoundCSE) { 395 VNT.insert(MI, CurrVN++); 396 Exps.push_back(MI); 397 continue; 398 } 399 400 // Found a common subexpression, eliminate it. 401 unsigned CSVN = VNT.lookup(MI); 402 MachineInstr *CSMI = Exps[CSVN]; 403 DEBUG(dbgs() << "Examining: " << *MI); 404 DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 405 406 // Check if it's profitable to perform this CSE. 407 bool DoCSE = true; 408 unsigned NumDefs = MI->getDesc().getNumDefs(); 409 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 410 MachineOperand &MO = MI->getOperand(i); 411 if (!MO.isReg() || !MO.isDef()) 412 continue; 413 unsigned OldReg = MO.getReg(); 414 unsigned NewReg = CSMI->getOperand(i).getReg(); 415 if (OldReg == NewReg) 416 continue; 417 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 418 TargetRegisterInfo::isVirtualRegister(NewReg) && 419 "Do not CSE physical register defs!"); 420 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 421 DoCSE = false; 422 break; 423 } 424 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 425 --NumDefs; 426 } 427 428 // Actually perform the elimination. 429 if (DoCSE) { 430 for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { 431 MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); 432 MRI->clearKillFlags(CSEPairs[i].second); 433 } 434 MI->eraseFromParent(); 435 ++NumCSEs; 436 if (DefPhys) 437 ++NumPhysCSEs; 438 } else { 439 DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 440 VNT.insert(MI, CurrVN++); 441 Exps.push_back(MI); 442 } 443 CSEPairs.clear(); 444 } 445 446 return Changed; 447} 448 449/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 450/// dominator tree node if its a leaf or all of its children are done. Walk 451/// up the dominator tree to destroy ancestors which are now done. 452void 453MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 454 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 455 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 456 if (OpenChildren[Node]) 457 return; 458 459 // Pop scope. 460 ExitScope(Node->getBlock()); 461 462 // Now traverse upwards to pop ancestors whose offsprings are all done. 463 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 464 unsigned Left = --OpenChildren[Parent]; 465 if (Left != 0) 466 break; 467 ExitScope(Parent->getBlock()); 468 Node = Parent; 469 } 470} 471 472bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 473 SmallVector<MachineDomTreeNode*, 32> Scopes; 474 SmallVector<MachineDomTreeNode*, 8> WorkList; 475 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 476 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 477 478 CurrVN = 0; 479 480 // Perform a DFS walk to determine the order of visit. 481 WorkList.push_back(Node); 482 do { 483 Node = WorkList.pop_back_val(); 484 Scopes.push_back(Node); 485 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 486 unsigned NumChildren = Children.size(); 487 OpenChildren[Node] = NumChildren; 488 for (unsigned i = 0; i != NumChildren; ++i) { 489 MachineDomTreeNode *Child = Children[i]; 490 ParentMap[Child] = Node; 491 WorkList.push_back(Child); 492 } 493 } while (!WorkList.empty()); 494 495 // Now perform CSE. 496 bool Changed = false; 497 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 498 MachineDomTreeNode *Node = Scopes[i]; 499 MachineBasicBlock *MBB = Node->getBlock(); 500 EnterScope(MBB); 501 Changed |= ProcessBlock(MBB); 502 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 503 ExitScopeIfDone(Node, OpenChildren, ParentMap); 504 } 505 506 return Changed; 507} 508 509bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 510 TII = MF.getTarget().getInstrInfo(); 511 TRI = MF.getTarget().getRegisterInfo(); 512 MRI = &MF.getRegInfo(); 513 AA = &getAnalysis<AliasAnalysis>(); 514 DT = &getAnalysis<MachineDominatorTree>(); 515 return PerformCSE(DT->getRootNode()); 516} 517