MachineCSE.cpp revision 1cc1bc86986faee8a7f52ff338671b5eb7371e58
1//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global common subexpression elimination on machine 11// instructions using a scoped hash table based value numbering scheme. It 12// must be run while the machine function is still in SSA form. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "machine-cse" 17#include "llvm/CodeGen/Passes.h" 18#include "llvm/CodeGen/MachineDominators.h" 19#include "llvm/CodeGen/MachineInstr.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/Analysis/AliasAnalysis.h" 22#include "llvm/Target/TargetInstrInfo.h" 23#include "llvm/ADT/DenseMap.h" 24#include "llvm/ADT/ScopedHashTable.h" 25#include "llvm/ADT/Statistic.h" 26#include "llvm/Support/CommandLine.h" 27#include "llvm/Support/Debug.h" 28 29using namespace llvm; 30 31STATISTIC(NumCoalesces, "Number of copies coalesced"); 32STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 33STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); 34 35namespace { 36 class MachineCSE : public MachineFunctionPass { 37 const TargetInstrInfo *TII; 38 const TargetRegisterInfo *TRI; 39 AliasAnalysis *AA; 40 MachineDominatorTree *DT; 41 MachineRegisterInfo *MRI; 42 public: 43 static char ID; // Pass identification 44 MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {} 45 46 virtual bool runOnMachineFunction(MachineFunction &MF); 47 48 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 49 AU.setPreservesCFG(); 50 MachineFunctionPass::getAnalysisUsage(AU); 51 AU.addRequired<AliasAnalysis>(); 52 AU.addPreservedID(MachineLoopInfoID); 53 AU.addRequired<MachineDominatorTree>(); 54 AU.addPreserved<MachineDominatorTree>(); 55 } 56 57 private: 58 const unsigned LookAheadLimit; 59 typedef ScopedHashTableScope<MachineInstr*, unsigned, 60 MachineInstrExpressionTrait> ScopeType; 61 DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; 62 ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; 63 SmallVector<MachineInstr*, 64> Exps; 64 unsigned CurrVN; 65 66 bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); 67 bool isPhysDefTriviallyDead(unsigned Reg, 68 MachineBasicBlock::const_iterator I, 69 MachineBasicBlock::const_iterator E) const ; 70 bool hasLivePhysRegDefUse(const MachineInstr *MI, 71 const MachineBasicBlock *MBB, 72 unsigned &PhysDef) const; 73 bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 74 unsigned PhysDef) const; 75 bool isCSECandidate(MachineInstr *MI); 76 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 77 MachineInstr *CSMI, MachineInstr *MI); 78 void EnterScope(MachineBasicBlock *MBB); 79 void ExitScope(MachineBasicBlock *MBB); 80 bool ProcessBlock(MachineBasicBlock *MBB); 81 void ExitScopeIfDone(MachineDomTreeNode *Node, 82 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 83 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 84 bool PerformCSE(MachineDomTreeNode *Node); 85 }; 86} // end anonymous namespace 87 88char MachineCSE::ID = 0; 89INITIALIZE_PASS(MachineCSE, "machine-cse", 90 "Machine Common Subexpression Elimination", false, false); 91 92FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } 93 94bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, 95 MachineBasicBlock *MBB) { 96 bool Changed = false; 97 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 98 MachineOperand &MO = MI->getOperand(i); 99 if (!MO.isReg() || !MO.isUse()) 100 continue; 101 unsigned Reg = MO.getReg(); 102 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 103 continue; 104 if (!MRI->hasOneNonDBGUse(Reg)) 105 // Only coalesce single use copies. This ensure the copy will be 106 // deleted. 107 continue; 108 MachineInstr *DefMI = MRI->getVRegDef(Reg); 109 if (DefMI->getParent() != MBB) 110 continue; 111 if (!DefMI->isCopy()) 112 continue; 113 unsigned SrcReg = DefMI->getOperand(1).getReg(); 114 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 115 continue; 116 if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) 117 continue; 118 const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); 119 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 120 const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); 121 if (!NewRC) 122 continue; 123 DEBUG(dbgs() << "Coalescing: " << *DefMI); 124 DEBUG(dbgs() << "*** to: " << *MI); 125 MO.setReg(SrcReg); 126 MRI->clearKillFlags(SrcReg); 127 if (NewRC != SRC) 128 MRI->setRegClass(SrcReg, NewRC); 129 DefMI->eraseFromParent(); 130 ++NumCoalesces; 131 Changed = true; 132 } 133 134 return Changed; 135} 136 137bool 138MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 139 MachineBasicBlock::const_iterator I, 140 MachineBasicBlock::const_iterator E) const { 141 unsigned LookAheadLeft = LookAheadLimit; 142 while (LookAheadLeft) { 143 // Skip over dbg_value's. 144 while (I != E && I->isDebugValue()) 145 ++I; 146 147 if (I == E) 148 // Reached end of block, register is obviously dead. 149 return true; 150 151 bool SeenDef = false; 152 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 153 const MachineOperand &MO = I->getOperand(i); 154 if (!MO.isReg() || !MO.getReg()) 155 continue; 156 if (!TRI->regsOverlap(MO.getReg(), Reg)) 157 continue; 158 if (MO.isUse()) 159 // Found a use! 160 return false; 161 SeenDef = true; 162 } 163 if (SeenDef) 164 // See a def of Reg (or an alias) before encountering any use, it's 165 // trivially dead. 166 return true; 167 168 --LookAheadLeft; 169 ++I; 170 } 171 return false; 172} 173 174/// hasLivePhysRegDefUse - Return true if the specified instruction read / write 175/// physical registers (except for dead defs of physical registers). It also 176/// returns the physical register def by reference if it's the only one and the 177/// instruction does not uses a physical register. 178bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, 179 const MachineBasicBlock *MBB, 180 unsigned &PhysDef) const { 181 PhysDef = 0; 182 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 183 const MachineOperand &MO = MI->getOperand(i); 184 if (!MO.isReg()) 185 continue; 186 unsigned Reg = MO.getReg(); 187 if (!Reg) 188 continue; 189 if (TargetRegisterInfo::isVirtualRegister(Reg)) 190 continue; 191 if (MO.isUse()) { 192 // Can't touch anything to read a physical register. 193 PhysDef = 0; 194 return true; 195 } 196 if (MO.isDead()) 197 // If the def is dead, it's ok. 198 continue; 199 // Ok, this is a physical register def that's not marked "dead". That's 200 // common since this pass is run before livevariables. We can scan 201 // forward a few instructions and check if it is obviously dead. 202 if (PhysDef) { 203 // Multiple physical register defs. These are rare, forget about it. 204 PhysDef = 0; 205 return true; 206 } 207 PhysDef = Reg; 208 } 209 210 if (PhysDef) { 211 MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); 212 if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) 213 return true; 214 } 215 return false; 216} 217 218bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 219 unsigned PhysDef) const { 220 // For now conservatively returns false if the common subexpression is 221 // not in the same basic block as the given instruction. 222 MachineBasicBlock *MBB = MI->getParent(); 223 if (CSMI->getParent() != MBB) 224 return false; 225 MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); 226 MachineBasicBlock::const_iterator E = MI; 227 unsigned LookAheadLeft = LookAheadLimit; 228 while (LookAheadLeft) { 229 // Skip over dbg_value's. 230 while (I != E && I->isDebugValue()) 231 ++I; 232 233 if (I == E) 234 return true; 235 if (I->modifiesRegister(PhysDef, TRI)) 236 return false; 237 238 --LookAheadLeft; 239 ++I; 240 } 241 242 return false; 243} 244 245bool MachineCSE::isCSECandidate(MachineInstr *MI) { 246 if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || 247 MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) 248 return false; 249 250 // Ignore copies. 251 if (MI->isCopyLike()) 252 return false; 253 254 // Ignore stuff that we obviously can't move. 255 const TargetInstrDesc &TID = MI->getDesc(); 256 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 257 TID.hasUnmodeledSideEffects()) 258 return false; 259 260 if (TID.mayLoad()) { 261 // Okay, this instruction does a load. As a refinement, we allow the target 262 // to decide whether the loaded value is actually a constant. If so, we can 263 // actually use it as a load. 264 if (!MI->isInvariantLoad(AA)) 265 // FIXME: we should be able to hoist loads with no other side effects if 266 // there are no other instructions which can change memory in this loop. 267 // This is a trivial form of alias analysis. 268 return false; 269 } 270 return true; 271} 272 273/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 274/// common expression that defines Reg. 275bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 276 MachineInstr *CSMI, MachineInstr *MI) { 277 // FIXME: Heuristics that works around the lack the live range splitting. 278 279 // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an 280 // immediate predecessor. We don't want to increase register pressure and end up 281 // causing other computation to be spilled. 282 if (MI->getDesc().isAsCheapAsAMove()) { 283 MachineBasicBlock *CSBB = CSMI->getParent(); 284 MachineBasicBlock *BB = MI->getParent(); 285 if (CSBB != BB && 286 find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end()) 287 return false; 288 } 289 290 // Heuristics #2: If the expression doesn't not use a vr and the only use 291 // of the redundant computation are copies, do not cse. 292 bool HasVRegUse = false; 293 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 294 const MachineOperand &MO = MI->getOperand(i); 295 if (MO.isReg() && MO.isUse() && MO.getReg() && 296 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 297 HasVRegUse = true; 298 break; 299 } 300 } 301 if (!HasVRegUse) { 302 bool HasNonCopyUse = false; 303 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 304 E = MRI->use_nodbg_end(); I != E; ++I) { 305 MachineInstr *Use = &*I; 306 // Ignore copies. 307 if (!Use->isCopyLike()) { 308 HasNonCopyUse = true; 309 break; 310 } 311 } 312 if (!HasNonCopyUse) 313 return false; 314 } 315 316 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 317 // it unless the defined value is already used in the BB of the new use. 318 bool HasPHI = false; 319 SmallPtrSet<MachineBasicBlock*, 4> CSBBs; 320 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), 321 E = MRI->use_nodbg_end(); I != E; ++I) { 322 MachineInstr *Use = &*I; 323 HasPHI |= Use->isPHI(); 324 CSBBs.insert(Use->getParent()); 325 } 326 327 if (!HasPHI) 328 return true; 329 return CSBBs.count(MI->getParent()); 330} 331 332void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 333 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 334 ScopeType *Scope = new ScopeType(VNT); 335 ScopeMap[MBB] = Scope; 336} 337 338void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 339 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 340 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 341 assert(SI != ScopeMap.end()); 342 ScopeMap.erase(SI); 343 delete SI->second; 344} 345 346bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 347 bool Changed = false; 348 349 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 350 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 351 MachineInstr *MI = &*I; 352 ++I; 353 354 if (!isCSECandidate(MI)) 355 continue; 356 357 bool DefPhys = false; 358 bool FoundCSE = VNT.count(MI); 359 if (!FoundCSE) { 360 // Look for trivial copy coalescing opportunities. 361 if (PerformTrivialCoalescing(MI, MBB)) { 362 // After coalescing MI itself may become a copy. 363 if (MI->isCopyLike()) 364 continue; 365 FoundCSE = VNT.count(MI); 366 } 367 } 368 // FIXME: commute commutable instructions? 369 370 // If the instruction defines a physical register and the value *may* be 371 // used, then it's not safe to replace it with a common subexpression. 372 unsigned PhysDef = 0; 373 if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) { 374 FoundCSE = false; 375 376 // ... Unless the CS is local and it also defines the physical register 377 // which is not clobbered in between. 378 if (PhysDef) { 379 unsigned CSVN = VNT.lookup(MI); 380 MachineInstr *CSMI = Exps[CSVN]; 381 if (PhysRegDefReaches(CSMI, MI, PhysDef)) { 382 FoundCSE = true; 383 DefPhys = true; 384 } 385 } 386 } 387 388 if (!FoundCSE) { 389 VNT.insert(MI, CurrVN++); 390 Exps.push_back(MI); 391 continue; 392 } 393 394 // Found a common subexpression, eliminate it. 395 unsigned CSVN = VNT.lookup(MI); 396 MachineInstr *CSMI = Exps[CSVN]; 397 DEBUG(dbgs() << "Examining: " << *MI); 398 DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 399 400 // Check if it's profitable to perform this CSE. 401 bool DoCSE = true; 402 unsigned NumDefs = MI->getDesc().getNumDefs(); 403 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 404 MachineOperand &MO = MI->getOperand(i); 405 if (!MO.isReg() || !MO.isDef()) 406 continue; 407 unsigned OldReg = MO.getReg(); 408 unsigned NewReg = CSMI->getOperand(i).getReg(); 409 if (OldReg == NewReg) 410 continue; 411 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 412 TargetRegisterInfo::isVirtualRegister(NewReg) && 413 "Do not CSE physical register defs!"); 414 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 415 DoCSE = false; 416 break; 417 } 418 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 419 --NumDefs; 420 } 421 422 // Actually perform the elimination. 423 if (DoCSE) { 424 for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { 425 MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); 426 MRI->clearKillFlags(CSEPairs[i].second); 427 } 428 MI->eraseFromParent(); 429 ++NumCSEs; 430 if (DefPhys) 431 ++NumPhysCSEs; 432 } else { 433 DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 434 VNT.insert(MI, CurrVN++); 435 Exps.push_back(MI); 436 } 437 CSEPairs.clear(); 438 } 439 440 return Changed; 441} 442 443/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 444/// dominator tree node if its a leaf or all of its children are done. Walk 445/// up the dominator tree to destroy ancestors which are now done. 446void 447MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 448 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 449 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 450 if (OpenChildren[Node]) 451 return; 452 453 // Pop scope. 454 ExitScope(Node->getBlock()); 455 456 // Now traverse upwards to pop ancestors whose offsprings are all done. 457 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 458 unsigned Left = --OpenChildren[Parent]; 459 if (Left != 0) 460 break; 461 ExitScope(Parent->getBlock()); 462 Node = Parent; 463 } 464} 465 466bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 467 SmallVector<MachineDomTreeNode*, 32> Scopes; 468 SmallVector<MachineDomTreeNode*, 8> WorkList; 469 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 470 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 471 472 // Perform a DFS walk to determine the order of visit. 473 WorkList.push_back(Node); 474 do { 475 Node = WorkList.pop_back_val(); 476 Scopes.push_back(Node); 477 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 478 unsigned NumChildren = Children.size(); 479 OpenChildren[Node] = NumChildren; 480 for (unsigned i = 0; i != NumChildren; ++i) { 481 MachineDomTreeNode *Child = Children[i]; 482 ParentMap[Child] = Node; 483 WorkList.push_back(Child); 484 } 485 } while (!WorkList.empty()); 486 487 // Now perform CSE. 488 bool Changed = false; 489 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 490 MachineDomTreeNode *Node = Scopes[i]; 491 MachineBasicBlock *MBB = Node->getBlock(); 492 EnterScope(MBB); 493 Changed |= ProcessBlock(MBB); 494 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 495 ExitScopeIfDone(Node, OpenChildren, ParentMap); 496 } 497 498 return Changed; 499} 500 501bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 502 TII = MF.getTarget().getInstrInfo(); 503 TRI = MF.getTarget().getRegisterInfo(); 504 MRI = &MF.getRegInfo(); 505 AA = &getAnalysis<AliasAnalysis>(); 506 DT = &getAnalysis<MachineDominatorTree>(); 507 return PerformCSE(DT->getRootNode()); 508} 509