MachineCSE.cpp revision 189c1ec4c162ca3d36d9bca803b032eb19de434a
1//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs global common subexpression elimination on machine 11// instructions using a scoped hash table based value numbering scheme. It 12// must be run while the machine function is still in SSA form. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "machine-cse" 17#include "llvm/CodeGen/Passes.h" 18#include "llvm/CodeGen/MachineDominators.h" 19#include "llvm/CodeGen/MachineInstr.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/Analysis/AliasAnalysis.h" 22#include "llvm/Target/TargetInstrInfo.h" 23#include "llvm/ADT/DenseMap.h" 24#include "llvm/ADT/ScopedHashTable.h" 25#include "llvm/ADT/SmallSet.h" 26#include "llvm/ADT/Statistic.h" 27#include "llvm/Support/CommandLine.h" 28#include "llvm/Support/Debug.h" 29 30using namespace llvm; 31 32STATISTIC(NumCoalesces, "Number of copies coalesced"); 33STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 34STATISTIC(NumPhysCSEs, 35 "Number of physreg referencing common subexpr eliminated"); 36 37namespace { 38 class MachineCSE : public MachineFunctionPass { 39 const TargetInstrInfo *TII; 40 const TargetRegisterInfo *TRI; 41 AliasAnalysis *AA; 42 MachineDominatorTree *DT; 43 MachineRegisterInfo *MRI; 44 public: 45 static char ID; // Pass identification 46 MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) { 47 initializeMachineCSEPass(*PassRegistry::getPassRegistry()); 48 } 49 50 virtual bool runOnMachineFunction(MachineFunction &MF); 51 52 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 53 AU.setPreservesCFG(); 54 MachineFunctionPass::getAnalysisUsage(AU); 55 AU.addRequired<AliasAnalysis>(); 56 AU.addPreservedID(MachineLoopInfoID); 57 AU.addRequired<MachineDominatorTree>(); 58 AU.addPreserved<MachineDominatorTree>(); 59 } 60 61 virtual void releaseMemory() { 62 ScopeMap.clear(); 63 Exps.clear(); 64 } 65 66 private: 67 const unsigned LookAheadLimit; 68 typedef ScopedHashTableScope<MachineInstr*, unsigned, 69 MachineInstrExpressionTrait> ScopeType; 70 DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; 71 ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; 72 SmallVector<MachineInstr*, 64> Exps; 73 unsigned CurrVN; 74 75 bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); 76 bool isPhysDefTriviallyDead(unsigned Reg, 77 MachineBasicBlock::const_iterator I, 78 MachineBasicBlock::const_iterator E) const ; 79 bool hasLivePhysRegDefUses(const MachineInstr *MI, 80 const MachineBasicBlock *MBB, 81 SmallSet<unsigned,8> &PhysRefs) const; 82 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 83 SmallSet<unsigned,8> &PhysRefs) const; 84 bool isCSECandidate(MachineInstr *MI); 85 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 86 MachineInstr *CSMI, MachineInstr *MI); 87 void EnterScope(MachineBasicBlock *MBB); 88 void ExitScope(MachineBasicBlock *MBB); 89 bool ProcessBlock(MachineBasicBlock *MBB); 90 void ExitScopeIfDone(MachineDomTreeNode *Node, 91 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 92 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 93 bool PerformCSE(MachineDomTreeNode *Node); 94 }; 95} // end anonymous namespace 96 97char MachineCSE::ID = 0; 98INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", 99 "Machine Common Subexpression Elimination", false, false) 100INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 101INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 102INITIALIZE_PASS_END(MachineCSE, "machine-cse", 103 "Machine Common Subexpression Elimination", false, false) 104 105FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } 106 107bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, 108 MachineBasicBlock *MBB) { 109 bool Changed = false; 110 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 111 MachineOperand &MO = MI->getOperand(i); 112 if (!MO.isReg() || !MO.isUse()) 113 continue; 114 unsigned Reg = MO.getReg(); 115 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 116 continue; 117 if (!MRI->hasOneNonDBGUse(Reg)) 118 // Only coalesce single use copies. This ensure the copy will be 119 // deleted. 120 continue; 121 MachineInstr *DefMI = MRI->getVRegDef(Reg); 122 if (DefMI->getParent() != MBB) 123 continue; 124 if (!DefMI->isCopy()) 125 continue; 126 unsigned SrcReg = DefMI->getOperand(1).getReg(); 127 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 128 continue; 129 if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) 130 continue; 131 if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) 132 continue; 133 DEBUG(dbgs() << "Coalescing: " << *DefMI); 134 DEBUG(dbgs() << "*** to: " << *MI); 135 MO.setReg(SrcReg); 136 MRI->clearKillFlags(SrcReg); 137 DefMI->eraseFromParent(); 138 ++NumCoalesces; 139 Changed = true; 140 } 141 142 return Changed; 143} 144 145bool 146MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 147 MachineBasicBlock::const_iterator I, 148 MachineBasicBlock::const_iterator E) const { 149 unsigned LookAheadLeft = LookAheadLimit; 150 while (LookAheadLeft) { 151 // Skip over dbg_value's. 152 while (I != E && I->isDebugValue()) 153 ++I; 154 155 if (I == E) 156 // Reached end of block, register is obviously dead. 157 return true; 158 159 bool SeenDef = false; 160 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 161 const MachineOperand &MO = I->getOperand(i); 162 if (!MO.isReg() || !MO.getReg()) 163 continue; 164 if (!TRI->regsOverlap(MO.getReg(), Reg)) 165 continue; 166 if (MO.isUse()) 167 // Found a use! 168 return false; 169 SeenDef = true; 170 } 171 if (SeenDef) 172 // See a def of Reg (or an alias) before encountering any use, it's 173 // trivially dead. 174 return true; 175 176 --LookAheadLeft; 177 ++I; 178 } 179 return false; 180} 181 182/// hasLivePhysRegDefUses - Return true if the specified instruction read/write 183/// physical registers (except for dead defs of physical registers). It also 184/// returns the physical register def by reference if it's the only one and the 185/// instruction does not uses a physical register. 186bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, 187 const MachineBasicBlock *MBB, 188 SmallSet<unsigned,8> &PhysRefs) const { 189 MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); 190 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 191 const MachineOperand &MO = MI->getOperand(i); 192 if (!MO.isReg()) 193 continue; 194 unsigned Reg = MO.getReg(); 195 if (!Reg) 196 continue; 197 if (TargetRegisterInfo::isVirtualRegister(Reg)) 198 continue; 199 // If the def is dead, it's ok. But the def may not marked "dead". That's 200 // common since this pass is run before livevariables. We can scan 201 // forward a few instructions and check if it is obviously dead. 202 if (MO.isDef() && 203 (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) 204 continue; 205 PhysRefs.insert(Reg); 206 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 207 PhysRefs.insert(*Alias); 208 } 209 210 return !PhysRefs.empty(); 211} 212 213bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 214 SmallSet<unsigned,8> &PhysRefs) const { 215 // For now conservatively returns false if the common subexpression is 216 // not in the same basic block as the given instruction. 217 MachineBasicBlock *MBB = MI->getParent(); 218 if (CSMI->getParent() != MBB) 219 return false; 220 MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); 221 MachineBasicBlock::const_iterator E = MI; 222 unsigned LookAheadLeft = LookAheadLimit; 223 while (LookAheadLeft) { 224 // Skip over dbg_value's. 225 while (I != E && I->isDebugValue()) 226 ++I; 227 228 if (I == E) 229 return true; 230 231 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 232 const MachineOperand &MO = I->getOperand(i); 233 if (!MO.isReg() || !MO.isDef()) 234 continue; 235 unsigned MOReg = MO.getReg(); 236 if (TargetRegisterInfo::isVirtualRegister(MOReg)) 237 continue; 238 if (PhysRefs.count(MOReg)) 239 return false; 240 } 241 242 --LookAheadLeft; 243 ++I; 244 } 245 246 return false; 247} 248 249bool MachineCSE::isCSECandidate(MachineInstr *MI) { 250 if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || 251 MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) 252 return false; 253 254 // Ignore copies. 255 if (MI->isCopyLike()) 256 return false; 257 258 // Ignore stuff that we obviously can't move. 259 const TargetInstrDesc &TID = MI->getDesc(); 260 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 261 TID.hasUnmodeledSideEffects()) 262 return false; 263 264 if (TID.mayLoad()) { 265 // Okay, this instruction does a load. As a refinement, we allow the target 266 // to decide whether the loaded value is actually a constant. If so, we can 267 // actually use it as a load. 268 if (!MI->isInvariantLoad(AA)) 269 // FIXME: we should be able to hoist loads with no other side effects if 270 // there are no other instructions which can change memory in this loop. 271 // This is a trivial form of alias analysis. 272 return false; 273 } 274 return true; 275} 276 277/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 278/// common expression that defines Reg. 279bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 280 MachineInstr *CSMI, MachineInstr *MI) { 281 // FIXME: Heuristics that works around the lack the live range splitting. 282 283 // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an 284 // immediate predecessor. We don't want to increase register pressure and end up 285 // causing other computation to be spilled. 286 if (MI->getDesc().isAsCheapAsAMove()) { 287 MachineBasicBlock *CSBB = CSMI->getParent(); 288 MachineBasicBlock *BB = MI->getParent(); 289 if (CSBB != BB && 290 find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end()) 291 return false; 292 } 293 294 // Heuristics #2: If the expression doesn't not use a vr and the only use 295 // of the redundant computation are copies, do not cse. 296 bool HasVRegUse = false; 297 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 298 const MachineOperand &MO = MI->getOperand(i); 299 if (MO.isReg() && MO.isUse() && MO.getReg() && 300 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 301 HasVRegUse = true; 302 break; 303 } 304 } 305 if (!HasVRegUse) { 306 bool HasNonCopyUse = false; 307 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 308 E = MRI->use_nodbg_end(); I != E; ++I) { 309 MachineInstr *Use = &*I; 310 // Ignore copies. 311 if (!Use->isCopyLike()) { 312 HasNonCopyUse = true; 313 break; 314 } 315 } 316 if (!HasNonCopyUse) 317 return false; 318 } 319 320 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 321 // it unless the defined value is already used in the BB of the new use. 322 bool HasPHI = false; 323 SmallPtrSet<MachineBasicBlock*, 4> CSBBs; 324 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), 325 E = MRI->use_nodbg_end(); I != E; ++I) { 326 MachineInstr *Use = &*I; 327 HasPHI |= Use->isPHI(); 328 CSBBs.insert(Use->getParent()); 329 } 330 331 if (!HasPHI) 332 return true; 333 return CSBBs.count(MI->getParent()); 334} 335 336void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 337 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 338 ScopeType *Scope = new ScopeType(VNT); 339 ScopeMap[MBB] = Scope; 340} 341 342void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 343 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 344 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 345 assert(SI != ScopeMap.end()); 346 ScopeMap.erase(SI); 347 delete SI->second; 348} 349 350bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 351 bool Changed = false; 352 353 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 354 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 355 MachineInstr *MI = &*I; 356 ++I; 357 358 if (!isCSECandidate(MI)) 359 continue; 360 361 bool FoundCSE = VNT.count(MI); 362 if (!FoundCSE) { 363 // Look for trivial copy coalescing opportunities. 364 if (PerformTrivialCoalescing(MI, MBB)) { 365 // After coalescing MI itself may become a copy. 366 if (MI->isCopyLike()) 367 continue; 368 FoundCSE = VNT.count(MI); 369 } 370 } 371 // FIXME: commute commutable instructions? 372 373 // If the instruction defines physical registers and the values *may* be 374 // used, then it's not safe to replace it with a common subexpression. 375 // It's also not safe if the instruction uses physical registers. 376 SmallSet<unsigned,8> PhysRefs; 377 if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) { 378 FoundCSE = false; 379 380 // ... Unless the CS is local and it also defines the physical register 381 // which is not clobbered in between and the physical register uses 382 // were not clobbered. 383 unsigned CSVN = VNT.lookup(MI); 384 MachineInstr *CSMI = Exps[CSVN]; 385 if (PhysRegDefsReach(CSMI, MI, PhysRefs)) 386 FoundCSE = true; 387 } 388 389 if (!FoundCSE) { 390 VNT.insert(MI, CurrVN++); 391 Exps.push_back(MI); 392 continue; 393 } 394 395 // Found a common subexpression, eliminate it. 396 unsigned CSVN = VNT.lookup(MI); 397 MachineInstr *CSMI = Exps[CSVN]; 398 DEBUG(dbgs() << "Examining: " << *MI); 399 DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 400 401 // Check if it's profitable to perform this CSE. 402 bool DoCSE = true; 403 unsigned NumDefs = MI->getDesc().getNumDefs(); 404 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 405 MachineOperand &MO = MI->getOperand(i); 406 if (!MO.isReg() || !MO.isDef()) 407 continue; 408 unsigned OldReg = MO.getReg(); 409 unsigned NewReg = CSMI->getOperand(i).getReg(); 410 if (OldReg == NewReg) 411 continue; 412 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 413 TargetRegisterInfo::isVirtualRegister(NewReg) && 414 "Do not CSE physical register defs!"); 415 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 416 DoCSE = false; 417 break; 418 } 419 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 420 --NumDefs; 421 } 422 423 // Actually perform the elimination. 424 if (DoCSE) { 425 for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { 426 MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); 427 MRI->clearKillFlags(CSEPairs[i].second); 428 } 429 MI->eraseFromParent(); 430 ++NumCSEs; 431 if (!PhysRefs.empty()) 432 ++NumPhysCSEs; 433 } else { 434 DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 435 VNT.insert(MI, CurrVN++); 436 Exps.push_back(MI); 437 } 438 CSEPairs.clear(); 439 } 440 441 return Changed; 442} 443 444/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 445/// dominator tree node if its a leaf or all of its children are done. Walk 446/// up the dominator tree to destroy ancestors which are now done. 447void 448MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 449 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 450 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 451 if (OpenChildren[Node]) 452 return; 453 454 // Pop scope. 455 ExitScope(Node->getBlock()); 456 457 // Now traverse upwards to pop ancestors whose offsprings are all done. 458 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 459 unsigned Left = --OpenChildren[Parent]; 460 if (Left != 0) 461 break; 462 ExitScope(Parent->getBlock()); 463 Node = Parent; 464 } 465} 466 467bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 468 SmallVector<MachineDomTreeNode*, 32> Scopes; 469 SmallVector<MachineDomTreeNode*, 8> WorkList; 470 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 471 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 472 473 CurrVN = 0; 474 475 // Perform a DFS walk to determine the order of visit. 476 WorkList.push_back(Node); 477 do { 478 Node = WorkList.pop_back_val(); 479 Scopes.push_back(Node); 480 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 481 unsigned NumChildren = Children.size(); 482 OpenChildren[Node] = NumChildren; 483 for (unsigned i = 0; i != NumChildren; ++i) { 484 MachineDomTreeNode *Child = Children[i]; 485 ParentMap[Child] = Node; 486 WorkList.push_back(Child); 487 } 488 } while (!WorkList.empty()); 489 490 // Now perform CSE. 491 bool Changed = false; 492 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 493 MachineDomTreeNode *Node = Scopes[i]; 494 MachineBasicBlock *MBB = Node->getBlock(); 495 EnterScope(MBB); 496 Changed |= ProcessBlock(MBB); 497 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 498 ExitScopeIfDone(Node, OpenChildren, ParentMap); 499 } 500 501 return Changed; 502} 503 504bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 505 TII = MF.getTarget().getInstrInfo(); 506 TRI = MF.getTarget().getRegisterInfo(); 507 MRI = &MF.getRegInfo(); 508 AA = &getAnalysis<AliasAnalysis>(); 509 DT = &getAnalysis<MachineDominatorTree>(); 510 return PerformCSE(DT->getRootNode()); 511} 512