MachineLICM.cpp revision 7e116d468e600c91a625df282bb9ad162915abbc
1//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion on machine instructions. We 11// attempt to remove as much code from the body of a loop as possible. 12// 13// This pass does not attempt to throttle itself to limit register pressure. 14// The register allocation phases are expected to perform rematerialization 15// to recover when register pressure is high. 16// 17// This pass is not intended to be a replacement or a complete alternative 18// for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19// constructs that are not exposed before lowering and instruction selection. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "machine-licm" 24#include "llvm/CodeGen/Passes.h" 25#include "llvm/CodeGen/MachineDominators.h" 26#include "llvm/CodeGen/MachineFrameInfo.h" 27#include "llvm/CodeGen/MachineLoopInfo.h" 28#include "llvm/CodeGen/MachineMemOperand.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/PseudoSourceValue.h" 31#include "llvm/Target/TargetRegisterInfo.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetMachine.h" 34#include "llvm/Analysis/AliasAnalysis.h" 35#include "llvm/ADT/DenseMap.h" 36#include "llvm/ADT/SmallSet.h" 37#include "llvm/ADT/Statistic.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Support/raw_ostream.h" 40 41using namespace llvm; 42 43STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); 44STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); 45STATISTIC(NumPostRAHoisted, 46 "Number of machine instructions hoisted out of loops post regalloc"); 47 48namespace { 49 class MachineLICM : public MachineFunctionPass { 50 bool PreRegAlloc; 51 52 const TargetMachine *TM; 53 const TargetInstrInfo *TII; 54 const TargetRegisterInfo *TRI; 55 const MachineFrameInfo *MFI; 56 MachineRegisterInfo *RegInfo; 57 58 // Various analyses that we use... 59 AliasAnalysis *AA; // Alias analysis info. 60 MachineLoopInfo *MLI; // Current MachineLoopInfo 61 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 62 63 // State that is updated as we process loops 64 bool Changed; // True if a loop is changed. 65 MachineLoop *CurLoop; // The current loop we are working on. 66 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 67 68 BitVector AllocatableSet; 69 70 // For each opcode, keep a list of potentail CSE instructions. 71 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 72 73 public: 74 static char ID; // Pass identification, replacement for typeid 75 MachineLICM() : 76 MachineFunctionPass(&ID), PreRegAlloc(true) {} 77 78 explicit MachineLICM(bool PreRA) : 79 MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} 80 81 virtual bool runOnMachineFunction(MachineFunction &MF); 82 83 const char *getPassName() const { return "Machine Instruction LICM"; } 84 85 // FIXME: Loop preheaders? 86 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 87 AU.setPreservesCFG(); 88 AU.addRequired<MachineLoopInfo>(); 89 AU.addRequired<MachineDominatorTree>(); 90 AU.addRequired<AliasAnalysis>(); 91 AU.addPreserved<MachineLoopInfo>(); 92 AU.addPreserved<MachineDominatorTree>(); 93 MachineFunctionPass::getAnalysisUsage(AU); 94 } 95 96 virtual void releaseMemory() { 97 CSEMap.clear(); 98 } 99 100 private: 101 /// CandidateInfo - Keep track of information about hoisting candidates. 102 struct CandidateInfo { 103 MachineInstr *MI; 104 unsigned Def; 105 int FI; 106 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 107 : MI(mi), Def(def), FI(fi) {} 108 }; 109 110 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 111 /// invariants out to the preheader. 112 void HoistRegionPostRA(); 113 114 /// HoistPostRA - When an instruction is found to only use loop invariant 115 /// operands that is safe to hoist, this instruction is called to do the 116 /// dirty work. 117 void HoistPostRA(MachineInstr *MI, unsigned Def); 118 119 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 120 /// gather register def and frame object update information. 121 void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, 122 SmallSet<int, 32> &StoredFIs, 123 SmallVector<CandidateInfo, 32> &Candidates); 124 125 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 126 /// current loop. 127 void AddToLiveIns(unsigned Reg); 128 129 /// IsLICMCandidate - Returns true if the instruction may be a suitable 130 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously 131 /// not safe to hoist it. 132 bool IsLICMCandidate(MachineInstr &I); 133 134 /// IsLoopInvariantInst - Returns true if the instruction is loop 135 /// invariant. I.e., all virtual register operands are defined outside of 136 /// the loop, physical registers aren't accessed (explicitly or implicitly), 137 /// and the instruction is hoistable. 138 /// 139 bool IsLoopInvariantInst(MachineInstr &I); 140 141 /// IsProfitableToHoist - Return true if it is potentially profitable to 142 /// hoist the given loop invariant. 143 bool IsProfitableToHoist(MachineInstr &MI); 144 145 /// HoistRegion - Walk the specified region of the CFG (defined by all 146 /// blocks dominated by the specified block, and that are in the current 147 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 148 /// visit definitions before uses, allowing us to hoist a loop body in one 149 /// pass without iteration. 150 /// 151 void HoistRegion(MachineDomTreeNode *N); 152 153 /// isLoadFromConstantMemory - Return true if the given instruction is a 154 /// load from constant memory. 155 bool isLoadFromConstantMemory(MachineInstr *MI); 156 157 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 158 /// the load itself could be hoisted. Return the unfolded and hoistable 159 /// load, or null if the load couldn't be unfolded or if it wouldn't 160 /// be hoistable. 161 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 162 163 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 164 /// duplicate of MI. Return this instruction if it's found. 165 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 166 std::vector<const MachineInstr*> &PrevMIs); 167 168 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 169 /// the preheader that compute the same value. If it's found, do a RAU on 170 /// with the definition of the existing instruction rather than hoisting 171 /// the instruction to the preheader. 172 bool EliminateCSE(MachineInstr *MI, 173 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 174 175 /// Hoist - When an instruction is found to only use loop invariant operands 176 /// that is safe to hoist, this instruction is called to do the dirty work. 177 /// 178 void Hoist(MachineInstr *MI); 179 180 /// InitCSEMap - Initialize the CSE map with instructions that are in the 181 /// current loop preheader that may become duplicates of instructions that 182 /// are hoisted out of the loop. 183 void InitCSEMap(MachineBasicBlock *BB); 184 }; 185} // end anonymous namespace 186 187char MachineLICM::ID = 0; 188static RegisterPass<MachineLICM> 189X("machinelicm", "Machine Loop Invariant Code Motion"); 190 191FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { 192 return new MachineLICM(PreRegAlloc); 193} 194 195/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most 196/// loop that has a preheader. 197static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { 198 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 199 if (L->getLoopPreheader()) 200 return false; 201 return true; 202} 203 204bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 205 if (PreRegAlloc) 206 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n"); 207 else 208 DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); 209 210 Changed = false; 211 TM = &MF.getTarget(); 212 TII = TM->getInstrInfo(); 213 TRI = TM->getRegisterInfo(); 214 MFI = MF.getFrameInfo(); 215 RegInfo = &MF.getRegInfo(); 216 AllocatableSet = TRI->getAllocatableSet(MF); 217 218 // Get our Loop information... 219 MLI = &getAnalysis<MachineLoopInfo>(); 220 DT = &getAnalysis<MachineDominatorTree>(); 221 AA = &getAnalysis<AliasAnalysis>(); 222 223 for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){ 224 CurLoop = *I; 225 226 // If this is done before regalloc, only visit outer-most preheader-sporting 227 // loops. 228 if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop)) 229 continue; 230 231 // Determine the block to which to hoist instructions. If we can't find a 232 // suitable loop preheader, we can't do any hoisting. 233 // 234 // FIXME: We are only hoisting if the basic block coming into this loop 235 // has only one successor. This isn't the case in general because we haven't 236 // broken critical edges or added preheaders. 237 CurPreheader = CurLoop->getLoopPreheader(); 238 if (!CurPreheader) 239 continue; 240 241 if (!PreRegAlloc) 242 HoistRegionPostRA(); 243 else { 244 // CSEMap is initialized for loop header when the first instruction is 245 // being hoisted. 246 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 247 HoistRegion(N); 248 CSEMap.clear(); 249 } 250 } 251 252 return Changed; 253} 254 255/// InstructionStoresToFI - Return true if instruction stores to the 256/// specified frame. 257static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 258 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 259 oe = MI->memoperands_end(); o != oe; ++o) { 260 if (!(*o)->isStore() || !(*o)->getValue()) 261 continue; 262 if (const FixedStackPseudoSourceValue *Value = 263 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 264 if (Value->getFrameIndex() == FI) 265 return true; 266 } 267 } 268 return false; 269} 270 271/// ProcessMI - Examine the instruction for potentai LICM candidate. Also 272/// gather register def and frame object update information. 273void MachineLICM::ProcessMI(MachineInstr *MI, 274 unsigned *PhysRegDefs, 275 SmallSet<int, 32> &StoredFIs, 276 SmallVector<CandidateInfo, 32> &Candidates) { 277 bool RuledOut = false; 278 bool HasNonInvariantUse = false; 279 unsigned Def = 0; 280 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 281 const MachineOperand &MO = MI->getOperand(i); 282 if (MO.isFI()) { 283 // Remember if the instruction stores to the frame index. 284 int FI = MO.getIndex(); 285 if (!StoredFIs.count(FI) && 286 MFI->isSpillSlotObjectIndex(FI) && 287 InstructionStoresToFI(MI, FI)) 288 StoredFIs.insert(FI); 289 HasNonInvariantUse = true; 290 continue; 291 } 292 293 if (!MO.isReg()) 294 continue; 295 unsigned Reg = MO.getReg(); 296 if (!Reg) 297 continue; 298 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 299 "Not expecting virtual register!"); 300 301 if (!MO.isDef()) { 302 if (Reg && PhysRegDefs[Reg]) 303 // If it's using a non-loop-invariant register, then it's obviously not 304 // safe to hoist. 305 HasNonInvariantUse = true; 306 continue; 307 } 308 309 if (MO.isImplicit()) { 310 ++PhysRegDefs[Reg]; 311 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 312 ++PhysRegDefs[*AS]; 313 if (!MO.isDead()) 314 // Non-dead implicit def? This cannot be hoisted. 315 RuledOut = true; 316 // No need to check if a dead implicit def is also defined by 317 // another instruction. 318 continue; 319 } 320 321 // FIXME: For now, avoid instructions with multiple defs, unless 322 // it's a dead implicit def. 323 if (Def) 324 RuledOut = true; 325 else 326 Def = Reg; 327 328 // If we have already seen another instruction that defines the same 329 // register, then this is not safe. 330 if (++PhysRegDefs[Reg] > 1) 331 // MI defined register is seen defined by another instruction in 332 // the loop, it cannot be a LICM candidate. 333 RuledOut = true; 334 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 335 if (++PhysRegDefs[*AS] > 1) 336 RuledOut = true; 337 } 338 339 // Only consider reloads for now and remats which do not have register 340 // operands. FIXME: Consider unfold load folding instructions. 341 if (Def && !RuledOut) { 342 int FI = INT_MIN; 343 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 344 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 345 Candidates.push_back(CandidateInfo(MI, Def, FI)); 346 } 347} 348 349/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 350/// invariants out to the preheader. 351void MachineLICM::HoistRegionPostRA() { 352 unsigned NumRegs = TRI->getNumRegs(); 353 unsigned *PhysRegDefs = new unsigned[NumRegs]; 354 std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); 355 356 SmallVector<CandidateInfo, 32> Candidates; 357 SmallSet<int, 32> StoredFIs; 358 359 // Walk the entire region, count number of defs for each register, and 360 // collect potential LICM candidates. 361 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 362 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 363 MachineBasicBlock *BB = Blocks[i]; 364 // Conservatively treat live-in's as an external def. 365 // FIXME: That means a reload that're reused in successor block(s) will not 366 // be LICM'ed. 367 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 368 E = BB->livein_end(); I != E; ++I) { 369 unsigned Reg = *I; 370 ++PhysRegDefs[Reg]; 371 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 372 ++PhysRegDefs[*AS]; 373 } 374 375 for (MachineBasicBlock::iterator 376 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 377 MachineInstr *MI = &*MII; 378 ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); 379 } 380 } 381 382 // Now evaluate whether the potential candidates qualify. 383 // 1. Check if the candidate defined register is defined by another 384 // instruction in the loop. 385 // 2. If the candidate is a load from stack slot (always true for now), 386 // check if the slot is stored anywhere in the loop. 387 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 388 if (Candidates[i].FI != INT_MIN && 389 StoredFIs.count(Candidates[i].FI)) 390 continue; 391 392 if (PhysRegDefs[Candidates[i].Def] == 1) { 393 bool Safe = true; 394 MachineInstr *MI = Candidates[i].MI; 395 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 396 const MachineOperand &MO = MI->getOperand(j); 397 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 398 continue; 399 if (PhysRegDefs[MO.getReg()]) { 400 // If it's using a non-loop-invariant register, then it's obviously 401 // not safe to hoist. 402 Safe = false; 403 break; 404 } 405 } 406 if (Safe) 407 HoistPostRA(MI, Candidates[i].Def); 408 } 409 } 410 411 delete[] PhysRegDefs; 412} 413 414/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 415/// current loop. 416void MachineLICM::AddToLiveIns(unsigned Reg) { 417 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 418 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) 419 Blocks[i]->addLiveIn(Reg); 420} 421 422/// HoistPostRA - When an instruction is found to only use loop invariant 423/// operands that is safe to hoist, this instruction is called to do the 424/// dirty work. 425void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 426 // Now move the instructions to the predecessor, inserting it before any 427 // terminator instructions. 428 DEBUG({ 429 dbgs() << "Hoisting " << *MI; 430 if (CurPreheader->getBasicBlock()) 431 dbgs() << " to MachineBasicBlock " 432 << CurPreheader->getName(); 433 if (MI->getParent()->getBasicBlock()) 434 dbgs() << " from MachineBasicBlock " 435 << MI->getParent()->getName(); 436 dbgs() << "\n"; 437 }); 438 439 // Splice the instruction to the preheader. 440 MachineBasicBlock *MBB = MI->getParent(); 441 CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI); 442 443 // Add register to livein list to all the BBs in the current loop since a 444 // loop invariant must be kept live throughout the whole loop. This is 445 // important to ensure later passes do not scavenge the def register. 446 AddToLiveIns(Def); 447 448 ++NumPostRAHoisted; 449 Changed = true; 450} 451 452/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 453/// dominated by the specified block, and that are in the current loop) in depth 454/// first order w.r.t the DominatorTree. This allows us to visit definitions 455/// before uses, allowing us to hoist a loop body in one pass without iteration. 456/// 457void MachineLICM::HoistRegion(MachineDomTreeNode *N) { 458 assert(N != 0 && "Null dominator tree node?"); 459 MachineBasicBlock *BB = N->getBlock(); 460 461 // If this subregion is not in the top level loop at all, exit. 462 if (!CurLoop->contains(BB)) return; 463 464 for (MachineBasicBlock::iterator 465 MII = BB->begin(), E = BB->end(); MII != E; ) { 466 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 467 Hoist(&*MII); 468 MII = NextMII; 469 } 470 471 const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); 472 for (unsigned I = 0, E = Children.size(); I != E; ++I) 473 HoistRegion(Children[I]); 474} 475 476/// IsLICMCandidate - Returns true if the instruction may be a suitable 477/// candidate for LICM. e.g. If the instruction is a call, then it's obviously 478/// not safe to hoist it. 479bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 480 if (I.isImplicitDef()) 481 return false; 482 483 const TargetInstrDesc &TID = I.getDesc(); 484 485 // Ignore stuff that we obviously can't hoist. 486 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 487 TID.hasUnmodeledSideEffects()) 488 return false; 489 490 if (TID.mayLoad()) { 491 // Okay, this instruction does a load. As a refinement, we allow the target 492 // to decide whether the loaded value is actually a constant. If so, we can 493 // actually use it as a load. 494 if (!I.isInvariantLoad(AA)) 495 // FIXME: we should be able to hoist loads with no other side effects if 496 // there are no other instructions which can change memory in this loop. 497 // This is a trivial form of alias analysis. 498 return false; 499 } 500 return true; 501} 502 503/// IsLoopInvariantInst - Returns true if the instruction is loop 504/// invariant. I.e., all virtual register operands are defined outside of the 505/// loop, physical registers aren't accessed explicitly, and there are no side 506/// effects that aren't captured by the operands or other flags. 507/// 508bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 509 if (!IsLICMCandidate(I)) 510 return false; 511 512 // The instruction is loop invariant if all of its operands are. 513 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 514 const MachineOperand &MO = I.getOperand(i); 515 516 if (!MO.isReg()) 517 continue; 518 519 unsigned Reg = MO.getReg(); 520 if (Reg == 0) continue; 521 522 // Don't hoist an instruction that uses or defines a physical register. 523 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 524 if (MO.isUse()) { 525 // If the physreg has no defs anywhere, it's just an ambient register 526 // and we can freely move its uses. Alternatively, if it's allocatable, 527 // it could get allocated to something with a def during allocation. 528 if (!RegInfo->def_empty(Reg)) 529 return false; 530 if (AllocatableSet.test(Reg)) 531 return false; 532 // Check for a def among the register's aliases too. 533 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 534 unsigned AliasReg = *Alias; 535 if (!RegInfo->def_empty(AliasReg)) 536 return false; 537 if (AllocatableSet.test(AliasReg)) 538 return false; 539 } 540 // Otherwise it's safe to move. 541 continue; 542 } else if (!MO.isDead()) { 543 // A def that isn't dead. We can't move it. 544 return false; 545 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 546 // If the reg is live into the loop, we can't hoist an instruction 547 // which would clobber it. 548 return false; 549 } 550 } 551 552 if (!MO.isUse()) 553 continue; 554 555 assert(RegInfo->getVRegDef(Reg) && 556 "Machine instr not mapped for this vreg?!"); 557 558 // If the loop contains the definition of an operand, then the instruction 559 // isn't loop invariant. 560 if (CurLoop->contains(RegInfo->getVRegDef(Reg))) 561 return false; 562 } 563 564 // If we got this far, the instruction is loop invariant! 565 return true; 566} 567 568 569/// HasPHIUses - Return true if the specified register has any PHI use. 570static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { 571 for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg), 572 UE = RegInfo->use_end(); UI != UE; ++UI) { 573 MachineInstr *UseMI = &*UI; 574 if (UseMI->isPHI()) 575 return true; 576 } 577 return false; 578} 579 580/// isLoadFromConstantMemory - Return true if the given instruction is a 581/// load from constant memory. Machine LICM will hoist these even if they are 582/// not re-materializable. 583bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { 584 if (!MI->getDesc().mayLoad()) return false; 585 if (!MI->hasOneMemOperand()) return false; 586 MachineMemOperand *MMO = *MI->memoperands_begin(); 587 if (MMO->isVolatile()) return false; 588 if (!MMO->getValue()) return false; 589 const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue()); 590 if (PSV) { 591 MachineFunction &MF = *MI->getParent()->getParent(); 592 return PSV->isConstant(MF.getFrameInfo()); 593 } else { 594 return AA->pointsToConstantMemory(MMO->getValue()); 595 } 596} 597 598/// IsProfitableToHoist - Return true if it is potentially profitable to hoist 599/// the given loop invariant. 600bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 601 // FIXME: For now, only hoist re-materilizable instructions. LICM will 602 // increase register pressure. We want to make sure it doesn't increase 603 // spilling. 604 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting 605 // these tend to help performance in low register pressure situation. The 606 // trade off is it may cause spill in high pressure situation. It will end up 607 // adding a store in the loop preheader. But the reload is no more expensive. 608 // The side benefit is these loads are frequently CSE'ed. 609 if (!TII->isTriviallyReMaterializable(&MI, AA)) { 610 if (!isLoadFromConstantMemory(&MI)) 611 return false; 612 } 613 614 // If result(s) of this instruction is used by PHIs, then don't hoist it. 615 // The presence of joins makes it difficult for current register allocator 616 // implementation to perform remat. 617 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 618 const MachineOperand &MO = MI.getOperand(i); 619 if (!MO.isReg() || !MO.isDef()) 620 continue; 621 if (HasPHIUses(MO.getReg(), RegInfo)) 622 return false; 623 } 624 625 return true; 626} 627 628MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 629 // If not, we may be able to unfold a load and hoist that. 630 // First test whether the instruction is loading from an amenable 631 // memory location. 632 if (!isLoadFromConstantMemory(MI)) 633 return 0; 634 635 // Next determine the register class for a temporary register. 636 unsigned LoadRegIndex; 637 unsigned NewOpc = 638 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 639 /*UnfoldLoad=*/true, 640 /*UnfoldStore=*/false, 641 &LoadRegIndex); 642 if (NewOpc == 0) return 0; 643 const TargetInstrDesc &TID = TII->get(NewOpc); 644 if (TID.getNumDefs() != 1) return 0; 645 const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); 646 // Ok, we're unfolding. Create a temporary register and do the unfold. 647 unsigned Reg = RegInfo->createVirtualRegister(RC); 648 649 MachineFunction &MF = *MI->getParent()->getParent(); 650 SmallVector<MachineInstr *, 2> NewMIs; 651 bool Success = 652 TII->unfoldMemoryOperand(MF, MI, Reg, 653 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 654 NewMIs); 655 (void)Success; 656 assert(Success && 657 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 658 "succeeded!"); 659 assert(NewMIs.size() == 2 && 660 "Unfolded a load into multiple instructions!"); 661 MachineBasicBlock *MBB = MI->getParent(); 662 MBB->insert(MI, NewMIs[0]); 663 MBB->insert(MI, NewMIs[1]); 664 // If unfolding produced a load that wasn't loop-invariant or profitable to 665 // hoist, discard the new instructions and bail. 666 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 667 NewMIs[0]->eraseFromParent(); 668 NewMIs[1]->eraseFromParent(); 669 return 0; 670 } 671 // Otherwise we successfully unfolded a load that we can hoist. 672 MI->eraseFromParent(); 673 return NewMIs[0]; 674} 675 676void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 677 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 678 const MachineInstr *MI = &*I; 679 // FIXME: For now, only hoist re-materilizable instructions. LICM will 680 // increase register pressure. We want to make sure it doesn't increase 681 // spilling. 682 if (TII->isTriviallyReMaterializable(MI, AA)) { 683 unsigned Opcode = MI->getOpcode(); 684 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 685 CI = CSEMap.find(Opcode); 686 if (CI != CSEMap.end()) 687 CI->second.push_back(MI); 688 else { 689 std::vector<const MachineInstr*> CSEMIs; 690 CSEMIs.push_back(MI); 691 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 692 } 693 } 694 } 695} 696 697const MachineInstr* 698MachineLICM::LookForDuplicate(const MachineInstr *MI, 699 std::vector<const MachineInstr*> &PrevMIs) { 700 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 701 const MachineInstr *PrevMI = PrevMIs[i]; 702 if (TII->produceSameValue(MI, PrevMI)) 703 return PrevMI; 704 } 705 return 0; 706} 707 708bool MachineLICM::EliminateCSE(MachineInstr *MI, 709 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 710 if (CI == CSEMap.end()) 711 return false; 712 713 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 714 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 715 716 // Replace virtual registers defined by MI by their counterparts defined 717 // by Dup. 718 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 719 const MachineOperand &MO = MI->getOperand(i); 720 721 // Physical registers may not differ here. 722 assert((!MO.isReg() || MO.getReg() == 0 || 723 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 724 MO.getReg() == Dup->getOperand(i).getReg()) && 725 "Instructions with different phys regs are not identical!"); 726 727 if (MO.isReg() && MO.isDef() && 728 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 729 RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); 730 } 731 MI->eraseFromParent(); 732 ++NumCSEed; 733 return true; 734 } 735 return false; 736} 737 738/// Hoist - When an instruction is found to use only loop invariant operands 739/// that are safe to hoist, this instruction is called to do the dirty work. 740/// 741void MachineLICM::Hoist(MachineInstr *MI) { 742 // First check whether we should hoist this instruction. 743 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 744 // If not, try unfolding a hoistable load. 745 MI = ExtractHoistableLoad(MI); 746 if (!MI) return; 747 } 748 749 // Now move the instructions to the predecessor, inserting it before any 750 // terminator instructions. 751 DEBUG({ 752 dbgs() << "Hoisting " << *MI; 753 if (CurPreheader->getBasicBlock()) 754 dbgs() << " to MachineBasicBlock " 755 << CurPreheader->getName(); 756 if (MI->getParent()->getBasicBlock()) 757 dbgs() << " from MachineBasicBlock " 758 << MI->getParent()->getName(); 759 dbgs() << "\n"; 760 }); 761 762 // If this is the first instruction being hoisted to the preheader, 763 // initialize the CSE map with potential common expressions. 764 InitCSEMap(CurPreheader); 765 766 // Look for opportunity to CSE the hoisted instruction. 767 unsigned Opcode = MI->getOpcode(); 768 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 769 CI = CSEMap.find(Opcode); 770 if (!EliminateCSE(MI, CI)) { 771 // Otherwise, splice the instruction to the preheader. 772 CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); 773 774 // Add to the CSE map. 775 if (CI != CSEMap.end()) 776 CI->second.push_back(MI); 777 else { 778 std::vector<const MachineInstr*> CSEMIs; 779 CSEMIs.push_back(MI); 780 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 781 } 782 } 783 784 ++NumHoisted; 785 Changed = true; 786} 787