MachineLICM.cpp revision 82e0a1a1a81ad54452823a8eb1e8d743cf38f098
1//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion on machine instructions. We 11// attempt to remove as much code from the body of a loop as possible. 12// 13// This pass does not attempt to throttle itself to limit register pressure. 14// The register allocation phases are expected to perform rematerialization 15// to recover when register pressure is high. 16// 17// This pass is not intended to be a replacement or a complete alternative 18// for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19// constructs that are not exposed before lowering and instruction selection. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "machine-licm" 24#include "llvm/CodeGen/Passes.h" 25#include "llvm/CodeGen/MachineDominators.h" 26#include "llvm/CodeGen/MachineFrameInfo.h" 27#include "llvm/CodeGen/MachineLoopInfo.h" 28#include "llvm/CodeGen/MachineMemOperand.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/PseudoSourceValue.h" 31#include "llvm/Target/TargetRegisterInfo.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetMachine.h" 34#include "llvm/Analysis/AliasAnalysis.h" 35#include "llvm/ADT/DenseMap.h" 36#include "llvm/ADT/SmallSet.h" 37#include "llvm/ADT/Statistic.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Support/raw_ostream.h" 40 41using namespace llvm; 42 43STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); 44STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); 45STATISTIC(NumPostRAHoisted, 46 "Number of machine instructions hoisted out of loops post regalloc"); 47 48namespace { 49 class MachineLICM : public MachineFunctionPass { 50 bool PreRegAlloc; 51 52 const TargetMachine *TM; 53 const TargetInstrInfo *TII; 54 const TargetRegisterInfo *TRI; 55 const MachineFrameInfo *MFI; 56 MachineRegisterInfo *RegInfo; 57 58 // Various analyses that we use... 59 AliasAnalysis *AA; // Alias analysis info. 60 MachineLoopInfo *MLI; // Current MachineLoopInfo 61 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 62 63 // State that is updated as we process loops 64 bool Changed; // True if a loop is changed. 65 bool FirstInLoop; // True if it's the first LICM in the loop. 66 MachineLoop *CurLoop; // The current loop we are working on. 67 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 68 69 BitVector AllocatableSet; 70 71 // For each opcode, keep a list of potentail CSE instructions. 72 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 73 74 public: 75 static char ID; // Pass identification, replacement for typeid 76 MachineLICM() : 77 MachineFunctionPass(&ID), PreRegAlloc(true) {} 78 79 explicit MachineLICM(bool PreRA) : 80 MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} 81 82 virtual bool runOnMachineFunction(MachineFunction &MF); 83 84 const char *getPassName() const { return "Machine Instruction LICM"; } 85 86 // FIXME: Loop preheaders? 87 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 88 AU.setPreservesCFG(); 89 AU.addRequired<MachineLoopInfo>(); 90 AU.addRequired<MachineDominatorTree>(); 91 AU.addRequired<AliasAnalysis>(); 92 AU.addPreserved<MachineLoopInfo>(); 93 AU.addPreserved<MachineDominatorTree>(); 94 MachineFunctionPass::getAnalysisUsage(AU); 95 } 96 97 virtual void releaseMemory() { 98 CSEMap.clear(); 99 } 100 101 private: 102 /// CandidateInfo - Keep track of information about hoisting candidates. 103 struct CandidateInfo { 104 MachineInstr *MI; 105 unsigned Def; 106 int FI; 107 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 108 : MI(mi), Def(def), FI(fi) {} 109 }; 110 111 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 112 /// invariants out to the preheader. 113 void HoistRegionPostRA(); 114 115 /// HoistPostRA - When an instruction is found to only use loop invariant 116 /// operands that is safe to hoist, this instruction is called to do the 117 /// dirty work. 118 void HoistPostRA(MachineInstr *MI, unsigned Def); 119 120 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 121 /// gather register def and frame object update information. 122 void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, 123 SmallSet<int, 32> &StoredFIs, 124 SmallVector<CandidateInfo, 32> &Candidates); 125 126 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 127 /// current loop. 128 void AddToLiveIns(unsigned Reg); 129 130 /// IsLICMCandidate - Returns true if the instruction may be a suitable 131 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously 132 /// not safe to hoist it. 133 bool IsLICMCandidate(MachineInstr &I); 134 135 /// IsLoopInvariantInst - Returns true if the instruction is loop 136 /// invariant. I.e., all virtual register operands are defined outside of 137 /// the loop, physical registers aren't accessed (explicitly or implicitly), 138 /// and the instruction is hoistable. 139 /// 140 bool IsLoopInvariantInst(MachineInstr &I); 141 142 /// IsProfitableToHoist - Return true if it is potentially profitable to 143 /// hoist the given loop invariant. 144 bool IsProfitableToHoist(MachineInstr &MI); 145 146 /// HoistRegion - Walk the specified region of the CFG (defined by all 147 /// blocks dominated by the specified block, and that are in the current 148 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 149 /// visit definitions before uses, allowing us to hoist a loop body in one 150 /// pass without iteration. 151 /// 152 void HoistRegion(MachineDomTreeNode *N); 153 154 /// isLoadFromConstantMemory - Return true if the given instruction is a 155 /// load from constant memory. 156 bool isLoadFromConstantMemory(MachineInstr *MI); 157 158 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 159 /// the load itself could be hoisted. Return the unfolded and hoistable 160 /// load, or null if the load couldn't be unfolded or if it wouldn't 161 /// be hoistable. 162 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 163 164 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 165 /// duplicate of MI. Return this instruction if it's found. 166 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 167 std::vector<const MachineInstr*> &PrevMIs); 168 169 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 170 /// the preheader that compute the same value. If it's found, do a RAU on 171 /// with the definition of the existing instruction rather than hoisting 172 /// the instruction to the preheader. 173 bool EliminateCSE(MachineInstr *MI, 174 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 175 176 /// Hoist - When an instruction is found to only use loop invariant operands 177 /// that is safe to hoist, this instruction is called to do the dirty work. 178 /// 179 void Hoist(MachineInstr *MI); 180 181 /// InitCSEMap - Initialize the CSE map with instructions that are in the 182 /// current loop preheader that may become duplicates of instructions that 183 /// are hoisted out of the loop. 184 void InitCSEMap(MachineBasicBlock *BB); 185 }; 186} // end anonymous namespace 187 188char MachineLICM::ID = 0; 189static RegisterPass<MachineLICM> 190X("machinelicm", "Machine Loop Invariant Code Motion"); 191 192FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { 193 return new MachineLICM(PreRegAlloc); 194} 195 196/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most 197/// loop that has a preheader. 198static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { 199 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 200 if (L->getLoopPreheader()) 201 return false; 202 return true; 203} 204 205bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 206 if (PreRegAlloc) 207 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n"); 208 else 209 DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); 210 211 Changed = FirstInLoop = false; 212 TM = &MF.getTarget(); 213 TII = TM->getInstrInfo(); 214 TRI = TM->getRegisterInfo(); 215 MFI = MF.getFrameInfo(); 216 RegInfo = &MF.getRegInfo(); 217 AllocatableSet = TRI->getAllocatableSet(MF); 218 219 // Get our Loop information... 220 MLI = &getAnalysis<MachineLoopInfo>(); 221 DT = &getAnalysis<MachineDominatorTree>(); 222 AA = &getAnalysis<AliasAnalysis>(); 223 224 for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){ 225 CurLoop = *I; 226 227 // If this is done before regalloc, only visit outer-most preheader-sporting 228 // loops. 229 if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop)) 230 continue; 231 232 // Determine the block to which to hoist instructions. If we can't find a 233 // suitable loop preheader, we can't do any hoisting. 234 // 235 // FIXME: We are only hoisting if the basic block coming into this loop 236 // has only one successor. This isn't the case in general because we haven't 237 // broken critical edges or added preheaders. 238 CurPreheader = CurLoop->getLoopPreheader(); 239 if (!CurPreheader) 240 continue; 241 242 if (!PreRegAlloc) 243 HoistRegionPostRA(); 244 else { 245 // CSEMap is initialized for loop header when the first instruction is 246 // being hoisted. 247 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 248 FirstInLoop = true; 249 HoistRegion(N); 250 CSEMap.clear(); 251 } 252 } 253 254 return Changed; 255} 256 257/// InstructionStoresToFI - Return true if instruction stores to the 258/// specified frame. 259static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 260 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 261 oe = MI->memoperands_end(); o != oe; ++o) { 262 if (!(*o)->isStore() || !(*o)->getValue()) 263 continue; 264 if (const FixedStackPseudoSourceValue *Value = 265 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 266 if (Value->getFrameIndex() == FI) 267 return true; 268 } 269 } 270 return false; 271} 272 273/// ProcessMI - Examine the instruction for potentai LICM candidate. Also 274/// gather register def and frame object update information. 275void MachineLICM::ProcessMI(MachineInstr *MI, 276 unsigned *PhysRegDefs, 277 SmallSet<int, 32> &StoredFIs, 278 SmallVector<CandidateInfo, 32> &Candidates) { 279 bool RuledOut = false; 280 bool HasNonInvariantUse = false; 281 unsigned Def = 0; 282 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 283 const MachineOperand &MO = MI->getOperand(i); 284 if (MO.isFI()) { 285 // Remember if the instruction stores to the frame index. 286 int FI = MO.getIndex(); 287 if (!StoredFIs.count(FI) && 288 MFI->isSpillSlotObjectIndex(FI) && 289 InstructionStoresToFI(MI, FI)) 290 StoredFIs.insert(FI); 291 HasNonInvariantUse = true; 292 continue; 293 } 294 295 if (!MO.isReg()) 296 continue; 297 unsigned Reg = MO.getReg(); 298 if (!Reg) 299 continue; 300 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 301 "Not expecting virtual register!"); 302 303 if (!MO.isDef()) { 304 if (Reg && PhysRegDefs[Reg]) 305 // If it's using a non-loop-invariant register, then it's obviously not 306 // safe to hoist. 307 HasNonInvariantUse = true; 308 continue; 309 } 310 311 if (MO.isImplicit()) { 312 ++PhysRegDefs[Reg]; 313 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 314 ++PhysRegDefs[*AS]; 315 if (!MO.isDead()) 316 // Non-dead implicit def? This cannot be hoisted. 317 RuledOut = true; 318 // No need to check if a dead implicit def is also defined by 319 // another instruction. 320 continue; 321 } 322 323 // FIXME: For now, avoid instructions with multiple defs, unless 324 // it's a dead implicit def. 325 if (Def) 326 RuledOut = true; 327 else 328 Def = Reg; 329 330 // If we have already seen another instruction that defines the same 331 // register, then this is not safe. 332 if (++PhysRegDefs[Reg] > 1) 333 // MI defined register is seen defined by another instruction in 334 // the loop, it cannot be a LICM candidate. 335 RuledOut = true; 336 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 337 if (++PhysRegDefs[*AS] > 1) 338 RuledOut = true; 339 } 340 341 // Only consider reloads for now and remats which do not have register 342 // operands. FIXME: Consider unfold load folding instructions. 343 if (Def && !RuledOut) { 344 int FI = INT_MIN; 345 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 346 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 347 Candidates.push_back(CandidateInfo(MI, Def, FI)); 348 } 349} 350 351/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 352/// invariants out to the preheader. 353void MachineLICM::HoistRegionPostRA() { 354 unsigned NumRegs = TRI->getNumRegs(); 355 unsigned *PhysRegDefs = new unsigned[NumRegs]; 356 std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); 357 358 SmallVector<CandidateInfo, 32> Candidates; 359 SmallSet<int, 32> StoredFIs; 360 361 // Walk the entire region, count number of defs for each register, and 362 // collect potential LICM candidates. 363 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 364 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 365 MachineBasicBlock *BB = Blocks[i]; 366 // Conservatively treat live-in's as an external def. 367 // FIXME: That means a reload that're reused in successor block(s) will not 368 // be LICM'ed. 369 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 370 E = BB->livein_end(); I != E; ++I) { 371 unsigned Reg = *I; 372 ++PhysRegDefs[Reg]; 373 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 374 ++PhysRegDefs[*AS]; 375 } 376 377 for (MachineBasicBlock::iterator 378 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 379 MachineInstr *MI = &*MII; 380 ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); 381 } 382 } 383 384 // Now evaluate whether the potential candidates qualify. 385 // 1. Check if the candidate defined register is defined by another 386 // instruction in the loop. 387 // 2. If the candidate is a load from stack slot (always true for now), 388 // check if the slot is stored anywhere in the loop. 389 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 390 if (Candidates[i].FI != INT_MIN && 391 StoredFIs.count(Candidates[i].FI)) 392 continue; 393 394 if (PhysRegDefs[Candidates[i].Def] == 1) { 395 bool Safe = true; 396 MachineInstr *MI = Candidates[i].MI; 397 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 398 const MachineOperand &MO = MI->getOperand(j); 399 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 400 continue; 401 if (PhysRegDefs[MO.getReg()]) { 402 // If it's using a non-loop-invariant register, then it's obviously 403 // not safe to hoist. 404 Safe = false; 405 break; 406 } 407 } 408 if (Safe) 409 HoistPostRA(MI, Candidates[i].Def); 410 } 411 } 412 413 delete[] PhysRegDefs; 414} 415 416/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 417/// loop, and make sure it is not killed by any instructions in the loop. 418void MachineLICM::AddToLiveIns(unsigned Reg) { 419 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 420 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 421 MachineBasicBlock *BB = Blocks[i]; 422 if (!BB->isLiveIn(Reg)) 423 BB->addLiveIn(Reg); 424 for (MachineBasicBlock::iterator 425 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 426 MachineInstr *MI = &*MII; 427 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 428 MachineOperand &MO = MI->getOperand(i); 429 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 430 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 431 MO.setIsKill(false); 432 } 433 } 434 } 435} 436 437/// HoistPostRA - When an instruction is found to only use loop invariant 438/// operands that is safe to hoist, this instruction is called to do the 439/// dirty work. 440void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 441 // Now move the instructions to the predecessor, inserting it before any 442 // terminator instructions. 443 DEBUG({ 444 dbgs() << "Hoisting " << *MI; 445 if (CurPreheader->getBasicBlock()) 446 dbgs() << " to MachineBasicBlock " 447 << CurPreheader->getName(); 448 if (MI->getParent()->getBasicBlock()) 449 dbgs() << " from MachineBasicBlock " 450 << MI->getParent()->getName(); 451 dbgs() << "\n"; 452 }); 453 454 // Splice the instruction to the preheader. 455 MachineBasicBlock *MBB = MI->getParent(); 456 CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI); 457 458 // Add register to livein list to all the BBs in the current loop since a 459 // loop invariant must be kept live throughout the whole loop. This is 460 // important to ensure later passes do not scavenge the def register. 461 AddToLiveIns(Def); 462 463 ++NumPostRAHoisted; 464 Changed = true; 465} 466 467/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 468/// dominated by the specified block, and that are in the current loop) in depth 469/// first order w.r.t the DominatorTree. This allows us to visit definitions 470/// before uses, allowing us to hoist a loop body in one pass without iteration. 471/// 472void MachineLICM::HoistRegion(MachineDomTreeNode *N) { 473 assert(N != 0 && "Null dominator tree node?"); 474 MachineBasicBlock *BB = N->getBlock(); 475 476 // If this subregion is not in the top level loop at all, exit. 477 if (!CurLoop->contains(BB)) return; 478 479 for (MachineBasicBlock::iterator 480 MII = BB->begin(), E = BB->end(); MII != E; ) { 481 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 482 Hoist(&*MII); 483 MII = NextMII; 484 } 485 486 const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); 487 for (unsigned I = 0, E = Children.size(); I != E; ++I) 488 HoistRegion(Children[I]); 489} 490 491/// IsLICMCandidate - Returns true if the instruction may be a suitable 492/// candidate for LICM. e.g. If the instruction is a call, then it's obviously 493/// not safe to hoist it. 494bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 495 if (I.isImplicitDef()) 496 return false; 497 498 const TargetInstrDesc &TID = I.getDesc(); 499 500 // Ignore stuff that we obviously can't hoist. 501 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 502 TID.hasUnmodeledSideEffects()) 503 return false; 504 505 if (TID.mayLoad()) { 506 // Okay, this instruction does a load. As a refinement, we allow the target 507 // to decide whether the loaded value is actually a constant. If so, we can 508 // actually use it as a load. 509 if (!I.isInvariantLoad(AA)) 510 // FIXME: we should be able to hoist loads with no other side effects if 511 // there are no other instructions which can change memory in this loop. 512 // This is a trivial form of alias analysis. 513 return false; 514 } 515 return true; 516} 517 518/// IsLoopInvariantInst - Returns true if the instruction is loop 519/// invariant. I.e., all virtual register operands are defined outside of the 520/// loop, physical registers aren't accessed explicitly, and there are no side 521/// effects that aren't captured by the operands or other flags. 522/// 523bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 524 if (!IsLICMCandidate(I)) 525 return false; 526 527 // The instruction is loop invariant if all of its operands are. 528 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 529 const MachineOperand &MO = I.getOperand(i); 530 531 if (!MO.isReg()) 532 continue; 533 534 unsigned Reg = MO.getReg(); 535 if (Reg == 0) continue; 536 537 // Don't hoist an instruction that uses or defines a physical register. 538 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 539 if (MO.isUse()) { 540 // If the physreg has no defs anywhere, it's just an ambient register 541 // and we can freely move its uses. Alternatively, if it's allocatable, 542 // it could get allocated to something with a def during allocation. 543 if (!RegInfo->def_empty(Reg)) 544 return false; 545 if (AllocatableSet.test(Reg)) 546 return false; 547 // Check for a def among the register's aliases too. 548 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 549 unsigned AliasReg = *Alias; 550 if (!RegInfo->def_empty(AliasReg)) 551 return false; 552 if (AllocatableSet.test(AliasReg)) 553 return false; 554 } 555 // Otherwise it's safe to move. 556 continue; 557 } else if (!MO.isDead()) { 558 // A def that isn't dead. We can't move it. 559 return false; 560 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 561 // If the reg is live into the loop, we can't hoist an instruction 562 // which would clobber it. 563 return false; 564 } 565 } 566 567 if (!MO.isUse()) 568 continue; 569 570 assert(RegInfo->getVRegDef(Reg) && 571 "Machine instr not mapped for this vreg?!"); 572 573 // If the loop contains the definition of an operand, then the instruction 574 // isn't loop invariant. 575 if (CurLoop->contains(RegInfo->getVRegDef(Reg))) 576 return false; 577 } 578 579 // If we got this far, the instruction is loop invariant! 580 return true; 581} 582 583 584/// HasPHIUses - Return true if the specified register has any PHI use. 585static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { 586 for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg), 587 UE = RegInfo->use_end(); UI != UE; ++UI) { 588 MachineInstr *UseMI = &*UI; 589 if (UseMI->isPHI()) 590 return true; 591 } 592 return false; 593} 594 595/// isLoadFromConstantMemory - Return true if the given instruction is a 596/// load from constant memory. Machine LICM will hoist these even if they are 597/// not re-materializable. 598bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { 599 if (!MI->getDesc().mayLoad()) return false; 600 if (!MI->hasOneMemOperand()) return false; 601 MachineMemOperand *MMO = *MI->memoperands_begin(); 602 if (MMO->isVolatile()) return false; 603 if (!MMO->getValue()) return false; 604 const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue()); 605 if (PSV) { 606 MachineFunction &MF = *MI->getParent()->getParent(); 607 return PSV->isConstant(MF.getFrameInfo()); 608 } else { 609 return AA->pointsToConstantMemory(MMO->getValue()); 610 } 611} 612 613/// IsProfitableToHoist - Return true if it is potentially profitable to hoist 614/// the given loop invariant. 615bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 616 // FIXME: For now, only hoist re-materilizable instructions. LICM will 617 // increase register pressure. We want to make sure it doesn't increase 618 // spilling. 619 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting 620 // these tend to help performance in low register pressure situation. The 621 // trade off is it may cause spill in high pressure situation. It will end up 622 // adding a store in the loop preheader. But the reload is no more expensive. 623 // The side benefit is these loads are frequently CSE'ed. 624 if (!TII->isTriviallyReMaterializable(&MI, AA)) { 625 if (!isLoadFromConstantMemory(&MI)) 626 return false; 627 } 628 629 // If result(s) of this instruction is used by PHIs, then don't hoist it. 630 // The presence of joins makes it difficult for current register allocator 631 // implementation to perform remat. 632 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 633 const MachineOperand &MO = MI.getOperand(i); 634 if (!MO.isReg() || !MO.isDef()) 635 continue; 636 if (HasPHIUses(MO.getReg(), RegInfo)) 637 return false; 638 } 639 640 return true; 641} 642 643MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 644 // If not, we may be able to unfold a load and hoist that. 645 // First test whether the instruction is loading from an amenable 646 // memory location. 647 if (!isLoadFromConstantMemory(MI)) 648 return 0; 649 650 // Next determine the register class for a temporary register. 651 unsigned LoadRegIndex; 652 unsigned NewOpc = 653 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 654 /*UnfoldLoad=*/true, 655 /*UnfoldStore=*/false, 656 &LoadRegIndex); 657 if (NewOpc == 0) return 0; 658 const TargetInstrDesc &TID = TII->get(NewOpc); 659 if (TID.getNumDefs() != 1) return 0; 660 const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); 661 // Ok, we're unfolding. Create a temporary register and do the unfold. 662 unsigned Reg = RegInfo->createVirtualRegister(RC); 663 664 MachineFunction &MF = *MI->getParent()->getParent(); 665 SmallVector<MachineInstr *, 2> NewMIs; 666 bool Success = 667 TII->unfoldMemoryOperand(MF, MI, Reg, 668 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 669 NewMIs); 670 (void)Success; 671 assert(Success && 672 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 673 "succeeded!"); 674 assert(NewMIs.size() == 2 && 675 "Unfolded a load into multiple instructions!"); 676 MachineBasicBlock *MBB = MI->getParent(); 677 MBB->insert(MI, NewMIs[0]); 678 MBB->insert(MI, NewMIs[1]); 679 // If unfolding produced a load that wasn't loop-invariant or profitable to 680 // hoist, discard the new instructions and bail. 681 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 682 NewMIs[0]->eraseFromParent(); 683 NewMIs[1]->eraseFromParent(); 684 return 0; 685 } 686 // Otherwise we successfully unfolded a load that we can hoist. 687 MI->eraseFromParent(); 688 return NewMIs[0]; 689} 690 691void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 692 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 693 const MachineInstr *MI = &*I; 694 // FIXME: For now, only hoist re-materilizable instructions. LICM will 695 // increase register pressure. We want to make sure it doesn't increase 696 // spilling. 697 if (TII->isTriviallyReMaterializable(MI, AA)) { 698 unsigned Opcode = MI->getOpcode(); 699 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 700 CI = CSEMap.find(Opcode); 701 if (CI != CSEMap.end()) 702 CI->second.push_back(MI); 703 else { 704 std::vector<const MachineInstr*> CSEMIs; 705 CSEMIs.push_back(MI); 706 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 707 } 708 } 709 } 710} 711 712const MachineInstr* 713MachineLICM::LookForDuplicate(const MachineInstr *MI, 714 std::vector<const MachineInstr*> &PrevMIs) { 715 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 716 const MachineInstr *PrevMI = PrevMIs[i]; 717 if (TII->produceSameValue(MI, PrevMI)) 718 return PrevMI; 719 } 720 return 0; 721} 722 723bool MachineLICM::EliminateCSE(MachineInstr *MI, 724 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 725 if (CI == CSEMap.end()) 726 return false; 727 728 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 729 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 730 731 // Replace virtual registers defined by MI by their counterparts defined 732 // by Dup. 733 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 734 const MachineOperand &MO = MI->getOperand(i); 735 736 // Physical registers may not differ here. 737 assert((!MO.isReg() || MO.getReg() == 0 || 738 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 739 MO.getReg() == Dup->getOperand(i).getReg()) && 740 "Instructions with different phys regs are not identical!"); 741 742 if (MO.isReg() && MO.isDef() && 743 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 744 RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); 745 RegInfo->clearKillFlags(Dup->getOperand(i).getReg()); 746 } 747 } 748 MI->eraseFromParent(); 749 ++NumCSEed; 750 return true; 751 } 752 return false; 753} 754 755/// Hoist - When an instruction is found to use only loop invariant operands 756/// that are safe to hoist, this instruction is called to do the dirty work. 757/// 758void MachineLICM::Hoist(MachineInstr *MI) { 759 // First check whether we should hoist this instruction. 760 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 761 // If not, try unfolding a hoistable load. 762 MI = ExtractHoistableLoad(MI); 763 if (!MI) return; 764 } 765 766 // Now move the instructions to the predecessor, inserting it before any 767 // terminator instructions. 768 DEBUG({ 769 dbgs() << "Hoisting " << *MI; 770 if (CurPreheader->getBasicBlock()) 771 dbgs() << " to MachineBasicBlock " 772 << CurPreheader->getName(); 773 if (MI->getParent()->getBasicBlock()) 774 dbgs() << " from MachineBasicBlock " 775 << MI->getParent()->getName(); 776 dbgs() << "\n"; 777 }); 778 779 // If this is the first instruction being hoisted to the preheader, 780 // initialize the CSE map with potential common expressions. 781 if (FirstInLoop) { 782 InitCSEMap(CurPreheader); 783 FirstInLoop = false; 784 } 785 786 // Look for opportunity to CSE the hoisted instruction. 787 unsigned Opcode = MI->getOpcode(); 788 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 789 CI = CSEMap.find(Opcode); 790 if (!EliminateCSE(MI, CI)) { 791 // Otherwise, splice the instruction to the preheader. 792 CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); 793 794 // Clear the kill flags of any register this instruction defines, 795 // since they may need to be live throughout the entire loop 796 // rather than just live for part of it. 797 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 798 MachineOperand &MO = MI->getOperand(i); 799 if (MO.isReg() && MO.isDef() && !MO.isDead()) 800 RegInfo->clearKillFlags(MO.getReg()); 801 } 802 803 // Add to the CSE map. 804 if (CI != CSEMap.end()) 805 CI->second.push_back(MI); 806 else { 807 std::vector<const MachineInstr*> CSEMIs; 808 CSEMIs.push_back(MI); 809 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 810 } 811 } 812 813 ++NumHoisted; 814 Changed = true; 815} 816