MachineLICM.cpp revision 03a9fdf2e755c1cebdb8371d79b591d46daa9463
1//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion on machine instructions. We 11// attempt to remove as much code from the body of a loop as possible. 12// 13// This pass does not attempt to throttle itself to limit register pressure. 14// The register allocation phases are expected to perform rematerialization 15// to recover when register pressure is high. 16// 17// This pass is not intended to be a replacement or a complete alternative 18// for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19// constructs that are not exposed before lowering and instruction selection. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "machine-licm" 24#include "llvm/CodeGen/Passes.h" 25#include "llvm/CodeGen/MachineDominators.h" 26#include "llvm/CodeGen/MachineFrameInfo.h" 27#include "llvm/CodeGen/MachineLoopInfo.h" 28#include "llvm/CodeGen/MachineMemOperand.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/PseudoSourceValue.h" 31#include "llvm/Target/TargetLowering.h" 32#include "llvm/Target/TargetRegisterInfo.h" 33#include "llvm/Target/TargetInstrInfo.h" 34#include "llvm/Target/TargetInstrItineraries.h" 35#include "llvm/Target/TargetMachine.h" 36#include "llvm/Analysis/AliasAnalysis.h" 37#include "llvm/ADT/DenseMap.h" 38#include "llvm/ADT/SmallSet.h" 39#include "llvm/ADT/Statistic.h" 40#include "llvm/Support/CommandLine.h" 41#include "llvm/Support/Debug.h" 42#include "llvm/Support/raw_ostream.h" 43 44using namespace llvm; 45 46static cl::opt<bool> 47TrackRegPressure("rp-aware-machine-licm", 48 cl::desc("Register pressure aware machine LICM"), 49 cl::init(false), cl::Hidden); 50 51STATISTIC(NumHoisted, 52 "Number of machine instructions hoisted out of loops"); 53STATISTIC(NumLowRP, 54 "Number of instructions hoisted in low reg pressure situation"); 55STATISTIC(NumHighLatency, 56 "Number of high latency instructions hoisted"); 57STATISTIC(NumCSEed, 58 "Number of hoisted machine instructions CSEed"); 59STATISTIC(NumPostRAHoisted, 60 "Number of machine instructions hoisted out of loops post regalloc"); 61 62namespace { 63 class MachineLICM : public MachineFunctionPass { 64 bool PreRegAlloc; 65 66 const TargetMachine *TM; 67 const TargetInstrInfo *TII; 68 const TargetLowering *TLI; 69 const TargetRegisterInfo *TRI; 70 const MachineFrameInfo *MFI; 71 MachineRegisterInfo *MRI; 72 const InstrItineraryData *InstrItins; 73 74 // Various analyses that we use... 75 AliasAnalysis *AA; // Alias analysis info. 76 MachineLoopInfo *MLI; // Current MachineLoopInfo 77 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 78 79 // State that is updated as we process loops 80 bool Changed; // True if a loop is changed. 81 bool FirstInLoop; // True if it's the first LICM in the loop. 82 MachineLoop *CurLoop; // The current loop we are working on. 83 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 84 85 BitVector AllocatableSet; 86 87 // Track 'estimated' register pressure. 88 SmallSet<unsigned, 32> RegSeen; 89 SmallVector<unsigned, 8> RegPressure; 90 91 // Register pressure "limit" per register class. If the pressure 92 // is higher than the limit, then it's considered high. 93 SmallVector<unsigned, 8> RegLimit; 94 95 // Register pressure on path leading from loop preheader to current BB. 96 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 97 98 // For each opcode, keep a list of potential CSE instructions. 99 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 100 101 public: 102 static char ID; // Pass identification, replacement for typeid 103 MachineLICM() : 104 MachineFunctionPass(ID), PreRegAlloc(true) {} 105 106 explicit MachineLICM(bool PreRA) : 107 MachineFunctionPass(ID), PreRegAlloc(PreRA) {} 108 109 virtual bool runOnMachineFunction(MachineFunction &MF); 110 111 const char *getPassName() const { return "Machine Instruction LICM"; } 112 113 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 114 AU.setPreservesCFG(); 115 AU.addRequired<MachineLoopInfo>(); 116 AU.addRequired<MachineDominatorTree>(); 117 AU.addRequired<AliasAnalysis>(); 118 AU.addPreserved<MachineLoopInfo>(); 119 AU.addPreserved<MachineDominatorTree>(); 120 MachineFunctionPass::getAnalysisUsage(AU); 121 } 122 123 virtual void releaseMemory() { 124 RegSeen.clear(); 125 RegPressure.clear(); 126 RegLimit.clear(); 127 for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator 128 CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) 129 CI->second.clear(); 130 CSEMap.clear(); 131 } 132 133 private: 134 /// CandidateInfo - Keep track of information about hoisting candidates. 135 struct CandidateInfo { 136 MachineInstr *MI; 137 unsigned Def; 138 int FI; 139 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 140 : MI(mi), Def(def), FI(fi) {} 141 }; 142 143 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 144 /// invariants out to the preheader. 145 void HoistRegionPostRA(); 146 147 /// HoistPostRA - When an instruction is found to only use loop invariant 148 /// operands that is safe to hoist, this instruction is called to do the 149 /// dirty work. 150 void HoistPostRA(MachineInstr *MI, unsigned Def); 151 152 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 153 /// gather register def and frame object update information. 154 void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, 155 SmallSet<int, 32> &StoredFIs, 156 SmallVector<CandidateInfo, 32> &Candidates); 157 158 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 159 /// current loop. 160 void AddToLiveIns(unsigned Reg); 161 162 /// IsLICMCandidate - Returns true if the instruction may be a suitable 163 /// candidate for LICM. e.g. If the instruction is a call, then it's 164 /// obviously not safe to hoist it. 165 bool IsLICMCandidate(MachineInstr &I); 166 167 /// IsLoopInvariantInst - Returns true if the instruction is loop 168 /// invariant. I.e., all virtual register operands are defined outside of 169 /// the loop, physical registers aren't accessed (explicitly or implicitly), 170 /// and the instruction is hoistable. 171 /// 172 bool IsLoopInvariantInst(MachineInstr &I); 173 174 /// ComputeOperandLatency - Compute operand latency between a def of 'Reg' 175 /// and an use in the current loop. 176 int ComputeOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg); 177 178 /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check 179 /// if hoisting an instruction of the given cost matrix can cause high 180 /// register pressure. 181 bool IncreaseHighRegPressure(DenseMap<unsigned, int> &Cost); 182 183 /// IsProfitableToHoist - Return true if it is potentially profitable to 184 /// hoist the given loop invariant. 185 bool IsProfitableToHoist(MachineInstr &MI); 186 187 /// HoistRegion - Walk the specified region of the CFG (defined by all 188 /// blocks dominated by the specified block, and that are in the current 189 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 190 /// visit definitions before uses, allowing us to hoist a loop body in one 191 /// pass without iteration. 192 /// 193 void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false); 194 195 /// InitRegPressure - Find all virtual register references that are liveout 196 /// of the preheader to initialize the starting "register pressure". Note 197 /// this does not count live through (livein but not used) registers. 198 void InitRegPressure(MachineBasicBlock *BB); 199 200 /// UpdateRegPressureBefore / UpdateRegPressureAfter - Update estimate of 201 /// register pressure before and after executing a specifi instruction. 202 void UpdateRegPressureBefore(const MachineInstr *MI); 203 void UpdateRegPressureAfter(const MachineInstr *MI); 204 205 /// isLoadFromConstantMemory - Return true if the given instruction is a 206 /// load from constant memory. 207 bool isLoadFromConstantMemory(MachineInstr *MI); 208 209 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 210 /// the load itself could be hoisted. Return the unfolded and hoistable 211 /// load, or null if the load couldn't be unfolded or if it wouldn't 212 /// be hoistable. 213 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 214 215 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 216 /// duplicate of MI. Return this instruction if it's found. 217 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 218 std::vector<const MachineInstr*> &PrevMIs); 219 220 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 221 /// the preheader that compute the same value. If it's found, do a RAU on 222 /// with the definition of the existing instruction rather than hoisting 223 /// the instruction to the preheader. 224 bool EliminateCSE(MachineInstr *MI, 225 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 226 227 /// Hoist - When an instruction is found to only use loop invariant operands 228 /// that is safe to hoist, this instruction is called to do the dirty work. 229 /// 230 void Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 231 232 /// InitCSEMap - Initialize the CSE map with instructions that are in the 233 /// current loop preheader that may become duplicates of instructions that 234 /// are hoisted out of the loop. 235 void InitCSEMap(MachineBasicBlock *BB); 236 237 /// getCurPreheader - Get the preheader for the current loop, splitting 238 /// a critical edge if needed. 239 MachineBasicBlock *getCurPreheader(); 240 }; 241} // end anonymous namespace 242 243char MachineLICM::ID = 0; 244INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 245 "Machine Loop Invariant Code Motion", false, false) 246INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 247INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 248INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 249INITIALIZE_PASS_END(MachineLICM, "machinelicm", 250 "Machine Loop Invariant Code Motion", false, false) 251 252FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { 253 return new MachineLICM(PreRegAlloc); 254} 255 256/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 257/// loop that has a unique predecessor. 258static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 259 // Check whether this loop even has a unique predecessor. 260 if (!CurLoop->getLoopPredecessor()) 261 return false; 262 // Ok, now check to see if any of its outer loops do. 263 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 264 if (L->getLoopPredecessor()) 265 return false; 266 // None of them did, so this is the outermost with a unique predecessor. 267 return true; 268} 269 270bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 271 if (PreRegAlloc) 272 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 273 else 274 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 275 DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); 276 277 Changed = FirstInLoop = false; 278 TM = &MF.getTarget(); 279 TII = TM->getInstrInfo(); 280 TLI = TM->getTargetLowering(); 281 TRI = TM->getRegisterInfo(); 282 MFI = MF.getFrameInfo(); 283 MRI = &MF.getRegInfo(); 284 InstrItins = TM->getInstrItineraryData(); 285 AllocatableSet = TRI->getAllocatableSet(MF); 286 287 if (PreRegAlloc) { 288 // Estimate register pressure during pre-regalloc pass. 289 unsigned NumRC = TRI->getNumRegClasses(); 290 RegPressure.resize(NumRC); 291 std::fill(RegPressure.begin(), RegPressure.end(), 0); 292 RegLimit.resize(NumRC); 293 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 294 E = TRI->regclass_end(); I != E; ++I) 295 RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF); 296 } 297 298 // Get our Loop information... 299 MLI = &getAnalysis<MachineLoopInfo>(); 300 DT = &getAnalysis<MachineDominatorTree>(); 301 AA = &getAnalysis<AliasAnalysis>(); 302 303 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 304 while (!Worklist.empty()) { 305 CurLoop = Worklist.pop_back_val(); 306 CurPreheader = 0; 307 308 // If this is done before regalloc, only visit outer-most preheader-sporting 309 // loops. 310 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 311 Worklist.append(CurLoop->begin(), CurLoop->end()); 312 continue; 313 } 314 315 if (!PreRegAlloc) 316 HoistRegionPostRA(); 317 else { 318 // CSEMap is initialized for loop header when the first instruction is 319 // being hoisted. 320 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 321 FirstInLoop = true; 322 HoistRegion(N, true); 323 CSEMap.clear(); 324 } 325 } 326 327 return Changed; 328} 329 330/// InstructionStoresToFI - Return true if instruction stores to the 331/// specified frame. 332static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 333 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 334 oe = MI->memoperands_end(); o != oe; ++o) { 335 if (!(*o)->isStore() || !(*o)->getValue()) 336 continue; 337 if (const FixedStackPseudoSourceValue *Value = 338 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 339 if (Value->getFrameIndex() == FI) 340 return true; 341 } 342 } 343 return false; 344} 345 346/// ProcessMI - Examine the instruction for potentai LICM candidate. Also 347/// gather register def and frame object update information. 348void MachineLICM::ProcessMI(MachineInstr *MI, 349 unsigned *PhysRegDefs, 350 SmallSet<int, 32> &StoredFIs, 351 SmallVector<CandidateInfo, 32> &Candidates) { 352 bool RuledOut = false; 353 bool HasNonInvariantUse = false; 354 unsigned Def = 0; 355 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 356 const MachineOperand &MO = MI->getOperand(i); 357 if (MO.isFI()) { 358 // Remember if the instruction stores to the frame index. 359 int FI = MO.getIndex(); 360 if (!StoredFIs.count(FI) && 361 MFI->isSpillSlotObjectIndex(FI) && 362 InstructionStoresToFI(MI, FI)) 363 StoredFIs.insert(FI); 364 HasNonInvariantUse = true; 365 continue; 366 } 367 368 if (!MO.isReg()) 369 continue; 370 unsigned Reg = MO.getReg(); 371 if (!Reg) 372 continue; 373 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 374 "Not expecting virtual register!"); 375 376 if (!MO.isDef()) { 377 if (Reg && PhysRegDefs[Reg]) 378 // If it's using a non-loop-invariant register, then it's obviously not 379 // safe to hoist. 380 HasNonInvariantUse = true; 381 continue; 382 } 383 384 if (MO.isImplicit()) { 385 ++PhysRegDefs[Reg]; 386 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 387 ++PhysRegDefs[*AS]; 388 if (!MO.isDead()) 389 // Non-dead implicit def? This cannot be hoisted. 390 RuledOut = true; 391 // No need to check if a dead implicit def is also defined by 392 // another instruction. 393 continue; 394 } 395 396 // FIXME: For now, avoid instructions with multiple defs, unless 397 // it's a dead implicit def. 398 if (Def) 399 RuledOut = true; 400 else 401 Def = Reg; 402 403 // If we have already seen another instruction that defines the same 404 // register, then this is not safe. 405 if (++PhysRegDefs[Reg] > 1) 406 // MI defined register is seen defined by another instruction in 407 // the loop, it cannot be a LICM candidate. 408 RuledOut = true; 409 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 410 if (++PhysRegDefs[*AS] > 1) 411 RuledOut = true; 412 } 413 414 // Only consider reloads for now and remats which do not have register 415 // operands. FIXME: Consider unfold load folding instructions. 416 if (Def && !RuledOut) { 417 int FI = INT_MIN; 418 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 419 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 420 Candidates.push_back(CandidateInfo(MI, Def, FI)); 421 } 422} 423 424/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 425/// invariants out to the preheader. 426void MachineLICM::HoistRegionPostRA() { 427 unsigned NumRegs = TRI->getNumRegs(); 428 unsigned *PhysRegDefs = new unsigned[NumRegs]; 429 std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); 430 431 SmallVector<CandidateInfo, 32> Candidates; 432 SmallSet<int, 32> StoredFIs; 433 434 // Walk the entire region, count number of defs for each register, and 435 // collect potential LICM candidates. 436 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 437 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 438 MachineBasicBlock *BB = Blocks[i]; 439 // Conservatively treat live-in's as an external def. 440 // FIXME: That means a reload that're reused in successor block(s) will not 441 // be LICM'ed. 442 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 443 E = BB->livein_end(); I != E; ++I) { 444 unsigned Reg = *I; 445 ++PhysRegDefs[Reg]; 446 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 447 ++PhysRegDefs[*AS]; 448 } 449 450 for (MachineBasicBlock::iterator 451 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 452 MachineInstr *MI = &*MII; 453 ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); 454 } 455 } 456 457 // Now evaluate whether the potential candidates qualify. 458 // 1. Check if the candidate defined register is defined by another 459 // instruction in the loop. 460 // 2. If the candidate is a load from stack slot (always true for now), 461 // check if the slot is stored anywhere in the loop. 462 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 463 if (Candidates[i].FI != INT_MIN && 464 StoredFIs.count(Candidates[i].FI)) 465 continue; 466 467 if (PhysRegDefs[Candidates[i].Def] == 1) { 468 bool Safe = true; 469 MachineInstr *MI = Candidates[i].MI; 470 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 471 const MachineOperand &MO = MI->getOperand(j); 472 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 473 continue; 474 if (PhysRegDefs[MO.getReg()]) { 475 // If it's using a non-loop-invariant register, then it's obviously 476 // not safe to hoist. 477 Safe = false; 478 break; 479 } 480 } 481 if (Safe) 482 HoistPostRA(MI, Candidates[i].Def); 483 } 484 } 485 486 delete[] PhysRegDefs; 487} 488 489/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 490/// loop, and make sure it is not killed by any instructions in the loop. 491void MachineLICM::AddToLiveIns(unsigned Reg) { 492 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 493 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 494 MachineBasicBlock *BB = Blocks[i]; 495 if (!BB->isLiveIn(Reg)) 496 BB->addLiveIn(Reg); 497 for (MachineBasicBlock::iterator 498 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 499 MachineInstr *MI = &*MII; 500 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 501 MachineOperand &MO = MI->getOperand(i); 502 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 503 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 504 MO.setIsKill(false); 505 } 506 } 507 } 508} 509 510/// HoistPostRA - When an instruction is found to only use loop invariant 511/// operands that is safe to hoist, this instruction is called to do the 512/// dirty work. 513void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 514 MachineBasicBlock *Preheader = getCurPreheader(); 515 if (!Preheader) return; 516 517 // Now move the instructions to the predecessor, inserting it before any 518 // terminator instructions. 519 DEBUG({ 520 dbgs() << "Hoisting " << *MI; 521 if (Preheader->getBasicBlock()) 522 dbgs() << " to MachineBasicBlock " 523 << Preheader->getName(); 524 if (MI->getParent()->getBasicBlock()) 525 dbgs() << " from MachineBasicBlock " 526 << MI->getParent()->getName(); 527 dbgs() << "\n"; 528 }); 529 530 // Splice the instruction to the preheader. 531 MachineBasicBlock *MBB = MI->getParent(); 532 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 533 534 // Add register to livein list to all the BBs in the current loop since a 535 // loop invariant must be kept live throughout the whole loop. This is 536 // important to ensure later passes do not scavenge the def register. 537 AddToLiveIns(Def); 538 539 ++NumPostRAHoisted; 540 Changed = true; 541} 542 543/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 544/// dominated by the specified block, and that are in the current loop) in depth 545/// first order w.r.t the DominatorTree. This allows us to visit definitions 546/// before uses, allowing us to hoist a loop body in one pass without iteration. 547/// 548void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { 549 assert(N != 0 && "Null dominator tree node?"); 550 MachineBasicBlock *BB = N->getBlock(); 551 552 // If this subregion is not in the top level loop at all, exit. 553 if (!CurLoop->contains(BB)) return; 554 555 MachineBasicBlock *Preheader = getCurPreheader(); 556 if (!Preheader) 557 return; 558 559 if (TrackRegPressure) { 560 if (IsHeader) { 561 // Compute registers which are liveout of preheader. 562 RegSeen.clear(); 563 BackTrace.clear(); 564 InitRegPressure(Preheader); 565 } 566 567 // Remember livein register pressure. 568 BackTrace.push_back(RegPressure); 569 } 570 571 for (MachineBasicBlock::iterator 572 MII = BB->begin(), E = BB->end(); MII != E; ) { 573 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 574 MachineInstr *MI = &*MII; 575 576 if (TrackRegPressure) 577 UpdateRegPressureBefore(MI); 578 Hoist(MI, Preheader); 579 if (TrackRegPressure) 580 UpdateRegPressureAfter(MI); 581 582 MII = NextMII; 583 } 584 585 // Don't hoist things out of a large switch statement. This often causes 586 // code to be hoisted that wasn't going to be executed, and increases 587 // register pressure in a situation where it's likely to matter. 588 if (BB->succ_size() < 25) { 589 const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); 590 for (unsigned I = 0, E = Children.size(); I != E; ++I) 591 HoistRegion(Children[I]); 592 } 593 594 if (TrackRegPressure) 595 BackTrace.pop_back(); 596} 597 598/// InitRegPressure - Find all virtual register references that are liveout of 599/// the preheader to initialize the starting "register pressure". Note this 600/// does not count live through (livein but not used) registers. 601void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 602 std::fill(RegPressure.begin(), RegPressure.end(), 0); 603 604 for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); 605 MII != E; ++MII) { 606 MachineInstr *MI = &*MII; 607 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 608 const MachineOperand &MO = MI->getOperand(i); 609 if (!MO.isReg() || MO.isImplicit()) 610 continue; 611 unsigned Reg = MO.getReg(); 612 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 613 continue; 614 615 bool isNew = !RegSeen.insert(Reg); 616 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 617 EVT VT = *RC->vt_begin(); 618 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 619 if (MO.isDef()) 620 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 621 else { 622 if (isNew && !MO.isKill()) 623 // Haven't seen this, it must be a livein. 624 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 625 else if (!isNew && MO.isKill()) 626 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 627 } 628 } 629 } 630} 631 632/// UpdateRegPressureBefore / UpdateRegPressureAfter - Update estimate of 633/// register pressure before and after executing a specifi instruction. 634void MachineLICM::UpdateRegPressureBefore(const MachineInstr *MI) { 635 bool NoImpact = MI->isImplicitDef() || MI->isPHI(); 636 637 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 638 const MachineOperand &MO = MI->getOperand(i); 639 if (!MO.isReg() || MO.isImplicit() || !MO.isUse()) 640 continue; 641 unsigned Reg = MO.getReg(); 642 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 643 continue; 644 645 bool isNew = !RegSeen.insert(Reg); 646 if (NoImpact) 647 continue; 648 649 if (!isNew && MO.isKill()) { 650 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 651 EVT VT = *RC->vt_begin(); 652 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 653 unsigned RCCost = TLI->getRepRegClassCostFor(VT); 654 655 assert(RCCost <= RegPressure[RCId]); 656 RegPressure[RCId] -= RCCost; 657 } 658 } 659} 660 661void MachineLICM::UpdateRegPressureAfter(const MachineInstr *MI) { 662 bool NoImpact = MI->isImplicitDef() || MI->isPHI(); 663 664 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 665 const MachineOperand &MO = MI->getOperand(i); 666 if (!MO.isReg() || MO.isImplicit() || !MO.isDef()) 667 continue; 668 unsigned Reg = MO.getReg(); 669 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 670 continue; 671 672 RegSeen.insert(Reg); 673 if (NoImpact) 674 continue; 675 676 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 677 EVT VT = *RC->vt_begin(); 678 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 679 unsigned RCCost = TLI->getRepRegClassCostFor(VT); 680 RegPressure[RCId] += RCCost; 681 } 682} 683 684/// IsLICMCandidate - Returns true if the instruction may be a suitable 685/// candidate for LICM. e.g. If the instruction is a call, then it's obviously 686/// not safe to hoist it. 687bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 688 // Check if it's safe to move the instruction. 689 bool DontMoveAcrossStore = true; 690 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 691 return false; 692 693 return true; 694} 695 696/// IsLoopInvariantInst - Returns true if the instruction is loop 697/// invariant. I.e., all virtual register operands are defined outside of the 698/// loop, physical registers aren't accessed explicitly, and there are no side 699/// effects that aren't captured by the operands or other flags. 700/// 701bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 702 if (!IsLICMCandidate(I)) 703 return false; 704 705 // The instruction is loop invariant if all of its operands are. 706 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 707 const MachineOperand &MO = I.getOperand(i); 708 709 if (!MO.isReg()) 710 continue; 711 712 unsigned Reg = MO.getReg(); 713 if (Reg == 0) continue; 714 715 // Don't hoist an instruction that uses or defines a physical register. 716 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 717 if (MO.isUse()) { 718 // If the physreg has no defs anywhere, it's just an ambient register 719 // and we can freely move its uses. Alternatively, if it's allocatable, 720 // it could get allocated to something with a def during allocation. 721 if (!MRI->def_empty(Reg)) 722 return false; 723 if (AllocatableSet.test(Reg)) 724 return false; 725 // Check for a def among the register's aliases too. 726 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 727 unsigned AliasReg = *Alias; 728 if (!MRI->def_empty(AliasReg)) 729 return false; 730 if (AllocatableSet.test(AliasReg)) 731 return false; 732 } 733 // Otherwise it's safe to move. 734 continue; 735 } else if (!MO.isDead()) { 736 // A def that isn't dead. We can't move it. 737 return false; 738 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 739 // If the reg is live into the loop, we can't hoist an instruction 740 // which would clobber it. 741 return false; 742 } 743 } 744 745 if (!MO.isUse()) 746 continue; 747 748 assert(MRI->getVRegDef(Reg) && 749 "Machine instr not mapped for this vreg?!"); 750 751 // If the loop contains the definition of an operand, then the instruction 752 // isn't loop invariant. 753 if (CurLoop->contains(MRI->getVRegDef(Reg))) 754 return false; 755 } 756 757 // If we got this far, the instruction is loop invariant! 758 return true; 759} 760 761 762/// HasPHIUses - Return true if the specified register has any PHI use. 763static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) { 764 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 765 UE = MRI->use_end(); UI != UE; ++UI) { 766 MachineInstr *UseMI = &*UI; 767 if (UseMI->isPHI()) 768 return true; 769 } 770 return false; 771} 772 773/// isLoadFromConstantMemory - Return true if the given instruction is a 774/// load from constant memory. Machine LICM will hoist these even if they are 775/// not re-materializable. 776bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { 777 if (!MI->getDesc().mayLoad()) return false; 778 if (!MI->hasOneMemOperand()) return false; 779 MachineMemOperand *MMO = *MI->memoperands_begin(); 780 if (MMO->isVolatile()) return false; 781 if (!MMO->getValue()) return false; 782 const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue()); 783 if (PSV) { 784 MachineFunction &MF = *MI->getParent()->getParent(); 785 return PSV->isConstant(MF.getFrameInfo()); 786 } else { 787 return AA->pointsToConstantMemory(MMO->getValue()); 788 } 789} 790 791/// ComputeOperandLatency - Compute operand latency between a def of 'Reg' 792/// and an use in the current loop. 793int MachineLICM::ComputeOperandLatency(MachineInstr &MI, 794 unsigned DefIdx, unsigned Reg) { 795 if (MRI->use_nodbg_empty(Reg)) 796 // No use? Return arbitrary large number! 797 return 300; 798 799 int Latency = -1; 800 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 801 E = MRI->use_nodbg_end(); I != E; ++I) { 802 MachineInstr *UseMI = &*I; 803 if (!CurLoop->contains(UseMI->getParent())) 804 continue; 805 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 806 const MachineOperand &MO = UseMI->getOperand(i); 807 if (!MO.isReg() || !MO.isUse()) 808 continue; 809 unsigned MOReg = MO.getReg(); 810 if (MOReg != Reg) 811 continue; 812 813 int UseCycle = TII->getOperandLatency(InstrItins, &MI, DefIdx, UseMI, i); 814 Latency = std::max(Latency, UseCycle); 815 } 816 817 if (Latency != -1) 818 break; 819 } 820 821 if (Latency == -1) 822 Latency = InstrItins->getOperandCycle(MI.getDesc().getSchedClass(), DefIdx); 823 824 return Latency; 825} 826 827/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check 828/// if hoisting an instruction of the given cost matrix can cause high 829/// register pressure. 830bool MachineLICM::IncreaseHighRegPressure(DenseMap<unsigned, int> &Cost) { 831 for (unsigned i = BackTrace.size(); i != 0; --i) { 832 bool AnyIncrease = false; 833 SmallVector<unsigned, 8> &RP = BackTrace[i-1]; 834 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 835 CI != CE; ++CI) { 836 if (CI->second <= 0) 837 continue; 838 AnyIncrease = true; 839 unsigned RCId = CI->first; 840 if (RP[RCId] + CI->second >= RegLimit[RCId]) 841 return true; 842 } 843 844 if (!AnyIncrease) 845 // Hoisting the instruction doesn't increase register pressure. 846 return false; 847 } 848 849 return false; 850} 851 852/// IsProfitableToHoist - Return true if it is potentially profitable to hoist 853/// the given loop invariant. 854bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 855 if (MI.isImplicitDef()) 856 return true; 857 858 // FIXME: For now, only hoist re-materilizable instructions. LICM will 859 // increase register pressure. We want to make sure it doesn't increase 860 // spilling. 861 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting 862 // these tend to help performance in low register pressure situation. The 863 // trade off is it may cause spill in high pressure situation. It will end up 864 // adding a store in the loop preheader. But the reload is no more expensive. 865 // The side benefit is these loads are frequently CSE'ed. 866 if (!TrackRegPressure || MI.getDesc().isAsCheapAsAMove()) { 867 if (!TII->isTriviallyReMaterializable(&MI, AA) && 868 !isLoadFromConstantMemory(&MI)) 869 return false; 870 } else { 871 // In low register pressure situation, we can be more aggressive about 872 // hoisting. Also, favors hoisting long latency instructions even in 873 // moderately high pressure situation. 874 DenseMap<unsigned, int> Cost; 875 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 876 const MachineOperand &MO = MI.getOperand(i); 877 if (!MO.isReg() || MO.isImplicit()) 878 continue; 879 unsigned Reg = MO.getReg(); 880 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 881 continue; 882 if (MO.isDef()) { 883 if (InstrItins && !InstrItins->isEmpty()) { 884 int Cycle = ComputeOperandLatency(MI, i, Reg); 885 if (Cycle > 3) { 886 // FIXME: Target specific high latency limit? 887 ++NumHighLatency; 888 return true; 889 } 890 } 891 892 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 893 EVT VT = *RC->vt_begin(); 894 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 895 unsigned RCCost = TLI->getRepRegClassCostFor(VT); 896 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 897 // If the instruction is not register pressure neutrail (or better), 898 // check if hoisting it will cause high register pressure in BB's 899 // leading up to this point. 900 if (CI != Cost.end()) 901 CI->second += RCCost; 902 else 903 Cost.insert(std::make_pair(RCId, RCCost)); 904 } else if (MO.isKill()) { 905 // Is a virtual register use is a kill, hoisting it out of the loop 906 // may actually reduce register pressure or be register pressure 907 // neutral 908 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 909 EVT VT = *RC->vt_begin(); 910 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 911 unsigned RCCost = TLI->getRepRegClassCostFor(VT); 912 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 913 if (CI != Cost.end()) 914 CI->second -= RCCost; 915 else 916 Cost.insert(std::make_pair(RCId, -RCCost)); 917 } 918 } 919 920 // Visit BBs from preheader to current BB, if hoisting this doesn't cause 921 // high register pressure, then it's safe to proceed. 922 if (!IncreaseHighRegPressure(Cost)) { 923 ++NumLowRP; 924 return true; 925 } 926 927 // High register pressure situation, only hoist if the instruction is going to 928 // be remat'ed. 929 if (!TII->isTriviallyReMaterializable(&MI, AA) && 930 !isLoadFromConstantMemory(&MI)) 931 return false; 932 } 933 934 // If result(s) of this instruction is used by PHIs, then don't hoist it. 935 // The presence of joins makes it difficult for current register allocator 936 // implementation to perform remat. 937 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 938 const MachineOperand &MO = MI.getOperand(i); 939 if (!MO.isReg() || !MO.isDef()) 940 continue; 941 if (HasPHIUses(MO.getReg(), MRI)) 942 return false; 943 } 944 945 return true; 946} 947 948MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 949 // Don't unfold simple loads. 950 if (MI->getDesc().canFoldAsLoad()) 951 return 0; 952 953 // If not, we may be able to unfold a load and hoist that. 954 // First test whether the instruction is loading from an amenable 955 // memory location. 956 if (!isLoadFromConstantMemory(MI)) 957 return 0; 958 959 // Next determine the register class for a temporary register. 960 unsigned LoadRegIndex; 961 unsigned NewOpc = 962 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 963 /*UnfoldLoad=*/true, 964 /*UnfoldStore=*/false, 965 &LoadRegIndex); 966 if (NewOpc == 0) return 0; 967 const TargetInstrDesc &TID = TII->get(NewOpc); 968 if (TID.getNumDefs() != 1) return 0; 969 const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); 970 // Ok, we're unfolding. Create a temporary register and do the unfold. 971 unsigned Reg = MRI->createVirtualRegister(RC); 972 973 MachineFunction &MF = *MI->getParent()->getParent(); 974 SmallVector<MachineInstr *, 2> NewMIs; 975 bool Success = 976 TII->unfoldMemoryOperand(MF, MI, Reg, 977 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 978 NewMIs); 979 (void)Success; 980 assert(Success && 981 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 982 "succeeded!"); 983 assert(NewMIs.size() == 2 && 984 "Unfolded a load into multiple instructions!"); 985 MachineBasicBlock *MBB = MI->getParent(); 986 MBB->insert(MI, NewMIs[0]); 987 MBB->insert(MI, NewMIs[1]); 988 // If unfolding produced a load that wasn't loop-invariant or profitable to 989 // hoist, discard the new instructions and bail. 990 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 991 NewMIs[0]->eraseFromParent(); 992 NewMIs[1]->eraseFromParent(); 993 return 0; 994 } 995 // Otherwise we successfully unfolded a load that we can hoist. 996 MI->eraseFromParent(); 997 return NewMIs[0]; 998} 999 1000void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 1001 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 1002 const MachineInstr *MI = &*I; 1003 // FIXME: For now, only hoist re-materilizable instructions. LICM will 1004 // increase register pressure. We want to make sure it doesn't increase 1005 // spilling. 1006 if (TII->isTriviallyReMaterializable(MI, AA)) { 1007 unsigned Opcode = MI->getOpcode(); 1008 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1009 CI = CSEMap.find(Opcode); 1010 if (CI != CSEMap.end()) 1011 CI->second.push_back(MI); 1012 else { 1013 std::vector<const MachineInstr*> CSEMIs; 1014 CSEMIs.push_back(MI); 1015 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1016 } 1017 } 1018 } 1019} 1020 1021const MachineInstr* 1022MachineLICM::LookForDuplicate(const MachineInstr *MI, 1023 std::vector<const MachineInstr*> &PrevMIs) { 1024 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1025 const MachineInstr *PrevMI = PrevMIs[i]; 1026 if (TII->produceSameValue(MI, PrevMI)) 1027 return PrevMI; 1028 } 1029 return 0; 1030} 1031 1032bool MachineLICM::EliminateCSE(MachineInstr *MI, 1033 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1034 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1035 // the undef property onto uses. 1036 if (CI == CSEMap.end() || MI->isImplicitDef()) 1037 return false; 1038 1039 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1040 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1041 1042 // Replace virtual registers defined by MI by their counterparts defined 1043 // by Dup. 1044 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1045 const MachineOperand &MO = MI->getOperand(i); 1046 1047 // Physical registers may not differ here. 1048 assert((!MO.isReg() || MO.getReg() == 0 || 1049 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1050 MO.getReg() == Dup->getOperand(i).getReg()) && 1051 "Instructions with different phys regs are not identical!"); 1052 1053 if (MO.isReg() && MO.isDef() && 1054 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 1055 MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); 1056 MRI->clearKillFlags(Dup->getOperand(i).getReg()); 1057 } 1058 } 1059 MI->eraseFromParent(); 1060 ++NumCSEed; 1061 return true; 1062 } 1063 return false; 1064} 1065 1066/// Hoist - When an instruction is found to use only loop invariant operands 1067/// that are safe to hoist, this instruction is called to do the dirty work. 1068/// 1069void MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1070 // First check whether we should hoist this instruction. 1071 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1072 // If not, try unfolding a hoistable load. 1073 MI = ExtractHoistableLoad(MI); 1074 if (!MI) return; 1075 } 1076 1077 // Now move the instructions to the predecessor, inserting it before any 1078 // terminator instructions. 1079 DEBUG({ 1080 dbgs() << "Hoisting " << *MI; 1081 if (Preheader->getBasicBlock()) 1082 dbgs() << " to MachineBasicBlock " 1083 << Preheader->getName(); 1084 if (MI->getParent()->getBasicBlock()) 1085 dbgs() << " from MachineBasicBlock " 1086 << MI->getParent()->getName(); 1087 dbgs() << "\n"; 1088 }); 1089 1090 // If this is the first instruction being hoisted to the preheader, 1091 // initialize the CSE map with potential common expressions. 1092 if (FirstInLoop) { 1093 InitCSEMap(Preheader); 1094 FirstInLoop = false; 1095 } 1096 1097 // Look for opportunity to CSE the hoisted instruction. 1098 unsigned Opcode = MI->getOpcode(); 1099 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1100 CI = CSEMap.find(Opcode); 1101 if (!EliminateCSE(MI, CI)) { 1102 // Otherwise, splice the instruction to the preheader. 1103 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1104 1105 // Clear the kill flags of any register this instruction defines, 1106 // since they may need to be live throughout the entire loop 1107 // rather than just live for part of it. 1108 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1109 MachineOperand &MO = MI->getOperand(i); 1110 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1111 MRI->clearKillFlags(MO.getReg()); 1112 } 1113 1114 // Add to the CSE map. 1115 if (CI != CSEMap.end()) 1116 CI->second.push_back(MI); 1117 else { 1118 std::vector<const MachineInstr*> CSEMIs; 1119 CSEMIs.push_back(MI); 1120 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1121 } 1122 } 1123 1124 ++NumHoisted; 1125 Changed = true; 1126} 1127 1128MachineBasicBlock *MachineLICM::getCurPreheader() { 1129 // Determine the block to which to hoist instructions. If we can't find a 1130 // suitable loop predecessor, we can't do any hoisting. 1131 1132 // If we've tried to get a preheader and failed, don't try again. 1133 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1134 return 0; 1135 1136 if (!CurPreheader) { 1137 CurPreheader = CurLoop->getLoopPreheader(); 1138 if (!CurPreheader) { 1139 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1140 if (!Pred) { 1141 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1142 return 0; 1143 } 1144 1145 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1146 if (!CurPreheader) { 1147 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1148 return 0; 1149 } 1150 } 1151 } 1152 return CurPreheader; 1153} 1154