MachineLICM.cpp revision ca4762cf48c713cfeda2ca8262853590c25e2a1d
1//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion on machine instructions. We 11// attempt to remove as much code from the body of a loop as possible. 12// 13// This pass does not attempt to throttle itself to limit register pressure. 14// The register allocation phases are expected to perform rematerialization 15// to recover when register pressure is high. 16// 17// This pass is not intended to be a replacement or a complete alternative 18// for the LLVM-IR-level LICM pass. It is only designed to hoist simple 19// constructs that are not exposed before lowering and instruction selection. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "machine-licm" 24#include "llvm/CodeGen/Passes.h" 25#include "llvm/CodeGen/MachineDominators.h" 26#include "llvm/CodeGen/MachineFrameInfo.h" 27#include "llvm/CodeGen/MachineLoopInfo.h" 28#include "llvm/CodeGen/MachineMemOperand.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/PseudoSourceValue.h" 31#include "llvm/Target/TargetLowering.h" 32#include "llvm/Target/TargetRegisterInfo.h" 33#include "llvm/Target/TargetInstrInfo.h" 34#include "llvm/Target/TargetInstrItineraries.h" 35#include "llvm/Target/TargetMachine.h" 36#include "llvm/Analysis/AliasAnalysis.h" 37#include "llvm/ADT/DenseMap.h" 38#include "llvm/ADT/SmallSet.h" 39#include "llvm/ADT/Statistic.h" 40#include "llvm/Support/CommandLine.h" 41#include "llvm/Support/Debug.h" 42#include "llvm/Support/raw_ostream.h" 43 44using namespace llvm; 45 46STATISTIC(NumHoisted, 47 "Number of machine instructions hoisted out of loops"); 48STATISTIC(NumLowRP, 49 "Number of instructions hoisted in low reg pressure situation"); 50STATISTIC(NumHighLatency, 51 "Number of high latency instructions hoisted"); 52STATISTIC(NumCSEed, 53 "Number of hoisted machine instructions CSEed"); 54STATISTIC(NumPostRAHoisted, 55 "Number of machine instructions hoisted out of loops post regalloc"); 56 57namespace { 58 class MachineLICM : public MachineFunctionPass { 59 bool PreRegAlloc; 60 61 const TargetMachine *TM; 62 const TargetInstrInfo *TII; 63 const TargetLowering *TLI; 64 const TargetRegisterInfo *TRI; 65 const MachineFrameInfo *MFI; 66 MachineRegisterInfo *MRI; 67 const InstrItineraryData *InstrItins; 68 69 // Various analyses that we use... 70 AliasAnalysis *AA; // Alias analysis info. 71 MachineLoopInfo *MLI; // Current MachineLoopInfo 72 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 73 74 // State that is updated as we process loops 75 bool Changed; // True if a loop is changed. 76 bool FirstInLoop; // True if it's the first LICM in the loop. 77 MachineLoop *CurLoop; // The current loop we are working on. 78 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 79 80 BitVector AllocatableSet; 81 82 // Track 'estimated' register pressure. 83 SmallSet<unsigned, 32> RegSeen; 84 SmallVector<unsigned, 8> RegPressure; 85 86 // Register pressure "limit" per register class. If the pressure 87 // is higher than the limit, then it's considered high. 88 SmallVector<unsigned, 8> RegLimit; 89 90 // Register pressure on path leading from loop preheader to current BB. 91 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 92 93 // For each opcode, keep a list of potential CSE instructions. 94 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 95 96 public: 97 static char ID; // Pass identification, replacement for typeid 98 MachineLICM() : 99 MachineFunctionPass(ID), PreRegAlloc(true) {} 100 101 explicit MachineLICM(bool PreRA) : 102 MachineFunctionPass(ID), PreRegAlloc(PreRA) {} 103 104 virtual bool runOnMachineFunction(MachineFunction &MF); 105 106 const char *getPassName() const { return "Machine Instruction LICM"; } 107 108 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 109 AU.setPreservesCFG(); 110 AU.addRequired<MachineLoopInfo>(); 111 AU.addRequired<MachineDominatorTree>(); 112 AU.addRequired<AliasAnalysis>(); 113 AU.addPreserved<MachineLoopInfo>(); 114 AU.addPreserved<MachineDominatorTree>(); 115 MachineFunctionPass::getAnalysisUsage(AU); 116 } 117 118 virtual void releaseMemory() { 119 RegSeen.clear(); 120 RegPressure.clear(); 121 RegLimit.clear(); 122 BackTrace.clear(); 123 for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator 124 CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) 125 CI->second.clear(); 126 CSEMap.clear(); 127 } 128 129 private: 130 /// CandidateInfo - Keep track of information about hoisting candidates. 131 struct CandidateInfo { 132 MachineInstr *MI; 133 unsigned Def; 134 int FI; 135 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 136 : MI(mi), Def(def), FI(fi) {} 137 }; 138 139 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 140 /// invariants out to the preheader. 141 void HoistRegionPostRA(); 142 143 /// HoistPostRA - When an instruction is found to only use loop invariant 144 /// operands that is safe to hoist, this instruction is called to do the 145 /// dirty work. 146 void HoistPostRA(MachineInstr *MI, unsigned Def); 147 148 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 149 /// gather register def and frame object update information. 150 void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, 151 SmallSet<int, 32> &StoredFIs, 152 SmallVector<CandidateInfo, 32> &Candidates); 153 154 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 155 /// current loop. 156 void AddToLiveIns(unsigned Reg); 157 158 /// IsLICMCandidate - Returns true if the instruction may be a suitable 159 /// candidate for LICM. e.g. If the instruction is a call, then it's 160 /// obviously not safe to hoist it. 161 bool IsLICMCandidate(MachineInstr &I); 162 163 /// IsLoopInvariantInst - Returns true if the instruction is loop 164 /// invariant. I.e., all virtual register operands are defined outside of 165 /// the loop, physical registers aren't accessed (explicitly or implicitly), 166 /// and the instruction is hoistable. 167 /// 168 bool IsLoopInvariantInst(MachineInstr &I); 169 170 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 171 /// and an use in the current loop, return true if the target considered 172 /// it 'high'. 173 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg); 174 175 /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check 176 /// if hoisting an instruction of the given cost matrix can cause high 177 /// register pressure. 178 bool IncreaseHighRegPressure(DenseMap<unsigned, int> &Cost); 179 180 /// IsProfitableToHoist - Return true if it is potentially profitable to 181 /// hoist the given loop invariant. 182 bool IsProfitableToHoist(MachineInstr &MI); 183 184 /// HoistRegion - Walk the specified region of the CFG (defined by all 185 /// blocks dominated by the specified block, and that are in the current 186 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 187 /// visit definitions before uses, allowing us to hoist a loop body in one 188 /// pass without iteration. 189 /// 190 void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false); 191 192 /// InitRegPressure - Find all virtual register references that are liveout 193 /// of the preheader to initialize the starting "register pressure". Note 194 /// this does not count live through (livein but not used) registers. 195 void InitRegPressure(MachineBasicBlock *BB); 196 197 /// UpdateRegPressureBefore / UpdateRegPressureAfter - Update estimate of 198 /// register pressure before and after executing a specifi instruction. 199 void UpdateRegPressureBefore(const MachineInstr *MI); 200 void UpdateRegPressureAfter(const MachineInstr *MI); 201 202 /// isLoadFromConstantMemory - Return true if the given instruction is a 203 /// load from constant memory. 204 bool isLoadFromConstantMemory(MachineInstr *MI); 205 206 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 207 /// the load itself could be hoisted. Return the unfolded and hoistable 208 /// load, or null if the load couldn't be unfolded or if it wouldn't 209 /// be hoistable. 210 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 211 212 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 213 /// duplicate of MI. Return this instruction if it's found. 214 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 215 std::vector<const MachineInstr*> &PrevMIs); 216 217 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 218 /// the preheader that compute the same value. If it's found, do a RAU on 219 /// with the definition of the existing instruction rather than hoisting 220 /// the instruction to the preheader. 221 bool EliminateCSE(MachineInstr *MI, 222 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 223 224 /// Hoist - When an instruction is found to only use loop invariant operands 225 /// that is safe to hoist, this instruction is called to do the dirty work. 226 /// 227 void Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 228 229 /// InitCSEMap - Initialize the CSE map with instructions that are in the 230 /// current loop preheader that may become duplicates of instructions that 231 /// are hoisted out of the loop. 232 void InitCSEMap(MachineBasicBlock *BB); 233 234 /// getCurPreheader - Get the preheader for the current loop, splitting 235 /// a critical edge if needed. 236 MachineBasicBlock *getCurPreheader(); 237 }; 238} // end anonymous namespace 239 240char MachineLICM::ID = 0; 241INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 242 "Machine Loop Invariant Code Motion", false, false) 243INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 244INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 245INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 246INITIALIZE_PASS_END(MachineLICM, "machinelicm", 247 "Machine Loop Invariant Code Motion", false, false) 248 249FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { 250 return new MachineLICM(PreRegAlloc); 251} 252 253/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 254/// loop that has a unique predecessor. 255static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 256 // Check whether this loop even has a unique predecessor. 257 if (!CurLoop->getLoopPredecessor()) 258 return false; 259 // Ok, now check to see if any of its outer loops do. 260 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 261 if (L->getLoopPredecessor()) 262 return false; 263 // None of them did, so this is the outermost with a unique predecessor. 264 return true; 265} 266 267bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 268 if (PreRegAlloc) 269 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 270 else 271 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 272 DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); 273 274 Changed = FirstInLoop = false; 275 TM = &MF.getTarget(); 276 TII = TM->getInstrInfo(); 277 TLI = TM->getTargetLowering(); 278 TRI = TM->getRegisterInfo(); 279 MFI = MF.getFrameInfo(); 280 MRI = &MF.getRegInfo(); 281 InstrItins = TM->getInstrItineraryData(); 282 AllocatableSet = TRI->getAllocatableSet(MF); 283 284 if (PreRegAlloc) { 285 // Estimate register pressure during pre-regalloc pass. 286 unsigned NumRC = TRI->getNumRegClasses(); 287 RegPressure.resize(NumRC); 288 std::fill(RegPressure.begin(), RegPressure.end(), 0); 289 RegLimit.resize(NumRC); 290 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 291 E = TRI->regclass_end(); I != E; ++I) 292 RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF); 293 } 294 295 // Get our Loop information... 296 MLI = &getAnalysis<MachineLoopInfo>(); 297 DT = &getAnalysis<MachineDominatorTree>(); 298 AA = &getAnalysis<AliasAnalysis>(); 299 300 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 301 while (!Worklist.empty()) { 302 CurLoop = Worklist.pop_back_val(); 303 CurPreheader = 0; 304 305 // If this is done before regalloc, only visit outer-most preheader-sporting 306 // loops. 307 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 308 Worklist.append(CurLoop->begin(), CurLoop->end()); 309 continue; 310 } 311 312 if (!PreRegAlloc) 313 HoistRegionPostRA(); 314 else { 315 // CSEMap is initialized for loop header when the first instruction is 316 // being hoisted. 317 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 318 FirstInLoop = true; 319 HoistRegion(N, true); 320 CSEMap.clear(); 321 } 322 } 323 324 return Changed; 325} 326 327/// InstructionStoresToFI - Return true if instruction stores to the 328/// specified frame. 329static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 330 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 331 oe = MI->memoperands_end(); o != oe; ++o) { 332 if (!(*o)->isStore() || !(*o)->getValue()) 333 continue; 334 if (const FixedStackPseudoSourceValue *Value = 335 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 336 if (Value->getFrameIndex() == FI) 337 return true; 338 } 339 } 340 return false; 341} 342 343/// ProcessMI - Examine the instruction for potentai LICM candidate. Also 344/// gather register def and frame object update information. 345void MachineLICM::ProcessMI(MachineInstr *MI, 346 unsigned *PhysRegDefs, 347 SmallSet<int, 32> &StoredFIs, 348 SmallVector<CandidateInfo, 32> &Candidates) { 349 bool RuledOut = false; 350 bool HasNonInvariantUse = false; 351 unsigned Def = 0; 352 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 353 const MachineOperand &MO = MI->getOperand(i); 354 if (MO.isFI()) { 355 // Remember if the instruction stores to the frame index. 356 int FI = MO.getIndex(); 357 if (!StoredFIs.count(FI) && 358 MFI->isSpillSlotObjectIndex(FI) && 359 InstructionStoresToFI(MI, FI)) 360 StoredFIs.insert(FI); 361 HasNonInvariantUse = true; 362 continue; 363 } 364 365 if (!MO.isReg()) 366 continue; 367 unsigned Reg = MO.getReg(); 368 if (!Reg) 369 continue; 370 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 371 "Not expecting virtual register!"); 372 373 if (!MO.isDef()) { 374 if (Reg && PhysRegDefs[Reg]) 375 // If it's using a non-loop-invariant register, then it's obviously not 376 // safe to hoist. 377 HasNonInvariantUse = true; 378 continue; 379 } 380 381 if (MO.isImplicit()) { 382 ++PhysRegDefs[Reg]; 383 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 384 ++PhysRegDefs[*AS]; 385 if (!MO.isDead()) 386 // Non-dead implicit def? This cannot be hoisted. 387 RuledOut = true; 388 // No need to check if a dead implicit def is also defined by 389 // another instruction. 390 continue; 391 } 392 393 // FIXME: For now, avoid instructions with multiple defs, unless 394 // it's a dead implicit def. 395 if (Def) 396 RuledOut = true; 397 else 398 Def = Reg; 399 400 // If we have already seen another instruction that defines the same 401 // register, then this is not safe. 402 if (++PhysRegDefs[Reg] > 1) 403 // MI defined register is seen defined by another instruction in 404 // the loop, it cannot be a LICM candidate. 405 RuledOut = true; 406 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 407 if (++PhysRegDefs[*AS] > 1) 408 RuledOut = true; 409 } 410 411 // Only consider reloads for now and remats which do not have register 412 // operands. FIXME: Consider unfold load folding instructions. 413 if (Def && !RuledOut) { 414 int FI = INT_MIN; 415 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 416 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 417 Candidates.push_back(CandidateInfo(MI, Def, FI)); 418 } 419} 420 421/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 422/// invariants out to the preheader. 423void MachineLICM::HoistRegionPostRA() { 424 unsigned NumRegs = TRI->getNumRegs(); 425 unsigned *PhysRegDefs = new unsigned[NumRegs]; 426 std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); 427 428 SmallVector<CandidateInfo, 32> Candidates; 429 SmallSet<int, 32> StoredFIs; 430 431 // Walk the entire region, count number of defs for each register, and 432 // collect potential LICM candidates. 433 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 434 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 435 MachineBasicBlock *BB = Blocks[i]; 436 // Conservatively treat live-in's as an external def. 437 // FIXME: That means a reload that're reused in successor block(s) will not 438 // be LICM'ed. 439 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 440 E = BB->livein_end(); I != E; ++I) { 441 unsigned Reg = *I; 442 ++PhysRegDefs[Reg]; 443 for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) 444 ++PhysRegDefs[*AS]; 445 } 446 447 for (MachineBasicBlock::iterator 448 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 449 MachineInstr *MI = &*MII; 450 ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); 451 } 452 } 453 454 // Now evaluate whether the potential candidates qualify. 455 // 1. Check if the candidate defined register is defined by another 456 // instruction in the loop. 457 // 2. If the candidate is a load from stack slot (always true for now), 458 // check if the slot is stored anywhere in the loop. 459 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 460 if (Candidates[i].FI != INT_MIN && 461 StoredFIs.count(Candidates[i].FI)) 462 continue; 463 464 if (PhysRegDefs[Candidates[i].Def] == 1) { 465 bool Safe = true; 466 MachineInstr *MI = Candidates[i].MI; 467 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 468 const MachineOperand &MO = MI->getOperand(j); 469 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 470 continue; 471 if (PhysRegDefs[MO.getReg()]) { 472 // If it's using a non-loop-invariant register, then it's obviously 473 // not safe to hoist. 474 Safe = false; 475 break; 476 } 477 } 478 if (Safe) 479 HoistPostRA(MI, Candidates[i].Def); 480 } 481 } 482 483 delete[] PhysRegDefs; 484} 485 486/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 487/// loop, and make sure it is not killed by any instructions in the loop. 488void MachineLICM::AddToLiveIns(unsigned Reg) { 489 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 490 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 491 MachineBasicBlock *BB = Blocks[i]; 492 if (!BB->isLiveIn(Reg)) 493 BB->addLiveIn(Reg); 494 for (MachineBasicBlock::iterator 495 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 496 MachineInstr *MI = &*MII; 497 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 498 MachineOperand &MO = MI->getOperand(i); 499 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 500 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 501 MO.setIsKill(false); 502 } 503 } 504 } 505} 506 507/// HoistPostRA - When an instruction is found to only use loop invariant 508/// operands that is safe to hoist, this instruction is called to do the 509/// dirty work. 510void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 511 MachineBasicBlock *Preheader = getCurPreheader(); 512 if (!Preheader) return; 513 514 // Now move the instructions to the predecessor, inserting it before any 515 // terminator instructions. 516 DEBUG({ 517 dbgs() << "Hoisting " << *MI; 518 if (Preheader->getBasicBlock()) 519 dbgs() << " to MachineBasicBlock " 520 << Preheader->getName(); 521 if (MI->getParent()->getBasicBlock()) 522 dbgs() << " from MachineBasicBlock " 523 << MI->getParent()->getName(); 524 dbgs() << "\n"; 525 }); 526 527 // Splice the instruction to the preheader. 528 MachineBasicBlock *MBB = MI->getParent(); 529 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 530 531 // Add register to livein list to all the BBs in the current loop since a 532 // loop invariant must be kept live throughout the whole loop. This is 533 // important to ensure later passes do not scavenge the def register. 534 AddToLiveIns(Def); 535 536 ++NumPostRAHoisted; 537 Changed = true; 538} 539 540/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 541/// dominated by the specified block, and that are in the current loop) in depth 542/// first order w.r.t the DominatorTree. This allows us to visit definitions 543/// before uses, allowing us to hoist a loop body in one pass without iteration. 544/// 545void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { 546 assert(N != 0 && "Null dominator tree node?"); 547 MachineBasicBlock *BB = N->getBlock(); 548 549 // If this subregion is not in the top level loop at all, exit. 550 if (!CurLoop->contains(BB)) return; 551 552 MachineBasicBlock *Preheader = getCurPreheader(); 553 if (!Preheader) 554 return; 555 556 if (IsHeader) { 557 // Compute registers which are liveout of preheader. 558 RegSeen.clear(); 559 BackTrace.clear(); 560 InitRegPressure(Preheader); 561 } 562 563 // Remember livein register pressure. 564 BackTrace.push_back(RegPressure); 565 566 for (MachineBasicBlock::iterator 567 MII = BB->begin(), E = BB->end(); MII != E; ) { 568 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 569 MachineInstr *MI = &*MII; 570 571 UpdateRegPressureBefore(MI); 572 Hoist(MI, Preheader); 573 UpdateRegPressureAfter(MI); 574 575 MII = NextMII; 576 } 577 578 // Don't hoist things out of a large switch statement. This often causes 579 // code to be hoisted that wasn't going to be executed, and increases 580 // register pressure in a situation where it's likely to matter. 581 if (BB->succ_size() < 25) { 582 const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); 583 for (unsigned I = 0, E = Children.size(); I != E; ++I) 584 HoistRegion(Children[I]); 585 } 586 587 BackTrace.pop_back(); 588} 589 590/// InitRegPressure - Find all virtual register references that are liveout of 591/// the preheader to initialize the starting "register pressure". Note this 592/// does not count live through (livein but not used) registers. 593void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 594 std::fill(RegPressure.begin(), RegPressure.end(), 0); 595 596 for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); 597 MII != E; ++MII) { 598 MachineInstr *MI = &*MII; 599 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 600 const MachineOperand &MO = MI->getOperand(i); 601 if (!MO.isReg() || MO.isImplicit()) 602 continue; 603 unsigned Reg = MO.getReg(); 604 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 605 continue; 606 607 bool isNew = !RegSeen.insert(Reg); 608 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 609 EVT VT = *RC->vt_begin(); 610 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 611 if (MO.isDef()) 612 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 613 else { 614 if (isNew && !MO.isKill()) 615 // Haven't seen this, it must be a livein. 616 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 617 else if (!isNew && MO.isKill()) 618 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 619 } 620 } 621 } 622} 623 624/// UpdateRegPressureBefore / UpdateRegPressureAfter - Update estimate of 625/// register pressure before and after executing a specifi instruction. 626void MachineLICM::UpdateRegPressureBefore(const MachineInstr *MI) { 627 bool NoImpact = MI->isImplicitDef() || MI->isPHI(); 628 629 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 630 const MachineOperand &MO = MI->getOperand(i); 631 if (!MO.isReg() || MO.isImplicit() || !MO.isUse()) 632 continue; 633 unsigned Reg = MO.getReg(); 634 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 635 continue; 636 637 bool isNew = !RegSeen.insert(Reg); 638 if (NoImpact) 639 continue; 640 641 if (!isNew && MO.isKill()) { 642 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 643 EVT VT = *RC->vt_begin(); 644 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 645 unsigned RCCost = TLI->getRepRegClassCostFor(VT); 646 647 assert(RCCost <= RegPressure[RCId]); 648 RegPressure[RCId] -= RCCost; 649 } 650 } 651} 652 653void MachineLICM::UpdateRegPressureAfter(const MachineInstr *MI) { 654 bool NoImpact = MI->isImplicitDef() || MI->isPHI(); 655 656 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 657 const MachineOperand &MO = MI->getOperand(i); 658 if (!MO.isReg() || MO.isImplicit() || !MO.isDef()) 659 continue; 660 unsigned Reg = MO.getReg(); 661 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 662 continue; 663 664 RegSeen.insert(Reg); 665 if (NoImpact) 666 continue; 667 668 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 669 EVT VT = *RC->vt_begin(); 670 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 671 unsigned RCCost = TLI->getRepRegClassCostFor(VT); 672 RegPressure[RCId] += RCCost; 673 } 674} 675 676/// IsLICMCandidate - Returns true if the instruction may be a suitable 677/// candidate for LICM. e.g. If the instruction is a call, then it's obviously 678/// not safe to hoist it. 679bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 680 // Check if it's safe to move the instruction. 681 bool DontMoveAcrossStore = true; 682 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 683 return false; 684 685 return true; 686} 687 688/// IsLoopInvariantInst - Returns true if the instruction is loop 689/// invariant. I.e., all virtual register operands are defined outside of the 690/// loop, physical registers aren't accessed explicitly, and there are no side 691/// effects that aren't captured by the operands or other flags. 692/// 693bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 694 if (!IsLICMCandidate(I)) 695 return false; 696 697 // The instruction is loop invariant if all of its operands are. 698 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 699 const MachineOperand &MO = I.getOperand(i); 700 701 if (!MO.isReg()) 702 continue; 703 704 unsigned Reg = MO.getReg(); 705 if (Reg == 0) continue; 706 707 // Don't hoist an instruction that uses or defines a physical register. 708 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 709 if (MO.isUse()) { 710 // If the physreg has no defs anywhere, it's just an ambient register 711 // and we can freely move its uses. Alternatively, if it's allocatable, 712 // it could get allocated to something with a def during allocation. 713 if (!MRI->def_empty(Reg)) 714 return false; 715 if (AllocatableSet.test(Reg)) 716 return false; 717 // Check for a def among the register's aliases too. 718 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 719 unsigned AliasReg = *Alias; 720 if (!MRI->def_empty(AliasReg)) 721 return false; 722 if (AllocatableSet.test(AliasReg)) 723 return false; 724 } 725 // Otherwise it's safe to move. 726 continue; 727 } else if (!MO.isDead()) { 728 // A def that isn't dead. We can't move it. 729 return false; 730 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 731 // If the reg is live into the loop, we can't hoist an instruction 732 // which would clobber it. 733 return false; 734 } 735 } 736 737 if (!MO.isUse()) 738 continue; 739 740 assert(MRI->getVRegDef(Reg) && 741 "Machine instr not mapped for this vreg?!"); 742 743 // If the loop contains the definition of an operand, then the instruction 744 // isn't loop invariant. 745 if (CurLoop->contains(MRI->getVRegDef(Reg))) 746 return false; 747 } 748 749 // If we got this far, the instruction is loop invariant! 750 return true; 751} 752 753 754/// HasPHIUses - Return true if the specified register has any PHI use. 755static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) { 756 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 757 UE = MRI->use_end(); UI != UE; ++UI) { 758 MachineInstr *UseMI = &*UI; 759 if (UseMI->isPHI()) 760 return true; 761 } 762 return false; 763} 764 765/// isLoadFromConstantMemory - Return true if the given instruction is a 766/// load from constant memory. Machine LICM will hoist these even if they are 767/// not re-materializable. 768bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { 769 if (!MI->getDesc().mayLoad()) return false; 770 if (!MI->hasOneMemOperand()) return false; 771 MachineMemOperand *MMO = *MI->memoperands_begin(); 772 if (MMO->isVolatile()) return false; 773 if (!MMO->getValue()) return false; 774 const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue()); 775 if (PSV) { 776 MachineFunction &MF = *MI->getParent()->getParent(); 777 return PSV->isConstant(MF.getFrameInfo()); 778 } else { 779 return AA->pointsToConstantMemory(MMO->getValue()); 780 } 781} 782 783/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 784/// and an use in the current loop, return true if the target considered 785/// it 'high'. 786bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 787 unsigned DefIdx, unsigned Reg) { 788 if (MRI->use_nodbg_empty(Reg)) 789 return false; 790 791 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 792 E = MRI->use_nodbg_end(); I != E; ++I) { 793 MachineInstr *UseMI = &*I; 794 if (!CurLoop->contains(UseMI->getParent())) 795 continue; 796 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 797 const MachineOperand &MO = UseMI->getOperand(i); 798 if (!MO.isReg() || !MO.isUse()) 799 continue; 800 unsigned MOReg = MO.getReg(); 801 if (MOReg != Reg) 802 continue; 803 804 if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) 805 return true; 806 } 807 808 // Only look at the first in loop use. 809 break; 810 } 811 812 return false; 813} 814 815/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check 816/// if hoisting an instruction of the given cost matrix can cause high 817/// register pressure. 818bool MachineLICM::IncreaseHighRegPressure(DenseMap<unsigned, int> &Cost) { 819 for (unsigned i = BackTrace.size(); i != 0; --i) { 820 bool AnyIncrease = false; 821 SmallVector<unsigned, 8> &RP = BackTrace[i-1]; 822 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 823 CI != CE; ++CI) { 824 if (CI->second <= 0) 825 continue; 826 AnyIncrease = true; 827 unsigned RCId = CI->first; 828 if (RP[RCId] + CI->second >= RegLimit[RCId]) 829 return true; 830 } 831 832 if (!AnyIncrease) 833 // Hoisting the instruction doesn't increase register pressure. 834 return false; 835 } 836 837 return false; 838} 839 840/// IsProfitableToHoist - Return true if it is potentially profitable to hoist 841/// the given loop invariant. 842bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 843 if (MI.isImplicitDef()) 844 return true; 845 846 // If the instruction is cheap, only hoist if it is re-materilizable. LICM 847 // will increase register pressure. It's probably not worth it if the 848 // instruction is cheap. 849 // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting 850 // these tend to help performance in low register pressure situation. The 851 // trade off is it may cause spill in high pressure situation. It will end up 852 // adding a store in the loop preheader. But the reload is no more expensive. 853 // The side benefit is these loads are frequently CSE'ed. 854 if (MI.getDesc().isAsCheapAsAMove()) { 855 if (!TII->isTriviallyReMaterializable(&MI, AA)) 856 return false; 857 } else { 858 // Estimate register pressure to determine whether to LICM the instruction. 859 // In low register pressure situation, we can be more aggressive about 860 // hoisting. Also, favors hoisting long latency instructions even in 861 // moderately high pressure situation. 862 DenseMap<unsigned, int> Cost; 863 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 864 const MachineOperand &MO = MI.getOperand(i); 865 if (!MO.isReg() || MO.isImplicit()) 866 continue; 867 unsigned Reg = MO.getReg(); 868 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 869 continue; 870 if (MO.isDef()) { 871 if (HasHighOperandLatency(MI, i, Reg)) { 872 ++NumHighLatency; 873 return true; 874 } 875 876 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 877 EVT VT = *RC->vt_begin(); 878 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 879 unsigned RCCost = TLI->getRepRegClassCostFor(VT); 880 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 881 // If the instruction is not register pressure neutrail (or better), 882 // check if hoisting it will cause high register pressure in BB's 883 // leading up to this point. 884 if (CI != Cost.end()) 885 CI->second += RCCost; 886 else 887 Cost.insert(std::make_pair(RCId, RCCost)); 888 } else if (MO.isKill()) { 889 // Is a virtual register use is a kill, hoisting it out of the loop 890 // may actually reduce register pressure or be register pressure 891 // neutral 892 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 893 EVT VT = *RC->vt_begin(); 894 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 895 unsigned RCCost = TLI->getRepRegClassCostFor(VT); 896 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 897 if (CI != Cost.end()) 898 CI->second -= RCCost; 899 else 900 Cost.insert(std::make_pair(RCId, -RCCost)); 901 } 902 } 903 904 // Visit BBs from preheader to current BB, if hoisting this doesn't cause 905 // high register pressure, then it's safe to proceed. 906 if (!IncreaseHighRegPressure(Cost)) { 907 ++NumLowRP; 908 return true; 909 } 910 911 // High register pressure situation, only hoist if the instruction is going to 912 // be remat'ed. 913 if (!TII->isTriviallyReMaterializable(&MI, AA) && 914 !isLoadFromConstantMemory(&MI)) 915 return false; 916 } 917 918 // If result(s) of this instruction is used by PHIs, then don't hoist it. 919 // The presence of joins makes it difficult for current register allocator 920 // implementation to perform remat. 921 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 922 const MachineOperand &MO = MI.getOperand(i); 923 if (!MO.isReg() || !MO.isDef()) 924 continue; 925 if (HasPHIUses(MO.getReg(), MRI)) 926 return false; 927 } 928 929 return true; 930} 931 932MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 933 // Don't unfold simple loads. 934 if (MI->getDesc().canFoldAsLoad()) 935 return 0; 936 937 // If not, we may be able to unfold a load and hoist that. 938 // First test whether the instruction is loading from an amenable 939 // memory location. 940 if (!isLoadFromConstantMemory(MI)) 941 return 0; 942 943 // Next determine the register class for a temporary register. 944 unsigned LoadRegIndex; 945 unsigned NewOpc = 946 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 947 /*UnfoldLoad=*/true, 948 /*UnfoldStore=*/false, 949 &LoadRegIndex); 950 if (NewOpc == 0) return 0; 951 const TargetInstrDesc &TID = TII->get(NewOpc); 952 if (TID.getNumDefs() != 1) return 0; 953 const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); 954 // Ok, we're unfolding. Create a temporary register and do the unfold. 955 unsigned Reg = MRI->createVirtualRegister(RC); 956 957 MachineFunction &MF = *MI->getParent()->getParent(); 958 SmallVector<MachineInstr *, 2> NewMIs; 959 bool Success = 960 TII->unfoldMemoryOperand(MF, MI, Reg, 961 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 962 NewMIs); 963 (void)Success; 964 assert(Success && 965 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 966 "succeeded!"); 967 assert(NewMIs.size() == 2 && 968 "Unfolded a load into multiple instructions!"); 969 MachineBasicBlock *MBB = MI->getParent(); 970 MBB->insert(MI, NewMIs[0]); 971 MBB->insert(MI, NewMIs[1]); 972 // If unfolding produced a load that wasn't loop-invariant or profitable to 973 // hoist, discard the new instructions and bail. 974 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 975 NewMIs[0]->eraseFromParent(); 976 NewMIs[1]->eraseFromParent(); 977 return 0; 978 } 979 // Otherwise we successfully unfolded a load that we can hoist. 980 MI->eraseFromParent(); 981 return NewMIs[0]; 982} 983 984void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 985 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 986 const MachineInstr *MI = &*I; 987 // FIXME: For now, only hoist re-materilizable instructions. LICM will 988 // increase register pressure. We want to make sure it doesn't increase 989 // spilling. 990 if (TII->isTriviallyReMaterializable(MI, AA)) { 991 unsigned Opcode = MI->getOpcode(); 992 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 993 CI = CSEMap.find(Opcode); 994 if (CI != CSEMap.end()) 995 CI->second.push_back(MI); 996 else { 997 std::vector<const MachineInstr*> CSEMIs; 998 CSEMIs.push_back(MI); 999 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1000 } 1001 } 1002 } 1003} 1004 1005const MachineInstr* 1006MachineLICM::LookForDuplicate(const MachineInstr *MI, 1007 std::vector<const MachineInstr*> &PrevMIs) { 1008 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1009 const MachineInstr *PrevMI = PrevMIs[i]; 1010 if (TII->produceSameValue(MI, PrevMI)) 1011 return PrevMI; 1012 } 1013 return 0; 1014} 1015 1016bool MachineLICM::EliminateCSE(MachineInstr *MI, 1017 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1018 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1019 // the undef property onto uses. 1020 if (CI == CSEMap.end() || MI->isImplicitDef()) 1021 return false; 1022 1023 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1024 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1025 1026 // Replace virtual registers defined by MI by their counterparts defined 1027 // by Dup. 1028 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1029 const MachineOperand &MO = MI->getOperand(i); 1030 1031 // Physical registers may not differ here. 1032 assert((!MO.isReg() || MO.getReg() == 0 || 1033 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1034 MO.getReg() == Dup->getOperand(i).getReg()) && 1035 "Instructions with different phys regs are not identical!"); 1036 1037 if (MO.isReg() && MO.isDef() && 1038 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 1039 MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); 1040 MRI->clearKillFlags(Dup->getOperand(i).getReg()); 1041 } 1042 } 1043 MI->eraseFromParent(); 1044 ++NumCSEed; 1045 return true; 1046 } 1047 return false; 1048} 1049 1050/// Hoist - When an instruction is found to use only loop invariant operands 1051/// that are safe to hoist, this instruction is called to do the dirty work. 1052/// 1053void MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1054 // First check whether we should hoist this instruction. 1055 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1056 // If not, try unfolding a hoistable load. 1057 MI = ExtractHoistableLoad(MI); 1058 if (!MI) return; 1059 } 1060 1061 // Now move the instructions to the predecessor, inserting it before any 1062 // terminator instructions. 1063 DEBUG({ 1064 dbgs() << "Hoisting " << *MI; 1065 if (Preheader->getBasicBlock()) 1066 dbgs() << " to MachineBasicBlock " 1067 << Preheader->getName(); 1068 if (MI->getParent()->getBasicBlock()) 1069 dbgs() << " from MachineBasicBlock " 1070 << MI->getParent()->getName(); 1071 dbgs() << "\n"; 1072 }); 1073 1074 // If this is the first instruction being hoisted to the preheader, 1075 // initialize the CSE map with potential common expressions. 1076 if (FirstInLoop) { 1077 InitCSEMap(Preheader); 1078 FirstInLoop = false; 1079 } 1080 1081 // Look for opportunity to CSE the hoisted instruction. 1082 unsigned Opcode = MI->getOpcode(); 1083 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1084 CI = CSEMap.find(Opcode); 1085 if (!EliminateCSE(MI, CI)) { 1086 // Otherwise, splice the instruction to the preheader. 1087 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1088 1089 // Clear the kill flags of any register this instruction defines, 1090 // since they may need to be live throughout the entire loop 1091 // rather than just live for part of it. 1092 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1093 MachineOperand &MO = MI->getOperand(i); 1094 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1095 MRI->clearKillFlags(MO.getReg()); 1096 } 1097 1098 // Add to the CSE map. 1099 if (CI != CSEMap.end()) 1100 CI->second.push_back(MI); 1101 else { 1102 std::vector<const MachineInstr*> CSEMIs; 1103 CSEMIs.push_back(MI); 1104 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 1105 } 1106 } 1107 1108 ++NumHoisted; 1109 Changed = true; 1110} 1111 1112MachineBasicBlock *MachineLICM::getCurPreheader() { 1113 // Determine the block to which to hoist instructions. If we can't find a 1114 // suitable loop predecessor, we can't do any hoisting. 1115 1116 // If we've tried to get a preheader and failed, don't try again. 1117 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1118 return 0; 1119 1120 if (!CurPreheader) { 1121 CurPreheader = CurLoop->getLoopPreheader(); 1122 if (!CurPreheader) { 1123 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1124 if (!Pred) { 1125 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1126 return 0; 1127 } 1128 1129 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1130 if (!CurPreheader) { 1131 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1132 return 0; 1133 } 1134 } 1135 } 1136 return CurPreheader; 1137} 1138