LICM.cpp revision 85dadecbd664f60f0c7e4fbb44f083d43d01cfb7
1//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion, attempting to remove as much 11// code from the body of a loop as possible. It does this by either hoisting 12// code into the preheader block, or by sinking code to the exit blocks if it is 13// safe. This pass also promotes must-aliased memory locations in the loop to 14// live in registers, thus hoisting and sinking "invariant" loads and stores. 15// 16// This pass uses alias analysis for two purposes: 17// 18// 1. Moving loop invariant loads and calls out of loops. If we can determine 19// that a load or call inside of a loop never aliases anything stored to, 20// we can hoist it or sink it like any other instruction. 21// 2. Scalar Promotion of Memory - If there is a store instruction inside of 22// the loop, we try to move the store to happen AFTER the loop instead of 23// inside of the loop. This can only happen if a few conditions are true: 24// A. The pointer stored through is loop invariant 25// B. There are no stores or loads in the loop which _may_ alias the 26// pointer. There are no calls in the loop which mod/ref the pointer. 27// If these conditions are true, we can promote the loads and stores in the 28// loop of the pointer to use a temporary alloca'd variable. We then use 29// the SSAUpdater to construct the appropriate SSA form for the value. 30// 31//===----------------------------------------------------------------------===// 32 33#define DEBUG_TYPE "licm" 34#include "llvm/Transforms/Scalar.h" 35#include "llvm/Constants.h" 36#include "llvm/DerivedTypes.h" 37#include "llvm/IntrinsicInst.h" 38#include "llvm/Instructions.h" 39#include "llvm/LLVMContext.h" 40#include "llvm/Analysis/AliasAnalysis.h" 41#include "llvm/Analysis/AliasSetTracker.h" 42#include "llvm/Analysis/ConstantFolding.h" 43#include "llvm/Analysis/LoopInfo.h" 44#include "llvm/Analysis/LoopPass.h" 45#include "llvm/Analysis/Dominators.h" 46#include "llvm/Transforms/Utils/Local.h" 47#include "llvm/Transforms/Utils/SSAUpdater.h" 48#include "llvm/Target/TargetData.h" 49#include "llvm/Target/TargetLibraryInfo.h" 50#include "llvm/Support/CFG.h" 51#include "llvm/Support/CommandLine.h" 52#include "llvm/Support/raw_ostream.h" 53#include "llvm/Support/Debug.h" 54#include "llvm/ADT/Statistic.h" 55#include <algorithm> 56using namespace llvm; 57 58STATISTIC(NumSunk , "Number of instructions sunk out of loop"); 59STATISTIC(NumHoisted , "Number of instructions hoisted out of loop"); 60STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 61STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 62STATISTIC(NumPromoted , "Number of memory locations promoted to registers"); 63 64static cl::opt<bool> 65DisablePromotion("disable-licm-promotion", cl::Hidden, 66 cl::desc("Disable memory promotion in LICM pass")); 67 68namespace { 69 struct LICM : public LoopPass { 70 static char ID; // Pass identification, replacement for typeid 71 LICM() : LoopPass(ID) { 72 initializeLICMPass(*PassRegistry::getPassRegistry()); 73 } 74 75 virtual bool runOnLoop(Loop *L, LPPassManager &LPM); 76 77 /// This transformation requires natural loop information & requires that 78 /// loop preheaders be inserted into the CFG... 79 /// 80 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 81 AU.setPreservesCFG(); 82 AU.addRequired<DominatorTree>(); 83 AU.addRequired<LoopInfo>(); 84 AU.addRequiredID(LoopSimplifyID); 85 AU.addRequired<AliasAnalysis>(); 86 AU.addPreserved<AliasAnalysis>(); 87 AU.addPreserved("scalar-evolution"); 88 AU.addPreservedID(LoopSimplifyID); 89 AU.addRequired<TargetLibraryInfo>(); 90 } 91 92 bool doFinalization() { 93 assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); 94 return false; 95 } 96 97 private: 98 AliasAnalysis *AA; // Current AliasAnalysis information 99 LoopInfo *LI; // Current LoopInfo 100 DominatorTree *DT; // Dominator Tree for the current Loop. 101 102 TargetData *TD; // TargetData for constant folding. 103 TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. 104 105 // State that is updated as we process loops. 106 bool Changed; // Set to true when we change anything. 107 BasicBlock *Preheader; // The preheader block of the current loop... 108 Loop *CurLoop; // The current loop we are working on... 109 AliasSetTracker *CurAST; // AliasSet information for the current loop... 110 DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; 111 112 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 113 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L); 114 115 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 116 /// set. 117 void deleteAnalysisValue(Value *V, Loop *L); 118 119 /// SinkRegion - Walk the specified region of the CFG (defined by all blocks 120 /// dominated by the specified block, and that are in the current loop) in 121 /// reverse depth first order w.r.t the DominatorTree. This allows us to 122 /// visit uses before definitions, allowing us to sink a loop body in one 123 /// pass without iteration. 124 /// 125 void SinkRegion(DomTreeNode *N); 126 127 /// HoistRegion - Walk the specified region of the CFG (defined by all 128 /// blocks dominated by the specified block, and that are in the current 129 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 130 /// visit definitions before uses, allowing us to hoist a loop body in one 131 /// pass without iteration. 132 /// 133 void HoistRegion(DomTreeNode *N); 134 135 /// inSubLoop - Little predicate that returns true if the specified basic 136 /// block is in a subloop of the current one, not the current one itself. 137 /// 138 bool inSubLoop(BasicBlock *BB) { 139 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 140 return LI->getLoopFor(BB) != CurLoop; 141 } 142 143 /// sink - When an instruction is found to only be used outside of the loop, 144 /// this function moves it to the exit blocks and patches up SSA form as 145 /// needed. 146 /// 147 void sink(Instruction &I); 148 149 /// hoist - When an instruction is found to only use loop invariant operands 150 /// that is safe to hoist, this instruction is called to do the dirty work. 151 /// 152 void hoist(Instruction &I); 153 154 /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it 155 /// is not a trapping instruction or if it is a trapping instruction and is 156 /// guaranteed to execute. 157 /// 158 bool isSafeToExecuteUnconditionally(Instruction &I); 159 160 /// isGuaranteedToExecute - Check that the instruction is guaranteed to 161 /// execute. 162 /// 163 bool isGuaranteedToExecute(Instruction &I); 164 165 /// pointerInvalidatedByLoop - Return true if the body of this loop may 166 /// store into the memory location pointed to by V. 167 /// 168 bool pointerInvalidatedByLoop(Value *V, uint64_t Size, 169 const MDNode *TBAAInfo) { 170 // Check to see if any of the basic blocks in CurLoop invalidate *V. 171 return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod(); 172 } 173 174 bool canSinkOrHoistInst(Instruction &I); 175 bool isNotUsedInLoop(Instruction &I); 176 177 void PromoteAliasSet(AliasSet &AS); 178 }; 179} 180 181char LICM::ID = 0; 182INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false) 183INITIALIZE_PASS_DEPENDENCY(DominatorTree) 184INITIALIZE_PASS_DEPENDENCY(LoopInfo) 185INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 186INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 187INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 188INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) 189 190Pass *llvm::createLICMPass() { return new LICM(); } 191 192/// Hoist expressions out of the specified loop. Note, alias info for inner 193/// loop is not preserved so it is not a good idea to run LICM multiple 194/// times on one loop. 195/// 196bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { 197 Changed = false; 198 199 // Get our Loop and Alias Analysis information... 200 LI = &getAnalysis<LoopInfo>(); 201 AA = &getAnalysis<AliasAnalysis>(); 202 DT = &getAnalysis<DominatorTree>(); 203 204 TD = getAnalysisIfAvailable<TargetData>(); 205 TLI = &getAnalysis<TargetLibraryInfo>(); 206 207 CurAST = new AliasSetTracker(*AA); 208 // Collect Alias info from subloops. 209 for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); 210 LoopItr != LoopItrE; ++LoopItr) { 211 Loop *InnerL = *LoopItr; 212 AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL]; 213 assert(InnerAST && "Where is my AST?"); 214 215 // What if InnerLoop was modified by other passes ? 216 CurAST->add(*InnerAST); 217 218 // Once we've incorporated the inner loop's AST into ours, we don't need the 219 // subloop's anymore. 220 delete InnerAST; 221 LoopToAliasSetMap.erase(InnerL); 222 } 223 224 CurLoop = L; 225 226 // Get the preheader block to move instructions into... 227 Preheader = L->getLoopPreheader(); 228 229 // Loop over the body of this loop, looking for calls, invokes, and stores. 230 // Because subloops have already been incorporated into AST, we skip blocks in 231 // subloops. 232 // 233 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 234 I != E; ++I) { 235 BasicBlock *BB = *I; 236 if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. 237 CurAST->add(*BB); // Incorporate the specified basic block 238 } 239 240 // We want to visit all of the instructions in this loop... that are not parts 241 // of our subloops (they have already had their invariants hoisted out of 242 // their loop, into this loop, so there is no need to process the BODIES of 243 // the subloops). 244 // 245 // Traverse the body of the loop in depth first order on the dominator tree so 246 // that we are guaranteed to see definitions before we see uses. This allows 247 // us to sink instructions in one pass, without iteration. After sinking 248 // instructions, we perform another pass to hoist them out of the loop. 249 // 250 if (L->hasDedicatedExits()) 251 SinkRegion(DT->getNode(L->getHeader())); 252 if (Preheader) 253 HoistRegion(DT->getNode(L->getHeader())); 254 255 // Now that all loop invariants have been removed from the loop, promote any 256 // memory references to scalars that we can. 257 if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { 258 // Loop over all of the alias sets in the tracker object. 259 for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); 260 I != E; ++I) 261 PromoteAliasSet(*I); 262 } 263 264 // Clear out loops state information for the next iteration 265 CurLoop = 0; 266 Preheader = 0; 267 268 // If this loop is nested inside of another one, save the alias information 269 // for when we process the outer loop. 270 if (L->getParentLoop()) 271 LoopToAliasSetMap[L] = CurAST; 272 else 273 delete CurAST; 274 return Changed; 275} 276 277/// SinkRegion - Walk the specified region of the CFG (defined by all blocks 278/// dominated by the specified block, and that are in the current loop) in 279/// reverse depth first order w.r.t the DominatorTree. This allows us to visit 280/// uses before definitions, allowing us to sink a loop body in one pass without 281/// iteration. 282/// 283void LICM::SinkRegion(DomTreeNode *N) { 284 assert(N != 0 && "Null dominator tree node?"); 285 BasicBlock *BB = N->getBlock(); 286 287 // If this subregion is not in the top level loop at all, exit. 288 if (!CurLoop->contains(BB)) return; 289 290 // We are processing blocks in reverse dfo, so process children first. 291 const std::vector<DomTreeNode*> &Children = N->getChildren(); 292 for (unsigned i = 0, e = Children.size(); i != e; ++i) 293 SinkRegion(Children[i]); 294 295 // Only need to process the contents of this block if it is not part of a 296 // subloop (which would already have been processed). 297 if (inSubLoop(BB)) return; 298 299 for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { 300 Instruction &I = *--II; 301 302 // If the instruction is dead, we would try to sink it because it isn't used 303 // in the loop, instead, just delete it. 304 if (isInstructionTriviallyDead(&I)) { 305 DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); 306 ++II; 307 CurAST->deleteValue(&I); 308 I.eraseFromParent(); 309 Changed = true; 310 continue; 311 } 312 313 // Check to see if we can sink this instruction to the exit blocks 314 // of the loop. We can do this if the all users of the instruction are 315 // outside of the loop. In this case, it doesn't even matter if the 316 // operands of the instruction are loop invariant. 317 // 318 if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) { 319 ++II; 320 sink(I); 321 } 322 } 323} 324 325/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 326/// dominated by the specified block, and that are in the current loop) in depth 327/// first order w.r.t the DominatorTree. This allows us to visit definitions 328/// before uses, allowing us to hoist a loop body in one pass without iteration. 329/// 330void LICM::HoistRegion(DomTreeNode *N) { 331 assert(N != 0 && "Null dominator tree node?"); 332 BasicBlock *BB = N->getBlock(); 333 334 // If this subregion is not in the top level loop at all, exit. 335 if (!CurLoop->contains(BB)) return; 336 337 // Only need to process the contents of this block if it is not part of a 338 // subloop (which would already have been processed). 339 if (!inSubLoop(BB)) 340 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { 341 Instruction &I = *II++; 342 343 // Try constant folding this instruction. If all the operands are 344 // constants, it is technically hoistable, but it would be better to just 345 // fold it. 346 if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) { 347 DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); 348 CurAST->copyValue(&I, C); 349 CurAST->deleteValue(&I); 350 I.replaceAllUsesWith(C); 351 I.eraseFromParent(); 352 continue; 353 } 354 355 // Try hoisting the instruction out to the preheader. We can only do this 356 // if all of the operands of the instruction are loop invariant and if it 357 // is safe to hoist the instruction. 358 // 359 if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) && 360 isSafeToExecuteUnconditionally(I)) 361 hoist(I); 362 } 363 364 const std::vector<DomTreeNode*> &Children = N->getChildren(); 365 for (unsigned i = 0, e = Children.size(); i != e; ++i) 366 HoistRegion(Children[i]); 367} 368 369/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this 370/// instruction. 371/// 372bool LICM::canSinkOrHoistInst(Instruction &I) { 373 // Loads have extra constraints we have to verify before we can hoist them. 374 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 375 if (!LI->isUnordered()) 376 return false; // Don't hoist volatile/atomic loads! 377 378 // Loads from constant memory are always safe to move, even if they end up 379 // in the same alias set as something that ends up being modified. 380 if (AA->pointsToConstantMemory(LI->getOperand(0))) 381 return true; 382 if (LI->getMetadata("invariant.load")) 383 return true; 384 385 // Don't hoist loads which have may-aliased stores in loop. 386 uint64_t Size = 0; 387 if (LI->getType()->isSized()) 388 Size = AA->getTypeStoreSize(LI->getType()); 389 return !pointerInvalidatedByLoop(LI->getOperand(0), Size, 390 LI->getMetadata(LLVMContext::MD_tbaa)); 391 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 392 // Don't sink or hoist dbg info; it's legal, but not useful. 393 if (isa<DbgInfoIntrinsic>(I)) 394 return false; 395 396 // Handle simple cases by querying alias analysis. 397 AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); 398 if (Behavior == AliasAnalysis::DoesNotAccessMemory) 399 return true; 400 if (AliasAnalysis::onlyReadsMemory(Behavior)) { 401 // If this call only reads from memory and there are no writes to memory 402 // in the loop, we can hoist or sink the call as appropriate. 403 bool FoundMod = false; 404 for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); 405 I != E; ++I) { 406 AliasSet &AS = *I; 407 if (!AS.isForwardingAliasSet() && AS.isMod()) { 408 FoundMod = true; 409 break; 410 } 411 } 412 if (!FoundMod) return true; 413 } 414 415 // FIXME: This should use mod/ref information to see if we can hoist or sink 416 // the call. 417 418 return false; 419 } 420 421 // Otherwise these instructions are hoistable/sinkable 422 return isa<BinaryOperator>(I) || isa<CastInst>(I) || 423 isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 424 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 425 isa<ShuffleVectorInst>(I); 426} 427 428/// isNotUsedInLoop - Return true if the only users of this instruction are 429/// outside of the loop. If this is true, we can sink the instruction to the 430/// exit blocks of the loop. 431/// 432bool LICM::isNotUsedInLoop(Instruction &I) { 433 for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) { 434 Instruction *User = cast<Instruction>(*UI); 435 if (PHINode *PN = dyn_cast<PHINode>(User)) { 436 // PHI node uses occur in predecessor blocks! 437 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 438 if (PN->getIncomingValue(i) == &I) 439 if (CurLoop->contains(PN->getIncomingBlock(i))) 440 return false; 441 } else if (CurLoop->contains(User)) { 442 return false; 443 } 444 } 445 return true; 446} 447 448 449/// sink - When an instruction is found to only be used outside of the loop, 450/// this function moves it to the exit blocks and patches up SSA form as needed. 451/// This method is guaranteed to remove the original instruction from its 452/// position, and may either delete it or move it to outside of the loop. 453/// 454void LICM::sink(Instruction &I) { 455 DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 456 457 SmallVector<BasicBlock*, 8> ExitBlocks; 458 CurLoop->getUniqueExitBlocks(ExitBlocks); 459 460 if (isa<LoadInst>(I)) ++NumMovedLoads; 461 else if (isa<CallInst>(I)) ++NumMovedCalls; 462 ++NumSunk; 463 Changed = true; 464 465 // The case where there is only a single exit node of this loop is common 466 // enough that we handle it as a special (more efficient) case. It is more 467 // efficient to handle because there are no PHI nodes that need to be placed. 468 if (ExitBlocks.size() == 1) { 469 if (!DT->dominates(I.getParent(), ExitBlocks[0])) { 470 // Instruction is not used, just delete it. 471 CurAST->deleteValue(&I); 472 // If I has users in unreachable blocks, eliminate. 473 // If I is not void type then replaceAllUsesWith undef. 474 // This allows ValueHandlers and custom metadata to adjust itself. 475 if (!I.use_empty()) 476 I.replaceAllUsesWith(UndefValue::get(I.getType())); 477 I.eraseFromParent(); 478 } else { 479 // Move the instruction to the start of the exit block, after any PHI 480 // nodes in it. 481 I.moveBefore(ExitBlocks[0]->getFirstInsertionPt()); 482 483 // This instruction is no longer in the AST for the current loop, because 484 // we just sunk it out of the loop. If we just sunk it into an outer 485 // loop, we will rediscover the operation when we process it. 486 CurAST->deleteValue(&I); 487 } 488 return; 489 } 490 491 if (ExitBlocks.empty()) { 492 // The instruction is actually dead if there ARE NO exit blocks. 493 CurAST->deleteValue(&I); 494 // If I has users in unreachable blocks, eliminate. 495 // If I is not void type then replaceAllUsesWith undef. 496 // This allows ValueHandlers and custom metadata to adjust itself. 497 if (!I.use_empty()) 498 I.replaceAllUsesWith(UndefValue::get(I.getType())); 499 I.eraseFromParent(); 500 return; 501 } 502 503 // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the 504 // hard work of inserting PHI nodes as necessary. 505 SmallVector<PHINode*, 8> NewPHIs; 506 SSAUpdater SSA(&NewPHIs); 507 508 if (!I.use_empty()) 509 SSA.Initialize(I.getType(), I.getName()); 510 511 // Insert a copy of the instruction in each exit block of the loop that is 512 // dominated by the instruction. Each exit block is known to only be in the 513 // ExitBlocks list once. 514 BasicBlock *InstOrigBB = I.getParent(); 515 unsigned NumInserted = 0; 516 517 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 518 BasicBlock *ExitBlock = ExitBlocks[i]; 519 520 if (!DT->dominates(InstOrigBB, ExitBlock)) 521 continue; 522 523 // Insert the code after the last PHI node. 524 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 525 526 // If this is the first exit block processed, just move the original 527 // instruction, otherwise clone the original instruction and insert 528 // the copy. 529 Instruction *New; 530 if (NumInserted++ == 0) { 531 I.moveBefore(InsertPt); 532 New = &I; 533 } else { 534 New = I.clone(); 535 if (!I.getName().empty()) 536 New->setName(I.getName()+".le"); 537 ExitBlock->getInstList().insert(InsertPt, New); 538 } 539 540 // Now that we have inserted the instruction, inform SSAUpdater. 541 if (!I.use_empty()) 542 SSA.AddAvailableValue(ExitBlock, New); 543 } 544 545 // If the instruction doesn't dominate any exit blocks, it must be dead. 546 if (NumInserted == 0) { 547 CurAST->deleteValue(&I); 548 if (!I.use_empty()) 549 I.replaceAllUsesWith(UndefValue::get(I.getType())); 550 I.eraseFromParent(); 551 return; 552 } 553 554 // Next, rewrite uses of the instruction, inserting PHI nodes as needed. 555 for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { 556 // Grab the use before incrementing the iterator. 557 Use &U = UI.getUse(); 558 // Increment the iterator before removing the use from the list. 559 ++UI; 560 SSA.RewriteUseAfterInsertions(U); 561 } 562 563 // Update CurAST for NewPHIs if I had pointer type. 564 if (I.getType()->isPointerTy()) 565 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 566 CurAST->copyValue(&I, NewPHIs[i]); 567 568 // Finally, remove the instruction from CurAST. It is no longer in the loop. 569 CurAST->deleteValue(&I); 570} 571 572/// hoist - When an instruction is found to only use loop invariant operands 573/// that is safe to hoist, this instruction is called to do the dirty work. 574/// 575void LICM::hoist(Instruction &I) { 576 DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " 577 << I << "\n"); 578 579 // Move the new node to the Preheader, before its terminator. 580 I.moveBefore(Preheader->getTerminator()); 581 582 if (isa<LoadInst>(I)) ++NumMovedLoads; 583 else if (isa<CallInst>(I)) ++NumMovedCalls; 584 ++NumHoisted; 585 Changed = true; 586} 587 588/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is 589/// not a trapping instruction or if it is a trapping instruction and is 590/// guaranteed to execute. 591/// 592bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { 593 // If it is not a trapping instruction, it is always safe to hoist. 594 if (Inst.isSafeToSpeculativelyExecute()) 595 return true; 596 597 return isGuaranteedToExecute(Inst); 598} 599 600bool LICM::isGuaranteedToExecute(Instruction &Inst) { 601 // Otherwise we have to check to make sure that the instruction dominates all 602 // of the exit blocks. If it doesn't, then there is a path out of the loop 603 // which does not execute this instruction, so we can't hoist it. 604 605 // If the instruction is in the header block for the loop (which is very 606 // common), it is always guaranteed to dominate the exit blocks. Since this 607 // is a common case, and can save some work, check it now. 608 if (Inst.getParent() == CurLoop->getHeader()) 609 return true; 610 611 // Get the exit blocks for the current loop. 612 SmallVector<BasicBlock*, 8> ExitBlocks; 613 CurLoop->getExitBlocks(ExitBlocks); 614 615 // Verify that the block dominates each of the exit blocks of the loop. 616 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 617 if (!DT->dominates(Inst.getParent(), ExitBlocks[i])) 618 return false; 619 620 return true; 621} 622 623namespace { 624 class LoopPromoter : public LoadAndStorePromoter { 625 Value *SomePtr; // Designated pointer to store to. 626 SmallPtrSet<Value*, 4> &PointerMustAliases; 627 SmallVectorImpl<BasicBlock*> &LoopExitBlocks; 628 AliasSetTracker &AST; 629 DebugLoc DL; 630 int Alignment; 631 public: 632 LoopPromoter(Value *SP, 633 const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, 634 SmallPtrSet<Value*, 4> &PMA, 635 SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, 636 DebugLoc dl, int alignment) 637 : LoadAndStorePromoter(Insts, S), SomePtr(SP), 638 PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), 639 Alignment(alignment) {} 640 641 virtual bool isInstInList(Instruction *I, 642 const SmallVectorImpl<Instruction*> &) const { 643 Value *Ptr; 644 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 645 Ptr = LI->getOperand(0); 646 else 647 Ptr = cast<StoreInst>(I)->getPointerOperand(); 648 return PointerMustAliases.count(Ptr); 649 } 650 651 virtual void doExtraRewritesBeforeFinalDeletion() const { 652 // Insert stores after in the loop exit blocks. Each exit block gets a 653 // store of the live-out values that feed them. Since we've already told 654 // the SSA updater about the defs in the loop and the preheader 655 // definition, it is all set and we can start using it. 656 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { 657 BasicBlock *ExitBlock = LoopExitBlocks[i]; 658 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 659 Instruction *InsertPos = ExitBlock->getFirstInsertionPt(); 660 StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); 661 NewSI->setAlignment(Alignment); 662 NewSI->setDebugLoc(DL); 663 } 664 } 665 666 virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const { 667 // Update alias analysis. 668 AST.copyValue(LI, V); 669 } 670 virtual void instructionDeleted(Instruction *I) const { 671 AST.deleteValue(I); 672 } 673 }; 674} // end anon namespace 675 676/// PromoteAliasSet - Try to promote memory values to scalars by sinking 677/// stores out of the loop and moving loads to before the loop. We do this by 678/// looping over the stores in the loop, looking for stores to Must pointers 679/// which are loop invariant. 680/// 681void LICM::PromoteAliasSet(AliasSet &AS) { 682 // We can promote this alias set if it has a store, if it is a "Must" alias 683 // set, if the pointer is loop invariant, and if we are not eliminating any 684 // volatile loads or stores. 685 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || 686 AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) 687 return; 688 689 assert(!AS.empty() && 690 "Must alias set should have at least one pointer element in it!"); 691 Value *SomePtr = AS.begin()->getValue(); 692 693 // It isn't safe to promote a load/store from the loop if the load/store is 694 // conditional. For example, turning: 695 // 696 // for () { if (c) *P += 1; } 697 // 698 // into: 699 // 700 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 701 // 702 // is not safe, because *P may only be valid to access if 'c' is true. 703 // 704 // It is safe to promote P if all uses are direct load/stores and if at 705 // least one is guaranteed to be executed. 706 bool GuaranteedToExecute = false; 707 708 SmallVector<Instruction*, 64> LoopUses; 709 SmallPtrSet<Value*, 4> PointerMustAliases; 710 711 // We start with an alignment of one and try to find instructions that allow 712 // us to prove better alignment. 713 unsigned Alignment = 1; 714 715 // Check that all of the pointers in the alias set have the same type. We 716 // cannot (yet) promote a memory location that is loaded and stored in 717 // different sizes. 718 for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { 719 Value *ASIV = ASI->getValue(); 720 PointerMustAliases.insert(ASIV); 721 722 // Check that all of the pointers in the alias set have the same type. We 723 // cannot (yet) promote a memory location that is loaded and stored in 724 // different sizes. 725 if (SomePtr->getType() != ASIV->getType()) 726 return; 727 728 for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); 729 UI != UE; ++UI) { 730 // Ignore instructions that are outside the loop. 731 Instruction *Use = dyn_cast<Instruction>(*UI); 732 if (!Use || !CurLoop->contains(Use)) 733 continue; 734 735 // If there is an non-load/store instruction in the loop, we can't promote 736 // it. 737 if (LoadInst *load = dyn_cast<LoadInst>(Use)) { 738 assert(!load->isVolatile() && "AST broken"); 739 if (!load->isSimple()) 740 return; 741 } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) { 742 // Stores *of* the pointer are not interesting, only stores *to* the 743 // pointer. 744 if (Use->getOperand(1) != ASIV) 745 continue; 746 assert(!store->isVolatile() && "AST broken"); 747 if (!store->isSimple()) 748 return; 749 750 // Note that we only check GuaranteedToExecute inside the store case 751 // so that we do not introduce stores where they did not exist before 752 // (which would break the LLVM concurrency model). 753 754 // If the alignment of this instruction allows us to specify a more 755 // restrictive (and performant) alignment and if we are sure this 756 // instruction will be executed, update the alignment. 757 // Larger is better, with the exception of 0 being the best alignment. 758 unsigned InstAlignment = store->getAlignment(); 759 if ((InstAlignment > Alignment || InstAlignment == 0) 760 && (Alignment != 0)) 761 if (isGuaranteedToExecute(*Use)) { 762 GuaranteedToExecute = true; 763 Alignment = InstAlignment; 764 } 765 766 if (!GuaranteedToExecute) 767 GuaranteedToExecute = isGuaranteedToExecute(*Use); 768 769 } else 770 return; // Not a load or store. 771 772 LoopUses.push_back(Use); 773 } 774 } 775 776 // If there isn't a guaranteed-to-execute instruction, we can't promote. 777 if (!GuaranteedToExecute) 778 return; 779 780 // Otherwise, this is safe to promote, lets do it! 781 DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); 782 Changed = true; 783 ++NumPromoted; 784 785 // Grab a debug location for the inserted loads/stores; given that the 786 // inserted loads/stores have little relation to the original loads/stores, 787 // this code just arbitrarily picks a location from one, since any debug 788 // location is better than none. 789 DebugLoc DL = LoopUses[0]->getDebugLoc(); 790 791 SmallVector<BasicBlock*, 8> ExitBlocks; 792 CurLoop->getUniqueExitBlocks(ExitBlocks); 793 794 // We use the SSAUpdater interface to insert phi nodes as required. 795 SmallVector<PHINode*, 16> NewPHIs; 796 SSAUpdater SSA(&NewPHIs); 797 LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, 798 *CurAST, DL, Alignment); 799 800 // Set up the preheader to have a definition of the value. It is the live-out 801 // value from the preheader that uses in the loop will use. 802 LoadInst *PreheaderLoad = 803 new LoadInst(SomePtr, SomePtr->getName()+".promoted", 804 Preheader->getTerminator()); 805 PreheaderLoad->setAlignment(Alignment); 806 PreheaderLoad->setDebugLoc(DL); 807 SSA.AddAvailableValue(Preheader, PreheaderLoad); 808 809 // Rewrite all the loads in the loop and remember all the definitions from 810 // stores in the loop. 811 Promoter.run(LoopUses); 812 813 // If the SSAUpdater didn't use the load in the preheader, just zap it now. 814 if (PreheaderLoad->use_empty()) 815 PreheaderLoad->eraseFromParent(); 816} 817 818 819/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 820void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { 821 AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); 822 if (!AST) 823 return; 824 825 AST->copyValue(From, To); 826} 827 828/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 829/// set. 830void LICM::deleteAnalysisValue(Value *V, Loop *L) { 831 AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); 832 if (!AST) 833 return; 834 835 AST->deleteValue(V); 836} 837