LICM.cpp revision 1b9c848d0a680de2aed44f43c85f4594416ff15a
1//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion, attempting to remove as much 11// code from the body of a loop as possible. It does this by either hoisting 12// code into the preheader block, or by sinking code to the exit blocks if it is 13// safe. This pass also promotes must-aliased memory locations in the loop to 14// live in registers, thus hoisting and sinking "invariant" loads and stores. 15// 16// This pass uses alias analysis for two purposes: 17// 18// 1. Moving loop invariant loads and calls out of loops. If we can determine 19// that a load or call inside of a loop never aliases anything stored to, 20// we can hoist it or sink it like any other instruction. 21// 2. Scalar Promotion of Memory - If there is a store instruction inside of 22// the loop, we try to move the store to happen AFTER the loop instead of 23// inside of the loop. This can only happen if a few conditions are true: 24// A. The pointer stored through is loop invariant 25// B. There are no stores or loads in the loop which _may_ alias the 26// pointer. There are no calls in the loop which mod/ref the pointer. 27// If these conditions are true, we can promote the loads and stores in the 28// loop of the pointer to use a temporary alloca'd variable. We then use 29// the SSAUpdater to construct the appropriate SSA form for the value. 30// 31//===----------------------------------------------------------------------===// 32 33#define DEBUG_TYPE "licm" 34#include "llvm/Transforms/Scalar.h" 35#include "llvm/Constants.h" 36#include "llvm/DerivedTypes.h" 37#include "llvm/IntrinsicInst.h" 38#include "llvm/Instructions.h" 39#include "llvm/Target/TargetData.h" 40#include "llvm/Analysis/LoopInfo.h" 41#include "llvm/Analysis/LoopPass.h" 42#include "llvm/Analysis/AliasAnalysis.h" 43#include "llvm/Analysis/AliasSetTracker.h" 44#include "llvm/Analysis/Dominators.h" 45#include "llvm/Analysis/ScalarEvolution.h" 46#include "llvm/Transforms/Utils/SSAUpdater.h" 47#include "llvm/Support/CFG.h" 48#include "llvm/Support/CommandLine.h" 49#include "llvm/Support/raw_ostream.h" 50#include "llvm/Support/Debug.h" 51#include "llvm/ADT/Statistic.h" 52#include <algorithm> 53using namespace llvm; 54 55STATISTIC(NumSunk , "Number of instructions sunk out of loop"); 56STATISTIC(NumHoisted , "Number of instructions hoisted out of loop"); 57STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 58STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 59STATISTIC(NumPromoted , "Number of memory locations promoted to registers"); 60 61static cl::opt<bool> 62DisablePromotion("disable-licm-promotion", cl::Hidden, 63 cl::desc("Disable memory promotion in LICM pass")); 64 65namespace { 66 struct LICM : public LoopPass { 67 static char ID; // Pass identification, replacement for typeid 68 LICM() : LoopPass(ID) {} 69 70 virtual bool runOnLoop(Loop *L, LPPassManager &LPM); 71 72 /// This transformation requires natural loop information & requires that 73 /// loop preheaders be inserted into the CFG... 74 /// 75 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 76 AU.setPreservesCFG(); 77 AU.addRequired<DominatorTree>(); 78 AU.addRequired<LoopInfo>(); 79 AU.addRequiredID(LoopSimplifyID); 80 AU.addRequired<AliasAnalysis>(); 81 AU.addPreserved<AliasAnalysis>(); 82 AU.addPreserved<ScalarEvolution>(); 83 AU.addPreservedID(LoopSimplifyID); 84 } 85 86 bool doFinalization() { 87 assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); 88 return false; 89 } 90 91 private: 92 AliasAnalysis *AA; // Current AliasAnalysis information 93 LoopInfo *LI; // Current LoopInfo 94 DominatorTree *DT; // Dominator Tree for the current Loop. 95 96 // State that is updated as we process loops. 97 bool Changed; // Set to true when we change anything. 98 BasicBlock *Preheader; // The preheader block of the current loop... 99 Loop *CurLoop; // The current loop we are working on... 100 AliasSetTracker *CurAST; // AliasSet information for the current loop... 101 DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; 102 103 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 104 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L); 105 106 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 107 /// set. 108 void deleteAnalysisValue(Value *V, Loop *L); 109 110 /// SinkRegion - Walk the specified region of the CFG (defined by all blocks 111 /// dominated by the specified block, and that are in the current loop) in 112 /// reverse depth first order w.r.t the DominatorTree. This allows us to 113 /// visit uses before definitions, allowing us to sink a loop body in one 114 /// pass without iteration. 115 /// 116 void SinkRegion(DomTreeNode *N); 117 118 /// HoistRegion - Walk the specified region of the CFG (defined by all 119 /// blocks dominated by the specified block, and that are in the current 120 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 121 /// visit definitions before uses, allowing us to hoist a loop body in one 122 /// pass without iteration. 123 /// 124 void HoistRegion(DomTreeNode *N); 125 126 /// inSubLoop - Little predicate that returns true if the specified basic 127 /// block is in a subloop of the current one, not the current one itself. 128 /// 129 bool inSubLoop(BasicBlock *BB) { 130 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 131 for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I) 132 if ((*I)->contains(BB)) 133 return true; // A subloop actually contains this block! 134 return false; 135 } 136 137 /// isExitBlockDominatedByBlockInLoop - This method checks to see if the 138 /// specified exit block of the loop is dominated by the specified block 139 /// that is in the body of the loop. We use these constraints to 140 /// dramatically limit the amount of the dominator tree that needs to be 141 /// searched. 142 bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock, 143 BasicBlock *BlockInLoop) const { 144 // If the block in the loop is the loop header, it must be dominated! 145 BasicBlock *LoopHeader = CurLoop->getHeader(); 146 if (BlockInLoop == LoopHeader) 147 return true; 148 149 DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop); 150 DomTreeNode *IDom = DT->getNode(ExitBlock); 151 152 // Because the exit block is not in the loop, we know we have to get _at 153 // least_ its immediate dominator. 154 IDom = IDom->getIDom(); 155 156 while (IDom && IDom != BlockInLoopNode) { 157 // If we have got to the header of the loop, then the instructions block 158 // did not dominate the exit node, so we can't hoist it. 159 if (IDom->getBlock() == LoopHeader) 160 return false; 161 162 // Get next Immediate Dominator. 163 IDom = IDom->getIDom(); 164 }; 165 166 return true; 167 } 168 169 /// sink - When an instruction is found to only be used outside of the loop, 170 /// this function moves it to the exit blocks and patches up SSA form as 171 /// needed. 172 /// 173 void sink(Instruction &I); 174 175 /// hoist - When an instruction is found to only use loop invariant operands 176 /// that is safe to hoist, this instruction is called to do the dirty work. 177 /// 178 void hoist(Instruction &I); 179 180 /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it 181 /// is not a trapping instruction or if it is a trapping instruction and is 182 /// guaranteed to execute. 183 /// 184 bool isSafeToExecuteUnconditionally(Instruction &I); 185 186 /// pointerInvalidatedByLoop - Return true if the body of this loop may 187 /// store into the memory location pointed to by V. 188 /// 189 bool pointerInvalidatedByLoop(Value *V, unsigned Size) { 190 // Check to see if any of the basic blocks in CurLoop invalidate *V. 191 return CurAST->getAliasSetForPointer(V, Size).isMod(); 192 } 193 194 bool canSinkOrHoistInst(Instruction &I); 195 bool isLoopInvariantInst(Instruction &I); 196 bool isNotUsedInLoop(Instruction &I); 197 198 void PromoteAliasSet(AliasSet &AS); 199 }; 200} 201 202char LICM::ID = 0; 203INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false); 204 205Pass *llvm::createLICMPass() { return new LICM(); } 206 207/// Hoist expressions out of the specified loop. Note, alias info for inner 208/// loop is not preserved so it is not a good idea to run LICM multiple 209/// times on one loop. 210/// 211bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { 212 Changed = false; 213 214 // Get our Loop and Alias Analysis information... 215 LI = &getAnalysis<LoopInfo>(); 216 AA = &getAnalysis<AliasAnalysis>(); 217 DT = &getAnalysis<DominatorTree>(); 218 219 CurAST = new AliasSetTracker(*AA); 220 // Collect Alias info from subloops. 221 for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); 222 LoopItr != LoopItrE; ++LoopItr) { 223 Loop *InnerL = *LoopItr; 224 AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL]; 225 assert(InnerAST && "Where is my AST?"); 226 227 // What if InnerLoop was modified by other passes ? 228 CurAST->add(*InnerAST); 229 230 // Once we've incorporated the inner loop's AST into ours, we don't need the 231 // subloop's anymore. 232 delete InnerAST; 233 LoopToAliasSetMap.erase(InnerL); 234 } 235 236 CurLoop = L; 237 238 // Get the preheader block to move instructions into... 239 Preheader = L->getLoopPreheader(); 240 241 // Loop over the body of this loop, looking for calls, invokes, and stores. 242 // Because subloops have already been incorporated into AST, we skip blocks in 243 // subloops. 244 // 245 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 246 I != E; ++I) { 247 BasicBlock *BB = *I; 248 if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. 249 CurAST->add(*BB); // Incorporate the specified basic block 250 } 251 252 // We want to visit all of the instructions in this loop... that are not parts 253 // of our subloops (they have already had their invariants hoisted out of 254 // their loop, into this loop, so there is no need to process the BODIES of 255 // the subloops). 256 // 257 // Traverse the body of the loop in depth first order on the dominator tree so 258 // that we are guaranteed to see definitions before we see uses. This allows 259 // us to sink instructions in one pass, without iteration. After sinking 260 // instructions, we perform another pass to hoist them out of the loop. 261 // 262 if (L->hasDedicatedExits()) 263 SinkRegion(DT->getNode(L->getHeader())); 264 if (Preheader) 265 HoistRegion(DT->getNode(L->getHeader())); 266 267 // Now that all loop invariants have been removed from the loop, promote any 268 // memory references to scalars that we can. 269 if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { 270 // Loop over all of the alias sets in the tracker object. 271 for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); 272 I != E; ++I) 273 PromoteAliasSet(*I); 274 } 275 276 // Clear out loops state information for the next iteration 277 CurLoop = 0; 278 Preheader = 0; 279 280 // If this loop is nested inside of another one, save the alias information 281 // for when we process the outer loop. 282 if (L->getParentLoop()) 283 LoopToAliasSetMap[L] = CurAST; 284 else 285 delete CurAST; 286 return Changed; 287} 288 289/// SinkRegion - Walk the specified region of the CFG (defined by all blocks 290/// dominated by the specified block, and that are in the current loop) in 291/// reverse depth first order w.r.t the DominatorTree. This allows us to visit 292/// uses before definitions, allowing us to sink a loop body in one pass without 293/// iteration. 294/// 295void LICM::SinkRegion(DomTreeNode *N) { 296 assert(N != 0 && "Null dominator tree node?"); 297 BasicBlock *BB = N->getBlock(); 298 299 // If this subregion is not in the top level loop at all, exit. 300 if (!CurLoop->contains(BB)) return; 301 302 // We are processing blocks in reverse dfo, so process children first... 303 const std::vector<DomTreeNode*> &Children = N->getChildren(); 304 for (unsigned i = 0, e = Children.size(); i != e; ++i) 305 SinkRegion(Children[i]); 306 307 // Only need to process the contents of this block if it is not part of a 308 // subloop (which would already have been processed). 309 if (inSubLoop(BB)) return; 310 311 for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { 312 Instruction &I = *--II; 313 314 // Check to see if we can sink this instruction to the exit blocks 315 // of the loop. We can do this if the all users of the instruction are 316 // outside of the loop. In this case, it doesn't even matter if the 317 // operands of the instruction are loop invariant. 318 // 319 if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) { 320 ++II; 321 sink(I); 322 } 323 } 324} 325 326/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 327/// dominated by the specified block, and that are in the current loop) in depth 328/// first order w.r.t the DominatorTree. This allows us to visit definitions 329/// before uses, allowing us to hoist a loop body in one pass without iteration. 330/// 331void LICM::HoistRegion(DomTreeNode *N) { 332 assert(N != 0 && "Null dominator tree node?"); 333 BasicBlock *BB = N->getBlock(); 334 335 // If this subregion is not in the top level loop at all, exit. 336 if (!CurLoop->contains(BB)) return; 337 338 // Only need to process the contents of this block if it is not part of a 339 // subloop (which would already have been processed). 340 if (!inSubLoop(BB)) 341 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { 342 Instruction &I = *II++; 343 344 // Try hoisting the instruction out to the preheader. We can only do this 345 // if all of the operands of the instruction are loop invariant and if it 346 // is safe to hoist the instruction. 347 // 348 if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) && 349 isSafeToExecuteUnconditionally(I)) 350 hoist(I); 351 } 352 353 const std::vector<DomTreeNode*> &Children = N->getChildren(); 354 for (unsigned i = 0, e = Children.size(); i != e; ++i) 355 HoistRegion(Children[i]); 356} 357 358/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this 359/// instruction. 360/// 361bool LICM::canSinkOrHoistInst(Instruction &I) { 362 // Loads have extra constraints we have to verify before we can hoist them. 363 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 364 if (LI->isVolatile()) 365 return false; // Don't hoist volatile loads! 366 367 // Loads from constant memory are always safe to move, even if they end up 368 // in the same alias set as something that ends up being modified. 369 if (AA->pointsToConstantMemory(LI->getOperand(0))) 370 return true; 371 372 // Don't hoist loads which have may-aliased stores in loop. 373 unsigned Size = 0; 374 if (LI->getType()->isSized()) 375 Size = AA->getTypeStoreSize(LI->getType()); 376 return !pointerInvalidatedByLoop(LI->getOperand(0), Size); 377 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 378 // Handle obvious cases efficiently. 379 AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); 380 if (Behavior == AliasAnalysis::DoesNotAccessMemory) 381 return true; 382 else if (Behavior == AliasAnalysis::OnlyReadsMemory) { 383 // If this call only reads from memory and there are no writes to memory 384 // in the loop, we can hoist or sink the call as appropriate. 385 bool FoundMod = false; 386 for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); 387 I != E; ++I) { 388 AliasSet &AS = *I; 389 if (!AS.isForwardingAliasSet() && AS.isMod()) { 390 FoundMod = true; 391 break; 392 } 393 } 394 if (!FoundMod) return true; 395 } 396 397 // FIXME: This should use mod/ref information to see if we can hoist or sink 398 // the call. 399 400 return false; 401 } 402 403 // Otherwise these instructions are hoistable/sinkable 404 return isa<BinaryOperator>(I) || isa<CastInst>(I) || 405 isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 406 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 407 isa<ShuffleVectorInst>(I); 408} 409 410/// isNotUsedInLoop - Return true if the only users of this instruction are 411/// outside of the loop. If this is true, we can sink the instruction to the 412/// exit blocks of the loop. 413/// 414bool LICM::isNotUsedInLoop(Instruction &I) { 415 for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) { 416 Instruction *User = cast<Instruction>(*UI); 417 if (PHINode *PN = dyn_cast<PHINode>(User)) { 418 // PHI node uses occur in predecessor blocks! 419 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 420 if (PN->getIncomingValue(i) == &I) 421 if (CurLoop->contains(PN->getIncomingBlock(i))) 422 return false; 423 } else if (CurLoop->contains(User)) { 424 return false; 425 } 426 } 427 return true; 428} 429 430 431/// isLoopInvariantInst - Return true if all operands of this instruction are 432/// loop invariant. We also filter out non-hoistable instructions here just for 433/// efficiency. 434/// 435bool LICM::isLoopInvariantInst(Instruction &I) { 436 // The instruction is loop invariant if all of its operands are loop-invariant 437 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) 438 if (!CurLoop->isLoopInvariant(I.getOperand(i))) 439 return false; 440 441 // If we got this far, the instruction is loop invariant! 442 return true; 443} 444 445/// sink - When an instruction is found to only be used outside of the loop, 446/// this function moves it to the exit blocks and patches up SSA form as needed. 447/// This method is guaranteed to remove the original instruction from its 448/// position, and may either delete it or move it to outside of the loop. 449/// 450void LICM::sink(Instruction &I) { 451 DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 452 453 SmallVector<BasicBlock*, 8> ExitBlocks; 454 CurLoop->getUniqueExitBlocks(ExitBlocks); 455 456 if (isa<LoadInst>(I)) ++NumMovedLoads; 457 else if (isa<CallInst>(I)) ++NumMovedCalls; 458 ++NumSunk; 459 Changed = true; 460 461 // The case where there is only a single exit node of this loop is common 462 // enough that we handle it as a special (more efficient) case. It is more 463 // efficient to handle because there are no PHI nodes that need to be placed. 464 if (ExitBlocks.size() == 1) { 465 if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) { 466 // Instruction is not used, just delete it. 467 CurAST->deleteValue(&I); 468 // If I has users in unreachable blocks, eliminate. 469 // If I is not void type then replaceAllUsesWith undef. 470 // This allows ValueHandlers and custom metadata to adjust itself. 471 if (!I.use_empty()) 472 I.replaceAllUsesWith(UndefValue::get(I.getType())); 473 I.eraseFromParent(); 474 } else { 475 // Move the instruction to the start of the exit block, after any PHI 476 // nodes in it. 477 I.removeFromParent(); 478 BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI(); 479 ExitBlocks[0]->getInstList().insert(InsertPt, &I); 480 481 // This instruction is no longer in the AST for the current loop, because 482 // we just sunk it out of the loop. If we just sunk it into an outer 483 // loop, we will rediscover the operation when we process it. 484 CurAST->deleteValue(&I); 485 } 486 return; 487 } 488 489 if (ExitBlocks.empty()) { 490 // The instruction is actually dead if there ARE NO exit blocks. 491 CurAST->deleteValue(&I); 492 // If I has users in unreachable blocks, eliminate. 493 // If I is not void type then replaceAllUsesWith undef. 494 // This allows ValueHandlers and custom metadata to adjust itself. 495 if (!I.use_empty()) 496 I.replaceAllUsesWith(UndefValue::get(I.getType())); 497 I.eraseFromParent(); 498 return; 499 } 500 501 // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the 502 // hard work of inserting PHI nodes as necessary. 503 SmallVector<PHINode*, 8> NewPHIs; 504 SSAUpdater SSA(&NewPHIs); 505 506 if (!I.use_empty()) 507 SSA.Initialize(&I); 508 509 // Insert a copy of the instruction in each exit block of the loop that is 510 // dominated by the instruction. Each exit block is known to only be in the 511 // ExitBlocks list once. 512 BasicBlock *InstOrigBB = I.getParent(); 513 unsigned NumInserted = 0; 514 515 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 516 BasicBlock *ExitBlock = ExitBlocks[i]; 517 518 if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) 519 continue; 520 521 // Insert the code after the last PHI node. 522 BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); 523 524 // If this is the first exit block processed, just move the original 525 // instruction, otherwise clone the original instruction and insert 526 // the copy. 527 Instruction *New; 528 if (NumInserted++ == 0) { 529 I.moveBefore(InsertPt); 530 New = &I; 531 } else { 532 New = I.clone(); 533 if (!I.getName().empty()) 534 New->setName(I.getName()+".le"); 535 ExitBlock->getInstList().insert(InsertPt, New); 536 } 537 538 // Now that we have inserted the instruction, inform SSAUpdater. 539 if (!I.use_empty()) 540 SSA.AddAvailableValue(ExitBlock, New); 541 } 542 543 // If the instruction doesn't dominate any exit blocks, it must be dead. 544 if (NumInserted == 0) { 545 CurAST->deleteValue(&I); 546 if (!I.use_empty()) 547 I.replaceAllUsesWith(UndefValue::get(I.getType())); 548 I.eraseFromParent(); 549 return; 550 } 551 552 // Next, rewrite uses of the instruction, inserting PHI nodes as needed. 553 for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { 554 // Grab the use before incrementing the iterator. 555 Use &U = UI.getUse(); 556 // Increment the iterator before removing the use from the list. 557 ++UI; 558 SSA.RewriteUseAfterInsertions(U); 559 } 560 561 // Update CurAST for NewPHIs if I had pointer type. 562 if (I.getType()->isPointerTy()) 563 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 564 CurAST->copyValue(NewPHIs[i], &I); 565 566 // Finally, remove the instruction from CurAST. It is no longer in the loop. 567 CurAST->deleteValue(&I); 568} 569 570/// hoist - When an instruction is found to only use loop invariant operands 571/// that is safe to hoist, this instruction is called to do the dirty work. 572/// 573void LICM::hoist(Instruction &I) { 574 DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " 575 << I << "\n"); 576 577 // Remove the instruction from its current basic block... but don't delete the 578 // instruction. 579 I.removeFromParent(); 580 581 // Insert the new node in Preheader, before the terminator. 582 Preheader->getInstList().insert(Preheader->getTerminator(), &I); 583 584 if (isa<LoadInst>(I)) ++NumMovedLoads; 585 else if (isa<CallInst>(I)) ++NumMovedCalls; 586 ++NumHoisted; 587 Changed = true; 588} 589 590/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is 591/// not a trapping instruction or if it is a trapping instruction and is 592/// guaranteed to execute. 593/// 594bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { 595 // If it is not a trapping instruction, it is always safe to hoist. 596 if (Inst.isSafeToSpeculativelyExecute()) 597 return true; 598 599 // Otherwise we have to check to make sure that the instruction dominates all 600 // of the exit blocks. If it doesn't, then there is a path out of the loop 601 // which does not execute this instruction, so we can't hoist it. 602 603 // If the instruction is in the header block for the loop (which is very 604 // common), it is always guaranteed to dominate the exit blocks. Since this 605 // is a common case, and can save some work, check it now. 606 if (Inst.getParent() == CurLoop->getHeader()) 607 return true; 608 609 // Get the exit blocks for the current loop. 610 SmallVector<BasicBlock*, 8> ExitBlocks; 611 CurLoop->getExitBlocks(ExitBlocks); 612 613 // For each exit block, get the DT node and walk up the DT until the 614 // instruction's basic block is found or we exit the loop. 615 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 616 if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent())) 617 return false; 618 619 return true; 620} 621 622/// PromoteAliasSet - Try to promote memory values to scalars by sinking 623/// stores out of the loop and moving loads to before the loop. We do this by 624/// looping over the stores in the loop, looking for stores to Must pointers 625/// which are loop invariant. 626/// 627void LICM::PromoteAliasSet(AliasSet &AS) { 628 // We can promote this alias set if it has a store, if it is a "Must" alias 629 // set, if the pointer is loop invariant, and if we are not eliminating any 630 // volatile loads or stores. 631 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || 632 AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) 633 return; 634 635 assert(!AS.empty() && 636 "Must alias set should have at least one pointer element in it!"); 637 Value *SomePtr = AS.begin()->getValue(); 638 639 // It isn't safe to promote a load/store from the loop if the load/store is 640 // conditional. For example, turning: 641 // 642 // for () { if (c) *P += 1; } 643 // 644 // into: 645 // 646 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 647 // 648 // is not safe, because *P may only be valid to access if 'c' is true. 649 // 650 // It is safe to promote P if all uses are direct load/stores and if at 651 // least one is guaranteed to be executed. 652 bool GuaranteedToExecute = false; 653 654 SmallVector<Instruction*, 64> LoopUses; 655 SmallPtrSet<Value*, 4> PointerMustAliases; 656 657 // Check that all of the pointers in the alias set have the same type. We 658 // cannot (yet) promote a memory location that is loaded and stored in 659 // different sizes. 660 for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { 661 Value *ASIV = ASI->getValue(); 662 PointerMustAliases.insert(ASIV); 663 664 // Check that all of the pointers in the alias set have the same type. We 665 // cannot (yet) promote a memory location that is loaded and stored in 666 // different sizes. 667 if (SomePtr->getType() != ASIV->getType()) 668 return; 669 670 for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); 671 UI != UE; ++UI) { 672 // Ignore instructions that are outside the loop. 673 Instruction *Use = dyn_cast<Instruction>(*UI); 674 if (!Use || !CurLoop->contains(Use)) 675 continue; 676 677 // If there is an non-load/store instruction in the loop, we can't promote 678 // it. 679 if (isa<LoadInst>(Use)) 680 assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); 681 else if (isa<StoreInst>(Use)) 682 assert(!cast<StoreInst>(Use)->isVolatile() && 683 Use->getOperand(0) != ASIV && "AST broken"); 684 else 685 return; // Not a load or store. 686 687 if (!GuaranteedToExecute) 688 GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); 689 690 LoopUses.push_back(Use); 691 } 692 } 693 694 // If there isn't a guaranteed-to-execute instruction, we can't promote. 695 if (!GuaranteedToExecute) 696 return; 697 698 // Otherwise, this is safe to promote, lets do it! 699 DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); 700 Changed = true; 701 ++NumPromoted; 702 703 // We use the SSAUpdater interface to insert phi nodes as required. 704 SmallVector<PHINode*, 16> NewPHIs; 705 SSAUpdater SSA(&NewPHIs); 706 707 // It wants to know some value of the same type as what we'll be inserting. 708 Value *SomeValue; 709 if (isa<LoadInst>(LoopUses[0])) 710 SomeValue = LoopUses[0]; 711 else 712 SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0); 713 SSA.Initialize(SomeValue); 714 715 // First step: bucket up uses of the pointers by the block they occur in. 716 // This is important because we have to handle multiple defs/uses in a block 717 // ourselves: SSAUpdater is purely for cross-block references. 718 // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element. 719 DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; 720 for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { 721 Instruction *User = LoopUses[i]; 722 UsesByBlock[User->getParent()].push_back(User); 723 } 724 725 // Okay, now we can iterate over all the blocks in the loop with uses, 726 // processing them. Keep track of which loads are loading a live-in value. 727 SmallVector<LoadInst*, 32> LiveInLoads; 728 729 for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) { 730 Instruction *User = LoopUses[LoopUse]; 731 std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()]; 732 733 // If this block has already been processed, ignore this repeat use. 734 if (BlockUses.empty()) continue; 735 736 // Okay, this is the first use in the block. If this block just has a 737 // single user in it, we can rewrite it trivially. 738 if (BlockUses.size() == 1) { 739 // If it is a store, it is a trivial def of the value in the block. 740 if (isa<StoreInst>(User)) { 741 SSA.AddAvailableValue(User->getParent(), 742 cast<StoreInst>(User)->getOperand(0)); 743 } else { 744 // Otherwise it is a load, queue it to rewrite as a live-in load. 745 LiveInLoads.push_back(cast<LoadInst>(User)); 746 } 747 BlockUses.clear(); 748 continue; 749 } 750 751 // Otherwise, check to see if this block is all loads. If so, we can queue 752 // them all as live in loads. 753 bool HasStore = false; 754 for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { 755 if (isa<StoreInst>(BlockUses[i])) { 756 HasStore = true; 757 break; 758 } 759 } 760 761 if (!HasStore) { 762 for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) 763 LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); 764 BlockUses.clear(); 765 continue; 766 } 767 768 // Otherwise, we have mixed loads and stores (or just a bunch of stores). 769 // Since SSAUpdater is purely for cross-block values, we need to determine 770 // the order of these instructions in the block. If the first use in the 771 // block is a load, then it uses the live in value. The last store defines 772 // the live out value. We handle this by doing a linear scan of the block. 773 BasicBlock *BB = User->getParent(); 774 Value *StoredValue = 0; 775 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { 776 if (LoadInst *L = dyn_cast<LoadInst>(II)) { 777 // If this is a load to an unrelated pointer, ignore it. 778 if (!PointerMustAliases.count(L->getOperand(0))) continue; 779 780 // If we haven't seen a store yet, this is a live in use, otherwise 781 // use the stored value. 782 if (StoredValue) 783 L->replaceAllUsesWith(StoredValue); 784 else 785 LiveInLoads.push_back(L); 786 continue; 787 } 788 789 if (StoreInst *S = dyn_cast<StoreInst>(II)) { 790 // If this is a load to an unrelated pointer, ignore it. 791 if (!PointerMustAliases.count(S->getOperand(1))) continue; 792 793 // Remember that this is the active value in the block. 794 StoredValue = S->getOperand(0); 795 } 796 } 797 798 // The last stored value that happened is the live-out for the block. 799 assert(StoredValue && "Already checked that there is a store in block"); 800 SSA.AddAvailableValue(BB, StoredValue); 801 BlockUses.clear(); 802 } 803 804 // Now that all the intra-loop values are classified, set up the preheader. 805 // It gets a load of the pointer we're promoting, and it is the live-out value 806 // from the preheader. 807 LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted", 808 Preheader->getTerminator()); 809 SSA.AddAvailableValue(Preheader, PreheaderLoad); 810 811 // Now that the preheader is good to go, set up the exit blocks. Each exit 812 // block gets a store of the live-out values that feed them. Since we've 813 // already told the SSA updater about the defs in the loop and the preheader 814 // definition, it is all set and we can start using it. 815 SmallVector<BasicBlock*, 8> ExitBlocks; 816 CurLoop->getUniqueExitBlocks(ExitBlocks); 817 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 818 BasicBlock *ExitBlock = ExitBlocks[i]; 819 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 820 Instruction *InsertPos = ExitBlock->getFirstNonPHI(); 821 new StoreInst(LiveInValue, SomePtr, InsertPos); 822 } 823 824 // Okay, now we rewrite all loads that use live-in values in the loop, 825 // inserting PHI nodes as necessary. 826 for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { 827 LoadInst *ALoad = LiveInLoads[i]; 828 ALoad->replaceAllUsesWith(SSA.GetValueInMiddleOfBlock(ALoad->getParent())); 829 } 830 831 // Now that everything is rewritten, delete the old instructions from the body 832 // of the loop. They should all be dead now. 833 for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { 834 Instruction *User = LoopUses[i]; 835 CurAST->deleteValue(User); 836 User->eraseFromParent(); 837 } 838 839 // If the preheader load is itself a pointer, we need to tell alias analysis 840 // about the new pointer we created in the preheader block and about any PHI 841 // nodes that just got inserted. 842 if (PreheaderLoad->getType()->isPointerTy()) { 843 // Copy any value stored to or loaded from a must-alias of the pointer. 844 CurAST->copyValue(SomeValue, PreheaderLoad); 845 846 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 847 CurAST->copyValue(SomeValue, NewPHIs[i]); 848 } 849 850 // fwew, we're done! 851} 852 853 854/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 855void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { 856 AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); 857 if (!AST) 858 return; 859 860 AST->copyValue(From, To); 861} 862 863/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 864/// set. 865void LICM::deleteAnalysisValue(Value *V, Loop *L) { 866 AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); 867 if (!AST) 868 return; 869 870 AST->deleteValue(V); 871} 872