LICM.cpp revision 205942a4a55d568e93480fc22d25cc7dac525fb7
1//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion, attempting to remove as much 11// code from the body of a loop as possible. It does this by either hoisting 12// code into the preheader block, or by sinking code to the exit blocks if it is 13// safe. This pass also promotes must-aliased memory locations in the loop to 14// live in registers, thus hoisting and sinking "invariant" loads and stores. 15// 16// This pass uses alias analysis for two purposes: 17// 18// 1. Moving loop invariant loads and calls out of loops. If we can determine 19// that a load or call inside of a loop never aliases anything stored to, 20// we can hoist it or sink it like any other instruction. 21// 2. Scalar Promotion of Memory - If there is a store instruction inside of 22// the loop, we try to move the store to happen AFTER the loop instead of 23// inside of the loop. This can only happen if a few conditions are true: 24// A. The pointer stored through is loop invariant 25// B. There are no stores or loads in the loop which _may_ alias the 26// pointer. There are no calls in the loop which mod/ref the pointer. 27// If these conditions are true, we can promote the loads and stores in the 28// loop of the pointer to use a temporary alloca'd variable. We then use 29// the SSAUpdater to construct the appropriate SSA form for the value. 30// 31//===----------------------------------------------------------------------===// 32 33#define DEBUG_TYPE "licm" 34#include "llvm/Transforms/Scalar.h" 35#include "llvm/Constants.h" 36#include "llvm/DerivedTypes.h" 37#include "llvm/IntrinsicInst.h" 38#include "llvm/Instructions.h" 39#include "llvm/LLVMContext.h" 40#include "llvm/Analysis/AliasAnalysis.h" 41#include "llvm/Analysis/AliasSetTracker.h" 42#include "llvm/Analysis/ConstantFolding.h" 43#include "llvm/Analysis/LoopInfo.h" 44#include "llvm/Analysis/LoopPass.h" 45#include "llvm/Analysis/Dominators.h" 46#include "llvm/Analysis/ScalarEvolution.h" 47#include "llvm/Transforms/Utils/Local.h" 48#include "llvm/Transforms/Utils/SSAUpdater.h" 49#include "llvm/Support/CFG.h" 50#include "llvm/Support/CommandLine.h" 51#include "llvm/Support/raw_ostream.h" 52#include "llvm/Support/Debug.h" 53#include "llvm/ADT/Statistic.h" 54#include <algorithm> 55using namespace llvm; 56 57STATISTIC(NumSunk , "Number of instructions sunk out of loop"); 58STATISTIC(NumHoisted , "Number of instructions hoisted out of loop"); 59STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 60STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 61STATISTIC(NumPromoted , "Number of memory locations promoted to registers"); 62 63static cl::opt<bool> 64DisablePromotion("disable-licm-promotion", cl::Hidden, 65 cl::desc("Disable memory promotion in LICM pass")); 66 67namespace { 68 struct LICM : public LoopPass { 69 static char ID; // Pass identification, replacement for typeid 70 LICM() : LoopPass(ID) { 71 initializeLICMPass(*PassRegistry::getPassRegistry()); 72 } 73 74 virtual bool runOnLoop(Loop *L, LPPassManager &LPM); 75 76 /// This transformation requires natural loop information & requires that 77 /// loop preheaders be inserted into the CFG... 78 /// 79 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 80 AU.setPreservesCFG(); 81 AU.addRequired<DominatorTree>(); 82 AU.addRequired<LoopInfo>(); 83 AU.addRequiredID(LoopSimplifyID); 84 AU.addRequired<AliasAnalysis>(); 85 AU.addPreserved<AliasAnalysis>(); 86 AU.addPreserved<ScalarEvolution>(); 87 AU.addPreservedID(LoopSimplifyID); 88 } 89 90 bool doFinalization() { 91 assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); 92 return false; 93 } 94 95 private: 96 AliasAnalysis *AA; // Current AliasAnalysis information 97 LoopInfo *LI; // Current LoopInfo 98 DominatorTree *DT; // Dominator Tree for the current Loop. 99 100 // State that is updated as we process loops. 101 bool Changed; // Set to true when we change anything. 102 BasicBlock *Preheader; // The preheader block of the current loop... 103 Loop *CurLoop; // The current loop we are working on... 104 AliasSetTracker *CurAST; // AliasSet information for the current loop... 105 DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; 106 107 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 108 void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L); 109 110 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 111 /// set. 112 void deleteAnalysisValue(Value *V, Loop *L); 113 114 /// SinkRegion - Walk the specified region of the CFG (defined by all blocks 115 /// dominated by the specified block, and that are in the current loop) in 116 /// reverse depth first order w.r.t the DominatorTree. This allows us to 117 /// visit uses before definitions, allowing us to sink a loop body in one 118 /// pass without iteration. 119 /// 120 void SinkRegion(DomTreeNode *N); 121 122 /// HoistRegion - Walk the specified region of the CFG (defined by all 123 /// blocks dominated by the specified block, and that are in the current 124 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 125 /// visit definitions before uses, allowing us to hoist a loop body in one 126 /// pass without iteration. 127 /// 128 void HoistRegion(DomTreeNode *N); 129 130 /// inSubLoop - Little predicate that returns true if the specified basic 131 /// block is in a subloop of the current one, not the current one itself. 132 /// 133 bool inSubLoop(BasicBlock *BB) { 134 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 135 for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I) 136 if ((*I)->contains(BB)) 137 return true; // A subloop actually contains this block! 138 return false; 139 } 140 141 /// isExitBlockDominatedByBlockInLoop - This method checks to see if the 142 /// specified exit block of the loop is dominated by the specified block 143 /// that is in the body of the loop. We use these constraints to 144 /// dramatically limit the amount of the dominator tree that needs to be 145 /// searched. 146 bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock, 147 BasicBlock *BlockInLoop) const { 148 // If the block in the loop is the loop header, it must be dominated! 149 BasicBlock *LoopHeader = CurLoop->getHeader(); 150 if (BlockInLoop == LoopHeader) 151 return true; 152 153 DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop); 154 DomTreeNode *IDom = DT->getNode(ExitBlock); 155 156 // Because the exit block is not in the loop, we know we have to get _at 157 // least_ its immediate dominator. 158 IDom = IDom->getIDom(); 159 160 while (IDom && IDom != BlockInLoopNode) { 161 // If we have got to the header of the loop, then the instructions block 162 // did not dominate the exit node, so we can't hoist it. 163 if (IDom->getBlock() == LoopHeader) 164 return false; 165 166 // Get next Immediate Dominator. 167 IDom = IDom->getIDom(); 168 }; 169 170 return true; 171 } 172 173 /// sink - When an instruction is found to only be used outside of the loop, 174 /// this function moves it to the exit blocks and patches up SSA form as 175 /// needed. 176 /// 177 void sink(Instruction &I); 178 179 /// hoist - When an instruction is found to only use loop invariant operands 180 /// that is safe to hoist, this instruction is called to do the dirty work. 181 /// 182 void hoist(Instruction &I); 183 184 /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it 185 /// is not a trapping instruction or if it is a trapping instruction and is 186 /// guaranteed to execute. 187 /// 188 bool isSafeToExecuteUnconditionally(Instruction &I); 189 190 /// pointerInvalidatedByLoop - Return true if the body of this loop may 191 /// store into the memory location pointed to by V. 192 /// 193 bool pointerInvalidatedByLoop(Value *V, unsigned Size, 194 const MDNode *TBAAInfo) { 195 // Check to see if any of the basic blocks in CurLoop invalidate *V. 196 return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod(); 197 } 198 199 bool canSinkOrHoistInst(Instruction &I); 200 bool isNotUsedInLoop(Instruction &I); 201 202 void PromoteAliasSet(AliasSet &AS); 203 }; 204} 205 206char LICM::ID = 0; 207INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false) 208INITIALIZE_PASS_DEPENDENCY(DominatorTree) 209INITIALIZE_PASS_DEPENDENCY(LoopInfo) 210INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 211INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 212INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) 213 214Pass *llvm::createLICMPass() { return new LICM(); } 215 216/// Hoist expressions out of the specified loop. Note, alias info for inner 217/// loop is not preserved so it is not a good idea to run LICM multiple 218/// times on one loop. 219/// 220bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { 221 Changed = false; 222 223 // Get our Loop and Alias Analysis information... 224 LI = &getAnalysis<LoopInfo>(); 225 AA = &getAnalysis<AliasAnalysis>(); 226 DT = &getAnalysis<DominatorTree>(); 227 228 CurAST = new AliasSetTracker(*AA); 229 // Collect Alias info from subloops. 230 for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); 231 LoopItr != LoopItrE; ++LoopItr) { 232 Loop *InnerL = *LoopItr; 233 AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL]; 234 assert(InnerAST && "Where is my AST?"); 235 236 // What if InnerLoop was modified by other passes ? 237 CurAST->add(*InnerAST); 238 239 // Once we've incorporated the inner loop's AST into ours, we don't need the 240 // subloop's anymore. 241 delete InnerAST; 242 LoopToAliasSetMap.erase(InnerL); 243 } 244 245 CurLoop = L; 246 247 // Get the preheader block to move instructions into... 248 Preheader = L->getLoopPreheader(); 249 250 // Loop over the body of this loop, looking for calls, invokes, and stores. 251 // Because subloops have already been incorporated into AST, we skip blocks in 252 // subloops. 253 // 254 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 255 I != E; ++I) { 256 BasicBlock *BB = *I; 257 if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. 258 CurAST->add(*BB); // Incorporate the specified basic block 259 } 260 261 // We want to visit all of the instructions in this loop... that are not parts 262 // of our subloops (they have already had their invariants hoisted out of 263 // their loop, into this loop, so there is no need to process the BODIES of 264 // the subloops). 265 // 266 // Traverse the body of the loop in depth first order on the dominator tree so 267 // that we are guaranteed to see definitions before we see uses. This allows 268 // us to sink instructions in one pass, without iteration. After sinking 269 // instructions, we perform another pass to hoist them out of the loop. 270 // 271 if (L->hasDedicatedExits()) 272 SinkRegion(DT->getNode(L->getHeader())); 273 if (Preheader) 274 HoistRegion(DT->getNode(L->getHeader())); 275 276 // Now that all loop invariants have been removed from the loop, promote any 277 // memory references to scalars that we can. 278 if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { 279 // Loop over all of the alias sets in the tracker object. 280 for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); 281 I != E; ++I) 282 PromoteAliasSet(*I); 283 } 284 285 // Clear out loops state information for the next iteration 286 CurLoop = 0; 287 Preheader = 0; 288 289 // If this loop is nested inside of another one, save the alias information 290 // for when we process the outer loop. 291 if (L->getParentLoop()) 292 LoopToAliasSetMap[L] = CurAST; 293 else 294 delete CurAST; 295 return Changed; 296} 297 298/// SinkRegion - Walk the specified region of the CFG (defined by all blocks 299/// dominated by the specified block, and that are in the current loop) in 300/// reverse depth first order w.r.t the DominatorTree. This allows us to visit 301/// uses before definitions, allowing us to sink a loop body in one pass without 302/// iteration. 303/// 304void LICM::SinkRegion(DomTreeNode *N) { 305 assert(N != 0 && "Null dominator tree node?"); 306 BasicBlock *BB = N->getBlock(); 307 308 // If this subregion is not in the top level loop at all, exit. 309 if (!CurLoop->contains(BB)) return; 310 311 // We are processing blocks in reverse dfo, so process children first. 312 const std::vector<DomTreeNode*> &Children = N->getChildren(); 313 for (unsigned i = 0, e = Children.size(); i != e; ++i) 314 SinkRegion(Children[i]); 315 316 // Only need to process the contents of this block if it is not part of a 317 // subloop (which would already have been processed). 318 if (inSubLoop(BB)) return; 319 320 for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { 321 Instruction &I = *--II; 322 323 // If the instruction is dead, we would try to sink it because it isn't used 324 // in the loop, instead, just delete it. 325 if (isInstructionTriviallyDead(&I)) { 326 DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); 327 ++II; 328 CurAST->deleteValue(&I); 329 I.eraseFromParent(); 330 Changed = true; 331 continue; 332 } 333 334 // Check to see if we can sink this instruction to the exit blocks 335 // of the loop. We can do this if the all users of the instruction are 336 // outside of the loop. In this case, it doesn't even matter if the 337 // operands of the instruction are loop invariant. 338 // 339 if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) { 340 ++II; 341 sink(I); 342 } 343 } 344} 345 346/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 347/// dominated by the specified block, and that are in the current loop) in depth 348/// first order w.r.t the DominatorTree. This allows us to visit definitions 349/// before uses, allowing us to hoist a loop body in one pass without iteration. 350/// 351void LICM::HoistRegion(DomTreeNode *N) { 352 assert(N != 0 && "Null dominator tree node?"); 353 BasicBlock *BB = N->getBlock(); 354 355 // If this subregion is not in the top level loop at all, exit. 356 if (!CurLoop->contains(BB)) return; 357 358 // Only need to process the contents of this block if it is not part of a 359 // subloop (which would already have been processed). 360 if (!inSubLoop(BB)) 361 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { 362 Instruction &I = *II++; 363 364 // Try constant folding this instruction. If all the operands are 365 // constants, it is technically hoistable, but it would be better to just 366 // fold it. 367 if (Constant *C = ConstantFoldInstruction(&I)) { 368 DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); 369 CurAST->copyValue(&I, C); 370 CurAST->deleteValue(&I); 371 I.replaceAllUsesWith(C); 372 I.eraseFromParent(); 373 continue; 374 } 375 376 // Try hoisting the instruction out to the preheader. We can only do this 377 // if all of the operands of the instruction are loop invariant and if it 378 // is safe to hoist the instruction. 379 // 380 if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) && 381 isSafeToExecuteUnconditionally(I)) 382 hoist(I); 383 } 384 385 const std::vector<DomTreeNode*> &Children = N->getChildren(); 386 for (unsigned i = 0, e = Children.size(); i != e; ++i) 387 HoistRegion(Children[i]); 388} 389 390/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this 391/// instruction. 392/// 393bool LICM::canSinkOrHoistInst(Instruction &I) { 394 // Loads have extra constraints we have to verify before we can hoist them. 395 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 396 if (LI->isVolatile()) 397 return false; // Don't hoist volatile loads! 398 399 // Loads from constant memory are always safe to move, even if they end up 400 // in the same alias set as something that ends up being modified. 401 if (AA->pointsToConstantMemory(LI->getOperand(0))) 402 return true; 403 404 // Don't hoist loads which have may-aliased stores in loop. 405 unsigned Size = 0; 406 if (LI->getType()->isSized()) 407 Size = AA->getTypeStoreSize(LI->getType()); 408 return !pointerInvalidatedByLoop(LI->getOperand(0), Size, 409 LI->getMetadata(LLVMContext::MD_tbaa)); 410 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 411 // Handle obvious cases efficiently. 412 AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); 413 if (Behavior == AliasAnalysis::DoesNotAccessMemory) 414 return true; 415 else if (Behavior == AliasAnalysis::OnlyReadsMemory) { 416 // If this call only reads from memory and there are no writes to memory 417 // in the loop, we can hoist or sink the call as appropriate. 418 bool FoundMod = false; 419 for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); 420 I != E; ++I) { 421 AliasSet &AS = *I; 422 if (!AS.isForwardingAliasSet() && AS.isMod()) { 423 FoundMod = true; 424 break; 425 } 426 } 427 if (!FoundMod) return true; 428 } 429 430 // FIXME: This should use mod/ref information to see if we can hoist or sink 431 // the call. 432 433 return false; 434 } 435 436 // Otherwise these instructions are hoistable/sinkable 437 return isa<BinaryOperator>(I) || isa<CastInst>(I) || 438 isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 439 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 440 isa<ShuffleVectorInst>(I); 441} 442 443/// isNotUsedInLoop - Return true if the only users of this instruction are 444/// outside of the loop. If this is true, we can sink the instruction to the 445/// exit blocks of the loop. 446/// 447bool LICM::isNotUsedInLoop(Instruction &I) { 448 for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) { 449 Instruction *User = cast<Instruction>(*UI); 450 if (PHINode *PN = dyn_cast<PHINode>(User)) { 451 // PHI node uses occur in predecessor blocks! 452 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 453 if (PN->getIncomingValue(i) == &I) 454 if (CurLoop->contains(PN->getIncomingBlock(i))) 455 return false; 456 } else if (CurLoop->contains(User)) { 457 return false; 458 } 459 } 460 return true; 461} 462 463 464/// sink - When an instruction is found to only be used outside of the loop, 465/// this function moves it to the exit blocks and patches up SSA form as needed. 466/// This method is guaranteed to remove the original instruction from its 467/// position, and may either delete it or move it to outside of the loop. 468/// 469void LICM::sink(Instruction &I) { 470 DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); 471 472 SmallVector<BasicBlock*, 8> ExitBlocks; 473 CurLoop->getUniqueExitBlocks(ExitBlocks); 474 475 if (isa<LoadInst>(I)) ++NumMovedLoads; 476 else if (isa<CallInst>(I)) ++NumMovedCalls; 477 ++NumSunk; 478 Changed = true; 479 480 // The case where there is only a single exit node of this loop is common 481 // enough that we handle it as a special (more efficient) case. It is more 482 // efficient to handle because there are no PHI nodes that need to be placed. 483 if (ExitBlocks.size() == 1) { 484 if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) { 485 // Instruction is not used, just delete it. 486 CurAST->deleteValue(&I); 487 // If I has users in unreachable blocks, eliminate. 488 // If I is not void type then replaceAllUsesWith undef. 489 // This allows ValueHandlers and custom metadata to adjust itself. 490 if (!I.use_empty()) 491 I.replaceAllUsesWith(UndefValue::get(I.getType())); 492 I.eraseFromParent(); 493 } else { 494 // Move the instruction to the start of the exit block, after any PHI 495 // nodes in it. 496 I.moveBefore(ExitBlocks[0]->getFirstNonPHI()); 497 498 // This instruction is no longer in the AST for the current loop, because 499 // we just sunk it out of the loop. If we just sunk it into an outer 500 // loop, we will rediscover the operation when we process it. 501 CurAST->deleteValue(&I); 502 } 503 return; 504 } 505 506 if (ExitBlocks.empty()) { 507 // The instruction is actually dead if there ARE NO exit blocks. 508 CurAST->deleteValue(&I); 509 // If I has users in unreachable blocks, eliminate. 510 // If I is not void type then replaceAllUsesWith undef. 511 // This allows ValueHandlers and custom metadata to adjust itself. 512 if (!I.use_empty()) 513 I.replaceAllUsesWith(UndefValue::get(I.getType())); 514 I.eraseFromParent(); 515 return; 516 } 517 518 // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the 519 // hard work of inserting PHI nodes as necessary. 520 SmallVector<PHINode*, 8> NewPHIs; 521 SSAUpdater SSA(&NewPHIs); 522 523 if (!I.use_empty()) 524 SSA.Initialize(I.getType(), I.getName()); 525 526 // Insert a copy of the instruction in each exit block of the loop that is 527 // dominated by the instruction. Each exit block is known to only be in the 528 // ExitBlocks list once. 529 BasicBlock *InstOrigBB = I.getParent(); 530 unsigned NumInserted = 0; 531 532 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 533 BasicBlock *ExitBlock = ExitBlocks[i]; 534 535 if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) 536 continue; 537 538 // Insert the code after the last PHI node. 539 BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); 540 541 // If this is the first exit block processed, just move the original 542 // instruction, otherwise clone the original instruction and insert 543 // the copy. 544 Instruction *New; 545 if (NumInserted++ == 0) { 546 I.moveBefore(InsertPt); 547 New = &I; 548 } else { 549 New = I.clone(); 550 if (!I.getName().empty()) 551 New->setName(I.getName()+".le"); 552 ExitBlock->getInstList().insert(InsertPt, New); 553 } 554 555 // Now that we have inserted the instruction, inform SSAUpdater. 556 if (!I.use_empty()) 557 SSA.AddAvailableValue(ExitBlock, New); 558 } 559 560 // If the instruction doesn't dominate any exit blocks, it must be dead. 561 if (NumInserted == 0) { 562 CurAST->deleteValue(&I); 563 if (!I.use_empty()) 564 I.replaceAllUsesWith(UndefValue::get(I.getType())); 565 I.eraseFromParent(); 566 return; 567 } 568 569 // Next, rewrite uses of the instruction, inserting PHI nodes as needed. 570 for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { 571 // Grab the use before incrementing the iterator. 572 Use &U = UI.getUse(); 573 // Increment the iterator before removing the use from the list. 574 ++UI; 575 SSA.RewriteUseAfterInsertions(U); 576 } 577 578 // Update CurAST for NewPHIs if I had pointer type. 579 if (I.getType()->isPointerTy()) 580 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 581 CurAST->copyValue(&I, NewPHIs[i]); 582 583 // Finally, remove the instruction from CurAST. It is no longer in the loop. 584 CurAST->deleteValue(&I); 585} 586 587/// hoist - When an instruction is found to only use loop invariant operands 588/// that is safe to hoist, this instruction is called to do the dirty work. 589/// 590void LICM::hoist(Instruction &I) { 591 DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " 592 << I << "\n"); 593 594 // Move the new node to the Preheader, before its terminator. 595 I.moveBefore(Preheader->getTerminator()); 596 597 if (isa<LoadInst>(I)) ++NumMovedLoads; 598 else if (isa<CallInst>(I)) ++NumMovedCalls; 599 ++NumHoisted; 600 Changed = true; 601} 602 603/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is 604/// not a trapping instruction or if it is a trapping instruction and is 605/// guaranteed to execute. 606/// 607bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { 608 // If it is not a trapping instruction, it is always safe to hoist. 609 if (Inst.isSafeToSpeculativelyExecute()) 610 return true; 611 612 // Otherwise we have to check to make sure that the instruction dominates all 613 // of the exit blocks. If it doesn't, then there is a path out of the loop 614 // which does not execute this instruction, so we can't hoist it. 615 616 // If the instruction is in the header block for the loop (which is very 617 // common), it is always guaranteed to dominate the exit blocks. Since this 618 // is a common case, and can save some work, check it now. 619 if (Inst.getParent() == CurLoop->getHeader()) 620 return true; 621 622 // Get the exit blocks for the current loop. 623 SmallVector<BasicBlock*, 8> ExitBlocks; 624 CurLoop->getExitBlocks(ExitBlocks); 625 626 // For each exit block, get the DT node and walk up the DT until the 627 // instruction's basic block is found or we exit the loop. 628 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 629 if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent())) 630 return false; 631 632 return true; 633} 634 635/// PromoteAliasSet - Try to promote memory values to scalars by sinking 636/// stores out of the loop and moving loads to before the loop. We do this by 637/// looping over the stores in the loop, looking for stores to Must pointers 638/// which are loop invariant. 639/// 640void LICM::PromoteAliasSet(AliasSet &AS) { 641 // We can promote this alias set if it has a store, if it is a "Must" alias 642 // set, if the pointer is loop invariant, and if we are not eliminating any 643 // volatile loads or stores. 644 if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || 645 AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) 646 return; 647 648 assert(!AS.empty() && 649 "Must alias set should have at least one pointer element in it!"); 650 Value *SomePtr = AS.begin()->getValue(); 651 652 // It isn't safe to promote a load/store from the loop if the load/store is 653 // conditional. For example, turning: 654 // 655 // for () { if (c) *P += 1; } 656 // 657 // into: 658 // 659 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; 660 // 661 // is not safe, because *P may only be valid to access if 'c' is true. 662 // 663 // It is safe to promote P if all uses are direct load/stores and if at 664 // least one is guaranteed to be executed. 665 bool GuaranteedToExecute = false; 666 667 SmallVector<Instruction*, 64> LoopUses; 668 SmallPtrSet<Value*, 4> PointerMustAliases; 669 670 // Check that all of the pointers in the alias set have the same type. We 671 // cannot (yet) promote a memory location that is loaded and stored in 672 // different sizes. 673 for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { 674 Value *ASIV = ASI->getValue(); 675 PointerMustAliases.insert(ASIV); 676 677 // Check that all of the pointers in the alias set have the same type. We 678 // cannot (yet) promote a memory location that is loaded and stored in 679 // different sizes. 680 if (SomePtr->getType() != ASIV->getType()) 681 return; 682 683 for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); 684 UI != UE; ++UI) { 685 // Ignore instructions that are outside the loop. 686 Instruction *Use = dyn_cast<Instruction>(*UI); 687 if (!Use || !CurLoop->contains(Use)) 688 continue; 689 690 // If there is an non-load/store instruction in the loop, we can't promote 691 // it. 692 if (isa<LoadInst>(Use)) 693 assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); 694 else if (isa<StoreInst>(Use)) { 695 assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); 696 if (Use->getOperand(0) == ASIV) return; 697 } else 698 return; // Not a load or store. 699 700 if (!GuaranteedToExecute) 701 GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); 702 703 LoopUses.push_back(Use); 704 } 705 } 706 707 // If there isn't a guaranteed-to-execute instruction, we can't promote. 708 if (!GuaranteedToExecute) 709 return; 710 711 // Otherwise, this is safe to promote, lets do it! 712 DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); 713 Changed = true; 714 ++NumPromoted; 715 716 // We use the SSAUpdater interface to insert phi nodes as required. 717 SmallVector<PHINode*, 16> NewPHIs; 718 SSAUpdater SSA(&NewPHIs); 719 720 // It wants to know some value of the same type as what we'll be inserting. 721 Value *SomeValue; 722 if (isa<LoadInst>(LoopUses[0])) 723 SomeValue = LoopUses[0]; 724 else 725 SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0); 726 SSA.Initialize(SomeValue->getType(), SomeValue->getName()); 727 728 // First step: bucket up uses of the pointers by the block they occur in. 729 // This is important because we have to handle multiple defs/uses in a block 730 // ourselves: SSAUpdater is purely for cross-block references. 731 // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element. 732 DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; 733 for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { 734 Instruction *User = LoopUses[i]; 735 UsesByBlock[User->getParent()].push_back(User); 736 } 737 738 // Okay, now we can iterate over all the blocks in the loop with uses, 739 // processing them. Keep track of which loads are loading a live-in value. 740 SmallVector<LoadInst*, 32> LiveInLoads; 741 DenseMap<Value*, Value*> ReplacedLoads; 742 743 for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) { 744 Instruction *User = LoopUses[LoopUse]; 745 std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()]; 746 747 // If this block has already been processed, ignore this repeat use. 748 if (BlockUses.empty()) continue; 749 750 // Okay, this is the first use in the block. If this block just has a 751 // single user in it, we can rewrite it trivially. 752 if (BlockUses.size() == 1) { 753 // If it is a store, it is a trivial def of the value in the block. 754 if (isa<StoreInst>(User)) { 755 SSA.AddAvailableValue(User->getParent(), 756 cast<StoreInst>(User)->getOperand(0)); 757 } else { 758 // Otherwise it is a load, queue it to rewrite as a live-in load. 759 LiveInLoads.push_back(cast<LoadInst>(User)); 760 } 761 BlockUses.clear(); 762 continue; 763 } 764 765 // Otherwise, check to see if this block is all loads. If so, we can queue 766 // them all as live in loads. 767 bool HasStore = false; 768 for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { 769 if (isa<StoreInst>(BlockUses[i])) { 770 HasStore = true; 771 break; 772 } 773 } 774 775 if (!HasStore) { 776 for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) 777 LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); 778 BlockUses.clear(); 779 continue; 780 } 781 782 // Otherwise, we have mixed loads and stores (or just a bunch of stores). 783 // Since SSAUpdater is purely for cross-block values, we need to determine 784 // the order of these instructions in the block. If the first use in the 785 // block is a load, then it uses the live in value. The last store defines 786 // the live out value. We handle this by doing a linear scan of the block. 787 BasicBlock *BB = User->getParent(); 788 Value *StoredValue = 0; 789 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { 790 if (LoadInst *L = dyn_cast<LoadInst>(II)) { 791 // If this is a load from an unrelated pointer, ignore it. 792 if (!PointerMustAliases.count(L->getOperand(0))) continue; 793 794 // If we haven't seen a store yet, this is a live in use, otherwise 795 // use the stored value. 796 if (StoredValue) { 797 L->replaceAllUsesWith(StoredValue); 798 ReplacedLoads[L] = StoredValue; 799 } else { 800 LiveInLoads.push_back(L); 801 } 802 continue; 803 } 804 805 if (StoreInst *S = dyn_cast<StoreInst>(II)) { 806 // If this is a store to an unrelated pointer, ignore it. 807 if (!PointerMustAliases.count(S->getOperand(1))) continue; 808 809 // Remember that this is the active value in the block. 810 StoredValue = S->getOperand(0); 811 } 812 } 813 814 // The last stored value that happened is the live-out for the block. 815 assert(StoredValue && "Already checked that there is a store in block"); 816 SSA.AddAvailableValue(BB, StoredValue); 817 BlockUses.clear(); 818 } 819 820 // Now that all the intra-loop values are classified, set up the preheader. 821 // It gets a load of the pointer we're promoting, and it is the live-out value 822 // from the preheader. 823 LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted", 824 Preheader->getTerminator()); 825 SSA.AddAvailableValue(Preheader, PreheaderLoad); 826 827 // Now that the preheader is good to go, set up the exit blocks. Each exit 828 // block gets a store of the live-out values that feed them. Since we've 829 // already told the SSA updater about the defs in the loop and the preheader 830 // definition, it is all set and we can start using it. 831 SmallVector<BasicBlock*, 8> ExitBlocks; 832 CurLoop->getUniqueExitBlocks(ExitBlocks); 833 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 834 BasicBlock *ExitBlock = ExitBlocks[i]; 835 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 836 Instruction *InsertPos = ExitBlock->getFirstNonPHI(); 837 new StoreInst(LiveInValue, SomePtr, InsertPos); 838 } 839 840 // Okay, now we rewrite all loads that use live-in values in the loop, 841 // inserting PHI nodes as necessary. 842 for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { 843 LoadInst *ALoad = LiveInLoads[i]; 844 Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); 845 ALoad->replaceAllUsesWith(NewVal); 846 CurAST->copyValue(ALoad, NewVal); 847 ReplacedLoads[ALoad] = NewVal; 848 } 849 850 // If the preheader load is itself a pointer, we need to tell alias analysis 851 // about the new pointer we created in the preheader block and about any PHI 852 // nodes that just got inserted. 853 if (PreheaderLoad->getType()->isPointerTy()) { 854 // Copy any value stored to or loaded from a must-alias of the pointer. 855 CurAST->copyValue(SomeValue, PreheaderLoad); 856 857 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 858 CurAST->copyValue(SomeValue, NewPHIs[i]); 859 } 860 861 // Now that everything is rewritten, delete the old instructions from the body 862 // of the loop. They should all be dead now. 863 for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { 864 Instruction *User = LoopUses[i]; 865 866 // If this is a load that still has uses, then the load must have been added 867 // as a live value in the SSAUpdate data structure for a block (e.g. because 868 // the loaded value was stored later). In this case, we need to recursively 869 // propagate the updates until we get to the real value. 870 if (!User->use_empty()) { 871 Value *NewVal = ReplacedLoads[User]; 872 assert(NewVal && "not a replaced load?"); 873 874 // Propagate down to the ultimate replacee. The intermediately loads 875 // could theoretically already have been deleted, so we don't want to 876 // dereference the Value*'s. 877 DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); 878 while (RLI != ReplacedLoads.end()) { 879 NewVal = RLI->second; 880 RLI = ReplacedLoads.find(NewVal); 881 } 882 883 User->replaceAllUsesWith(NewVal); 884 CurAST->copyValue(User, NewVal); 885 } 886 887 CurAST->deleteValue(User); 888 User->eraseFromParent(); 889 } 890 891 // fwew, we're done! 892} 893 894 895/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. 896void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { 897 AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); 898 if (!AST) 899 return; 900 901 AST->copyValue(From, To); 902} 903 904/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias 905/// set. 906void LICM::deleteAnalysisValue(Value *V, Loop *L) { 907 AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); 908 if (!AST) 909 return; 910 911 AST->deleteValue(V); 912} 913