LICM.cpp revision 96b651c627160e1ea0f1bb86d352d697e6c1972d
1//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by the LLVM research group and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion, attempting to remove as much 11// code from the body of a loop as possible. It does this by either hoisting 12// code into the preheader block, or by sinking code to the exit blocks if it is 13// safe. This pass also promotes must-aliased memory locations in the loop to 14// live in registers, thus hoisting and sinking "invariant" loads and stores. 15// 16// This pass uses alias analysis for two purposes: 17// 18// 1. Moving loop invariant loads and calls out of loops. If we can determine 19// that a load or call inside of a loop never aliases anything stored to, 20// we can hoist it or sink it like any other instruction. 21// 2. Scalar Promotion of Memory - If there is a store instruction inside of 22// the loop, we try to move the store to happen AFTER the loop instead of 23// inside of the loop. This can only happen if a few conditions are true: 24// A. The pointer stored through is loop invariant 25// B. There are no stores or loads in the loop which _may_ alias the 26// pointer. There are no calls in the loop which mod/ref the pointer. 27// If these conditions are true, we can promote the loads and stores in the 28// loop of the pointer to use a temporary alloca'd variable. We then use 29// the mem2reg functionality to construct the appropriate SSA form for the 30// variable. 31// 32//===----------------------------------------------------------------------===// 33 34#define DEBUG_TYPE "licm" 35#include "llvm/Transforms/Scalar.h" 36#include "llvm/Constants.h" 37#include "llvm/DerivedTypes.h" 38#include "llvm/Instructions.h" 39#include "llvm/Target/TargetData.h" 40#include "llvm/Analysis/LoopInfo.h" 41#include "llvm/Analysis/LoopPass.h" 42#include "llvm/Analysis/AliasAnalysis.h" 43#include "llvm/Analysis/AliasSetTracker.h" 44#include "llvm/Analysis/Dominators.h" 45#include "llvm/Analysis/ScalarEvolution.h" 46#include "llvm/Transforms/Utils/PromoteMemToReg.h" 47#include "llvm/Support/CFG.h" 48#include "llvm/Support/Compiler.h" 49#include "llvm/Support/CommandLine.h" 50#include "llvm/Support/Debug.h" 51#include "llvm/ADT/Statistic.h" 52#include <algorithm> 53using namespace llvm; 54 55STATISTIC(NumSunk , "Number of instructions sunk out of loop"); 56STATISTIC(NumHoisted , "Number of instructions hoisted out of loop"); 57STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); 58STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); 59STATISTIC(NumPromoted , "Number of memory locations promoted to registers"); 60 61namespace { 62 cl::opt<bool> 63 DisablePromotion("disable-licm-promotion", cl::Hidden, 64 cl::desc("Disable memory promotion in LICM pass")); 65 66 struct VISIBILITY_HIDDEN LICM : public LoopPass { 67 static char ID; // Pass identification, replacement for typeid 68 LICM() : LoopPass((intptr_t)&ID) {} 69 70 virtual bool runOnLoop(Loop *L, LPPassManager &LPM); 71 72 /// This transformation requires natural loop information & requires that 73 /// loop preheaders be inserted into the CFG... 74 /// 75 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 76 AU.setPreservesCFG(); 77 AU.addRequiredID(LoopSimplifyID); 78 AU.addRequired<LoopInfo>(); 79 AU.addRequired<DominatorTree>(); 80 AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg) 81 AU.addRequired<AliasAnalysis>(); 82 AU.addPreserved<ScalarEvolution>(); 83 AU.addPreserved<DominanceFrontier>(); 84 } 85 86 bool doFinalization() { 87 LoopToAliasMap.clear(); 88 return false; 89 } 90 91 private: 92 // Various analyses that we use... 93 AliasAnalysis *AA; // Current AliasAnalysis information 94 LoopInfo *LI; // Current LoopInfo 95 DominatorTree *DT; // Dominator Tree for the current Loop... 96 DominanceFrontier *DF; // Current Dominance Frontier 97 98 // State that is updated as we process loops 99 bool Changed; // Set to true when we change anything. 100 BasicBlock *Preheader; // The preheader block of the current loop... 101 Loop *CurLoop; // The current loop we are working on... 102 AliasSetTracker *CurAST; // AliasSet information for the current loop... 103 std::map<Loop *, AliasSetTracker *> LoopToAliasMap; 104 105 /// SinkRegion - Walk the specified region of the CFG (defined by all blocks 106 /// dominated by the specified block, and that are in the current loop) in 107 /// reverse depth first order w.r.t the DominatorTree. This allows us to 108 /// visit uses before definitions, allowing us to sink a loop body in one 109 /// pass without iteration. 110 /// 111 void SinkRegion(DomTreeNode *N); 112 113 /// HoistRegion - Walk the specified region of the CFG (defined by all 114 /// blocks dominated by the specified block, and that are in the current 115 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 116 /// visit definitions before uses, allowing us to hoist a loop body in one 117 /// pass without iteration. 118 /// 119 void HoistRegion(DomTreeNode *N); 120 121 /// inSubLoop - Little predicate that returns true if the specified basic 122 /// block is in a subloop of the current one, not the current one itself. 123 /// 124 bool inSubLoop(BasicBlock *BB) { 125 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 126 for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I) 127 if ((*I)->contains(BB)) 128 return true; // A subloop actually contains this block! 129 return false; 130 } 131 132 /// isExitBlockDominatedByBlockInLoop - This method checks to see if the 133 /// specified exit block of the loop is dominated by the specified block 134 /// that is in the body of the loop. We use these constraints to 135 /// dramatically limit the amount of the dominator tree that needs to be 136 /// searched. 137 bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock, 138 BasicBlock *BlockInLoop) const { 139 // If the block in the loop is the loop header, it must be dominated! 140 BasicBlock *LoopHeader = CurLoop->getHeader(); 141 if (BlockInLoop == LoopHeader) 142 return true; 143 144 DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop); 145 DomTreeNode *IDom = DT->getNode(ExitBlock); 146 147 // Because the exit block is not in the loop, we know we have to get _at 148 // least_ its immediate dominator. 149 do { 150 // Get next Immediate Dominator. 151 IDom = IDom->getIDom(); 152 153 // If we have got to the header of the loop, then the instructions block 154 // did not dominate the exit node, so we can't hoist it. 155 if (IDom->getBlock() == LoopHeader) 156 return false; 157 158 } while (IDom != BlockInLoopNode); 159 160 return true; 161 } 162 163 /// sink - When an instruction is found to only be used outside of the loop, 164 /// this function moves it to the exit blocks and patches up SSA form as 165 /// needed. 166 /// 167 void sink(Instruction &I); 168 169 /// hoist - When an instruction is found to only use loop invariant operands 170 /// that is safe to hoist, this instruction is called to do the dirty work. 171 /// 172 void hoist(Instruction &I); 173 174 /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it 175 /// is not a trapping instruction or if it is a trapping instruction and is 176 /// guaranteed to execute. 177 /// 178 bool isSafeToExecuteUnconditionally(Instruction &I); 179 180 /// pointerInvalidatedByLoop - Return true if the body of this loop may 181 /// store into the memory location pointed to by V. 182 /// 183 bool pointerInvalidatedByLoop(Value *V, unsigned Size) { 184 // Check to see if any of the basic blocks in CurLoop invalidate *V. 185 return CurAST->getAliasSetForPointer(V, Size).isMod(); 186 } 187 188 bool canSinkOrHoistInst(Instruction &I); 189 bool isLoopInvariantInst(Instruction &I); 190 bool isNotUsedInLoop(Instruction &I); 191 192 /// PromoteValuesInLoop - Look at the stores in the loop and promote as many 193 /// to scalars as we can. 194 /// 195 void PromoteValuesInLoop(); 196 197 /// FindPromotableValuesInLoop - Check the current loop for stores to 198 /// definite pointers, which are not loaded and stored through may aliases. 199 /// If these are found, create an alloca for the value, add it to the 200 /// PromotedValues list, and keep track of the mapping from value to 201 /// alloca... 202 /// 203 void FindPromotableValuesInLoop( 204 std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, 205 std::map<Value*, AllocaInst*> &Val2AlMap); 206 }; 207 208 char LICM::ID = 0; 209 RegisterPass<LICM> X("licm", "Loop Invariant Code Motion"); 210} 211 212LoopPass *llvm::createLICMPass() { return new LICM(); } 213 214/// Hoist expressions out of the specified loop... 215/// 216bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { 217 Changed = false; 218 219 // Get our Loop and Alias Analysis information... 220 LI = &getAnalysis<LoopInfo>(); 221 AA = &getAnalysis<AliasAnalysis>(); 222 DF = &getAnalysis<DominanceFrontier>(); 223 DT = &getAnalysis<DominatorTree>(); 224 225 CurAST = new AliasSetTracker(*AA); 226 // Collect Alias info from subloops 227 for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); 228 LoopItr != LoopItrE; ++LoopItr) { 229 Loop *InnerL = *LoopItr; 230 AliasSetTracker *InnerAST = LoopToAliasMap[InnerL]; 231 assert (InnerAST && "Where is my AST?"); 232 233 // What if InnerLoop was modified by other passes ? 234 CurAST->add(*InnerAST); 235 } 236 237 CurLoop = L; 238 239 // Get the preheader block to move instructions into... 240 Preheader = L->getLoopPreheader(); 241 assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!"); 242 243 // Loop over the body of this loop, looking for calls, invokes, and stores. 244 // Because subloops have already been incorporated into AST, we skip blocks in 245 // subloops. 246 // 247 for (std::vector<BasicBlock*>::const_iterator I = L->getBlocks().begin(), 248 E = L->getBlocks().end(); I != E; ++I) 249 if (LI->getLoopFor(*I) == L) // Ignore blocks in subloops... 250 CurAST->add(**I); // Incorporate the specified basic block 251 252 // We want to visit all of the instructions in this loop... that are not parts 253 // of our subloops (they have already had their invariants hoisted out of 254 // their loop, into this loop, so there is no need to process the BODIES of 255 // the subloops). 256 // 257 // Traverse the body of the loop in depth first order on the dominator tree so 258 // that we are guaranteed to see definitions before we see uses. This allows 259 // us to sink instructions in one pass, without iteration. AFter sinking 260 // instructions, we perform another pass to hoist them out of the loop. 261 // 262 SinkRegion(DT->getNode(L->getHeader())); 263 HoistRegion(DT->getNode(L->getHeader())); 264 265 // Now that all loop invariants have been removed from the loop, promote any 266 // memory references to scalars that we can... 267 if (!DisablePromotion) 268 PromoteValuesInLoop(); 269 270 // Clear out loops state information for the next iteration 271 CurLoop = 0; 272 Preheader = 0; 273 274 LoopToAliasMap[L] = CurAST; 275 return Changed; 276} 277 278/// SinkRegion - Walk the specified region of the CFG (defined by all blocks 279/// dominated by the specified block, and that are in the current loop) in 280/// reverse depth first order w.r.t the DominatorTree. This allows us to visit 281/// uses before definitions, allowing us to sink a loop body in one pass without 282/// iteration. 283/// 284void LICM::SinkRegion(DomTreeNode *N) { 285 assert(N != 0 && "Null dominator tree node?"); 286 BasicBlock *BB = N->getBlock(); 287 288 // If this subregion is not in the top level loop at all, exit. 289 if (!CurLoop->contains(BB)) return; 290 291 // We are processing blocks in reverse dfo, so process children first... 292 const std::vector<DomTreeNode*> &Children = N->getChildren(); 293 for (unsigned i = 0, e = Children.size(); i != e; ++i) 294 SinkRegion(Children[i]); 295 296 // Only need to process the contents of this block if it is not part of a 297 // subloop (which would already have been processed). 298 if (inSubLoop(BB)) return; 299 300 for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { 301 Instruction &I = *--II; 302 303 // Check to see if we can sink this instruction to the exit blocks 304 // of the loop. We can do this if the all users of the instruction are 305 // outside of the loop. In this case, it doesn't even matter if the 306 // operands of the instruction are loop invariant. 307 // 308 if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) { 309 ++II; 310 sink(I); 311 } 312 } 313} 314 315 316/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 317/// dominated by the specified block, and that are in the current loop) in depth 318/// first order w.r.t the DominatorTree. This allows us to visit definitions 319/// before uses, allowing us to hoist a loop body in one pass without iteration. 320/// 321void LICM::HoistRegion(DomTreeNode *N) { 322 assert(N != 0 && "Null dominator tree node?"); 323 BasicBlock *BB = N->getBlock(); 324 325 // If this subregion is not in the top level loop at all, exit. 326 if (!CurLoop->contains(BB)) return; 327 328 // Only need to process the contents of this block if it is not part of a 329 // subloop (which would already have been processed). 330 if (!inSubLoop(BB)) 331 for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { 332 Instruction &I = *II++; 333 334 // Try hoisting the instruction out to the preheader. We can only do this 335 // if all of the operands of the instruction are loop invariant and if it 336 // is safe to hoist the instruction. 337 // 338 if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) && 339 isSafeToExecuteUnconditionally(I)) 340 hoist(I); 341 } 342 343 const std::vector<DomTreeNode*> &Children = N->getChildren(); 344 for (unsigned i = 0, e = Children.size(); i != e; ++i) 345 HoistRegion(Children[i]); 346} 347 348/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this 349/// instruction. 350/// 351bool LICM::canSinkOrHoistInst(Instruction &I) { 352 // Loads have extra constraints we have to verify before we can hoist them. 353 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 354 if (LI->isVolatile()) 355 return false; // Don't hoist volatile loads! 356 357 // Don't hoist loads which have may-aliased stores in loop. 358 unsigned Size = 0; 359 if (LI->getType()->isSized()) 360 Size = AA->getTargetData().getTypeSize(LI->getType()); 361 return !pointerInvalidatedByLoop(LI->getOperand(0), Size); 362 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 363 // Handle obvious cases efficiently. 364 if (Function *Callee = CI->getCalledFunction()) { 365 AliasAnalysis::ModRefBehavior Behavior =AA->getModRefBehavior(Callee, CI); 366 if (Behavior == AliasAnalysis::DoesNotAccessMemory) 367 return true; 368 else if (Behavior == AliasAnalysis::OnlyReadsMemory) { 369 // If this call only reads from memory and there are no writes to memory 370 // in the loop, we can hoist or sink the call as appropriate. 371 bool FoundMod = false; 372 for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); 373 I != E; ++I) { 374 AliasSet &AS = *I; 375 if (!AS.isForwardingAliasSet() && AS.isMod()) { 376 FoundMod = true; 377 break; 378 } 379 } 380 if (!FoundMod) return true; 381 } 382 } 383 384 // FIXME: This should use mod/ref information to see if we can hoist or sink 385 // the call. 386 387 return false; 388 } 389 390 // Otherwise these instructions are hoistable/sinkable 391 return isa<BinaryOperator>(I) || isa<CastInst>(I) || 392 isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 393 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 394 isa<ShuffleVectorInst>(I); 395} 396 397/// isNotUsedInLoop - Return true if the only users of this instruction are 398/// outside of the loop. If this is true, we can sink the instruction to the 399/// exit blocks of the loop. 400/// 401bool LICM::isNotUsedInLoop(Instruction &I) { 402 for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) { 403 Instruction *User = cast<Instruction>(*UI); 404 if (PHINode *PN = dyn_cast<PHINode>(User)) { 405 // PHI node uses occur in predecessor blocks! 406 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 407 if (PN->getIncomingValue(i) == &I) 408 if (CurLoop->contains(PN->getIncomingBlock(i))) 409 return false; 410 } else if (CurLoop->contains(User->getParent())) { 411 return false; 412 } 413 } 414 return true; 415} 416 417 418/// isLoopInvariantInst - Return true if all operands of this instruction are 419/// loop invariant. We also filter out non-hoistable instructions here just for 420/// efficiency. 421/// 422bool LICM::isLoopInvariantInst(Instruction &I) { 423 // The instruction is loop invariant if all of its operands are loop-invariant 424 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) 425 if (!CurLoop->isLoopInvariant(I.getOperand(i))) 426 return false; 427 428 // If we got this far, the instruction is loop invariant! 429 return true; 430} 431 432/// sink - When an instruction is found to only be used outside of the loop, 433/// this function moves it to the exit blocks and patches up SSA form as needed. 434/// This method is guaranteed to remove the original instruction from its 435/// position, and may either delete it or move it to outside of the loop. 436/// 437void LICM::sink(Instruction &I) { 438 DOUT << "LICM sinking instruction: " << I; 439 440 std::vector<BasicBlock*> ExitBlocks; 441 CurLoop->getExitBlocks(ExitBlocks); 442 443 if (isa<LoadInst>(I)) ++NumMovedLoads; 444 else if (isa<CallInst>(I)) ++NumMovedCalls; 445 ++NumSunk; 446 Changed = true; 447 448 // The case where there is only a single exit node of this loop is common 449 // enough that we handle it as a special (more efficient) case. It is more 450 // efficient to handle because there are no PHI nodes that need to be placed. 451 if (ExitBlocks.size() == 1) { 452 if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) { 453 // Instruction is not used, just delete it. 454 CurAST->deleteValue(&I); 455 if (!I.use_empty()) // If I has users in unreachable blocks, eliminate. 456 I.replaceAllUsesWith(UndefValue::get(I.getType())); 457 I.eraseFromParent(); 458 } else { 459 // Move the instruction to the start of the exit block, after any PHI 460 // nodes in it. 461 I.removeFromParent(); 462 463 BasicBlock::iterator InsertPt = ExitBlocks[0]->begin(); 464 while (isa<PHINode>(InsertPt)) ++InsertPt; 465 ExitBlocks[0]->getInstList().insert(InsertPt, &I); 466 } 467 } else if (ExitBlocks.size() == 0) { 468 // The instruction is actually dead if there ARE NO exit blocks. 469 CurAST->deleteValue(&I); 470 if (!I.use_empty()) // If I has users in unreachable blocks, eliminate. 471 I.replaceAllUsesWith(UndefValue::get(I.getType())); 472 I.eraseFromParent(); 473 } else { 474 // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to 475 // do all of the hard work of inserting PHI nodes as necessary. We convert 476 // the value into a stack object to get it to do this. 477 478 // Firstly, we create a stack object to hold the value... 479 AllocaInst *AI = 0; 480 481 if (I.getType() != Type::VoidTy) { 482 AI = new AllocaInst(I.getType(), 0, I.getName(), 483 I.getParent()->getParent()->getEntryBlock().begin()); 484 CurAST->add(AI); 485 } 486 487 // Secondly, insert load instructions for each use of the instruction 488 // outside of the loop. 489 while (!I.use_empty()) { 490 Instruction *U = cast<Instruction>(I.use_back()); 491 492 // If the user is a PHI Node, we actually have to insert load instructions 493 // in all predecessor blocks, not in the PHI block itself! 494 if (PHINode *UPN = dyn_cast<PHINode>(U)) { 495 // Only insert into each predecessor once, so that we don't have 496 // different incoming values from the same block! 497 std::map<BasicBlock*, Value*> InsertedBlocks; 498 for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i) 499 if (UPN->getIncomingValue(i) == &I) { 500 BasicBlock *Pred = UPN->getIncomingBlock(i); 501 Value *&PredVal = InsertedBlocks[Pred]; 502 if (!PredVal) { 503 // Insert a new load instruction right before the terminator in 504 // the predecessor block. 505 PredVal = new LoadInst(AI, "", Pred->getTerminator()); 506 CurAST->add(cast<LoadInst>(PredVal)); 507 } 508 509 UPN->setIncomingValue(i, PredVal); 510 } 511 512 } else { 513 LoadInst *L = new LoadInst(AI, "", U); 514 U->replaceUsesOfWith(&I, L); 515 CurAST->add(L); 516 } 517 } 518 519 // Thirdly, insert a copy of the instruction in each exit block of the loop 520 // that is dominated by the instruction, storing the result into the memory 521 // location. Be careful not to insert the instruction into any particular 522 // basic block more than once. 523 std::set<BasicBlock*> InsertedBlocks; 524 BasicBlock *InstOrigBB = I.getParent(); 525 526 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 527 BasicBlock *ExitBlock = ExitBlocks[i]; 528 529 if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) { 530 // If we haven't already processed this exit block, do so now. 531 if (InsertedBlocks.insert(ExitBlock).second) { 532 // Insert the code after the last PHI node... 533 BasicBlock::iterator InsertPt = ExitBlock->begin(); 534 while (isa<PHINode>(InsertPt)) ++InsertPt; 535 536 // If this is the first exit block processed, just move the original 537 // instruction, otherwise clone the original instruction and insert 538 // the copy. 539 Instruction *New; 540 if (InsertedBlocks.size() == 1) { 541 I.removeFromParent(); 542 ExitBlock->getInstList().insert(InsertPt, &I); 543 New = &I; 544 } else { 545 New = I.clone(); 546 CurAST->copyValue(&I, New); 547 if (!I.getName().empty()) 548 New->setName(I.getName()+".le"); 549 ExitBlock->getInstList().insert(InsertPt, New); 550 } 551 552 // Now that we have inserted the instruction, store it into the alloca 553 if (AI) new StoreInst(New, AI, InsertPt); 554 } 555 } 556 } 557 558 // If the instruction doesn't dominate any exit blocks, it must be dead. 559 if (InsertedBlocks.empty()) { 560 CurAST->deleteValue(&I); 561 I.eraseFromParent(); 562 } 563 564 // Finally, promote the fine value to SSA form. 565 if (AI) { 566 std::vector<AllocaInst*> Allocas; 567 Allocas.push_back(AI); 568 PromoteMemToReg(Allocas, *DT, *DF, CurAST); 569 } 570 } 571} 572 573/// hoist - When an instruction is found to only use loop invariant operands 574/// that is safe to hoist, this instruction is called to do the dirty work. 575/// 576void LICM::hoist(Instruction &I) { 577 DOUT << "LICM hoisting to " << Preheader->getName() << ": " << I; 578 579 // Remove the instruction from its current basic block... but don't delete the 580 // instruction. 581 I.removeFromParent(); 582 583 // Insert the new node in Preheader, before the terminator. 584 Preheader->getInstList().insert(Preheader->getTerminator(), &I); 585 586 if (isa<LoadInst>(I)) ++NumMovedLoads; 587 else if (isa<CallInst>(I)) ++NumMovedCalls; 588 ++NumHoisted; 589 Changed = true; 590} 591 592/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is 593/// not a trapping instruction or if it is a trapping instruction and is 594/// guaranteed to execute. 595/// 596bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { 597 // If it is not a trapping instruction, it is always safe to hoist. 598 if (!Inst.isTrapping()) return true; 599 600 // Otherwise we have to check to make sure that the instruction dominates all 601 // of the exit blocks. If it doesn't, then there is a path out of the loop 602 // which does not execute this instruction, so we can't hoist it. 603 604 // If the instruction is in the header block for the loop (which is very 605 // common), it is always guaranteed to dominate the exit blocks. Since this 606 // is a common case, and can save some work, check it now. 607 if (Inst.getParent() == CurLoop->getHeader()) 608 return true; 609 610 // It's always safe to load from a global or alloca. 611 if (isa<LoadInst>(Inst)) 612 if (isa<AllocationInst>(Inst.getOperand(0)) || 613 isa<GlobalVariable>(Inst.getOperand(0))) 614 return true; 615 616 // Get the exit blocks for the current loop. 617 std::vector<BasicBlock*> ExitBlocks; 618 CurLoop->getExitBlocks(ExitBlocks); 619 620 // For each exit block, get the DT node and walk up the DT until the 621 // instruction's basic block is found or we exit the loop. 622 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 623 if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent())) 624 return false; 625 626 return true; 627} 628 629 630/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking 631/// stores out of the loop and moving loads to before the loop. We do this by 632/// looping over the stores in the loop, looking for stores to Must pointers 633/// which are loop invariant. We promote these memory locations to use allocas 634/// instead. These allocas can easily be raised to register values by the 635/// PromoteMem2Reg functionality. 636/// 637void LICM::PromoteValuesInLoop() { 638 // PromotedValues - List of values that are promoted out of the loop. Each 639 // value has an alloca instruction for it, and a canonical version of the 640 // pointer. 641 std::vector<std::pair<AllocaInst*, Value*> > PromotedValues; 642 std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca 643 644 FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap); 645 if (ValueToAllocaMap.empty()) return; // If there are values to promote. 646 647 Changed = true; 648 NumPromoted += PromotedValues.size(); 649 650 std::vector<Value*> PointerValueNumbers; 651 652 // Emit a copy from the value into the alloca'd value in the loop preheader 653 TerminatorInst *LoopPredInst = Preheader->getTerminator(); 654 for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { 655 Value *Ptr = PromotedValues[i].second; 656 657 // If we are promoting a pointer value, update alias information for the 658 // inserted load. 659 Value *LoadValue = 0; 660 if (isa<PointerType>(cast<PointerType>(Ptr->getType())->getElementType())) { 661 // Locate a load or store through the pointer, and assign the same value 662 // to LI as we are loading or storing. Since we know that the value is 663 // stored in this loop, this will always succeed. 664 for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end(); 665 UI != E; ++UI) 666 if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 667 LoadValue = LI; 668 break; 669 } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 670 if (SI->getOperand(1) == Ptr) { 671 LoadValue = SI->getOperand(0); 672 break; 673 } 674 } 675 assert(LoadValue && "No store through the pointer found!"); 676 PointerValueNumbers.push_back(LoadValue); // Remember this for later. 677 } 678 679 // Load from the memory we are promoting. 680 LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst); 681 682 if (LoadValue) CurAST->copyValue(LoadValue, LI); 683 684 // Store into the temporary alloca. 685 new StoreInst(LI, PromotedValues[i].first, LoopPredInst); 686 } 687 688 // Scan the basic blocks in the loop, replacing uses of our pointers with 689 // uses of the allocas in question. 690 // 691 const std::vector<BasicBlock*> &LoopBBs = CurLoop->getBlocks(); 692 for (std::vector<BasicBlock*>::const_iterator I = LoopBBs.begin(), 693 E = LoopBBs.end(); I != E; ++I) { 694 // Rewrite all loads and stores in the block of the pointer... 695 for (BasicBlock::iterator II = (*I)->begin(), E = (*I)->end(); 696 II != E; ++II) { 697 if (LoadInst *L = dyn_cast<LoadInst>(II)) { 698 std::map<Value*, AllocaInst*>::iterator 699 I = ValueToAllocaMap.find(L->getOperand(0)); 700 if (I != ValueToAllocaMap.end()) 701 L->setOperand(0, I->second); // Rewrite load instruction... 702 } else if (StoreInst *S = dyn_cast<StoreInst>(II)) { 703 std::map<Value*, AllocaInst*>::iterator 704 I = ValueToAllocaMap.find(S->getOperand(1)); 705 if (I != ValueToAllocaMap.end()) 706 S->setOperand(1, I->second); // Rewrite store instruction... 707 } 708 } 709 } 710 711 // Now that the body of the loop uses the allocas instead of the original 712 // memory locations, insert code to copy the alloca value back into the 713 // original memory location on all exits from the loop. Note that we only 714 // want to insert one copy of the code in each exit block, though the loop may 715 // exit to the same block more than once. 716 // 717 std::set<BasicBlock*> ProcessedBlocks; 718 719 std::vector<BasicBlock*> ExitBlocks; 720 CurLoop->getExitBlocks(ExitBlocks); 721 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 722 if (ProcessedBlocks.insert(ExitBlocks[i]).second) { 723 // Copy all of the allocas into their memory locations. 724 BasicBlock::iterator BI = ExitBlocks[i]->begin(); 725 while (isa<PHINode>(*BI)) 726 ++BI; // Skip over all of the phi nodes in the block. 727 Instruction *InsertPos = BI; 728 unsigned PVN = 0; 729 for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { 730 // Load from the alloca. 731 LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos); 732 733 // If this is a pointer type, update alias info appropriately. 734 if (isa<PointerType>(LI->getType())) 735 CurAST->copyValue(PointerValueNumbers[PVN++], LI); 736 737 // Store into the memory we promoted. 738 new StoreInst(LI, PromotedValues[i].second, InsertPos); 739 } 740 } 741 742 // Now that we have done the deed, use the mem2reg functionality to promote 743 // all of the new allocas we just created into real SSA registers. 744 // 745 std::vector<AllocaInst*> PromotedAllocas; 746 PromotedAllocas.reserve(PromotedValues.size()); 747 for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) 748 PromotedAllocas.push_back(PromotedValues[i].first); 749 PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); 750} 751 752/// FindPromotableValuesInLoop - Check the current loop for stores to definite 753/// pointers, which are not loaded and stored through may aliases. If these are 754/// found, create an alloca for the value, add it to the PromotedValues list, 755/// and keep track of the mapping from value to alloca. 756/// 757void LICM::FindPromotableValuesInLoop( 758 std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, 759 std::map<Value*, AllocaInst*> &ValueToAllocaMap) { 760 Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin(); 761 762 // Loop over all of the alias sets in the tracker object. 763 for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); 764 I != E; ++I) { 765 AliasSet &AS = *I; 766 // We can promote this alias set if it has a store, if it is a "Must" alias 767 // set, if the pointer is loop invariant, and if we are not eliminating any 768 // volatile loads or stores. 769 if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias() && 770 !AS.isVolatile() && CurLoop->isLoopInvariant(AS.begin()->first)) { 771 assert(AS.begin() != AS.end() && 772 "Must alias set should have at least one pointer element in it!"); 773 Value *V = AS.begin()->first; 774 775 // Check that all of the pointers in the alias set have the same type. We 776 // cannot (yet) promote a memory location that is loaded and stored in 777 // different sizes. 778 bool PointerOk = true; 779 for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) 780 if (V->getType() != I->first->getType()) { 781 PointerOk = false; 782 break; 783 } 784 785 if (PointerOk) { 786 const Type *Ty = cast<PointerType>(V->getType())->getElementType(); 787 AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart); 788 PromotedValues.push_back(std::make_pair(AI, V)); 789 790 // Update the AST and alias analysis. 791 CurAST->copyValue(V, AI); 792 793 for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) 794 ValueToAllocaMap.insert(std::make_pair(I->first, AI)); 795 796 DOUT << "LICM: Promoting value: " << *V << "\n"; 797 } 798 } 799 } 800} 801