LICM.cpp revision ed6dfc2856cd44a8aa608a9074eabbf42dd6cadc
1//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by the LLVM research group and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass performs loop invariant code motion, attempting to remove as much 11// code from the body of a loop as possible. It does this by either hoisting 12// code into the preheader block, or by sinking code to the exit blocks if it is 13// safe. This pass also promotes must-aliased memory locations in the loop to 14// live in registers. 15// 16// This pass uses alias analysis for two purposes: 17// 18// 1. Moving loop invariant loads out of loops. If we can determine that a 19// load inside of a loop never aliases anything stored to, we can hoist it 20// or sink it like any other instruction. 21// 2. Scalar Promotion of Memory - If there is a store instruction inside of 22// the loop, we try to move the store to happen AFTER the loop instead of 23// inside of the loop. This can only happen if a few conditions are true: 24// A. The pointer stored through is loop invariant 25// B. There are no stores or loads in the loop which _may_ alias the 26// pointer. There are no calls in the loop which mod/ref the pointer. 27// If these conditions are true, we can promote the loads and stores in the 28// loop of the pointer to use a temporary alloca'd variable. We then use 29// the mem2reg functionality to construct the appropriate SSA form for the 30// variable. 31// 32//===----------------------------------------------------------------------===// 33 34#include "llvm/Transforms/Scalar.h" 35#include "llvm/Transforms/Utils/PromoteMemToReg.h" 36#include "llvm/Transforms/Utils/Local.h" 37#include "llvm/Analysis/LoopInfo.h" 38#include "llvm/Analysis/AliasAnalysis.h" 39#include "llvm/Analysis/AliasSetTracker.h" 40#include "llvm/Analysis/Dominators.h" 41#include "llvm/Instructions.h" 42#include "llvm/DerivedTypes.h" 43#include "llvm/Target/TargetData.h" 44#include "llvm/Support/CFG.h" 45#include "Support/CommandLine.h" 46#include "Support/Debug.h" 47#include "Support/Statistic.h" 48#include "llvm/Assembly/Writer.h" 49#include <algorithm> 50using namespace llvm; 51 52namespace { 53 cl::opt<bool> 54 DisablePromotion("disable-licm-promotion", cl::Hidden, 55 cl::desc("Disable memory promotion in LICM pass")); 56 57 Statistic<> NumHoisted("licm", "Number of instructions hoisted out of loop"); 58 Statistic<> NumHoistedLoads("licm", "Number of load insts hoisted"); 59 Statistic<> NumPromoted("licm", 60 "Number of memory locations promoted to registers"); 61 62 struct LICM : public FunctionPass { 63 virtual bool runOnFunction(Function &F); 64 65 /// This transformation requires natural loop information & requires that 66 /// loop preheaders be inserted into the CFG... 67 /// 68 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 69 AU.setPreservesCFG(); 70 AU.addRequiredID(LoopSimplifyID); 71 AU.addRequired<LoopInfo>(); 72 AU.addRequired<DominatorTree>(); 73 AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg) 74 AU.addRequired<AliasAnalysis>(); 75 } 76 77 private: 78 // Various analyses that we use... 79 AliasAnalysis *AA; // Current AliasAnalysis information 80 LoopInfo *LI; // Current LoopInfo 81 DominatorTree *DT; // Dominator Tree for the current Loop... 82 DominanceFrontier *DF; // Current Dominance Frontier 83 84 // State that is updated as we process loops 85 bool Changed; // Set to true when we change anything. 86 BasicBlock *Preheader; // The preheader block of the current loop... 87 Loop *CurLoop; // The current loop we are working on... 88 AliasSetTracker *CurAST; // AliasSet information for the current loop... 89 90 /// visitLoop - Hoist expressions out of the specified loop... 91 /// 92 void visitLoop(Loop *L, AliasSetTracker &AST); 93 94 /// HoistRegion - Walk the specified region of the CFG (defined by all 95 /// blocks dominated by the specified block, and that are in the current 96 /// loop) in depth first order w.r.t the DominatorTree. This allows us to 97 /// visit definitions before uses, allowing us to hoist a loop body in one 98 /// pass without iteration. 99 /// 100 void HoistRegion(DominatorTree::Node *N); 101 102 /// inSubLoop - Little predicate that returns true if the specified basic 103 /// block is in a subloop of the current one, not the current one itself. 104 /// 105 bool inSubLoop(BasicBlock *BB) { 106 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); 107 for (unsigned i = 0, e = CurLoop->getSubLoops().size(); i != e; ++i) 108 if (CurLoop->getSubLoops()[i]->contains(BB)) 109 return true; // A subloop actually contains this block! 110 return false; 111 } 112 113 /// hoist - When an instruction is found to only use loop invariant operands 114 /// that is safe to hoist, this instruction is called to do the dirty work. 115 /// 116 void hoist(Instruction &I); 117 118 /// SafeToHoist - Only hoist an instruction if it is not a trapping 119 /// instruction or if it is a trapping instruction and is guaranteed to 120 /// execute. 121 /// 122 bool SafeToHoist(Instruction &I); 123 124 /// pointerInvalidatedByLoop - Return true if the body of this loop may 125 /// store into the memory location pointed to by V. 126 /// 127 bool pointerInvalidatedByLoop(Value *V) { 128 // Check to see if any of the basic blocks in CurLoop invalidate *V. 129 return CurAST->getAliasSetForPointer(V, 0).isMod(); 130 } 131 132 /// isLoopInvariant - Return true if the specified value is loop invariant 133 /// 134 inline bool isLoopInvariant(Value *V) { 135 if (Instruction *I = dyn_cast<Instruction>(V)) 136 return !CurLoop->contains(I->getParent()); 137 return true; // All non-instructions are loop invariant 138 } 139 bool isLoopInvariantInst(Instruction &Inst); 140 141 /// PromoteValuesInLoop - Look at the stores in the loop and promote as many 142 /// to scalars as we can. 143 /// 144 void PromoteValuesInLoop(); 145 146 /// findPromotableValuesInLoop - Check the current loop for stores to 147 /// definite pointers, which are not loaded and stored through may aliases. 148 /// If these are found, create an alloca for the value, add it to the 149 /// PromotedValues list, and keep track of the mapping from value to 150 /// alloca... 151 /// 152 void findPromotableValuesInLoop( 153 std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, 154 std::map<Value*, AllocaInst*> &Val2AlMap); 155 }; 156 157 RegisterOpt<LICM> X("licm", "Loop Invariant Code Motion"); 158} 159 160FunctionPass *llvm::createLICMPass() { return new LICM(); } 161 162/// runOnFunction - For LICM, this simply traverses the loop structure of the 163/// function, hoisting expressions out of loops if possible. 164/// 165bool LICM::runOnFunction(Function &) { 166 Changed = false; 167 168 // Get our Loop and Alias Analysis information... 169 LI = &getAnalysis<LoopInfo>(); 170 AA = &getAnalysis<AliasAnalysis>(); 171 DF = &getAnalysis<DominanceFrontier>(); 172 DT = &getAnalysis<DominatorTree>(); 173 174 // Hoist expressions out of all of the top-level loops. 175 const std::vector<Loop*> &TopLevelLoops = LI->getTopLevelLoops(); 176 for (std::vector<Loop*>::const_iterator I = TopLevelLoops.begin(), 177 E = TopLevelLoops.end(); I != E; ++I) { 178 AliasSetTracker AST(*AA); 179 visitLoop(*I, AST); 180 } 181 return Changed; 182} 183 184 185/// visitLoop - Hoist expressions out of the specified loop... 186/// 187void LICM::visitLoop(Loop *L, AliasSetTracker &AST) { 188 // Recurse through all subloops before we process this loop... 189 for (std::vector<Loop*>::const_iterator I = L->getSubLoops().begin(), 190 E = L->getSubLoops().end(); I != E; ++I) { 191 AliasSetTracker SubAST(*AA); 192 visitLoop(*I, SubAST); 193 194 // Incorporate information about the subloops into this loop... 195 AST.add(SubAST); 196 } 197 CurLoop = L; 198 CurAST = &AST; 199 200 // Get the preheader block to move instructions into... 201 Preheader = L->getLoopPreheader(); 202 assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!"); 203 204 // Loop over the body of this loop, looking for calls, invokes, and stores. 205 // Because subloops have already been incorporated into AST, we skip blocks in 206 // subloops. 207 // 208 const std::vector<BasicBlock*> &LoopBBs = L->getBlocks(); 209 for (std::vector<BasicBlock*>::const_iterator I = LoopBBs.begin(), 210 E = LoopBBs.end(); I != E; ++I) 211 if (LI->getLoopFor(*I) == L) // Ignore blocks in subloops... 212 AST.add(**I); // Incorporate the specified basic block 213 214 // We want to visit all of the instructions in this loop... that are not parts 215 // of our subloops (they have already had their invariants hoisted out of 216 // their loop, into this loop, so there is no need to process the BODIES of 217 // the subloops). 218 // 219 // Traverse the body of the loop in depth first order on the dominator tree so 220 // that we are guaranteed to see definitions before we see uses. This allows 221 // us to perform the LICM transformation in one pass, without iteration. 222 // 223 HoistRegion(DT->getNode(L->getHeader())); 224 225 // Now that all loop invariants have been removed from the loop, promote any 226 // memory references to scalars that we can... 227 if (!DisablePromotion) 228 PromoteValuesInLoop(); 229 230 // Clear out loops state information for the next iteration 231 CurLoop = 0; 232 Preheader = 0; 233} 234 235/// HoistRegion - Walk the specified region of the CFG (defined by all blocks 236/// dominated by the specified block, and that are in the current loop) in depth 237/// first order w.r.t the DominatorTree. This allows us to visit definitions 238/// before uses, allowing us to hoist a loop body in one pass without iteration. 239/// 240void LICM::HoistRegion(DominatorTree::Node *N) { 241 assert(N != 0 && "Null dominator tree node?"); 242 BasicBlock *BB = N->getBlock(); 243 244 // If this subregion is not in the top level loop at all, exit. 245 if (!CurLoop->contains(BB)) return; 246 247 // Only need to hoist the contents of this block if it is not part of a 248 // subloop (which would already have been hoisted) 249 if (!inSubLoop(BB)) 250 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ) { 251 Instruction &Inst = *I++; 252 if (isLoopInvariantInst(Inst) && SafeToHoist(Inst)) 253 hoist(Inst); 254 } 255 256 const std::vector<DominatorTree::Node*> &Children = N->getChildren(); 257 for (unsigned i = 0, e = Children.size(); i != e; ++i) 258 HoistRegion(Children[i]); 259} 260 261bool LICM::isLoopInvariantInst(Instruction &I) { 262 assert(!isa<TerminatorInst>(I) && "Can't hoist terminator instructions!"); 263 264 // We can only hoist simple expressions... 265 if (!isa<BinaryOperator>(I) && !isa<ShiftInst>(I) && !isa<LoadInst>(I) && 266 !isa<GetElementPtrInst>(I) && !isa<CastInst>(I) && !isa<VANextInst>(I) && 267 !isa<VAArgInst>(I)) 268 return false; 269 270 // The instruction is loop invariant if all of its operands are loop-invariant 271 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) 272 if (!isLoopInvariant(I.getOperand(i))) 273 return false; 274 275 // Loads have extra constraints we have to verify before we can hoist them. 276 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 277 if (LI->isVolatile()) 278 return false; // Don't hoist volatile loads! 279 280 // Don't hoist loads which have may-aliased stores in loop. 281 if (pointerInvalidatedByLoop(I.getOperand(0))) 282 return false; 283 } 284 285 // If we got this far, the instruction is loop invariant! 286 return true; 287} 288 289 290/// hoist - When an instruction is found to only use loop invariant operands 291/// that is safe to hoist, this instruction is called to do the dirty work. 292/// 293void LICM::hoist(Instruction &Inst) { 294 DEBUG(std::cerr << "LICM hoisting to"; 295 WriteAsOperand(std::cerr, Preheader, false); 296 std::cerr << ": " << Inst); 297 298 if (isa<LoadInst>(Inst)) 299 ++NumHoistedLoads; 300 301 // Remove the instruction from its current basic block... but don't delete the 302 // instruction. 303 Inst.getParent()->getInstList().remove(&Inst); 304 305 // Insert the new node in Preheader, before the terminator. 306 Preheader->getInstList().insert(Preheader->getTerminator(), &Inst); 307 308 ++NumHoisted; 309 Changed = true; 310} 311 312/// SafeToHoist - Only hoist an instruction if it is not a trapping instruction 313/// or if it is a trapping instruction and is guaranteed to execute 314/// 315bool LICM::SafeToHoist(Instruction &Inst) { 316 // If it is not a trapping instruction, it is always safe to hoist. 317 if (!Inst.isTrapping()) return true; 318 319 // Otherwise we have to check to make sure that the instruction dominates all 320 // of the exit blocks. If it doesn't, then there is a path out of the loop 321 // which does not execute this instruction, so we can't hoist it. 322 323 // If the instruction is in the header block for the loop (which is very 324 // common), it is always guaranteed to dominate the exit blocks. Since this 325 // is a common case, and can save some work, check it now. 326 BasicBlock *LoopHeader = CurLoop->getHeader(); 327 if (Inst.getParent() == LoopHeader) 328 return true; 329 330 // Get the Dominator Tree Node for the instruction's basic block. 331 DominatorTree::Node *InstDTNode = DT->getNode(Inst.getParent()); 332 333 // Get the exit blocks for the current loop. 334 const std::vector<BasicBlock* > &ExitBlocks = CurLoop->getExitBlocks(); 335 336 // For each exit block, get the DT node and walk up the DT until the 337 // instruction's basic block is found or we exit the loop. 338 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 339 DominatorTree::Node *IDom = DT->getNode(ExitBlocks[i]); 340 341 do { 342 // Get next Immediate Dominator. 343 IDom = IDom->getIDom(); 344 345 // If we have got to the header of the loop, then the instructions block 346 // did not dominate the exit node, so we can't hoist it. 347 if (IDom->getBlock() == LoopHeader) 348 return false; 349 350 } while(IDom != InstDTNode); 351 } 352 353 return true; 354} 355 356 357/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking 358/// stores out of the loop and moving loads to before the loop. We do this by 359/// looping over the stores in the loop, looking for stores to Must pointers 360/// which are loop invariant. We promote these memory locations to use allocas 361/// instead. These allocas can easily be raised to register values by the 362/// PromoteMem2Reg functionality. 363/// 364void LICM::PromoteValuesInLoop() { 365 // PromotedValues - List of values that are promoted out of the loop. Each 366 // value has an alloca instruction for it, and a canonical version of the 367 // pointer. 368 std::vector<std::pair<AllocaInst*, Value*> > PromotedValues; 369 std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca 370 371 findPromotableValuesInLoop(PromotedValues, ValueToAllocaMap); 372 if (ValueToAllocaMap.empty()) return; // If there are values to promote... 373 374 Changed = true; 375 NumPromoted += PromotedValues.size(); 376 377 // Emit a copy from the value into the alloca'd value in the loop preheader 378 TerminatorInst *LoopPredInst = Preheader->getTerminator(); 379 for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { 380 // Load from the memory we are promoting... 381 LoadInst *LI = new LoadInst(PromotedValues[i].second, 382 PromotedValues[i].second->getName()+".promoted", 383 LoopPredInst); 384 // Store into the temporary alloca... 385 new StoreInst(LI, PromotedValues[i].first, LoopPredInst); 386 } 387 388 // Scan the basic blocks in the loop, replacing uses of our pointers with 389 // uses of the allocas in question. If we find a branch that exits the 390 // loop, make sure to put reload code into all of the successors of the 391 // loop. 392 // 393 const std::vector<BasicBlock*> &LoopBBs = CurLoop->getBlocks(); 394 for (std::vector<BasicBlock*>::const_iterator I = LoopBBs.begin(), 395 E = LoopBBs.end(); I != E; ++I) { 396 // Rewrite all loads and stores in the block of the pointer... 397 for (BasicBlock::iterator II = (*I)->begin(), E = (*I)->end(); 398 II != E; ++II) { 399 if (LoadInst *L = dyn_cast<LoadInst>(II)) { 400 std::map<Value*, AllocaInst*>::iterator 401 I = ValueToAllocaMap.find(L->getOperand(0)); 402 if (I != ValueToAllocaMap.end()) 403 L->setOperand(0, I->second); // Rewrite load instruction... 404 } else if (StoreInst *S = dyn_cast<StoreInst>(II)) { 405 std::map<Value*, AllocaInst*>::iterator 406 I = ValueToAllocaMap.find(S->getOperand(1)); 407 if (I != ValueToAllocaMap.end()) 408 S->setOperand(1, I->second); // Rewrite store instruction... 409 } 410 } 411 412 // Check to see if any successors of this block are outside of the loop. 413 // If so, we need to copy the value from the alloca back into the memory 414 // location... 415 // 416 for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) 417 if (!CurLoop->contains(*SI)) { 418 // Copy all of the allocas into their memory locations... 419 BasicBlock::iterator BI = (*SI)->begin(); 420 while (isa<PHINode>(*BI)) 421 ++BI; // Skip over all of the phi nodes in the block... 422 Instruction *InsertPos = BI; 423 for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { 424 // Load from the alloca... 425 LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos); 426 // Store into the memory we promoted... 427 new StoreInst(LI, PromotedValues[i].second, InsertPos); 428 } 429 } 430 } 431 432 // Now that we have done the deed, use the mem2reg functionality to promote 433 // all of the new allocas we just created into real SSA registers... 434 // 435 std::vector<AllocaInst*> PromotedAllocas; 436 PromotedAllocas.reserve(PromotedValues.size()); 437 for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) 438 PromotedAllocas.push_back(PromotedValues[i].first); 439 PromoteMemToReg(PromotedAllocas, *DT, *DF, AA->getTargetData()); 440} 441 442/// findPromotableValuesInLoop - Check the current loop for stores to definite 443/// pointers, which are not loaded and stored through may aliases. If these are 444/// found, create an alloca for the value, add it to the PromotedValues list, 445/// and keep track of the mapping from value to alloca... 446/// 447void LICM::findPromotableValuesInLoop( 448 std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, 449 std::map<Value*, AllocaInst*> &ValueToAllocaMap) { 450 Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin(); 451 452 // Loop over all of the alias sets in the tracker object... 453 for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); 454 I != E; ++I) { 455 AliasSet &AS = *I; 456 // We can promote this alias set if it has a store, if it is a "Must" alias 457 // set, and if the pointer is loop invariant. 458 if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias() && 459 isLoopInvariant(AS.begin()->first)) { 460 assert(AS.begin() != AS.end() && 461 "Must alias set should have at least one pointer element in it!"); 462 Value *V = AS.begin()->first; 463 464 // Check that all of the pointers in the alias set have the same type. We 465 // cannot (yet) promote a memory location that is loaded and stored in 466 // different sizes. 467 bool PointerOk = true; 468 for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) 469 if (V->getType() != I->first->getType()) { 470 PointerOk = false; 471 break; 472 } 473 474 if (PointerOk) { 475 const Type *Ty = cast<PointerType>(V->getType())->getElementType(); 476 AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart); 477 PromotedValues.push_back(std::make_pair(AI, V)); 478 479 for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) 480 ValueToAllocaMap.insert(std::make_pair(I->first, AI)); 481 482 DEBUG(std::cerr << "LICM: Promoting value: " << *V << "\n"); 483 } 484 } 485 } 486} 487