LoopUnswitch.cpp revision 825cb98d9a9a28c007d6a10d5ebec858e669a685
149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===// 249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// The LLVM Compiler Infrastructure 449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// This file is distributed under the University of Illinois Open Source 649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// License. See LICENSE.TXT for details. 749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//===----------------------------------------------------------------------===// 949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 1049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// This pass transforms loops that contain branches on loop-invariant conditions 1149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// to have multiple loops. For example, it turns the left into the right code: 1249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 1349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// for (...) if (lic) 1449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// A for (...) 1549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// if (lic) A; B; C 1649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// B else 17a1514e24cc24b050f53a12650e047799358833a1Chandler Carruth// C for (...) 18a1514e24cc24b050f53a12650e047799358833a1Chandler Carruth// A; C 1949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 20a1514e24cc24b050f53a12650e047799358833a1Chandler Carruth// This can increase the size of the code exponentially (doubling it every time 2149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// a loop is unswitched) so we only unswitch if the resultant code will be 2249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// smaller than a threshold. 230b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth// 2449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// This pass expects LICM to be run before it to hoist invariant conditions out 2549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// of the loop, to make the unswitching opportunity obvious. 2649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 2749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//===----------------------------------------------------------------------===// 2849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 2949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#define DEBUG_TYPE "loop-unswitch" 3049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Transforms/Scalar.h" 3149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Constants.h" 3249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/DerivedTypes.h" 333639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski#include "llvm/Function.h" 343639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski#include "llvm/Instructions.h" 353639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski#include "llvm/Analysis/ConstantFolding.h" 363639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski#include "llvm/Analysis/LoopInfo.h" 373639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski#include "llvm/Analysis/LoopPass.h" 3849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Analysis/Dominators.h" 3949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Transforms/Utils/Cloning.h" 4049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Transforms/Utils/Local.h" 413639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski#include "llvm/Transforms/Utils/BasicBlockUtils.h" 4249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/ADT/Statistic.h" 4349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/ADT/SmallPtrSet.h" 443639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski#include "llvm/Support/CommandLine.h" 4549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Support/Compiler.h" 4649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Support/Debug.h" 4749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include <algorithm> 4849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include <set> 4949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskiusing namespace llvm; 503639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski 513639ce2575660a0e6938d2e84e8bd9a738fd7051Justin HolewinskiSTATISTIC(NumBranches, "Number of branches unswitched"); 523639ce2575660a0e6938d2e84e8bd9a738fd7051Justin HolewinskiSTATISTIC(NumSwitches, "Number of switches unswitched"); 5349683f3c961379fbc088871a5d6304950f1f1cbcJustin HolewinskiSTATISTIC(NumSelects , "Number of selects unswitched"); 5449683f3c961379fbc088871a5d6304950f1f1cbcJustin HolewinskiSTATISTIC(NumTrivial , "Number of unswitches that are trivial"); 5549683f3c961379fbc088871a5d6304950f1f1cbcJustin HolewinskiSTATISTIC(NumSimplify, "Number of simplifications of unswitched code"); 5649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 573639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinskistatic cl::opt<unsigned> 583639ce2575660a0e6938d2e84e8bd9a738fd7051Justin HolewinskiThreshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), 593639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski cl::init(10), cl::Hidden); 6049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 6149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskinamespace { 6249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass { 6349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski LoopInfo *LI; // Loop information 6449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski LPPassManager *LPM; 6549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 663639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski // LoopProcessWorklist - Used to check if second loop needs processing 6749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski // after RewriteLoopBodyWithConditionConstant rewrites first loop. 6849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski std::vector<Loop*> LoopProcessWorklist; 6949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski SmallPtrSet<Value *,8> UnswitchedVals; 7049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 7149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski bool OptimizeForSize; 7249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski bool redoLoop; 7349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 7449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski Loop *currentLoop; 7549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski DominanceFrontier *DF; 7649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski DominatorTree *DT; 7749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski BasicBlock *loopHeader; 7849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski BasicBlock *loopPreheader; 793639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski 8049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski /// LoopDF - Loop's dominance frontier. This set is a collection of 8149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski /// loop exiting blocks' DF member blocks. However this does set does not 8249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski /// includes basic blocks that are inside loop. 8349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski SmallPtrSet<BasicBlock *, 8> LoopDF; 8449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 853639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski /// OrigLoopExitMap - This is used to map loop exiting block with 8649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski /// corresponding loop exit block, before updating CFG. 8749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski DenseMap<BasicBlock *, BasicBlock *> OrigLoopExitMap; 8849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 8949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski // LoopBlocks contains all of the basic blocks of the loop, including the 9049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski // preheader of the loop, the body of the loop, and the exit blocks of the 913639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski // loop, in that order. 9249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski std::vector<BasicBlock*> LoopBlocks; 9349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski // NewBlocks contained cloned copy of basic blocks from LoopBlocks. 9449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski std::vector<BasicBlock*> NewBlocks; 9549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski public: 9649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski static char ID; // Pass ID, replacement for typeid 9749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski explicit LoopUnswitch(bool Os = false) : 9849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski LoopPass((intptr_t)&ID), OptimizeForSize(Os), redoLoop(false), 9949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL), 10049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski loopPreheader(NULL) {} 10149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 10249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski bool runOnLoop(Loop *L, LPPassManager &LPM); 10349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski bool processCurrentLoop(); 10449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 10549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski /// This transformation requires natural loop information & requires that 10649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski /// loop preheaders be inserted into the CFG... 10749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski /// 10849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski virtual void getAnalysisUsage(AnalysisUsage &AU) const { 10949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski AU.addRequiredID(LoopSimplifyID); 11049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski AU.addPreservedID(LoopSimplifyID); 11149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski AU.addRequired<LoopInfo>(); 11249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski AU.addPreserved<LoopInfo>(); 11349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski AU.addRequiredID(LCSSAID); 11449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski AU.addPreservedID(LCSSAID); 11549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski AU.addPreserved<DominatorTree>(); 11649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski AU.addPreserved<DominanceFrontier>(); 11749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski } 11849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 119 private: 120 121 /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, 122 /// remove it. 123 void RemoveLoopFromWorklist(Loop *L) { 124 std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(), 125 LoopProcessWorklist.end(), L); 126 if (I != LoopProcessWorklist.end()) 127 LoopProcessWorklist.erase(I); 128 } 129 130 void initLoopData() { 131 loopHeader = currentLoop->getHeader(); 132 loopPreheader = currentLoop->getLoopPreheader(); 133 } 134 135 /// Split all of the edges from inside the loop to their exit blocks. 136 /// Update the appropriate Phi nodes as we do so. 137 void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks, 138 SmallVector<BasicBlock *, 8> &MiddleBlocks); 139 140 /// If BB's dominance frontier has a member that is not part of loop L then 141 /// remove it. Add NewDFMember in BB's dominance frontier. 142 void ReplaceLoopExternalDFMember(Loop *L, BasicBlock *BB, 143 BasicBlock *NewDFMember); 144 145 bool UnswitchIfProfitable(Value *LoopCond, Constant *Val); 146 unsigned getLoopUnswitchCost(Value *LIC); 147 void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, 148 BasicBlock *ExitBlock); 149 void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L); 150 151 void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 152 Constant *Val, bool isEqual); 153 154 void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, 155 BasicBlock *TrueDest, 156 BasicBlock *FalseDest, 157 Instruction *InsertPt); 158 159 void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L); 160 void RemoveBlockIfDead(BasicBlock *BB, 161 std::vector<Instruction*> &Worklist, Loop *l); 162 void RemoveLoopFromHierarchy(Loop *L); 163 bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0, 164 BasicBlock **LoopExit = 0); 165 166 }; 167} 168char LoopUnswitch::ID = 0; 169static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops"); 170 171LoopPass *llvm::createLoopUnswitchPass(bool Os) { 172 return new LoopUnswitch(Os); 173} 174 175/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is 176/// invariant in the loop, or has an invariant piece, return the invariant. 177/// Otherwise, return null. 178static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { 179 // Constants should be folded, not unswitched on! 180 if (isa<Constant>(Cond)) return false; 181 182 // TODO: Handle: br (VARIANT|INVARIANT). 183 // TODO: Hoist simple expressions out of loops. 184 if (L->isLoopInvariant(Cond)) return Cond; 185 186 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond)) 187 if (BO->getOpcode() == Instruction::And || 188 BO->getOpcode() == Instruction::Or) { 189 // If either the left or right side is invariant, we can unswitch on this, 190 // which will cause the branch to go away in one loop and the condition to 191 // simplify in the other one. 192 if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed)) 193 return LHS; 194 if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed)) 195 return RHS; 196 } 197 198 return 0; 199} 200 201bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { 202 LI = &getAnalysis<LoopInfo>(); 203 LPM = &LPM_Ref; 204 DF = getAnalysisToUpdate<DominanceFrontier>(); 205 DT = getAnalysisToUpdate<DominatorTree>(); 206 currentLoop = L; 207 bool Changed = false; 208 209 do { 210 assert(currentLoop->isLCSSAForm()); 211 redoLoop = false; 212 Changed |= processCurrentLoop(); 213 } while(redoLoop); 214 215 return Changed; 216} 217 218/// processCurrentLoop - Do actual work and unswitch loop if possible 219/// and profitable. 220bool LoopUnswitch::processCurrentLoop() { 221 bool Changed = false; 222 223 // Loop over all of the basic blocks in the loop. If we find an interior 224 // block that is branching on a loop-invariant condition, we can unswitch this 225 // loop. 226 for (Loop::block_iterator I = currentLoop->block_begin(), 227 E = currentLoop->block_end(); 228 I != E; ++I) { 229 TerminatorInst *TI = (*I)->getTerminator(); 230 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 231 // If this isn't branching on an invariant condition, we can't unswitch 232 // it. 233 if (BI->isConditional()) { 234 // See if this, or some part of it, is loop invariant. If so, we can 235 // unswitch on it if we desire. 236 Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), 237 currentLoop, Changed); 238 if (LoopCond && UnswitchIfProfitable(LoopCond, 239 ConstantInt::getTrue())) { 240 ++NumBranches; 241 return true; 242 } 243 } 244 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 245 Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 246 currentLoop, Changed); 247 if (LoopCond && SI->getNumCases() > 1) { 248 // Find a value to unswitch on: 249 // FIXME: this should chose the most expensive case! 250 Constant *UnswitchVal = SI->getCaseValue(1); 251 // Do not process same value again and again. 252 if (!UnswitchedVals.insert(UnswitchVal)) 253 continue; 254 255 if (UnswitchIfProfitable(LoopCond, UnswitchVal)) { 256 ++NumSwitches; 257 return true; 258 } 259 } 260 } 261 262 // Scan the instructions to check for unswitchable values. 263 for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); 264 BBI != E; ++BBI) 265 if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) { 266 Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 267 currentLoop, Changed); 268 if (LoopCond && UnswitchIfProfitable(LoopCond, 269 ConstantInt::getTrue())) { 270 ++NumSelects; 271 return true; 272 } 273 } 274 } 275 return Changed; 276} 277 278/// isTrivialLoopExitBlock - Check to see if all paths from BB either: 279/// 1. Exit the loop with no side effects. 280/// 2. Branch to the latch block with no side-effects. 281/// 282/// If these conditions are true, we return true and set ExitBB to the block we 283/// exit through. 284/// 285static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, 286 BasicBlock *&ExitBB, 287 std::set<BasicBlock*> &Visited) { 288 if (!Visited.insert(BB).second) { 289 // Already visited and Ok, end of recursion. 290 return true; 291 } else if (!L->contains(BB)) { 292 // Otherwise, this is a loop exit, this is fine so long as this is the 293 // first exit. 294 if (ExitBB != 0) return false; 295 ExitBB = BB; 296 return true; 297 } 298 299 // Otherwise, this is an unvisited intra-loop node. Check all successors. 300 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { 301 // Check to see if the successor is a trivial loop exit. 302 if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited)) 303 return false; 304 } 305 306 // Okay, everything after this looks good, check to make sure that this block 307 // doesn't include any side effects. 308 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 309 if (I->mayWriteToMemory()) 310 return false; 311 312 return true; 313} 314 315/// isTrivialLoopExitBlock - Return true if the specified block unconditionally 316/// leads to an exit from the specified loop, and has no side-effects in the 317/// process. If so, return the block that is exited to, otherwise return null. 318static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { 319 std::set<BasicBlock*> Visited; 320 Visited.insert(L->getHeader()); // Branches to header are ok. 321 BasicBlock *ExitBB = 0; 322 if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) 323 return ExitBB; 324 return 0; 325} 326 327/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is 328/// trivial: that is, that the condition controls whether or not the loop does 329/// anything at all. If this is a trivial condition, unswitching produces no 330/// code duplications (equivalently, it produces a simpler loop and a new empty 331/// loop, which gets deleted). 332/// 333/// If this is a trivial condition, return true, otherwise return false. When 334/// returning true, this sets Cond and Val to the condition that controls the 335/// trivial condition: when Cond dynamically equals Val, the loop is known to 336/// exit. Finally, this sets LoopExit to the BB that the loop exits to when 337/// Cond == Val. 338/// 339bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, 340 BasicBlock **LoopExit) { 341 BasicBlock *Header = currentLoop->getHeader(); 342 TerminatorInst *HeaderTerm = Header->getTerminator(); 343 344 BasicBlock *LoopExitBB = 0; 345 if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) { 346 // If the header block doesn't end with a conditional branch on Cond, we 347 // can't handle it. 348 if (!BI->isConditional() || BI->getCondition() != Cond) 349 return false; 350 351 // Check to see if a successor of the branch is guaranteed to go to the 352 // latch block or exit through a one exit block without having any 353 // side-effects. If so, determine the value of Cond that causes it to do 354 // this. 355 if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 356 BI->getSuccessor(0)))) { 357 if (Val) *Val = ConstantInt::getTrue(); 358 } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 359 BI->getSuccessor(1)))) { 360 if (Val) *Val = ConstantInt::getFalse(); 361 } 362 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) { 363 // If this isn't a switch on Cond, we can't handle it. 364 if (SI->getCondition() != Cond) return false; 365 366 // Check to see if a successor of the switch is guaranteed to go to the 367 // latch block or exit through a one exit block without having any 368 // side-effects. If so, determine the value of Cond that causes it to do 369 // this. Note that we can't trivially unswitch on the default case. 370 for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) 371 if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 372 SI->getSuccessor(i)))) { 373 // Okay, we found a trivial case, remember the value that is trivial. 374 if (Val) *Val = SI->getCaseValue(i); 375 break; 376 } 377 } 378 379 // If we didn't find a single unique LoopExit block, or if the loop exit block 380 // contains phi nodes, this isn't trivial. 381 if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) 382 return false; // Can't handle this. 383 384 if (LoopExit) *LoopExit = LoopExitBB; 385 386 // We already know that nothing uses any scalar values defined inside of this 387 // loop. As such, we just have to check to see if this loop will execute any 388 // side-effecting instructions (e.g. stores, calls, volatile loads) in the 389 // part of the loop that the code *would* execute. We already checked the 390 // tail, check the header now. 391 for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I) 392 if (I->mayWriteToMemory()) 393 return false; 394 return true; 395} 396 397/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if 398/// we choose to unswitch current loop on the specified value. 399/// 400unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) { 401 // If the condition is trivial, always unswitch. There is no code growth for 402 // this case. 403 if (IsTrivialUnswitchCondition(LIC)) 404 return 0; 405 406 // FIXME: This is really overly conservative. However, more liberal 407 // estimations have thus far resulted in excessive unswitching, which is bad 408 // both in compile time and in code size. This should be replaced once 409 // someone figures out how a good estimation. 410 return currentLoop->getBlocks().size(); 411 412 unsigned Cost = 0; 413 // FIXME: this is brain dead. It should take into consideration code 414 // shrinkage. 415 for (Loop::block_iterator I = currentLoop->block_begin(), 416 E = currentLoop->block_end(); 417 I != E; ++I) { 418 BasicBlock *BB = *I; 419 // Do not include empty blocks in the cost calculation. This happen due to 420 // loop canonicalization and will be removed. 421 if (BB->begin() == BasicBlock::iterator(BB->getTerminator())) 422 continue; 423 424 // Count basic blocks. 425 ++Cost; 426 } 427 428 return Cost; 429} 430 431/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when 432/// LoopCond == Val to simplify the loop. If we decide that this is profitable, 433/// unswitch the loop, reprocess the pieces, then return true. 434bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ 435 // Check to see if it would be profitable to unswitch current loop. 436 unsigned Cost = getLoopUnswitchCost(LoopCond); 437 438 // Do not do non-trivial unswitch while optimizing for size. 439 if (Cost && OptimizeForSize) 440 return false; 441 442 if (Cost > Threshold) { 443 // FIXME: this should estimate growth by the amount of code shared by the 444 // resultant unswitched loops. 445 // 446 DOUT << "NOT unswitching loop %" 447 << currentLoop->getHeader()->getName() << ", cost too high: " 448 << currentLoop->getBlocks().size() << "\n"; 449 return false; 450 } 451 452 initLoopData(); 453 454 Constant *CondVal; 455 BasicBlock *ExitBlock; 456 if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { 457 UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock); 458 } else { 459 UnswitchNontrivialCondition(LoopCond, Val, currentLoop); 460 } 461 462 return true; 463} 464 465// RemapInstruction - Convert the instruction operands from referencing the 466// current values into those specified by ValueMap. 467// 468static inline void RemapInstruction(Instruction *I, 469 DenseMap<const Value *, Value*> &ValueMap) { 470 for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { 471 Value *Op = I->getOperand(op); 472 DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op); 473 if (It != ValueMap.end()) Op = It->second; 474 I->setOperand(op, Op); 475 } 476} 477 478// CloneDomInfo - NewBB is cloned from Orig basic block. Now clone Dominator 479// Info. 480// 481// If Orig block's immediate dominator is mapped in VM then use corresponding 482// immediate dominator from the map. Otherwise Orig block's dominator is also 483// NewBB's dominator. 484// 485// OrigPreheader is loop pre-header before this pass started 486// updating CFG. NewPrehader is loops new pre-header. However, after CFG 487// manipulation, loop L may not exist. So rely on input parameter NewPreheader. 488static void CloneDomInfo(BasicBlock *NewBB, BasicBlock *Orig, 489 BasicBlock *NewPreheader, BasicBlock *OrigPreheader, 490 BasicBlock *OrigHeader, 491 DominatorTree *DT, DominanceFrontier *DF, 492 DenseMap<const Value*, Value*> &VM) { 493 494 // If NewBB alreay has found its place in domiantor tree then no need to do 495 // anything. 496 if (DT->getNode(NewBB)) 497 return; 498 499 // If Orig does not have any immediate domiantor then its clone, NewBB, does 500 // not need any immediate dominator. 501 DomTreeNode *OrigNode = DT->getNode(Orig); 502 if (!OrigNode) 503 return; 504 DomTreeNode *OrigIDomNode = OrigNode->getIDom(); 505 if (!OrigIDomNode) 506 return; 507 508 BasicBlock *OrigIDom = NULL; 509 510 // If Orig is original loop header then its immediate dominator is 511 // NewPreheader. 512 if (Orig == OrigHeader) 513 OrigIDom = NewPreheader; 514 515 // If Orig is new pre-header then its immediate dominator is 516 // original pre-header. 517 else if (Orig == NewPreheader) 518 OrigIDom = OrigPreheader; 519 520 // Otherwise ask DT to find Orig's immediate dominator. 521 else 522 OrigIDom = OrigIDomNode->getBlock(); 523 524 // Initially use Orig's immediate dominator as NewBB's immediate dominator. 525 BasicBlock *NewIDom = OrigIDom; 526 DenseMap<const Value*, Value*>::iterator I = VM.find(OrigIDom); 527 if (I != VM.end()) { 528 NewIDom = cast<BasicBlock>(I->second); 529 530 // If NewIDom does not have corresponding dominatore tree node then 531 // get one. 532 if (!DT->getNode(NewIDom)) 533 CloneDomInfo(NewIDom, OrigIDom, NewPreheader, OrigPreheader, 534 OrigHeader, DT, DF, VM); 535 } 536 537 DT->addNewBlock(NewBB, NewIDom); 538 539 // Copy cloned dominance frontiner set 540 DominanceFrontier::DomSetType NewDFSet; 541 if (DF) { 542 DominanceFrontier::iterator DFI = DF->find(Orig); 543 if ( DFI != DF->end()) { 544 DominanceFrontier::DomSetType S = DFI->second; 545 for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end(); 546 I != E; ++I) { 547 BasicBlock *BB = *I; 548 DenseMap<const Value*, Value*>::iterator IDM = VM.find(BB); 549 if (IDM != VM.end()) 550 NewDFSet.insert(cast<BasicBlock>(IDM->second)); 551 else 552 NewDFSet.insert(BB); 553 } 554 } 555 DF->addBasicBlock(NewBB, NewDFSet); 556 } 557} 558 559/// CloneLoop - Recursively clone the specified loop and all of its children, 560/// mapping the blocks with the specified map. 561static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM, 562 LoopInfo *LI, LPPassManager *LPM) { 563 Loop *New = new Loop(); 564 565 LPM->insertLoop(New, PL); 566 567 // Add all of the blocks in L to the new loop. 568 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 569 I != E; ++I) 570 if (LI->getLoopFor(*I) == L) 571 New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase()); 572 573 // Add all of the subloops to the new loop. 574 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) 575 CloneLoop(*I, New, VM, LI, LPM); 576 577 return New; 578} 579 580/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values 581/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the 582/// code immediately before InsertPt. 583void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, 584 BasicBlock *TrueDest, 585 BasicBlock *FalseDest, 586 Instruction *InsertPt) { 587 // Insert a conditional branch on LIC to the two preheaders. The original 588 // code is the true version and the new code is the false version. 589 Value *BranchVal = LIC; 590 if (!isa<ConstantInt>(Val) || Val->getType() != Type::Int1Ty) 591 BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt); 592 else if (Val != ConstantInt::getTrue()) 593 // We want to enter the new loop when the condition is true. 594 std::swap(TrueDest, FalseDest); 595 596 // Insert the new branch. 597 BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); 598} 599 600 601/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable 602/// condition in it (a cond branch from its header block to its latch block, 603/// where the path through the loop that doesn't execute its body has no 604/// side-effects), unswitch it. This doesn't involve any code duplication, just 605/// moving the conditional branch outside of the loop and updating loop info. 606void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, 607 Constant *Val, 608 BasicBlock *ExitBlock) { 609 DOUT << "loop-unswitch: Trivial-Unswitch loop %" 610 << loopHeader->getName() << " [" << L->getBlocks().size() 611 << " blocks] in Function " << L->getHeader()->getParent()->getName() 612 << " on cond: " << *Val << " == " << *Cond << "\n"; 613 614 // First step, split the preheader, so that we know that there is a safe place 615 // to insert the conditional branch. We will change loopPreheader to have a 616 // conditional branch on Cond. 617 BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this); 618 619 // Now that we have a place to insert the conditional branch, create a place 620 // to branch to: this is the exit block out of the loop that we should 621 // short-circuit to. 622 623 // Split this block now, so that the loop maintains its exit block, and so 624 // that the jump from the preheader can execute the contents of the exit block 625 // without actually branching to it (the exit block should be dominated by the 626 // loop header, not the preheader). 627 assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); 628 BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this); 629 630 // Okay, now we have a position to branch from and a position to branch to, 631 // insert the new conditional branch. 632 EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, 633 loopPreheader->getTerminator()); 634 if (DT) { 635 DT->changeImmediateDominator(NewExit, loopPreheader); 636 DT->changeImmediateDominator(NewPH, loopPreheader); 637 } 638 639 if (DF) { 640 // NewExit is now part of NewPH and Loop Header's dominance 641 // frontier. 642 DominanceFrontier::iterator DFI = DF->find(NewPH); 643 if (DFI != DF->end()) 644 DF->addToFrontier(DFI, NewExit); 645 DFI = DF->find(loopHeader); 646 DF->addToFrontier(DFI, NewExit); 647 648 // ExitBlock does not have successors then NewExit is part of 649 // its dominance frontier. 650 if (succ_begin(ExitBlock) == succ_end(ExitBlock)) { 651 DFI = DF->find(ExitBlock); 652 DF->addToFrontier(DFI, NewExit); 653 } 654 } 655 LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L); 656 loopPreheader->getTerminator()->eraseFromParent(); 657 658 // We need to reprocess this loop, it could be unswitched again. 659 redoLoop = true; 660 661 // Now that we know that the loop is never entered when this condition is a 662 // particular value, rewrite the loop with this info. We know that this will 663 // at least eliminate the old branch. 664 RewriteLoopBodyWithConditionConstant(L, Cond, Val, false); 665 ++NumTrivial; 666} 667 668/// ReplaceLoopExternalDFMember - 669/// If BB's dominance frontier has a member that is not part of loop L then 670/// remove it. Add NewDFMember in BB's dominance frontier. 671void LoopUnswitch::ReplaceLoopExternalDFMember(Loop *L, BasicBlock *BB, 672 BasicBlock *NewDFMember) { 673 674 DominanceFrontier::iterator DFI = DF->find(BB); 675 if (DFI == DF->end()) 676 return; 677 678 DominanceFrontier::DomSetType &DFSet = DFI->second; 679 for (DominanceFrontier::DomSetType::iterator DI = DFSet.begin(), 680 DE = DFSet.end(); DI != DE;) { 681 BasicBlock *B = *DI++; 682 if (L->contains(B)) 683 continue; 684 685 DF->removeFromFrontier(DFI, B); 686 LoopDF.insert(B); 687 } 688 689 DF->addToFrontier(DFI, NewDFMember); 690} 691 692/// SplitExitEdges - Split all of the edges from inside the loop to their exit 693/// blocks. Update the appropriate Phi nodes as we do so. 694void LoopUnswitch::SplitExitEdges(Loop *L, 695 const SmallVector<BasicBlock *, 8> &ExitBlocks, 696 SmallVector<BasicBlock *, 8> &MiddleBlocks) { 697 698 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 699 BasicBlock *ExitBlock = ExitBlocks[i]; 700 std::vector<BasicBlock*> Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); 701 702 for (unsigned j = 0, e = Preds.size(); j != e; ++j) { 703 BasicBlock* MiddleBlock = SplitEdge(Preds[j], ExitBlock, this); 704 MiddleBlocks.push_back(MiddleBlock); 705 BasicBlock* StartBlock = Preds[j]; 706 BasicBlock* EndBlock; 707 if (MiddleBlock->getSinglePredecessor() == ExitBlock) { 708 EndBlock = MiddleBlock; 709 MiddleBlock = EndBlock->getSinglePredecessor();; 710 } else { 711 EndBlock = ExitBlock; 712 } 713 714 OrigLoopExitMap[StartBlock] = EndBlock; 715 716 std::set<PHINode*> InsertedPHIs; 717 PHINode* OldLCSSA = 0; 718 for (BasicBlock::iterator I = EndBlock->begin(); 719 (OldLCSSA = dyn_cast<PHINode>(I)); ++I) { 720 Value* OldValue = OldLCSSA->getIncomingValueForBlock(MiddleBlock); 721 PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(), 722 OldLCSSA->getName() + ".us-lcssa", 723 MiddleBlock->getTerminator()); 724 NewLCSSA->addIncoming(OldValue, StartBlock); 725 OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(MiddleBlock), 726 NewLCSSA); 727 InsertedPHIs.insert(NewLCSSA); 728 } 729 730 BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI(); 731 for (BasicBlock::iterator I = MiddleBlock->begin(); 732 (OldLCSSA = dyn_cast<PHINode>(I)) && InsertedPHIs.count(OldLCSSA) == 0; 733 ++I) { 734 PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(), 735 OldLCSSA->getName() + ".us-lcssa", 736 InsertPt); 737 OldLCSSA->replaceAllUsesWith(NewLCSSA); 738 NewLCSSA->addIncoming(OldLCSSA, MiddleBlock); 739 } 740 741 if (DF && DT) { 742 // StartBlock -- > MiddleBlock -- > EndBlock 743 // StartBlock is loop exiting block. EndBlock will become merge point 744 // of two loop exits after loop unswitch. 745 746 // If StartBlock's DF member includes a block that is not loop member 747 // then replace that DF member with EndBlock. 748 749 // If MiddleBlock's DF member includes a block that is not loop member 750 // tnen replace that DF member with EndBlock. 751 752 ReplaceLoopExternalDFMember(L, StartBlock, EndBlock); 753 ReplaceLoopExternalDFMember(L, MiddleBlock, EndBlock); 754 } 755 } 756 } 757 758} 759 760/// addBBToDomFrontier - Helper function. Insert DFBB in Basic Block BB's 761/// dominance frontier using iterator DFI. 762static void addBBToDomFrontier(DominanceFrontier &DF, 763 DominanceFrontier::iterator &DFI, 764 BasicBlock *BB, BasicBlock *DFBB) { 765 if (DFI != DF.end()) { 766 DF.addToFrontier(DFI, DFBB); 767 return; 768 } 769 770 DominanceFrontier::DomSetType NSet; 771 NSet.insert(DFBB); 772 DF.addBasicBlock(BB, NSet); 773 DFI = DF.find(BB); 774} 775 776/// UnswitchNontrivialCondition - We determined that the loop is profitable 777/// to unswitch when LIC equal Val. Split it into loop versions and test the 778/// condition outside of either loop. Return the loops created as Out1/Out2. 779void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 780 Loop *L) { 781 Function *F = loopHeader->getParent(); 782 DOUT << "loop-unswitch: Unswitching loop %" 783 << loopHeader->getName() << " [" << L->getBlocks().size() 784 << " blocks] in Function " << F->getName() 785 << " when '" << *Val << "' == " << *LIC << "\n"; 786 787 LoopBlocks.clear(); 788 NewBlocks.clear(); 789 790 // First step, split the preheader and exit blocks, and add these blocks to 791 // the LoopBlocks list. 792 BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this); 793 LoopBlocks.push_back(NewPreheader); 794 795 // We want the loop to come after the preheader, but before the exit blocks. 796 LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); 797 798 SmallVector<BasicBlock*, 8> ExitBlocks; 799 L->getUniqueExitBlocks(ExitBlocks); 800 801 // Split all of the edges from inside the loop to their exit blocks. Update 802 // the appropriate Phi nodes as we do so. 803 SmallVector<BasicBlock *,8> MiddleBlocks; 804 SplitExitEdges(L, ExitBlocks, MiddleBlocks); 805 806 // The exit blocks may have been changed due to edge splitting, recompute. 807 ExitBlocks.clear(); 808 L->getUniqueExitBlocks(ExitBlocks); 809 810 // Add exit blocks to the loop blocks. 811 LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); 812 813 // Next step, clone all of the basic blocks that make up the loop (including 814 // the loop preheader and exit blocks), keeping track of the mapping between 815 // the instructions and blocks. 816 NewBlocks.reserve(LoopBlocks.size()); 817 DenseMap<const Value*, Value*> ValueMap; 818 for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { 819 BasicBlock *New = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); 820 NewBlocks.push_back(New); 821 ValueMap[LoopBlocks[i]] = New; // Keep the BB mapping. 822 LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], New, L); 823 } 824 825 // OutSiders are basic block that are dominated by original header and 826 // at the same time they are not part of loop. 827 SmallPtrSet<BasicBlock *, 8> OutSiders; 828 if (DT) { 829 DomTreeNode *OrigHeaderNode = DT->getNode(loopHeader); 830 for(std::vector<DomTreeNode*>::iterator DI = OrigHeaderNode->begin(), 831 DE = OrigHeaderNode->end(); DI != DE; ++DI) { 832 BasicBlock *B = (*DI)->getBlock(); 833 834 DenseMap<const Value*, Value*>::iterator VI = ValueMap.find(B); 835 if (VI == ValueMap.end()) 836 OutSiders.insert(B); 837 } 838 } 839 840 // Splice the newly inserted blocks into the function right before the 841 // original preheader. 842 F->getBasicBlockList().splice(LoopBlocks[0], F->getBasicBlockList(), 843 NewBlocks[0], F->end()); 844 845 // Now we create the new Loop object for the versioned loop. 846 Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM); 847 Loop *ParentLoop = L->getParentLoop(); 848 if (ParentLoop) { 849 // Make sure to add the cloned preheader and exit blocks to the parent loop 850 // as well. 851 ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase()); 852 } 853 854 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 855 BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]); 856 // The new exit block should be in the same loop as the old one. 857 if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) 858 ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); 859 860 assert(NewExit->getTerminator()->getNumSuccessors() == 1 && 861 "Exit block should have been split to have one successor!"); 862 BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); 863 864 // If the successor of the exit block had PHI nodes, add an entry for 865 // NewExit. 866 PHINode *PN; 867 for (BasicBlock::iterator I = ExitSucc->begin(); 868 (PN = dyn_cast<PHINode>(I)); ++I) { 869 Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); 870 DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V); 871 if (It != ValueMap.end()) V = It->second; 872 PN->addIncoming(V, NewExit); 873 } 874 } 875 876 // Rewrite the code to refer to itself. 877 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 878 for (BasicBlock::iterator I = NewBlocks[i]->begin(), 879 E = NewBlocks[i]->end(); I != E; ++I) 880 RemapInstruction(I, ValueMap); 881 882 // Rewrite the original preheader to select between versions of the loop. 883 BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); 884 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && 885 "Preheader splitting did not work correctly!"); 886 887 // Emit the new branch that selects between the two versions of this loop. 888 EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR); 889 LPM->deleteSimpleAnalysisValue(OldBR, L); 890 OldBR->eraseFromParent(); 891 892 // Update dominator info 893 if (DF && DT) { 894 895 SmallVector<BasicBlock *,4> ExitingBlocks; 896 L->getExitingBlocks(ExitingBlocks); 897 898 // Clone dominator info for all cloned basic block. 899 for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { 900 BasicBlock *LBB = LoopBlocks[i]; 901 BasicBlock *NBB = NewBlocks[i]; 902 CloneDomInfo(NBB, LBB, NewPreheader, loopPreheader, 903 loopHeader, DT, DF, ValueMap); 904 905 // If LBB's dominance frontier includes DFMember 906 // such that DFMember is also a member of LoopDF then 907 // - Remove DFMember from LBB's dominance frontier 908 // - Copy loop exiting blocks', that are dominated by BB, 909 // dominance frontier member in BB's dominance frontier 910 911 DominanceFrontier::iterator LBBI = DF->find(LBB); 912 DominanceFrontier::iterator NBBI = DF->find(NBB); 913 if (LBBI == DF->end()) 914 continue; 915 916 DominanceFrontier::DomSetType &LBSet = LBBI->second; 917 for (DominanceFrontier::DomSetType::iterator LI = LBSet.begin(), 918 LE = LBSet.end(); LI != LE; /* NULL */) { 919 BasicBlock *B = *LI++; 920 if (B == LBB && B == loopHeader) 921 continue; 922 bool removeB = false; 923 if (!LoopDF.count(B)) 924 continue; 925 926 // If LBB dominates loop exits then insert loop exit block's DF 927 // into B's DF. 928 for(SmallVector<BasicBlock *, 4>::iterator 929 LExitI = ExitingBlocks.begin(), 930 LExitE = ExitingBlocks.end(); LExitI != LExitE; ++LExitI) { 931 BasicBlock *E = *LExitI; 932 933 if (!DT->dominates(LBB,E)) 934 continue; 935 936 DenseMap<BasicBlock *, BasicBlock *>::iterator DFBI = 937 OrigLoopExitMap.find(E); 938 if (DFBI == OrigLoopExitMap.end()) 939 continue; 940 941 BasicBlock *DFB = DFBI->second; 942 DF->addToFrontier(LBBI, DFB); 943 DF->addToFrontier(NBBI, DFB); 944 removeB = true; 945 } 946 947 // If B's replacement is inserted in DF then now is the time to remove 948 // B. 949 if (removeB) { 950 DF->removeFromFrontier(LBBI, B); 951 if (L->contains(B)) 952 DF->removeFromFrontier(NBBI, cast<BasicBlock>(ValueMap[B])); 953 else 954 DF->removeFromFrontier(NBBI, B); 955 } 956 } 957 958 } 959 960 // MiddleBlocks are dominated by original pre header. SplitEdge updated 961 // MiddleBlocks' dominance frontier appropriately. 962 for (unsigned i = 0, e = MiddleBlocks.size(); i != e; ++i) { 963 BasicBlock *MBB = MiddleBlocks[i]; 964 if (!MBB->getSinglePredecessor()) 965 DT->changeImmediateDominator(MBB, loopPreheader); 966 } 967 968 // All Outsiders are now dominated by original pre header. 969 for (SmallPtrSet<BasicBlock *, 8>::iterator OI = OutSiders.begin(), 970 OE = OutSiders.end(); OI != OE; ++OI) { 971 BasicBlock *OB = *OI; 972 DT->changeImmediateDominator(OB, loopPreheader); 973 } 974 975 // New loop headers are dominated by original preheader 976 DT->changeImmediateDominator(NewBlocks[0], loopPreheader); 977 DT->changeImmediateDominator(LoopBlocks[0], loopPreheader); 978 } 979 980 LoopProcessWorklist.push_back(NewLoop); 981 redoLoop = true; 982 983 // Now we rewrite the original code to know that the condition is true and the 984 // new code to know that the condition is false. 985 RewriteLoopBodyWithConditionConstant(L , LIC, Val, false); 986 987 // It's possible that simplifying one loop could cause the other to be 988 // deleted. If so, don't simplify it. 989 if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop) 990 RewriteLoopBodyWithConditionConstant(NewLoop, LIC, Val, true); 991} 992 993/// RemoveFromWorklist - Remove all instances of I from the worklist vector 994/// specified. 995static void RemoveFromWorklist(Instruction *I, 996 std::vector<Instruction*> &Worklist) { 997 std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(), 998 Worklist.end(), I); 999 while (WI != Worklist.end()) { 1000 unsigned Offset = WI-Worklist.begin(); 1001 Worklist.erase(WI); 1002 WI = std::find(Worklist.begin()+Offset, Worklist.end(), I); 1003 } 1004} 1005 1006/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the 1007/// program, replacing all uses with V and update the worklist. 1008static void ReplaceUsesOfWith(Instruction *I, Value *V, 1009 std::vector<Instruction*> &Worklist, 1010 Loop *L, LPPassManager *LPM) { 1011 DOUT << "Replace with '" << *V << "': " << *I; 1012 1013 // Add uses to the worklist, which may be dead now. 1014 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1015 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1016 Worklist.push_back(Use); 1017 1018 // Add users to the worklist which may be simplified now. 1019 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1020 UI != E; ++UI) 1021 Worklist.push_back(cast<Instruction>(*UI)); 1022 LPM->deleteSimpleAnalysisValue(I, L); 1023 RemoveFromWorklist(I, Worklist); 1024 I->replaceAllUsesWith(V); 1025 I->eraseFromParent(); 1026 ++NumSimplify; 1027} 1028 1029/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop 1030/// information, and remove any dead successors it has. 1031/// 1032void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, 1033 std::vector<Instruction*> &Worklist, 1034 Loop *L) { 1035 if (pred_begin(BB) != pred_end(BB)) { 1036 // This block isn't dead, since an edge to BB was just removed, see if there 1037 // are any easy simplifications we can do now. 1038 if (BasicBlock *Pred = BB->getSinglePredecessor()) { 1039 // If it has one pred, fold phi nodes in BB. 1040 while (isa<PHINode>(BB->begin())) 1041 ReplaceUsesOfWith(BB->begin(), 1042 cast<PHINode>(BB->begin())->getIncomingValue(0), 1043 Worklist, L, LPM); 1044 1045 // If this is the header of a loop and the only pred is the latch, we now 1046 // have an unreachable loop. 1047 if (Loop *L = LI->getLoopFor(BB)) 1048 if (loopHeader == BB && L->contains(Pred)) { 1049 // Remove the branch from the latch to the header block, this makes 1050 // the header dead, which will make the latch dead (because the header 1051 // dominates the latch). 1052 LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L); 1053 Pred->getTerminator()->eraseFromParent(); 1054 new UnreachableInst(Pred); 1055 1056 // The loop is now broken, remove it from LI. 1057 RemoveLoopFromHierarchy(L); 1058 1059 // Reprocess the header, which now IS dead. 1060 RemoveBlockIfDead(BB, Worklist, L); 1061 return; 1062 } 1063 1064 // If pred ends in a uncond branch, add uncond branch to worklist so that 1065 // the two blocks will get merged. 1066 if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) 1067 if (BI->isUnconditional()) 1068 Worklist.push_back(BI); 1069 } 1070 return; 1071 } 1072 1073 DOUT << "Nuking dead block: " << *BB; 1074 1075 // Remove the instructions in the basic block from the worklist. 1076 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { 1077 RemoveFromWorklist(I, Worklist); 1078 1079 // Anything that uses the instructions in this basic block should have their 1080 // uses replaced with undefs. 1081 if (!I->use_empty()) 1082 I->replaceAllUsesWith(UndefValue::get(I->getType())); 1083 } 1084 1085 // If this is the edge to the header block for a loop, remove the loop and 1086 // promote all subloops. 1087 if (Loop *BBLoop = LI->getLoopFor(BB)) { 1088 if (BBLoop->getLoopLatch() == BB) 1089 RemoveLoopFromHierarchy(BBLoop); 1090 } 1091 1092 // Remove the block from the loop info, which removes it from any loops it 1093 // was in. 1094 LI->removeBlock(BB); 1095 1096 1097 // Remove phi node entries in successors for this block. 1098 TerminatorInst *TI = BB->getTerminator(); 1099 std::vector<BasicBlock*> Succs; 1100 for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { 1101 Succs.push_back(TI->getSuccessor(i)); 1102 TI->getSuccessor(i)->removePredecessor(BB); 1103 } 1104 1105 // Unique the successors, remove anything with multiple uses. 1106 std::sort(Succs.begin(), Succs.end()); 1107 Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end()); 1108 1109 // Remove the basic block, including all of the instructions contained in it. 1110 LPM->deleteSimpleAnalysisValue(BB, L); 1111 BB->eraseFromParent(); 1112 // Remove successor blocks here that are not dead, so that we know we only 1113 // have dead blocks in this list. Nondead blocks have a way of becoming dead, 1114 // then getting removed before we revisit them, which is badness. 1115 // 1116 for (unsigned i = 0; i != Succs.size(); ++i) 1117 if (pred_begin(Succs[i]) != pred_end(Succs[i])) { 1118 // One exception is loop headers. If this block was the preheader for a 1119 // loop, then we DO want to visit the loop so the loop gets deleted. 1120 // We know that if the successor is a loop header, that this loop had to 1121 // be the preheader: the case where this was the latch block was handled 1122 // above and headers can only have two predecessors. 1123 if (!LI->isLoopHeader(Succs[i])) { 1124 Succs.erase(Succs.begin()+i); 1125 --i; 1126 } 1127 } 1128 1129 for (unsigned i = 0, e = Succs.size(); i != e; ++i) 1130 RemoveBlockIfDead(Succs[i], Worklist, L); 1131} 1132 1133/// RemoveLoopFromHierarchy - We have discovered that the specified loop has 1134/// become unwrapped, either because the backedge was deleted, or because the 1135/// edge into the header was removed. If the edge into the header from the 1136/// latch block was removed, the loop is unwrapped but subloops are still alive, 1137/// so they just reparent loops. If the loops are actually dead, they will be 1138/// removed later. 1139void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) { 1140 LPM->deleteLoopFromQueue(L); 1141 RemoveLoopFromWorklist(L); 1142} 1143 1144 1145 1146// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has 1147// the value specified by Val in the specified loop, or we know it does NOT have 1148// that value. Rewrite any uses of LIC or of properties correlated to it. 1149void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 1150 Constant *Val, 1151 bool IsEqual) { 1152 assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); 1153 1154 // FIXME: Support correlated properties, like: 1155 // for (...) 1156 // if (li1 < li2) 1157 // ... 1158 // if (li1 > li2) 1159 // ... 1160 1161 // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, 1162 // selects, switches. 1163 std::vector<User*> Users(LIC->use_begin(), LIC->use_end()); 1164 std::vector<Instruction*> Worklist; 1165 1166 // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC 1167 // in the loop with the appropriate one directly. 1168 if (IsEqual || (isa<ConstantInt>(Val) && Val->getType() == Type::Int1Ty)) { 1169 Value *Replacement; 1170 if (IsEqual) 1171 Replacement = Val; 1172 else 1173 Replacement = ConstantInt::get(Type::Int1Ty, 1174 !cast<ConstantInt>(Val)->getZExtValue()); 1175 1176 for (unsigned i = 0, e = Users.size(); i != e; ++i) 1177 if (Instruction *U = cast<Instruction>(Users[i])) { 1178 if (!L->contains(U->getParent())) 1179 continue; 1180 U->replaceUsesOfWith(LIC, Replacement); 1181 Worklist.push_back(U); 1182 } 1183 } else { 1184 // Otherwise, we don't know the precise value of LIC, but we do know that it 1185 // is certainly NOT "Val". As such, simplify any uses in the loop that we 1186 // can. This case occurs when we unswitch switch statements. 1187 for (unsigned i = 0, e = Users.size(); i != e; ++i) 1188 if (Instruction *U = cast<Instruction>(Users[i])) { 1189 if (!L->contains(U->getParent())) 1190 continue; 1191 1192 Worklist.push_back(U); 1193 1194 // If we know that LIC is not Val, use this info to simplify code. 1195 if (SwitchInst *SI = dyn_cast<SwitchInst>(U)) { 1196 for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { 1197 if (SI->getCaseValue(i) == Val) { 1198 // Found a dead case value. Don't remove PHI nodes in the 1199 // successor if they become single-entry, those PHI nodes may 1200 // be in the Users list. 1201 1202 // FIXME: This is a hack. We need to keep the successor around 1203 // and hooked up so as to preserve the loop structure, because 1204 // trying to update it is complicated. So instead we preserve the 1205 // loop structure and put the block on an dead code path. 1206 1207 BasicBlock* Old = SI->getParent(); 1208 BasicBlock* Split = SplitBlock(Old, SI, this); 1209 1210 Instruction* OldTerm = Old->getTerminator(); 1211 BranchInst::Create(Split, SI->getSuccessor(i), 1212 ConstantInt::getTrue(), OldTerm); 1213 1214 LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L); 1215 Old->getTerminator()->eraseFromParent(); 1216 1217 PHINode *PN; 1218 for (BasicBlock::iterator II = SI->getSuccessor(i)->begin(); 1219 (PN = dyn_cast<PHINode>(II)); ++II) { 1220 Value *InVal = PN->removeIncomingValue(Split, false); 1221 PN->addIncoming(InVal, Old); 1222 } 1223 1224 SI->removeCase(i); 1225 break; 1226 } 1227 } 1228 } 1229 1230 // TODO: We could do other simplifications, for example, turning 1231 // LIC == Val -> false. 1232 } 1233 } 1234 1235 SimplifyCode(Worklist, L); 1236} 1237 1238/// SimplifyCode - Okay, now that we have simplified some instructions in the 1239/// loop, walk over it and constant prop, dce, and fold control flow where 1240/// possible. Note that this is effectively a very simple loop-structure-aware 1241/// optimizer. During processing of this loop, L could very well be deleted, so 1242/// it must not be used. 1243/// 1244/// FIXME: When the loop optimizer is more mature, separate this out to a new 1245/// pass. 1246/// 1247void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { 1248 while (!Worklist.empty()) { 1249 Instruction *I = Worklist.back(); 1250 Worklist.pop_back(); 1251 1252 // Simple constant folding. 1253 if (Constant *C = ConstantFoldInstruction(I)) { 1254 ReplaceUsesOfWith(I, C, Worklist, L, LPM); 1255 continue; 1256 } 1257 1258 // Simple DCE. 1259 if (isInstructionTriviallyDead(I)) { 1260 DOUT << "Remove dead instruction '" << *I; 1261 1262 // Add uses to the worklist, which may be dead now. 1263 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1264 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1265 Worklist.push_back(Use); 1266 LPM->deleteSimpleAnalysisValue(I, L); 1267 RemoveFromWorklist(I, Worklist); 1268 I->eraseFromParent(); 1269 ++NumSimplify; 1270 continue; 1271 } 1272 1273 // Special case hacks that appear commonly in unswitched code. 1274 switch (I->getOpcode()) { 1275 case Instruction::Select: 1276 if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(0))) { 1277 ReplaceUsesOfWith(I, I->getOperand(!CB->getZExtValue()+1), Worklist, L, 1278 LPM); 1279 continue; 1280 } 1281 break; 1282 case Instruction::And: 1283 if (isa<ConstantInt>(I->getOperand(0)) && 1284 I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS 1285 cast<BinaryOperator>(I)->swapOperands(); 1286 if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) 1287 if (CB->getType() == Type::Int1Ty) { 1288 if (CB->isOne()) // X & 1 -> X 1289 ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); 1290 else // X & 0 -> 0 1291 ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); 1292 continue; 1293 } 1294 break; 1295 case Instruction::Or: 1296 if (isa<ConstantInt>(I->getOperand(0)) && 1297 I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS 1298 cast<BinaryOperator>(I)->swapOperands(); 1299 if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) 1300 if (CB->getType() == Type::Int1Ty) { 1301 if (CB->isOne()) // X | 1 -> 1 1302 ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); 1303 else // X | 0 -> X 1304 ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); 1305 continue; 1306 } 1307 break; 1308 case Instruction::Br: { 1309 BranchInst *BI = cast<BranchInst>(I); 1310 if (BI->isUnconditional()) { 1311 // If BI's parent is the only pred of the successor, fold the two blocks 1312 // together. 1313 BasicBlock *Pred = BI->getParent(); 1314 BasicBlock *Succ = BI->getSuccessor(0); 1315 BasicBlock *SinglePred = Succ->getSinglePredecessor(); 1316 if (!SinglePred) continue; // Nothing to do. 1317 assert(SinglePred == Pred && "CFG broken"); 1318 1319 DOUT << "Merging blocks: " << Pred->getName() << " <- " 1320 << Succ->getName() << "\n"; 1321 1322 // Resolve any single entry PHI nodes in Succ. 1323 while (PHINode *PN = dyn_cast<PHINode>(Succ->begin())) 1324 ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM); 1325 1326 // Move all of the successor contents from Succ to Pred. 1327 Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(), 1328 Succ->end()); 1329 LPM->deleteSimpleAnalysisValue(BI, L); 1330 BI->eraseFromParent(); 1331 RemoveFromWorklist(BI, Worklist); 1332 1333 // If Succ has any successors with PHI nodes, update them to have 1334 // entries coming from Pred instead of Succ. 1335 Succ->replaceAllUsesWith(Pred); 1336 1337 // Remove Succ from the loop tree. 1338 LI->removeBlock(Succ); 1339 LPM->deleteSimpleAnalysisValue(Succ, L); 1340 Succ->eraseFromParent(); 1341 ++NumSimplify; 1342 } else if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){ 1343 // Conditional branch. Turn it into an unconditional branch, then 1344 // remove dead blocks. 1345 break; // FIXME: Enable. 1346 1347 DOUT << "Folded branch: " << *BI; 1348 BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue()); 1349 BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue()); 1350 DeadSucc->removePredecessor(BI->getParent(), true); 1351 Worklist.push_back(BranchInst::Create(LiveSucc, BI)); 1352 LPM->deleteSimpleAnalysisValue(BI, L); 1353 BI->eraseFromParent(); 1354 RemoveFromWorklist(BI, Worklist); 1355 ++NumSimplify; 1356 1357 RemoveBlockIfDead(DeadSucc, Worklist, L); 1358 } 1359 break; 1360 } 1361 } 1362 } 1363} 1364