IndVarSimplify.cpp revision dd1f9e4bf6e7d427fd581728f3d2e431e12e6e71
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This transformation analyzes and transforms the induction variables (and 11// computations derived from them) into simpler forms suitable for subsequent 12// analysis and transformation. 13// 14// This transformation makes the following changes to each loop with an 15// identifiable induction variable: 16// 1. All loops are transformed to have a SINGLE canonical induction variable 17// which starts at zero and steps by one. 18// 2. The canonical induction variable is guaranteed to be the first PHI node 19// in the loop header block. 20// 3. Any pointer arithmetic recurrences are raised to use array subscripts. 21// 22// If the trip count of a loop is computable, this pass also makes the following 23// changes: 24// 1. The exit condition for the loop is canonicalized to compare the 25// induction value against the exit value. This turns loops like: 26// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 27// 2. Any use outside of the loop of an expression derived from the indvar 28// is changed to compute the derived value outside of the loop, eliminating 29// the dependence on the exit value of the induction variable. If the only 30// purpose of the loop is to compute the exit value of some derived 31// expression, this transformation will make the loop dead. 32// 33// This transformation should be followed by strength reduction after all of the 34// desired loop transformations have been performed. Additionally, on targets 35// where it is profitable, the loop could be transformed to count down to zero 36// (the "do loop" optimization). 37// 38//===----------------------------------------------------------------------===// 39 40#define DEBUG_TYPE "indvars" 41#include "llvm/Transforms/Scalar.h" 42#include "llvm/BasicBlock.h" 43#include "llvm/Constants.h" 44#include "llvm/Instructions.h" 45#include "llvm/Type.h" 46#include "llvm/Analysis/ScalarEvolutionExpander.h" 47#include "llvm/Analysis/LoopInfo.h" 48#include "llvm/Analysis/LoopPass.h" 49#include "llvm/Support/CFG.h" 50#include "llvm/Support/Compiler.h" 51#include "llvm/Support/Debug.h" 52#include "llvm/Support/GetElementPtrTypeIterator.h" 53#include "llvm/Transforms/Utils/Local.h" 54#include "llvm/Support/CommandLine.h" 55#include "llvm/ADT/SmallVector.h" 56#include "llvm/ADT/SetVector.h" 57#include "llvm/ADT/SmallPtrSet.h" 58#include "llvm/ADT/Statistic.h" 59using namespace llvm; 60 61STATISTIC(NumRemoved , "Number of aux indvars removed"); 62STATISTIC(NumPointer , "Number of pointer indvars promoted"); 63STATISTIC(NumInserted, "Number of canonical indvars added"); 64STATISTIC(NumReplaced, "Number of exit values replaced"); 65STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 66 67namespace { 68 class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass { 69 LoopInfo *LI; 70 ScalarEvolution *SE; 71 bool Changed; 72 public: 73 74 static char ID; // Pass identification, replacement for typeid 75 IndVarSimplify() : LoopPass(&ID) {} 76 77 virtual bool runOnLoop(Loop *L, LPPassManager &LPM); 78 79 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 80 AU.addRequired<ScalarEvolution>(); 81 AU.addRequiredID(LCSSAID); 82 AU.addRequiredID(LoopSimplifyID); 83 AU.addRequired<LoopInfo>(); 84 AU.addPreserved<ScalarEvolution>(); 85 AU.addPreservedID(LoopSimplifyID); 86 AU.addPreservedID(LCSSAID); 87 AU.setPreservesCFG(); 88 } 89 90 private: 91 92 void RewriteNonIntegerIVs(Loop *L); 93 94 void EliminatePointerRecurrence(PHINode *PN, BasicBlock *Preheader, 95 SmallPtrSet<Instruction*, 16> &DeadInsts); 96 void LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount, 97 Value *IndVar, 98 BasicBlock *ExitingBlock, 99 BranchInst *BI, 100 SCEVExpander &Rewriter); 101 void RewriteLoopExitValues(Loop *L, SCEV *BackedgeTakenCount); 102 103 void DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*, 16> &Insts); 104 105 void HandleFloatingPointIV(Loop *L, PHINode *PH, 106 SmallPtrSet<Instruction*, 16> &DeadInsts); 107 }; 108} 109 110char IndVarSimplify::ID = 0; 111static RegisterPass<IndVarSimplify> 112X("indvars", "Canonicalize Induction Variables"); 113 114Pass *llvm::createIndVarSimplifyPass() { 115 return new IndVarSimplify(); 116} 117 118/// DeleteTriviallyDeadInstructions - If any of the instructions is the 119/// specified set are trivially dead, delete them and see if this makes any of 120/// their operands subsequently dead. 121void IndVarSimplify:: 122DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*, 16> &Insts) { 123 while (!Insts.empty()) { 124 Instruction *I = *Insts.begin(); 125 Insts.erase(I); 126 if (isInstructionTriviallyDead(I)) { 127 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 128 if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i))) 129 Insts.insert(U); 130 SE->deleteValueFromRecords(I); 131 DOUT << "INDVARS: Deleting: " << *I; 132 I->eraseFromParent(); 133 Changed = true; 134 } 135 } 136} 137 138 139/// EliminatePointerRecurrence - Check to see if this is a trivial GEP pointer 140/// recurrence. If so, change it into an integer recurrence, permitting 141/// analysis by the SCEV routines. 142void IndVarSimplify::EliminatePointerRecurrence(PHINode *PN, 143 BasicBlock *Preheader, 144 SmallPtrSet<Instruction*, 16> &DeadInsts) { 145 assert(PN->getNumIncomingValues() == 2 && "Noncanonicalized loop!"); 146 unsigned PreheaderIdx = PN->getBasicBlockIndex(Preheader); 147 unsigned BackedgeIdx = PreheaderIdx^1; 148 if (GetElementPtrInst *GEPI = 149 dyn_cast<GetElementPtrInst>(PN->getIncomingValue(BackedgeIdx))) 150 if (GEPI->getOperand(0) == PN) { 151 assert(GEPI->getNumOperands() == 2 && "GEP types must match!"); 152 DOUT << "INDVARS: Eliminating pointer recurrence: " << *GEPI; 153 154 // Okay, we found a pointer recurrence. Transform this pointer 155 // recurrence into an integer recurrence. Compute the value that gets 156 // added to the pointer at every iteration. 157 Value *AddedVal = GEPI->getOperand(1); 158 159 // Insert a new integer PHI node into the top of the block. 160 PHINode *NewPhi = PHINode::Create(AddedVal->getType(), 161 PN->getName()+".rec", PN); 162 NewPhi->addIncoming(Constant::getNullValue(NewPhi->getType()), Preheader); 163 164 // Create the new add instruction. 165 Value *NewAdd = BinaryOperator::CreateAdd(NewPhi, AddedVal, 166 GEPI->getName()+".rec", GEPI); 167 NewPhi->addIncoming(NewAdd, PN->getIncomingBlock(BackedgeIdx)); 168 169 // Update the existing GEP to use the recurrence. 170 GEPI->setOperand(0, PN->getIncomingValue(PreheaderIdx)); 171 172 // Update the GEP to use the new recurrence we just inserted. 173 GEPI->setOperand(1, NewAdd); 174 175 // If the incoming value is a constant expr GEP, try peeling out the array 176 // 0 index if possible to make things simpler. 177 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEPI->getOperand(0))) 178 if (CE->getOpcode() == Instruction::GetElementPtr) { 179 unsigned NumOps = CE->getNumOperands(); 180 assert(NumOps > 1 && "CE folding didn't work!"); 181 if (CE->getOperand(NumOps-1)->isNullValue()) { 182 // Check to make sure the last index really is an array index. 183 gep_type_iterator GTI = gep_type_begin(CE); 184 for (unsigned i = 1, e = CE->getNumOperands()-1; 185 i != e; ++i, ++GTI) 186 /*empty*/; 187 if (isa<SequentialType>(*GTI)) { 188 // Pull the last index out of the constant expr GEP. 189 SmallVector<Value*, 8> CEIdxs(CE->op_begin()+1, CE->op_end()-1); 190 Constant *NCE = ConstantExpr::getGetElementPtr(CE->getOperand(0), 191 &CEIdxs[0], 192 CEIdxs.size()); 193 Value *Idx[2]; 194 Idx[0] = Constant::getNullValue(Type::Int32Ty); 195 Idx[1] = NewAdd; 196 GetElementPtrInst *NGEPI = GetElementPtrInst::Create( 197 NCE, Idx, Idx + 2, 198 GEPI->getName(), GEPI); 199 SE->deleteValueFromRecords(GEPI); 200 GEPI->replaceAllUsesWith(NGEPI); 201 GEPI->eraseFromParent(); 202 GEPI = NGEPI; 203 } 204 } 205 } 206 207 208 // Finally, if there are any other users of the PHI node, we must 209 // insert a new GEP instruction that uses the pre-incremented version 210 // of the induction amount. 211 if (!PN->use_empty()) { 212 BasicBlock::iterator InsertPos = PN; ++InsertPos; 213 while (isa<PHINode>(InsertPos)) ++InsertPos; 214 Value *PreInc = 215 GetElementPtrInst::Create(PN->getIncomingValue(PreheaderIdx), 216 NewPhi, "", InsertPos); 217 PreInc->takeName(PN); 218 PN->replaceAllUsesWith(PreInc); 219 } 220 221 // Delete the old PHI for sure, and the GEP if its otherwise unused. 222 DeadInsts.insert(PN); 223 224 ++NumPointer; 225 Changed = true; 226 } 227} 228 229/// LinearFunctionTestReplace - This method rewrites the exit condition of the 230/// loop to be a canonical != comparison against the incremented loop induction 231/// variable. This pass is able to rewrite the exit tests of any loop where the 232/// SCEV analysis can determine a loop-invariant trip count of the loop, which 233/// is actually a much broader range than just linear tests. 234void IndVarSimplify::LinearFunctionTestReplace(Loop *L, 235 SCEVHandle BackedgeTakenCount, 236 Value *IndVar, 237 BasicBlock *ExitingBlock, 238 BranchInst *BI, 239 SCEVExpander &Rewriter) { 240 // If the exiting block is not the same as the backedge block, we must compare 241 // against the preincremented value, otherwise we prefer to compare against 242 // the post-incremented value. 243 Value *CmpIndVar; 244 SCEVHandle RHS = BackedgeTakenCount; 245 if (ExitingBlock == L->getLoopLatch()) { 246 // Add one to the "backedge-taken" count to get the trip count. 247 // If this addition may overflow, we have to be more pessimistic and 248 // cast the induction variable before doing the add. 249 SCEVHandle Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); 250 SCEVHandle N = 251 SE->getAddExpr(BackedgeTakenCount, 252 SE->getIntegerSCEV(1, BackedgeTakenCount->getType())); 253 if ((isa<SCEVConstant>(N) && !N->isZero()) || 254 SE->isLoopGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { 255 // No overflow. Cast the sum. 256 RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); 257 } else { 258 // Potential overflow. Cast before doing the add. 259 RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, 260 IndVar->getType()); 261 RHS = SE->getAddExpr(RHS, 262 SE->getIntegerSCEV(1, IndVar->getType())); 263 } 264 265 // The BackedgeTaken expression contains the number of times that the 266 // backedge branches to the loop header. This is one less than the 267 // number of times the loop executes, so use the incremented indvar. 268 CmpIndVar = L->getCanonicalInductionVariableIncrement(); 269 } else { 270 // We have to use the preincremented value... 271 RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, 272 IndVar->getType()); 273 CmpIndVar = IndVar; 274 } 275 276 // Expand the code for the iteration count into the preheader of the loop. 277 BasicBlock *Preheader = L->getLoopPreheader(); 278 Value *ExitCnt = Rewriter.expandCodeFor(RHS, 279 Preheader->getTerminator()); 280 281 // Insert a new icmp_ne or icmp_eq instruction before the branch. 282 ICmpInst::Predicate Opcode; 283 if (L->contains(BI->getSuccessor(0))) 284 Opcode = ICmpInst::ICMP_NE; 285 else 286 Opcode = ICmpInst::ICMP_EQ; 287 288 DOUT << "INDVARS: Rewriting loop exit condition to:\n" 289 << " LHS:" << *CmpIndVar // includes a newline 290 << " op:\t" 291 << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" 292 << " RHS:\t" << *RHS << "\n"; 293 294 Value *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI); 295 BI->setCondition(Cond); 296 ++NumLFTR; 297 Changed = true; 298} 299 300/// RewriteLoopExitValues - Check to see if this loop has a computable 301/// loop-invariant execution count. If so, this means that we can compute the 302/// final value of any expressions that are recurrent in the loop, and 303/// substitute the exit values from the loop into any instructions outside of 304/// the loop that use the final values of the current expressions. 305void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEV *BackedgeTakenCount) { 306 BasicBlock *Preheader = L->getLoopPreheader(); 307 308 // Scan all of the instructions in the loop, looking at those that have 309 // extra-loop users and which are recurrences. 310 SCEVExpander Rewriter(*SE, *LI); 311 312 // We insert the code into the preheader of the loop if the loop contains 313 // multiple exit blocks, or in the exit block if there is exactly one. 314 BasicBlock *BlockToInsertInto; 315 SmallVector<BasicBlock*, 8> ExitBlocks; 316 L->getUniqueExitBlocks(ExitBlocks); 317 if (ExitBlocks.size() == 1) 318 BlockToInsertInto = ExitBlocks[0]; 319 else 320 BlockToInsertInto = Preheader; 321 BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI(); 322 323 bool HasConstantItCount = isa<SCEVConstant>(BackedgeTakenCount); 324 325 SmallPtrSet<Instruction*, 16> InstructionsToDelete; 326 std::map<Instruction*, Value*> ExitValues; 327 328 // Find all values that are computed inside the loop, but used outside of it. 329 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 330 // the exit blocks of the loop to find them. 331 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 332 BasicBlock *ExitBB = ExitBlocks[i]; 333 334 // If there are no PHI nodes in this exit block, then no values defined 335 // inside the loop are used on this path, skip it. 336 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 337 if (!PN) continue; 338 339 unsigned NumPreds = PN->getNumIncomingValues(); 340 341 // Iterate over all of the PHI nodes. 342 BasicBlock::iterator BBI = ExitBB->begin(); 343 while ((PN = dyn_cast<PHINode>(BBI++))) { 344 345 // Iterate over all of the values in all the PHI nodes. 346 for (unsigned i = 0; i != NumPreds; ++i) { 347 // If the value being merged in is not integer or is not defined 348 // in the loop, skip it. 349 Value *InVal = PN->getIncomingValue(i); 350 if (!isa<Instruction>(InVal) || 351 // SCEV only supports integer expressions for now. 352 !isa<IntegerType>(InVal->getType())) 353 continue; 354 355 // If this pred is for a subloop, not L itself, skip it. 356 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 357 continue; // The Block is in a subloop, skip it. 358 359 // Check that InVal is defined in the loop. 360 Instruction *Inst = cast<Instruction>(InVal); 361 if (!L->contains(Inst->getParent())) 362 continue; 363 364 // We require that this value either have a computable evolution or that 365 // the loop have a constant iteration count. In the case where the loop 366 // has a constant iteration count, we can sometimes force evaluation of 367 // the exit value through brute force. 368 SCEVHandle SH = SE->getSCEV(Inst); 369 if (!SH->hasComputableLoopEvolution(L) && !HasConstantItCount) 370 continue; // Cannot get exit evolution for the loop value. 371 372 // Okay, this instruction has a user outside of the current loop 373 // and varies predictably *inside* the loop. Evaluate the value it 374 // contains when the loop exits, if possible. 375 SCEVHandle ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 376 if (isa<SCEVCouldNotCompute>(ExitValue) || 377 !ExitValue->isLoopInvariant(L)) 378 continue; 379 380 Changed = true; 381 ++NumReplaced; 382 383 // See if we already computed the exit value for the instruction, if so, 384 // just reuse it. 385 Value *&ExitVal = ExitValues[Inst]; 386 if (!ExitVal) 387 ExitVal = Rewriter.expandCodeFor(ExitValue, InsertPt); 388 389 DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 390 << " LoopVal = " << *Inst << "\n"; 391 392 PN->setIncomingValue(i, ExitVal); 393 394 // If this instruction is dead now, schedule it to be removed. 395 if (Inst->use_empty()) 396 InstructionsToDelete.insert(Inst); 397 398 // See if this is a single-entry LCSSA PHI node. If so, we can (and 399 // have to) remove 400 // the PHI entirely. This is safe, because the NewVal won't be variant 401 // in the loop, so we don't need an LCSSA phi node anymore. 402 if (NumPreds == 1) { 403 SE->deleteValueFromRecords(PN); 404 PN->replaceAllUsesWith(ExitVal); 405 PN->eraseFromParent(); 406 break; 407 } 408 } 409 } 410 } 411 412 DeleteTriviallyDeadInstructions(InstructionsToDelete); 413} 414 415void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { 416 // First step. Check to see if there are any trivial GEP pointer recurrences. 417 // If there are, change them into integer recurrences, permitting analysis by 418 // the SCEV routines. 419 // 420 BasicBlock *Header = L->getHeader(); 421 BasicBlock *Preheader = L->getLoopPreheader(); 422 423 SmallPtrSet<Instruction*, 16> DeadInsts; 424 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { 425 PHINode *PN = cast<PHINode>(I); 426 if (isa<PointerType>(PN->getType())) 427 EliminatePointerRecurrence(PN, Preheader, DeadInsts); 428 else 429 HandleFloatingPointIV(L, PN, DeadInsts); 430 } 431 432 // If the loop previously had a pointer or floating-point IV, ScalarEvolution 433 // may not have been able to compute a trip count. Now that we've done some 434 // re-writing, the trip count may be computable. 435 if (Changed) 436 SE->forgetLoopBackedgeTakenCount(L); 437 438 if (!DeadInsts.empty()) 439 DeleteTriviallyDeadInstructions(DeadInsts); 440} 441 442/// getEffectiveIndvarType - Determine the widest type that the 443/// induction-variable PHINode Phi is cast to. 444/// 445static const Type *getEffectiveIndvarType(const PHINode *Phi) { 446 const Type *Ty = Phi->getType(); 447 448 for (Value::use_const_iterator UI = Phi->use_begin(), UE = Phi->use_end(); 449 UI != UE; ++UI) { 450 const Type *CandidateType = NULL; 451 if (const ZExtInst *ZI = dyn_cast<ZExtInst>(UI)) 452 CandidateType = ZI->getDestTy(); 453 else if (const SExtInst *SI = dyn_cast<SExtInst>(UI)) 454 CandidateType = SI->getDestTy(); 455 if (CandidateType && 456 CandidateType->getPrimitiveSizeInBits() > 457 Ty->getPrimitiveSizeInBits()) 458 Ty = CandidateType; 459 } 460 461 return Ty; 462} 463 464/// TestOrigIVForWrap - Analyze the original induction variable 465/// that controls the loop's iteration to determine whether it 466/// would ever undergo signed or unsigned overflow. Also, check 467/// whether an induction variable in the same type that starts 468/// at 0 would undergo signed overflow. 469/// 470/// In addition to setting the NoSignedWrap and NoUnsignedWrap 471/// variables to true when appropriate (they are not set to false here), 472/// return the PHI for this induction variable. Also record the initial 473/// and final values and the increment; these are not meaningful unless 474/// either NoSignedWrap or NoUnsignedWrap is true, and are always meaningful 475/// in that case, although the final value may be 0 indicating a nonconstant. 476/// 477/// TODO: This duplicates a fair amount of ScalarEvolution logic. 478/// Perhaps this can be merged with 479/// ScalarEvolution::getBackedgeTakenCount 480/// and/or ScalarEvolution::get{Sign,Zero}ExtendExpr. 481/// 482static const PHINode *TestOrigIVForWrap(const Loop *L, 483 const BranchInst *BI, 484 const Instruction *OrigCond, 485 bool &NoSignedWrap, 486 bool &NoUnsignedWrap, 487 const ConstantInt* &InitialVal, 488 const ConstantInt* &IncrVal, 489 const ConstantInt* &LimitVal) { 490 // Verify that the loop is sane and find the exit condition. 491 const ICmpInst *Cmp = dyn_cast<ICmpInst>(OrigCond); 492 if (!Cmp) return 0; 493 494 const Value *CmpLHS = Cmp->getOperand(0); 495 const Value *CmpRHS = Cmp->getOperand(1); 496 const BasicBlock *TrueBB = BI->getSuccessor(0); 497 const BasicBlock *FalseBB = BI->getSuccessor(1); 498 ICmpInst::Predicate Pred = Cmp->getPredicate(); 499 500 // Canonicalize a constant to the RHS. 501 if (isa<ConstantInt>(CmpLHS)) { 502 Pred = ICmpInst::getSwappedPredicate(Pred); 503 std::swap(CmpLHS, CmpRHS); 504 } 505 // Canonicalize SLE to SLT. 506 if (Pred == ICmpInst::ICMP_SLE) 507 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) 508 if (!CI->getValue().isMaxSignedValue()) { 509 CmpRHS = ConstantInt::get(CI->getValue() + 1); 510 Pred = ICmpInst::ICMP_SLT; 511 } 512 // Canonicalize SGT to SGE. 513 if (Pred == ICmpInst::ICMP_SGT) 514 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) 515 if (!CI->getValue().isMaxSignedValue()) { 516 CmpRHS = ConstantInt::get(CI->getValue() + 1); 517 Pred = ICmpInst::ICMP_SGE; 518 } 519 // Canonicalize SGE to SLT. 520 if (Pred == ICmpInst::ICMP_SGE) { 521 std::swap(TrueBB, FalseBB); 522 Pred = ICmpInst::ICMP_SLT; 523 } 524 // Canonicalize ULE to ULT. 525 if (Pred == ICmpInst::ICMP_ULE) 526 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) 527 if (!CI->getValue().isMaxValue()) { 528 CmpRHS = ConstantInt::get(CI->getValue() + 1); 529 Pred = ICmpInst::ICMP_ULT; 530 } 531 // Canonicalize UGT to UGE. 532 if (Pred == ICmpInst::ICMP_UGT) 533 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) 534 if (!CI->getValue().isMaxValue()) { 535 CmpRHS = ConstantInt::get(CI->getValue() + 1); 536 Pred = ICmpInst::ICMP_UGE; 537 } 538 // Canonicalize UGE to ULT. 539 if (Pred == ICmpInst::ICMP_UGE) { 540 std::swap(TrueBB, FalseBB); 541 Pred = ICmpInst::ICMP_ULT; 542 } 543 // For now, analyze only LT loops for signed overflow. 544 if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_ULT) 545 return 0; 546 547 bool isSigned = Pred == ICmpInst::ICMP_SLT; 548 549 // Get the increment instruction. Look past casts if we will 550 // be able to prove that the original induction variable doesn't 551 // undergo signed or unsigned overflow, respectively. 552 const Value *IncrInst = CmpLHS; 553 if (isSigned) { 554 if (const SExtInst *SI = dyn_cast<SExtInst>(CmpLHS)) { 555 if (!isa<ConstantInt>(CmpRHS) || 556 !cast<ConstantInt>(CmpRHS)->getValue() 557 .isSignedIntN(IncrInst->getType()->getPrimitiveSizeInBits())) 558 return 0; 559 IncrInst = SI->getOperand(0); 560 } 561 } else { 562 if (const ZExtInst *ZI = dyn_cast<ZExtInst>(CmpLHS)) { 563 if (!isa<ConstantInt>(CmpRHS) || 564 !cast<ConstantInt>(CmpRHS)->getValue() 565 .isIntN(IncrInst->getType()->getPrimitiveSizeInBits())) 566 return 0; 567 IncrInst = ZI->getOperand(0); 568 } 569 } 570 571 // For now, only analyze induction variables that have simple increments. 572 const BinaryOperator *IncrOp = dyn_cast<BinaryOperator>(IncrInst); 573 if (!IncrOp || IncrOp->getOpcode() != Instruction::Add) 574 return 0; 575 IncrVal = dyn_cast<ConstantInt>(IncrOp->getOperand(1)); 576 if (!IncrVal) 577 return 0; 578 579 // Make sure the PHI looks like a normal IV. 580 const PHINode *PN = dyn_cast<PHINode>(IncrOp->getOperand(0)); 581 if (!PN || PN->getNumIncomingValues() != 2) 582 return 0; 583 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 584 unsigned BackEdge = !IncomingEdge; 585 if (!L->contains(PN->getIncomingBlock(BackEdge)) || 586 PN->getIncomingValue(BackEdge) != IncrOp) 587 return 0; 588 if (!L->contains(TrueBB)) 589 return 0; 590 591 // For now, only analyze loops with a constant start value, so that 592 // we can easily determine if the start value is not a maximum value 593 // which would wrap on the first iteration. 594 InitialVal = dyn_cast<ConstantInt>(PN->getIncomingValue(IncomingEdge)); 595 if (!InitialVal) 596 return 0; 597 598 // The upper limit need not be a constant; we'll check later. 599 LimitVal = dyn_cast<ConstantInt>(CmpRHS); 600 601 // We detect the impossibility of wrapping in two cases, both of 602 // which require starting with a non-max value: 603 // - The IV counts up by one, and the loop iterates only while it remains 604 // less than a limiting value (any) in the same type. 605 // - The IV counts up by a positive increment other than 1, and the 606 // constant limiting value + the increment is less than the max value 607 // (computed as max-increment to avoid overflow) 608 if (isSigned && !InitialVal->getValue().isMaxSignedValue()) { 609 if (IncrVal->equalsInt(1)) 610 NoSignedWrap = true; // LimitVal need not be constant 611 else if (LimitVal) { 612 uint64_t numBits = LimitVal->getValue().getBitWidth(); 613 if (IncrVal->getValue().sgt(APInt::getNullValue(numBits)) && 614 (APInt::getSignedMaxValue(numBits) - IncrVal->getValue()) 615 .sgt(LimitVal->getValue())) 616 NoSignedWrap = true; 617 } 618 } else if (!isSigned && !InitialVal->getValue().isMaxValue()) { 619 if (IncrVal->equalsInt(1)) 620 NoUnsignedWrap = true; // LimitVal need not be constant 621 else if (LimitVal) { 622 uint64_t numBits = LimitVal->getValue().getBitWidth(); 623 if (IncrVal->getValue().ugt(APInt::getNullValue(numBits)) && 624 (APInt::getMaxValue(numBits) - IncrVal->getValue()) 625 .ugt(LimitVal->getValue())) 626 NoUnsignedWrap = true; 627 } 628 } 629 return PN; 630} 631 632static Value *getSignExtendedTruncVar(const SCEVAddRecExpr *AR, 633 ScalarEvolution *SE, 634 const Type *LargestType, Loop *L, 635 const Type *myType, 636 SCEVExpander &Rewriter, 637 BasicBlock::iterator InsertPt) { 638 SCEVHandle ExtendedStart = 639 SE->getSignExtendExpr(AR->getStart(), LargestType); 640 SCEVHandle ExtendedStep = 641 SE->getSignExtendExpr(AR->getStepRecurrence(*SE), LargestType); 642 SCEVHandle ExtendedAddRec = 643 SE->getAddRecExpr(ExtendedStart, ExtendedStep, L); 644 if (LargestType != myType) 645 ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType); 646 return Rewriter.expandCodeFor(ExtendedAddRec, InsertPt); 647} 648 649static Value *getZeroExtendedTruncVar(const SCEVAddRecExpr *AR, 650 ScalarEvolution *SE, 651 const Type *LargestType, Loop *L, 652 const Type *myType, 653 SCEVExpander &Rewriter, 654 BasicBlock::iterator InsertPt) { 655 SCEVHandle ExtendedStart = 656 SE->getZeroExtendExpr(AR->getStart(), LargestType); 657 SCEVHandle ExtendedStep = 658 SE->getZeroExtendExpr(AR->getStepRecurrence(*SE), LargestType); 659 SCEVHandle ExtendedAddRec = 660 SE->getAddRecExpr(ExtendedStart, ExtendedStep, L); 661 if (LargestType != myType) 662 ExtendedAddRec = SE->getTruncateExpr(ExtendedAddRec, myType); 663 return Rewriter.expandCodeFor(ExtendedAddRec, InsertPt); 664} 665 666bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { 667 LI = &getAnalysis<LoopInfo>(); 668 SE = &getAnalysis<ScalarEvolution>(); 669 Changed = false; 670 671 // If there are any floating-point or pointer recurrences, attempt to 672 // transform them to use integer recurrences. 673 RewriteNonIntegerIVs(L); 674 675 BasicBlock *Header = L->getHeader(); 676 BasicBlock *ExitingBlock = L->getExitingBlock(); 677 SmallPtrSet<Instruction*, 16> DeadInsts; 678 679 // Verify the input to the pass in already in LCSSA form. 680 assert(L->isLCSSAForm()); 681 682 // Check to see if this loop has a computable loop-invariant execution count. 683 // If so, this means that we can compute the final value of any expressions 684 // that are recurrent in the loop, and substitute the exit values from the 685 // loop into any instructions outside of the loop that use the final values of 686 // the current expressions. 687 // 688 SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); 689 if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 690 RewriteLoopExitValues(L, BackedgeTakenCount); 691 692 // Next, analyze all of the induction variables in the loop, canonicalizing 693 // auxillary induction variables. 694 std::vector<std::pair<PHINode*, SCEVHandle> > IndVars; 695 696 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { 697 PHINode *PN = cast<PHINode>(I); 698 if (PN->getType()->isInteger()) { // FIXME: when we have fast-math, enable! 699 SCEVHandle SCEV = SE->getSCEV(PN); 700 // FIXME: It is an extremely bad idea to indvar substitute anything more 701 // complex than affine induction variables. Doing so will put expensive 702 // polynomial evaluations inside of the loop, and the str reduction pass 703 // currently can only reduce affine polynomials. For now just disable 704 // indvar subst on anything more complex than an affine addrec. 705 if (SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SCEV)) 706 if (AR->getLoop() == L && AR->isAffine()) 707 IndVars.push_back(std::make_pair(PN, SCEV)); 708 } 709 } 710 711 // Compute the type of the largest recurrence expression, and collect 712 // the set of the types of the other recurrence expressions. 713 const Type *LargestType = 0; 714 SmallSetVector<const Type *, 4> SizesToInsert; 715 if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 716 LargestType = BackedgeTakenCount->getType(); 717 SizesToInsert.insert(BackedgeTakenCount->getType()); 718 } 719 for (unsigned i = 0, e = IndVars.size(); i != e; ++i) { 720 const PHINode *PN = IndVars[i].first; 721 SizesToInsert.insert(PN->getType()); 722 const Type *EffTy = getEffectiveIndvarType(PN); 723 SizesToInsert.insert(EffTy); 724 if (!LargestType || 725 EffTy->getPrimitiveSizeInBits() > 726 LargestType->getPrimitiveSizeInBits()) 727 LargestType = EffTy; 728 } 729 730 // Create a rewriter object which we'll use to transform the code with. 731 SCEVExpander Rewriter(*SE, *LI); 732 733 // Now that we know the largest of of the induction variables in this loop, 734 // insert a canonical induction variable of the largest size. 735 Value *IndVar = 0; 736 if (!SizesToInsert.empty()) { 737 IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType); 738 ++NumInserted; 739 Changed = true; 740 DOUT << "INDVARS: New CanIV: " << *IndVar; 741 } 742 743 // If we have a trip count expression, rewrite the loop's exit condition 744 // using it. We can currently only handle loops with a single exit. 745 bool NoSignedWrap = false; 746 bool NoUnsignedWrap = false; 747 const ConstantInt* InitialVal, * IncrVal, * LimitVal; 748 const PHINode *OrigControllingPHI = 0; 749 if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock) 750 // Can't rewrite non-branch yet. 751 if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) { 752 if (Instruction *OrigCond = dyn_cast<Instruction>(BI->getCondition())) { 753 // Determine if the OrigIV will ever undergo overflow. 754 OrigControllingPHI = 755 TestOrigIVForWrap(L, BI, OrigCond, 756 NoSignedWrap, NoUnsignedWrap, 757 InitialVal, IncrVal, LimitVal); 758 759 // We'll be replacing the original condition, so it'll be dead. 760 DeadInsts.insert(OrigCond); 761 } 762 763 LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, 764 ExitingBlock, BI, Rewriter); 765 } 766 767 // Now that we have a canonical induction variable, we can rewrite any 768 // recurrences in terms of the induction variable. Start with the auxillary 769 // induction variables, and recursively rewrite any of their uses. 770 BasicBlock::iterator InsertPt = Header->getFirstNonPHI(); 771 772 // If there were induction variables of other sizes, cast the primary 773 // induction variable to the right size for them, avoiding the need for the 774 // code evaluation methods to insert induction variables of different sizes. 775 for (unsigned i = 0, e = SizesToInsert.size(); i != e; ++i) { 776 const Type *Ty = SizesToInsert[i]; 777 if (Ty != LargestType) { 778 Instruction *New = new TruncInst(IndVar, Ty, "indvar", InsertPt); 779 Rewriter.addInsertedValue(New, SE->getSCEV(New)); 780 DOUT << "INDVARS: Made trunc IV for type " << *Ty << ": " 781 << *New << "\n"; 782 } 783 } 784 785 // Rewrite all induction variables in terms of the canonical induction 786 // variable. 787 while (!IndVars.empty()) { 788 PHINode *PN = IndVars.back().first; 789 SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(IndVars.back().second); 790 Value *NewVal = Rewriter.expandCodeFor(AR, InsertPt); 791 DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *PN 792 << " into = " << *NewVal << "\n"; 793 NewVal->takeName(PN); 794 795 /// If the new canonical induction variable is wider than the original, 796 /// and the original has uses that are casts to wider types, see if the 797 /// truncate and extend can be omitted. 798 if (PN == OrigControllingPHI && PN->getType() != LargestType) 799 for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); 800 UI != UE; ++UI) { 801 if (isa<SExtInst>(UI) && NoSignedWrap) { 802 Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, L, 803 UI->getType(), Rewriter, InsertPt); 804 UI->replaceAllUsesWith(TruncIndVar); 805 if (Instruction *DeadUse = dyn_cast<Instruction>(*UI)) 806 DeadInsts.insert(DeadUse); 807 } 808 // See if we can figure out sext(i+constant) doesn't wrap, so we can 809 // use a larger add. This is common in subscripting. 810 Instruction *UInst = dyn_cast<Instruction>(*UI); 811 if (UInst && UInst->getOpcode()==Instruction::Add && 812 UInst->hasOneUse() && 813 isa<ConstantInt>(UInst->getOperand(1)) && 814 isa<SExtInst>(UInst->use_begin()) && NoSignedWrap && LimitVal) { 815 uint64_t numBits = LimitVal->getValue().getBitWidth(); 816 ConstantInt* RHS = dyn_cast<ConstantInt>(UInst->getOperand(1)); 817 if (((APInt::getSignedMaxValue(numBits) - IncrVal->getValue()) - 818 RHS->getValue()).sgt(LimitVal->getValue())) { 819 SExtInst* oldSext = dyn_cast<SExtInst>(UInst->use_begin()); 820 Value *TruncIndVar = getSignExtendedTruncVar(AR, SE, LargestType, L, 821 oldSext->getType(), Rewriter, 822 InsertPt); 823 APInt APcopy = APInt(RHS->getValue()); 824 ConstantInt* newRHS = 825 ConstantInt::get(APcopy.sext(oldSext->getType()-> 826 getPrimitiveSizeInBits())); 827 Value *NewAdd = BinaryOperator::CreateAdd(TruncIndVar, newRHS, 828 UInst->getName()+".nosex", 829 UInst); 830 oldSext->replaceAllUsesWith(NewAdd); 831 if (Instruction *DeadUse = dyn_cast<Instruction>(oldSext)) 832 DeadInsts.insert(DeadUse); 833 if (Instruction *DeadUse = dyn_cast<Instruction>(UInst)) 834 DeadInsts.insert(DeadUse); 835 } 836 } 837 if (isa<ZExtInst>(UI) && NoUnsignedWrap) { 838 Value *TruncIndVar = getZeroExtendedTruncVar(AR, SE, LargestType, L, 839 UI->getType(), Rewriter, InsertPt); 840 UI->replaceAllUsesWith(TruncIndVar); 841 if (Instruction *DeadUse = dyn_cast<Instruction>(*UI)) 842 DeadInsts.insert(DeadUse); 843 } 844 } 845 846 // Replace the old PHI Node with the inserted computation. 847 PN->replaceAllUsesWith(NewVal); 848 DeadInsts.insert(PN); 849 IndVars.pop_back(); 850 ++NumRemoved; 851 Changed = true; 852 } 853 854 DeleteTriviallyDeadInstructions(DeadInsts); 855 assert(L->isLCSSAForm()); 856 return Changed; 857} 858 859/// Return true if it is OK to use SIToFPInst for an inducation variable 860/// with given inital and exit values. 861static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV, 862 uint64_t intIV, uint64_t intEV) { 863 864 if (InitV.getValueAPF().isNegative() || ExitV.getValueAPF().isNegative()) 865 return true; 866 867 // If the iteration range can be handled by SIToFPInst then use it. 868 APInt Max = APInt::getSignedMaxValue(32); 869 if (Max.getZExtValue() > static_cast<uint64_t>(abs(intEV - intIV))) 870 return true; 871 872 return false; 873} 874 875/// convertToInt - Convert APF to an integer, if possible. 876static bool convertToInt(const APFloat &APF, uint64_t *intVal) { 877 878 bool isExact = false; 879 if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) 880 return false; 881 if (APF.convertToInteger(intVal, 32, APF.isNegative(), 882 APFloat::rmTowardZero, &isExact) 883 != APFloat::opOK) 884 return false; 885 if (!isExact) 886 return false; 887 return true; 888 889} 890 891/// HandleFloatingPointIV - If the loop has floating induction variable 892/// then insert corresponding integer induction variable if possible. 893/// For example, 894/// for(double i = 0; i < 10000; ++i) 895/// bar(i) 896/// is converted into 897/// for(int i = 0; i < 10000; ++i) 898/// bar((double)i); 899/// 900void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH, 901 SmallPtrSet<Instruction*, 16> &DeadInsts) { 902 903 unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0)); 904 unsigned BackEdge = IncomingEdge^1; 905 906 // Check incoming value. 907 ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge)); 908 if (!InitValue) return; 909 uint64_t newInitValue = Type::Int32Ty->getPrimitiveSizeInBits(); 910 if (!convertToInt(InitValue->getValueAPF(), &newInitValue)) 911 return; 912 913 // Check IV increment. Reject this PH if increement operation is not 914 // an add or increment value can not be represented by an integer. 915 BinaryOperator *Incr = 916 dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge)); 917 if (!Incr) return; 918 if (Incr->getOpcode() != Instruction::Add) return; 919 ConstantFP *IncrValue = NULL; 920 unsigned IncrVIndex = 1; 921 if (Incr->getOperand(1) == PH) 922 IncrVIndex = 0; 923 IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex)); 924 if (!IncrValue) return; 925 uint64_t newIncrValue = Type::Int32Ty->getPrimitiveSizeInBits(); 926 if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue)) 927 return; 928 929 // Check Incr uses. One user is PH and the other users is exit condition used 930 // by the conditional terminator. 931 Value::use_iterator IncrUse = Incr->use_begin(); 932 Instruction *U1 = cast<Instruction>(IncrUse++); 933 if (IncrUse == Incr->use_end()) return; 934 Instruction *U2 = cast<Instruction>(IncrUse++); 935 if (IncrUse != Incr->use_end()) return; 936 937 // Find exit condition. 938 FCmpInst *EC = dyn_cast<FCmpInst>(U1); 939 if (!EC) 940 EC = dyn_cast<FCmpInst>(U2); 941 if (!EC) return; 942 943 if (BranchInst *BI = dyn_cast<BranchInst>(EC->getParent()->getTerminator())) { 944 if (!BI->isConditional()) return; 945 if (BI->getCondition() != EC) return; 946 } 947 948 // Find exit value. If exit value can not be represented as an interger then 949 // do not handle this floating point PH. 950 ConstantFP *EV = NULL; 951 unsigned EVIndex = 1; 952 if (EC->getOperand(1) == Incr) 953 EVIndex = 0; 954 EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex)); 955 if (!EV) return; 956 uint64_t intEV = Type::Int32Ty->getPrimitiveSizeInBits(); 957 if (!convertToInt(EV->getValueAPF(), &intEV)) 958 return; 959 960 // Find new predicate for integer comparison. 961 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 962 switch (EC->getPredicate()) { 963 case CmpInst::FCMP_OEQ: 964 case CmpInst::FCMP_UEQ: 965 NewPred = CmpInst::ICMP_EQ; 966 break; 967 case CmpInst::FCMP_OGT: 968 case CmpInst::FCMP_UGT: 969 NewPred = CmpInst::ICMP_UGT; 970 break; 971 case CmpInst::FCMP_OGE: 972 case CmpInst::FCMP_UGE: 973 NewPred = CmpInst::ICMP_UGE; 974 break; 975 case CmpInst::FCMP_OLT: 976 case CmpInst::FCMP_ULT: 977 NewPred = CmpInst::ICMP_ULT; 978 break; 979 case CmpInst::FCMP_OLE: 980 case CmpInst::FCMP_ULE: 981 NewPred = CmpInst::ICMP_ULE; 982 break; 983 default: 984 break; 985 } 986 if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return; 987 988 // Insert new integer induction variable. 989 PHINode *NewPHI = PHINode::Create(Type::Int32Ty, 990 PH->getName()+".int", PH); 991 NewPHI->addIncoming(ConstantInt::get(Type::Int32Ty, newInitValue), 992 PH->getIncomingBlock(IncomingEdge)); 993 994 Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, 995 ConstantInt::get(Type::Int32Ty, 996 newIncrValue), 997 Incr->getName()+".int", Incr); 998 NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge)); 999 1000 ConstantInt *NewEV = ConstantInt::get(Type::Int32Ty, intEV); 1001 Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(BackEdge) : NewEV); 1002 Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(BackEdge)); 1003 ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(), 1004 EC->getParent()->getTerminator()); 1005 1006 // Delete old, floating point, exit comparision instruction. 1007 EC->replaceAllUsesWith(NewEC); 1008 DeadInsts.insert(EC); 1009 1010 // Delete old, floating point, increment instruction. 1011 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 1012 DeadInsts.insert(Incr); 1013 1014 // Replace floating induction variable. Give SIToFPInst preference over 1015 // UIToFPInst because it is faster on platforms that are widely used. 1016 if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) { 1017 SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv", 1018 PH->getParent()->getFirstNonPHI()); 1019 PH->replaceAllUsesWith(Conv); 1020 } else { 1021 UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv", 1022 PH->getParent()->getFirstNonPHI()); 1023 PH->replaceAllUsesWith(Conv); 1024 } 1025 DeadInsts.insert(PH); 1026} 1027 1028