IndVarSimplify.cpp revision db125cfaf57cc83e7dd7453de2d509bc8efd0e5e
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This transformation analyzes and transforms the induction variables (and 11// computations derived from them) into simpler forms suitable for subsequent 12// analysis and transformation. 13// 14// This transformation makes the following changes to each loop with an 15// identifiable induction variable: 16// 1. All loops are transformed to have a SINGLE canonical induction variable 17// which starts at zero and steps by one. 18// 2. The canonical induction variable is guaranteed to be the first PHI node 19// in the loop header block. 20// 3. The canonical induction variable is guaranteed to be in a wide enough 21// type so that IV expressions need not be (directly) zero-extended or 22// sign-extended. 23// 4. Any pointer arithmetic recurrences are raised to use array subscripts. 24// 25// If the trip count of a loop is computable, this pass also makes the following 26// changes: 27// 1. The exit condition for the loop is canonicalized to compare the 28// induction value against the exit value. This turns loops like: 29// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 30// 2. Any use outside of the loop of an expression derived from the indvar 31// is changed to compute the derived value outside of the loop, eliminating 32// the dependence on the exit value of the induction variable. If the only 33// purpose of the loop is to compute the exit value of some derived 34// expression, this transformation will make the loop dead. 35// 36// This transformation should be followed by strength reduction after all of the 37// desired loop transformations have been performed. 38// 39//===----------------------------------------------------------------------===// 40 41#define DEBUG_TYPE "indvars" 42#include "llvm/Transforms/Scalar.h" 43#include "llvm/BasicBlock.h" 44#include "llvm/Constants.h" 45#include "llvm/Instructions.h" 46#include "llvm/IntrinsicInst.h" 47#include "llvm/LLVMContext.h" 48#include "llvm/Type.h" 49#include "llvm/Analysis/Dominators.h" 50#include "llvm/Analysis/IVUsers.h" 51#include "llvm/Analysis/ScalarEvolutionExpander.h" 52#include "llvm/Analysis/LoopInfo.h" 53#include "llvm/Analysis/LoopPass.h" 54#include "llvm/Support/CFG.h" 55#include "llvm/Support/CommandLine.h" 56#include "llvm/Support/Debug.h" 57#include "llvm/Support/raw_ostream.h" 58#include "llvm/Transforms/Utils/Local.h" 59#include "llvm/Transforms/Utils/BasicBlockUtils.h" 60#include "llvm/Target/TargetData.h" 61#include "llvm/ADT/DenseMap.h" 62#include "llvm/ADT/SmallVector.h" 63#include "llvm/ADT/Statistic.h" 64#include "llvm/ADT/STLExtras.h" 65using namespace llvm; 66 67STATISTIC(NumRemoved , "Number of aux indvars removed"); 68STATISTIC(NumWidened , "Number of indvars widened"); 69STATISTIC(NumInserted , "Number of canonical indvars added"); 70STATISTIC(NumReplaced , "Number of exit values replaced"); 71STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 72STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); 73STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 74STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); 75STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); 76STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 77 78static cl::opt<bool> DisableIVRewrite( 79 "disable-iv-rewrite", cl::Hidden, 80 cl::desc("Disable canonical induction variable rewriting")); 81 82namespace { 83 class IndVarSimplify : public LoopPass { 84 IVUsers *IU; 85 LoopInfo *LI; 86 ScalarEvolution *SE; 87 DominatorTree *DT; 88 TargetData *TD; 89 90 SmallVector<WeakVH, 16> DeadInsts; 91 bool Changed; 92 public: 93 94 static char ID; // Pass identification, replacement for typeid 95 IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0), 96 Changed(false) { 97 initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); 98 } 99 100 virtual bool runOnLoop(Loop *L, LPPassManager &LPM); 101 102 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 103 AU.addRequired<DominatorTree>(); 104 AU.addRequired<LoopInfo>(); 105 AU.addRequired<ScalarEvolution>(); 106 AU.addRequiredID(LoopSimplifyID); 107 AU.addRequiredID(LCSSAID); 108 if (!DisableIVRewrite) 109 AU.addRequired<IVUsers>(); 110 AU.addPreserved<ScalarEvolution>(); 111 AU.addPreservedID(LoopSimplifyID); 112 AU.addPreservedID(LCSSAID); 113 if (!DisableIVRewrite) 114 AU.addPreserved<IVUsers>(); 115 AU.setPreservesCFG(); 116 } 117 118 private: 119 virtual void releaseMemory() { 120 DeadInsts.clear(); 121 } 122 123 bool isValidRewrite(Value *FromVal, Value *ToVal); 124 125 void HandleFloatingPointIV(Loop *L, PHINode *PH); 126 void RewriteNonIntegerIVs(Loop *L); 127 128 void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 129 130 void SimplifyIVUsers(SCEVExpander &Rewriter); 131 void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter); 132 133 bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand); 134 void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); 135 void EliminateIVRemainder(BinaryOperator *Rem, 136 Value *IVOperand, 137 bool IsSigned); 138 139 void SimplifyCongruentIVs(Loop *L); 140 141 void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); 142 143 ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, 144 PHINode *IndVar, 145 SCEVExpander &Rewriter); 146 147 void SinkUnusedInvariants(Loop *L); 148 }; 149} 150 151char IndVarSimplify::ID = 0; 152INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", 153 "Induction Variable Simplification", false, false) 154INITIALIZE_PASS_DEPENDENCY(DominatorTree) 155INITIALIZE_PASS_DEPENDENCY(LoopInfo) 156INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 157INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 158INITIALIZE_PASS_DEPENDENCY(LCSSA) 159INITIALIZE_PASS_DEPENDENCY(IVUsers) 160INITIALIZE_PASS_END(IndVarSimplify, "indvars", 161 "Induction Variable Simplification", false, false) 162 163Pass *llvm::createIndVarSimplifyPass() { 164 return new IndVarSimplify(); 165} 166 167/// isValidRewrite - Return true if the SCEV expansion generated by the 168/// rewriter can replace the original value. SCEV guarantees that it 169/// produces the same value, but the way it is produced may be illegal IR. 170/// Ideally, this function will only be called for verification. 171bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 172 // If an SCEV expression subsumed multiple pointers, its expansion could 173 // reassociate the GEP changing the base pointer. This is illegal because the 174 // final address produced by a GEP chain must be inbounds relative to its 175 // underlying object. Otherwise basic alias analysis, among other things, 176 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 177 // producing an expression involving multiple pointers. Until then, we must 178 // bail out here. 179 // 180 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 181 // because it understands lcssa phis while SCEV does not. 182 Value *FromPtr = FromVal; 183 Value *ToPtr = ToVal; 184 if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) { 185 FromPtr = GEP->getPointerOperand(); 186 } 187 if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) { 188 ToPtr = GEP->getPointerOperand(); 189 } 190 if (FromPtr != FromVal || ToPtr != ToVal) { 191 // Quickly check the common case 192 if (FromPtr == ToPtr) 193 return true; 194 195 // SCEV may have rewritten an expression that produces the GEP's pointer 196 // operand. That's ok as long as the pointer operand has the same base 197 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 198 // base of a recurrence. This handles the case in which SCEV expansion 199 // converts a pointer type recurrence into a nonrecurrent pointer base 200 // indexed by an integer recurrence. 201 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 202 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 203 if (FromBase == ToBase) 204 return true; 205 206 DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " 207 << *FromBase << " != " << *ToBase << "\n"); 208 209 return false; 210 } 211 return true; 212} 213 214//===----------------------------------------------------------------------===// 215// RewriteNonIntegerIVs and helpers. Prefer integer IVs. 216//===----------------------------------------------------------------------===// 217 218/// ConvertToSInt - Convert APF to an integer, if possible. 219static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 220 bool isExact = false; 221 if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) 222 return false; 223 // See if we can convert this to an int64_t 224 uint64_t UIntVal; 225 if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, 226 &isExact) != APFloat::opOK || !isExact) 227 return false; 228 IntVal = UIntVal; 229 return true; 230} 231 232/// HandleFloatingPointIV - If the loop has floating induction variable 233/// then insert corresponding integer induction variable if possible. 234/// For example, 235/// for(double i = 0; i < 10000; ++i) 236/// bar(i) 237/// is converted into 238/// for(int i = 0; i < 10000; ++i) 239/// bar((double)i); 240/// 241void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { 242 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 243 unsigned BackEdge = IncomingEdge^1; 244 245 // Check incoming value. 246 ConstantFP *InitValueVal = 247 dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 248 249 int64_t InitValue; 250 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 251 return; 252 253 // Check IV increment. Reject this PN if increment operation is not 254 // an add or increment value can not be represented by an integer. 255 BinaryOperator *Incr = 256 dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 257 if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; 258 259 // If this is not an add of the PHI with a constantfp, or if the constant fp 260 // is not an integer, bail out. 261 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 262 int64_t IncValue; 263 if (IncValueVal == 0 || Incr->getOperand(0) != PN || 264 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 265 return; 266 267 // Check Incr uses. One user is PN and the other user is an exit condition 268 // used by the conditional terminator. 269 Value::use_iterator IncrUse = Incr->use_begin(); 270 Instruction *U1 = cast<Instruction>(*IncrUse++); 271 if (IncrUse == Incr->use_end()) return; 272 Instruction *U2 = cast<Instruction>(*IncrUse++); 273 if (IncrUse != Incr->use_end()) return; 274 275 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 276 // only used by a branch, we can't transform it. 277 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 278 if (!Compare) 279 Compare = dyn_cast<FCmpInst>(U2); 280 if (Compare == 0 || !Compare->hasOneUse() || 281 !isa<BranchInst>(Compare->use_back())) 282 return; 283 284 BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); 285 286 // We need to verify that the branch actually controls the iteration count 287 // of the loop. If not, the new IV can overflow and no one will notice. 288 // The branch block must be in the loop and one of the successors must be out 289 // of the loop. 290 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 291 if (!L->contains(TheBr->getParent()) || 292 (L->contains(TheBr->getSuccessor(0)) && 293 L->contains(TheBr->getSuccessor(1)))) 294 return; 295 296 297 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 298 // transform it. 299 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 300 int64_t ExitValue; 301 if (ExitValueVal == 0 || 302 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 303 return; 304 305 // Find new predicate for integer comparison. 306 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 307 switch (Compare->getPredicate()) { 308 default: return; // Unknown comparison. 309 case CmpInst::FCMP_OEQ: 310 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 311 case CmpInst::FCMP_ONE: 312 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 313 case CmpInst::FCMP_OGT: 314 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 315 case CmpInst::FCMP_OGE: 316 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 317 case CmpInst::FCMP_OLT: 318 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 319 case CmpInst::FCMP_OLE: 320 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 321 } 322 323 // We convert the floating point induction variable to a signed i32 value if 324 // we can. This is only safe if the comparison will not overflow in a way 325 // that won't be trapped by the integer equivalent operations. Check for this 326 // now. 327 // TODO: We could use i64 if it is native and the range requires it. 328 329 // The start/stride/exit values must all fit in signed i32. 330 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 331 return; 332 333 // If not actually striding (add x, 0.0), avoid touching the code. 334 if (IncValue == 0) 335 return; 336 337 // Positive and negative strides have different safety conditions. 338 if (IncValue > 0) { 339 // If we have a positive stride, we require the init to be less than the 340 // exit value and an equality or less than comparison. 341 if (InitValue >= ExitValue || 342 NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) 343 return; 344 345 uint32_t Range = uint32_t(ExitValue-InitValue); 346 if (NewPred == CmpInst::ICMP_SLE) { 347 // Normalize SLE -> SLT, check for infinite loop. 348 if (++Range == 0) return; // Range overflows. 349 } 350 351 unsigned Leftover = Range % uint32_t(IncValue); 352 353 // If this is an equality comparison, we require that the strided value 354 // exactly land on the exit value, otherwise the IV condition will wrap 355 // around and do things the fp IV wouldn't. 356 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 357 Leftover != 0) 358 return; 359 360 // If the stride would wrap around the i32 before exiting, we can't 361 // transform the IV. 362 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 363 return; 364 365 } else { 366 // If we have a negative stride, we require the init to be greater than the 367 // exit value and an equality or greater than comparison. 368 if (InitValue >= ExitValue || 369 NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) 370 return; 371 372 uint32_t Range = uint32_t(InitValue-ExitValue); 373 if (NewPred == CmpInst::ICMP_SGE) { 374 // Normalize SGE -> SGT, check for infinite loop. 375 if (++Range == 0) return; // Range overflows. 376 } 377 378 unsigned Leftover = Range % uint32_t(-IncValue); 379 380 // If this is an equality comparison, we require that the strided value 381 // exactly land on the exit value, otherwise the IV condition will wrap 382 // around and do things the fp IV wouldn't. 383 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 384 Leftover != 0) 385 return; 386 387 // If the stride would wrap around the i32 before exiting, we can't 388 // transform the IV. 389 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 390 return; 391 } 392 393 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 394 395 // Insert new integer induction variable. 396 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 397 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 398 PN->getIncomingBlock(IncomingEdge)); 399 400 Value *NewAdd = 401 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 402 Incr->getName()+".int", Incr); 403 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 404 405 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 406 ConstantInt::get(Int32Ty, ExitValue), 407 Compare->getName()); 408 409 // In the following deletions, PN may become dead and may be deleted. 410 // Use a WeakVH to observe whether this happens. 411 WeakVH WeakPH = PN; 412 413 // Delete the old floating point exit comparison. The branch starts using the 414 // new comparison. 415 NewCompare->takeName(Compare); 416 Compare->replaceAllUsesWith(NewCompare); 417 RecursivelyDeleteTriviallyDeadInstructions(Compare); 418 419 // Delete the old floating point increment. 420 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 421 RecursivelyDeleteTriviallyDeadInstructions(Incr); 422 423 // If the FP induction variable still has uses, this is because something else 424 // in the loop uses its value. In order to canonicalize the induction 425 // variable, we chose to eliminate the IV and rewrite it in terms of an 426 // int->fp cast. 427 // 428 // We give preference to sitofp over uitofp because it is faster on most 429 // platforms. 430 if (WeakPH) { 431 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 432 PN->getParent()->getFirstNonPHI()); 433 PN->replaceAllUsesWith(Conv); 434 RecursivelyDeleteTriviallyDeadInstructions(PN); 435 } 436 437 // Add a new IVUsers entry for the newly-created integer PHI. 438 if (IU) 439 IU->AddUsersIfInteresting(NewPHI); 440} 441 442void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { 443 // First step. Check to see if there are any floating-point recurrences. 444 // If there are, change them into integer recurrences, permitting analysis by 445 // the SCEV routines. 446 // 447 BasicBlock *Header = L->getHeader(); 448 449 SmallVector<WeakVH, 8> PHIs; 450 for (BasicBlock::iterator I = Header->begin(); 451 PHINode *PN = dyn_cast<PHINode>(I); ++I) 452 PHIs.push_back(PN); 453 454 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 455 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 456 HandleFloatingPointIV(L, PN); 457 458 // If the loop previously had floating-point IV, ScalarEvolution 459 // may not have been able to compute a trip count. Now that we've done some 460 // re-writing, the trip count may be computable. 461 if (Changed) 462 SE->forgetLoop(L); 463} 464 465//===----------------------------------------------------------------------===// 466// RewriteLoopExitValues - Optimize IV users outside the loop. 467// As a side effect, reduces the amount of IV processing within the loop. 468//===----------------------------------------------------------------------===// 469 470/// RewriteLoopExitValues - Check to see if this loop has a computable 471/// loop-invariant execution count. If so, this means that we can compute the 472/// final value of any expressions that are recurrent in the loop, and 473/// substitute the exit values from the loop into any instructions outside of 474/// the loop that use the final values of the current expressions. 475/// 476/// This is mostly redundant with the regular IndVarSimplify activities that 477/// happen later, except that it's more powerful in some cases, because it's 478/// able to brute-force evaluate arbitrary instructions as long as they have 479/// constant operands at the beginning of the loop. 480void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 481 // Verify the input to the pass in already in LCSSA form. 482 assert(L->isLCSSAForm(*DT)); 483 484 SmallVector<BasicBlock*, 8> ExitBlocks; 485 L->getUniqueExitBlocks(ExitBlocks); 486 487 // Find all values that are computed inside the loop, but used outside of it. 488 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 489 // the exit blocks of the loop to find them. 490 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 491 BasicBlock *ExitBB = ExitBlocks[i]; 492 493 // If there are no PHI nodes in this exit block, then no values defined 494 // inside the loop are used on this path, skip it. 495 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 496 if (!PN) continue; 497 498 unsigned NumPreds = PN->getNumIncomingValues(); 499 500 // Iterate over all of the PHI nodes. 501 BasicBlock::iterator BBI = ExitBB->begin(); 502 while ((PN = dyn_cast<PHINode>(BBI++))) { 503 if (PN->use_empty()) 504 continue; // dead use, don't replace it 505 506 // SCEV only supports integer expressions for now. 507 if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy()) 508 continue; 509 510 // It's necessary to tell ScalarEvolution about this explicitly so that 511 // it can walk the def-use list and forget all SCEVs, as it may not be 512 // watching the PHI itself. Once the new exit value is in place, there 513 // may not be a def-use connection between the loop and every instruction 514 // which got a SCEVAddRecExpr for that loop. 515 SE->forgetValue(PN); 516 517 // Iterate over all of the values in all the PHI nodes. 518 for (unsigned i = 0; i != NumPreds; ++i) { 519 // If the value being merged in is not integer or is not defined 520 // in the loop, skip it. 521 Value *InVal = PN->getIncomingValue(i); 522 if (!isa<Instruction>(InVal)) 523 continue; 524 525 // If this pred is for a subloop, not L itself, skip it. 526 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 527 continue; // The Block is in a subloop, skip it. 528 529 // Check that InVal is defined in the loop. 530 Instruction *Inst = cast<Instruction>(InVal); 531 if (!L->contains(Inst)) 532 continue; 533 534 // Okay, this instruction has a user outside of the current loop 535 // and varies predictably *inside* the loop. Evaluate the value it 536 // contains when the loop exits, if possible. 537 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 538 if (!SE->isLoopInvariant(ExitValue, L)) 539 continue; 540 541 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 542 543 DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' 544 << " LoopVal = " << *Inst << "\n"); 545 546 if (!isValidRewrite(Inst, ExitVal)) { 547 DeadInsts.push_back(ExitVal); 548 continue; 549 } 550 Changed = true; 551 ++NumReplaced; 552 553 PN->setIncomingValue(i, ExitVal); 554 555 // If this instruction is dead now, delete it. 556 RecursivelyDeleteTriviallyDeadInstructions(Inst); 557 558 if (NumPreds == 1) { 559 // Completely replace a single-pred PHI. This is safe, because the 560 // NewVal won't be variant in the loop, so we don't need an LCSSA phi 561 // node anymore. 562 PN->replaceAllUsesWith(ExitVal); 563 RecursivelyDeleteTriviallyDeadInstructions(PN); 564 } 565 } 566 if (NumPreds != 1) { 567 // Clone the PHI and delete the original one. This lets IVUsers and 568 // any other maps purge the original user from their records. 569 PHINode *NewPN = cast<PHINode>(PN->clone()); 570 NewPN->takeName(PN); 571 NewPN->insertBefore(PN); 572 PN->replaceAllUsesWith(NewPN); 573 PN->eraseFromParent(); 574 } 575 } 576 } 577 578 // The insertion point instruction may have been deleted; clear it out 579 // so that the rewriter doesn't trip over it later. 580 Rewriter.clearInsertPoint(); 581} 582 583//===----------------------------------------------------------------------===// 584// Rewrite IV users based on a canonical IV. 585// To be replaced by -disable-iv-rewrite. 586//===----------------------------------------------------------------------===// 587 588/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this 589/// loop. IVUsers is treated as a worklist. Each successive simplification may 590/// push more users which may themselves be candidates for simplification. 591/// 592/// This is the old approach to IV simplification to be replaced by 593/// SimplifyIVUsersNoRewrite. 594/// 595void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { 596 // Each round of simplification involves a round of eliminating operations 597 // followed by a round of widening IVs. A single IVUsers worklist is used 598 // across all rounds. The inner loop advances the user. If widening exposes 599 // more uses, then another pass through the outer loop is triggered. 600 for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { 601 Instruction *UseInst = I->getUser(); 602 Value *IVOperand = I->getOperandValToReplace(); 603 604 if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { 605 EliminateIVComparison(ICmp, IVOperand); 606 continue; 607 } 608 if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { 609 bool IsSigned = Rem->getOpcode() == Instruction::SRem; 610 if (IsSigned || Rem->getOpcode() == Instruction::URem) { 611 EliminateIVRemainder(Rem, IVOperand, IsSigned); 612 continue; 613 } 614 } 615 } 616} 617 618// FIXME: It is an extremely bad idea to indvar substitute anything more 619// complex than affine induction variables. Doing so will put expensive 620// polynomial evaluations inside of the loop, and the str reduction pass 621// currently can only reduce affine polynomials. For now just disable 622// indvar subst on anything more complex than an affine addrec, unless 623// it can be expanded to a trivial value. 624static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { 625 // Loop-invariant values are safe. 626 if (SE->isLoopInvariant(S, L)) return true; 627 628 // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how 629 // to transform them into efficient code. 630 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 631 return AR->isAffine(); 632 633 // An add is safe it all its operands are safe. 634 if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { 635 for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), 636 E = Commutative->op_end(); I != E; ++I) 637 if (!isSafe(*I, L, SE)) return false; 638 return true; 639 } 640 641 // A cast is safe if its operand is. 642 if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) 643 return isSafe(C->getOperand(), L, SE); 644 645 // A udiv is safe if its operands are. 646 if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) 647 return isSafe(UD->getLHS(), L, SE) && 648 isSafe(UD->getRHS(), L, SE); 649 650 // SCEVUnknown is always safe. 651 if (isa<SCEVUnknown>(S)) 652 return true; 653 654 // Nothing else is safe. 655 return false; 656} 657 658void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { 659 // Rewrite all induction variable expressions in terms of the canonical 660 // induction variable. 661 // 662 // If there were induction variables of other sizes or offsets, manually 663 // add the offsets to the primary induction variable and cast, avoiding 664 // the need for the code evaluation methods to insert induction variables 665 // of different sizes. 666 for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { 667 Value *Op = UI->getOperandValToReplace(); 668 Type *UseTy = Op->getType(); 669 Instruction *User = UI->getUser(); 670 671 // Compute the final addrec to expand into code. 672 const SCEV *AR = IU->getReplacementExpr(*UI); 673 674 // Evaluate the expression out of the loop, if possible. 675 if (!L->contains(UI->getUser())) { 676 const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); 677 if (SE->isLoopInvariant(ExitVal, L)) 678 AR = ExitVal; 679 } 680 681 // FIXME: It is an extremely bad idea to indvar substitute anything more 682 // complex than affine induction variables. Doing so will put expensive 683 // polynomial evaluations inside of the loop, and the str reduction pass 684 // currently can only reduce affine polynomials. For now just disable 685 // indvar subst on anything more complex than an affine addrec, unless 686 // it can be expanded to a trivial value. 687 if (!isSafe(AR, L, SE)) 688 continue; 689 690 // Determine the insertion point for this user. By default, insert 691 // immediately before the user. The SCEVExpander class will automatically 692 // hoist loop invariants out of the loop. For PHI nodes, there may be 693 // multiple uses, so compute the nearest common dominator for the 694 // incoming blocks. 695 Instruction *InsertPt = User; 696 if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) 697 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) 698 if (PHI->getIncomingValue(i) == Op) { 699 if (InsertPt == User) 700 InsertPt = PHI->getIncomingBlock(i)->getTerminator(); 701 else 702 InsertPt = 703 DT->findNearestCommonDominator(InsertPt->getParent(), 704 PHI->getIncomingBlock(i)) 705 ->getTerminator(); 706 } 707 708 // Now expand it into actual Instructions and patch it into place. 709 Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); 710 711 DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' 712 << " into = " << *NewVal << "\n"); 713 714 if (!isValidRewrite(Op, NewVal)) { 715 DeadInsts.push_back(NewVal); 716 continue; 717 } 718 // Inform ScalarEvolution that this value is changing. The change doesn't 719 // affect its value, but it does potentially affect which use lists the 720 // value will be on after the replacement, which affects ScalarEvolution's 721 // ability to walk use lists and drop dangling pointers when a value is 722 // deleted. 723 SE->forgetValue(User); 724 725 // Patch the new value into place. 726 if (Op->hasName()) 727 NewVal->takeName(Op); 728 if (Instruction *NewValI = dyn_cast<Instruction>(NewVal)) 729 NewValI->setDebugLoc(User->getDebugLoc()); 730 User->replaceUsesOfWith(Op, NewVal); 731 UI->setOperandValToReplace(NewVal); 732 733 ++NumRemoved; 734 Changed = true; 735 736 // The old value may be dead now. 737 DeadInsts.push_back(Op); 738 } 739} 740 741//===----------------------------------------------------------------------===// 742// IV Widening - Extend the width of an IV to cover its widest uses. 743//===----------------------------------------------------------------------===// 744 745namespace { 746 // Collect information about induction variables that are used by sign/zero 747 // extend operations. This information is recorded by CollectExtend and 748 // provides the input to WidenIV. 749 struct WideIVInfo { 750 Type *WidestNativeType; // Widest integer type created [sz]ext 751 bool IsSigned; // Was an sext user seen before a zext? 752 753 WideIVInfo() : WidestNativeType(0), IsSigned(false) {} 754 }; 755} 756 757/// CollectExtend - Update information about the induction variable that is 758/// extended by this sign or zero extend operation. This is used to determine 759/// the final width of the IV before actually widening it. 760static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI, 761 ScalarEvolution *SE, const TargetData *TD) { 762 Type *Ty = Cast->getType(); 763 uint64_t Width = SE->getTypeSizeInBits(Ty); 764 if (TD && !TD->isLegalInteger(Width)) 765 return; 766 767 if (!WI.WidestNativeType) { 768 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 769 WI.IsSigned = IsSigned; 770 return; 771 } 772 773 // We extend the IV to satisfy the sign of its first user, arbitrarily. 774 if (WI.IsSigned != IsSigned) 775 return; 776 777 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 778 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 779} 780 781namespace { 782/// WidenIV - The goal of this transform is to remove sign and zero extends 783/// without creating any new induction variables. To do this, it creates a new 784/// phi of the wider type and redirects all users, either removing extends or 785/// inserting truncs whenever we stop propagating the type. 786/// 787class WidenIV { 788 // Parameters 789 PHINode *OrigPhi; 790 Type *WideType; 791 bool IsSigned; 792 793 // Context 794 LoopInfo *LI; 795 Loop *L; 796 ScalarEvolution *SE; 797 DominatorTree *DT; 798 799 // Result 800 PHINode *WidePhi; 801 Instruction *WideInc; 802 const SCEV *WideIncExpr; 803 SmallVectorImpl<WeakVH> &DeadInsts; 804 805 SmallPtrSet<Instruction*,16> Widened; 806 SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; 807 808public: 809 WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo, 810 ScalarEvolution *SEv, DominatorTree *DTree, 811 SmallVectorImpl<WeakVH> &DI) : 812 OrigPhi(PN), 813 WideType(WI.WidestNativeType), 814 IsSigned(WI.IsSigned), 815 LI(LInfo), 816 L(LI->getLoopFor(OrigPhi->getParent())), 817 SE(SEv), 818 DT(DTree), 819 WidePhi(0), 820 WideInc(0), 821 WideIncExpr(0), 822 DeadInsts(DI) { 823 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 824 } 825 826 PHINode *CreateWideIV(SCEVExpander &Rewriter); 827 828protected: 829 Instruction *CloneIVUser(Instruction *NarrowUse, 830 Instruction *NarrowDef, 831 Instruction *WideDef); 832 833 const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); 834 835 Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, 836 Instruction *WideDef); 837 838 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 839}; 840} // anonymous namespace 841 842static Value *getExtend( Value *NarrowOper, Type *WideType, 843 bool IsSigned, IRBuilder<> &Builder) { 844 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 845 Builder.CreateZExt(NarrowOper, WideType); 846} 847 848/// CloneIVUser - Instantiate a wide operation to replace a narrow 849/// operation. This only needs to handle operations that can evaluation to 850/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone. 851Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, 852 Instruction *NarrowDef, 853 Instruction *WideDef) { 854 unsigned Opcode = NarrowUse->getOpcode(); 855 switch (Opcode) { 856 default: 857 return 0; 858 case Instruction::Add: 859 case Instruction::Mul: 860 case Instruction::UDiv: 861 case Instruction::Sub: 862 case Instruction::And: 863 case Instruction::Or: 864 case Instruction::Xor: 865 case Instruction::Shl: 866 case Instruction::LShr: 867 case Instruction::AShr: 868 DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n"); 869 870 IRBuilder<> Builder(NarrowUse); 871 872 // Replace NarrowDef operands with WideDef. Otherwise, we don't know 873 // anything about the narrow operand yet so must insert a [sz]ext. It is 874 // probably loop invariant and will be folded or hoisted. If it actually 875 // comes from a widened IV, it should be removed during a future call to 876 // WidenIVUse. 877 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : 878 getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder); 879 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : 880 getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder); 881 882 BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse); 883 BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), 884 LHS, RHS, 885 NarrowBO->getName()); 886 Builder.Insert(WideBO); 887 if (const OverflowingBinaryOperator *OBO = 888 dyn_cast<OverflowingBinaryOperator>(NarrowBO)) { 889 if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); 890 if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); 891 } 892 return WideBO; 893 } 894 llvm_unreachable(0); 895} 896 897/// HoistStep - Attempt to hoist an IV increment above a potential use. 898/// 899/// To successfully hoist, two criteria must be met: 900/// - IncV operands dominate InsertPos and 901/// - InsertPos dominates IncV 902/// 903/// Meeting the second condition means that we don't need to check all of IncV's 904/// existing uses (it's moving up in the domtree). 905/// 906/// This does not yet recursively hoist the operands, although that would 907/// not be difficult. 908static bool HoistStep(Instruction *IncV, Instruction *InsertPos, 909 const DominatorTree *DT) 910{ 911 if (DT->dominates(IncV, InsertPos)) 912 return true; 913 914 if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) 915 return false; 916 917 if (IncV->mayHaveSideEffects()) 918 return false; 919 920 // Attempt to hoist IncV 921 for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); 922 OI != OE; ++OI) { 923 Instruction *OInst = dyn_cast<Instruction>(OI); 924 if (OInst && !DT->dominates(OInst, InsertPos)) 925 return false; 926 } 927 IncV->moveBefore(InsertPos); 928 return true; 929} 930 931// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' 932// perspective after widening it's type? In other words, can the extend be 933// safely hoisted out of the loop with SCEV reducing the value to a recurrence 934// on the same loop. If so, return the sign or zero extended 935// recurrence. Otherwise return NULL. 936const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { 937 if (!SE->isSCEVable(NarrowUse->getType())) 938 return 0; 939 940 const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); 941 if (SE->getTypeSizeInBits(NarrowExpr->getType()) 942 >= SE->getTypeSizeInBits(WideType)) { 943 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 944 // index. So don't follow this use. 945 return 0; 946 } 947 948 const SCEV *WideExpr = IsSigned ? 949 SE->getSignExtendExpr(NarrowExpr, WideType) : 950 SE->getZeroExtendExpr(NarrowExpr, WideType); 951 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 952 if (!AddRec || AddRec->getLoop() != L) 953 return 0; 954 955 return AddRec; 956} 957 958/// WidenIVUse - Determine whether an individual user of the narrow IV can be 959/// widened. If so, return the wide clone of the user. 960Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, 961 Instruction *WideDef) { 962 Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser()); 963 964 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 965 if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) 966 return 0; 967 968 // Our raison d'etre! Eliminate sign and zero extension. 969 if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) { 970 Value *NewDef = WideDef; 971 if (NarrowUse->getType() != WideType) { 972 unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType()); 973 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 974 if (CastWidth < IVWidth) { 975 // The cast isn't as wide as the IV, so insert a Trunc. 976 IRBuilder<> Builder(NarrowDefUse); 977 NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); 978 } 979 else { 980 // A wider extend was hidden behind a narrower one. This may induce 981 // another round of IV widening in which the intermediate IV becomes 982 // dead. It should be very rare. 983 DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 984 << " not wide enough to subsume " << *NarrowUse << "\n"); 985 NarrowUse->replaceUsesOfWith(NarrowDef, WideDef); 986 NewDef = NarrowUse; 987 } 988 } 989 if (NewDef != NarrowUse) { 990 DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse 991 << " replaced by " << *WideDef << "\n"); 992 ++NumElimExt; 993 NarrowUse->replaceAllUsesWith(NewDef); 994 DeadInsts.push_back(NarrowUse); 995 } 996 // Now that the extend is gone, we want to expose it's uses for potential 997 // further simplification. We don't need to directly inform SimplifyIVUsers 998 // of the new users, because their parent IV will be processed later as a 999 // new loop phi. If we preserved IVUsers analysis, we would also want to 1000 // push the uses of WideDef here. 1001 1002 // No further widening is needed. The deceased [sz]ext had done it for us. 1003 return 0; 1004 } 1005 1006 // Does this user itself evaluate to a recurrence after widening? 1007 const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse); 1008 if (!WideAddRec) { 1009 // This user does not evaluate to a recurence after widening, so don't 1010 // follow it. Instead insert a Trunc to kill off the original use, 1011 // eventually isolating the original narrow IV so it can be removed. 1012 IRBuilder<> Builder(NarrowDefUse); 1013 Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); 1014 NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); 1015 return 0; 1016 } 1017 // We assume that block terminators are not SCEVable. We wouldn't want to 1018 // insert a Trunc after a terminator if there happens to be a critical edge. 1019 assert(NarrowUse != NarrowUse->getParent()->getTerminator() && 1020 "SCEV is not expected to evaluate a block terminator"); 1021 1022 // Reuse the IV increment that SCEVExpander created as long as it dominates 1023 // NarrowUse. 1024 Instruction *WideUse = 0; 1025 if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) { 1026 WideUse = WideInc; 1027 } 1028 else { 1029 WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef); 1030 if (!WideUse) 1031 return 0; 1032 } 1033 // Evaluation of WideAddRec ensured that the narrow expression could be 1034 // extended outside the loop without overflow. This suggests that the wide use 1035 // evaluates to the same expression as the extended narrow use, but doesn't 1036 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1037 // where it fails, we simply throw away the newly created wide use. 1038 if (WideAddRec != SE->getSCEV(WideUse)) { 1039 DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse 1040 << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); 1041 DeadInsts.push_back(WideUse); 1042 return 0; 1043 } 1044 1045 // Returning WideUse pushes it on the worklist. 1046 return WideUse; 1047} 1048 1049/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers. 1050/// 1051void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1052 for (Value::use_iterator UI = NarrowDef->use_begin(), 1053 UE = NarrowDef->use_end(); UI != UE; ++UI) { 1054 Use &U = UI.getUse(); 1055 1056 // Handle data flow merges and bizarre phi cycles. 1057 if (!Widened.insert(cast<Instruction>(U.getUser()))) 1058 continue; 1059 1060 NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef)); 1061 } 1062} 1063 1064/// CreateWideIV - Process a single induction variable. First use the 1065/// SCEVExpander to create a wide induction variable that evaluates to the same 1066/// recurrence as the original narrow IV. Then use a worklist to forward 1067/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all 1068/// interesting IV users, the narrow IV will be isolated for removal by 1069/// DeleteDeadPHIs. 1070/// 1071/// It would be simpler to delete uses as they are processed, but we must avoid 1072/// invalidating SCEV expressions. 1073/// 1074PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { 1075 // Is this phi an induction variable? 1076 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1077 if (!AddRec) 1078 return NULL; 1079 1080 // Widen the induction variable expression. 1081 const SCEV *WideIVExpr = IsSigned ? 1082 SE->getSignExtendExpr(AddRec, WideType) : 1083 SE->getZeroExtendExpr(AddRec, WideType); 1084 1085 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1086 "Expect the new IV expression to preserve its type"); 1087 1088 // Can the IV be extended outside the loop without overflow? 1089 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1090 if (!AddRec || AddRec->getLoop() != L) 1091 return NULL; 1092 1093 // An AddRec must have loop-invariant operands. Since this AddRec is 1094 // materialized by a loop header phi, the expression cannot have any post-loop 1095 // operands, so they must dominate the loop header. 1096 assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1097 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) 1098 && "Loop header phi recurrence inputs do not dominate the loop"); 1099 1100 // The rewriter provides a value for the desired IV expression. This may 1101 // either find an existing phi or materialize a new one. Either way, we 1102 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1103 // of the phi-SCC dominates the loop entry. 1104 Instruction *InsertPt = L->getHeader()->begin(); 1105 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1106 1107 // Remembering the WideIV increment generated by SCEVExpander allows 1108 // WidenIVUse to reuse it when widening the narrow IV's increment. We don't 1109 // employ a general reuse mechanism because the call above is the only call to 1110 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1111 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1112 WideInc = 1113 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1114 WideIncExpr = SE->getSCEV(WideInc); 1115 } 1116 1117 DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1118 ++NumWidened; 1119 1120 // Traverse the def-use chain using a worklist starting at the original IV. 1121 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1122 1123 Widened.insert(OrigPhi); 1124 pushNarrowIVUsers(OrigPhi, WidePhi); 1125 1126 while (!NarrowIVUsers.empty()) { 1127 Use *UsePtr; 1128 Instruction *WideDef; 1129 tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val(); 1130 Use &NarrowDefUse = *UsePtr; 1131 1132 // Process a def-use edge. This may replace the use, so don't hold a 1133 // use_iterator across it. 1134 Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get()); 1135 Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef); 1136 1137 // Follow all def-use edges from the previous narrow use. 1138 if (WideUse) 1139 pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse); 1140 1141 // WidenIVUse may have removed the def-use edge. 1142 if (NarrowDef->use_empty()) 1143 DeadInsts.push_back(NarrowDef); 1144 } 1145 return WidePhi; 1146} 1147 1148//===----------------------------------------------------------------------===// 1149// Simplification of IV users based on SCEV evaluation. 1150//===----------------------------------------------------------------------===// 1151 1152void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { 1153 unsigned IVOperIdx = 0; 1154 ICmpInst::Predicate Pred = ICmp->getPredicate(); 1155 if (IVOperand != ICmp->getOperand(0)) { 1156 // Swapped 1157 assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); 1158 IVOperIdx = 1; 1159 Pred = ICmpInst::getSwappedPredicate(Pred); 1160 } 1161 1162 // Get the SCEVs for the ICmp operands. 1163 const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); 1164 const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); 1165 1166 // Simplify unnecessary loops away. 1167 const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); 1168 S = SE->getSCEVAtScope(S, ICmpLoop); 1169 X = SE->getSCEVAtScope(X, ICmpLoop); 1170 1171 // If the condition is always true or always false, replace it with 1172 // a constant value. 1173 if (SE->isKnownPredicate(Pred, S, X)) 1174 ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); 1175 else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) 1176 ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); 1177 else 1178 return; 1179 1180 DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); 1181 ++NumElimCmp; 1182 Changed = true; 1183 DeadInsts.push_back(ICmp); 1184} 1185 1186void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, 1187 Value *IVOperand, 1188 bool IsSigned) { 1189 // We're only interested in the case where we know something about 1190 // the numerator. 1191 if (IVOperand != Rem->getOperand(0)) 1192 return; 1193 1194 // Get the SCEVs for the ICmp operands. 1195 const SCEV *S = SE->getSCEV(Rem->getOperand(0)); 1196 const SCEV *X = SE->getSCEV(Rem->getOperand(1)); 1197 1198 // Simplify unnecessary loops away. 1199 const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); 1200 S = SE->getSCEVAtScope(S, ICmpLoop); 1201 X = SE->getSCEVAtScope(X, ICmpLoop); 1202 1203 // i % n --> i if i is in [0,n). 1204 if ((!IsSigned || SE->isKnownNonNegative(S)) && 1205 SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, 1206 S, X)) 1207 Rem->replaceAllUsesWith(Rem->getOperand(0)); 1208 else { 1209 // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). 1210 const SCEV *LessOne = 1211 SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); 1212 if (IsSigned && !SE->isKnownNonNegative(LessOne)) 1213 return; 1214 1215 if (!SE->isKnownPredicate(IsSigned ? 1216 ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, 1217 LessOne, X)) 1218 return; 1219 1220 ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, 1221 Rem->getOperand(0), Rem->getOperand(1), 1222 "tmp"); 1223 SelectInst *Sel = 1224 SelectInst::Create(ICmp, 1225 ConstantInt::get(Rem->getType(), 0), 1226 Rem->getOperand(0), "tmp", Rem); 1227 Rem->replaceAllUsesWith(Sel); 1228 } 1229 1230 // Inform IVUsers about the new users. 1231 if (IU) { 1232 if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) 1233 IU->AddUsersIfInteresting(I); 1234 } 1235 DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); 1236 ++NumElimRem; 1237 Changed = true; 1238 DeadInsts.push_back(Rem); 1239} 1240 1241/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has 1242/// no observable side-effect given the range of IV values. 1243bool IndVarSimplify::EliminateIVUser(Instruction *UseInst, 1244 Instruction *IVOperand) { 1245 if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { 1246 EliminateIVComparison(ICmp, IVOperand); 1247 return true; 1248 } 1249 if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { 1250 bool IsSigned = Rem->getOpcode() == Instruction::SRem; 1251 if (IsSigned || Rem->getOpcode() == Instruction::URem) { 1252 EliminateIVRemainder(Rem, IVOperand, IsSigned); 1253 return true; 1254 } 1255 } 1256 1257 // Eliminate any operation that SCEV can prove is an identity function. 1258 if (!SE->isSCEVable(UseInst->getType()) || 1259 (UseInst->getType() != IVOperand->getType()) || 1260 (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) 1261 return false; 1262 1263 DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); 1264 1265 UseInst->replaceAllUsesWith(IVOperand); 1266 ++NumElimIdentity; 1267 Changed = true; 1268 DeadInsts.push_back(UseInst); 1269 return true; 1270} 1271 1272/// pushIVUsers - Add all uses of Def to the current IV's worklist. 1273/// 1274static void pushIVUsers( 1275 Instruction *Def, 1276 SmallPtrSet<Instruction*,16> &Simplified, 1277 SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { 1278 1279 for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); 1280 UI != E; ++UI) { 1281 Instruction *User = cast<Instruction>(*UI); 1282 1283 // Avoid infinite or exponential worklist processing. 1284 // Also ensure unique worklist users. 1285 // If Def is a LoopPhi, it may not be in the Simplified set, so check for 1286 // self edges first. 1287 if (User != Def && Simplified.insert(User)) 1288 SimpleIVUsers.push_back(std::make_pair(User, Def)); 1289 } 1290} 1291 1292/// isSimpleIVUser - Return true if this instruction generates a simple SCEV 1293/// expression in terms of that IV. 1294/// 1295/// This is similar to IVUsers' isInsteresting() but processes each instruction 1296/// non-recursively when the operand is already known to be a simpleIVUser. 1297/// 1298static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { 1299 if (!SE->isSCEVable(I->getType())) 1300 return false; 1301 1302 // Get the symbolic expression for this instruction. 1303 const SCEV *S = SE->getSCEV(I); 1304 1305 // We assume that terminators are not SCEVable. 1306 assert((!S || I != I->getParent()->getTerminator()) && 1307 "can't fold terminators"); 1308 1309 // Only consider affine recurrences. 1310 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); 1311 if (AR && AR->getLoop() == L) 1312 return true; 1313 1314 return false; 1315} 1316 1317/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist 1318/// of IV users. Each successive simplification may push more users which may 1319/// themselves be candidates for simplification. 1320/// 1321/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it 1322/// simplifies instructions in-place during analysis. Rather than rewriting 1323/// induction variables bottom-up from their users, it transforms a chain of 1324/// IVUsers top-down, updating the IR only when it encouters a clear 1325/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still 1326/// needed, but only used to generate a new IV (phi) of wider type for sign/zero 1327/// extend elimination. 1328/// 1329/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. 1330/// 1331void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { 1332 std::map<PHINode *, WideIVInfo> WideIVMap; 1333 1334 SmallVector<PHINode*, 8> LoopPhis; 1335 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1336 LoopPhis.push_back(cast<PHINode>(I)); 1337 } 1338 // Each round of simplification iterates through the SimplifyIVUsers worklist 1339 // for all current phis, then determines whether any IVs can be 1340 // widened. Widening adds new phis to LoopPhis, inducing another round of 1341 // simplification on the wide IVs. 1342 while (!LoopPhis.empty()) { 1343 // Evaluate as many IV expressions as possible before widening any IVs. This 1344 // forces SCEV to set no-wrap flags before evaluating sign/zero 1345 // extension. The first time SCEV attempts to normalize sign/zero extension, 1346 // the result becomes final. So for the most predictable results, we delay 1347 // evaluation of sign/zero extend evaluation until needed, and avoid running 1348 // other SCEV based analysis prior to SimplifyIVUsersNoRewrite. 1349 do { 1350 PHINode *CurrIV = LoopPhis.pop_back_val(); 1351 1352 // Information about sign/zero extensions of CurrIV. 1353 WideIVInfo WI; 1354 1355 // Instructions processed by SimplifyIVUsers for CurrIV. 1356 SmallPtrSet<Instruction*,16> Simplified; 1357 1358 // Use-def pairs if IV users waiting to be processed for CurrIV. 1359 SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; 1360 1361 // Push users of the current LoopPhi. In rare cases, pushIVUsers may be 1362 // called multiple times for the same LoopPhi. This is the proper thing to 1363 // do for loop header phis that use each other. 1364 pushIVUsers(CurrIV, Simplified, SimpleIVUsers); 1365 1366 while (!SimpleIVUsers.empty()) { 1367 Instruction *UseInst, *Operand; 1368 tie(UseInst, Operand) = SimpleIVUsers.pop_back_val(); 1369 // Bypass back edges to avoid extra work. 1370 if (UseInst == CurrIV) continue; 1371 1372 if (EliminateIVUser(UseInst, Operand)) { 1373 pushIVUsers(Operand, Simplified, SimpleIVUsers); 1374 continue; 1375 } 1376 if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { 1377 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 1378 if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { 1379 CollectExtend(Cast, IsSigned, WI, SE, TD); 1380 } 1381 continue; 1382 } 1383 if (isSimpleIVUser(UseInst, L, SE)) { 1384 pushIVUsers(UseInst, Simplified, SimpleIVUsers); 1385 } 1386 } 1387 if (WI.WidestNativeType) { 1388 WideIVMap[CurrIV] = WI; 1389 } 1390 } while(!LoopPhis.empty()); 1391 1392 for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(), 1393 E = WideIVMap.end(); I != E; ++I) { 1394 WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts); 1395 if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { 1396 Changed = true; 1397 LoopPhis.push_back(WidePhi); 1398 } 1399 } 1400 WideIVMap.clear(); 1401 } 1402} 1403 1404/// SimplifyCongruentIVs - Check for congruent phis in this loop header and 1405/// populate ExprToIVMap for use later. 1406/// 1407void IndVarSimplify::SimplifyCongruentIVs(Loop *L) { 1408 DenseMap<const SCEV *, PHINode *> ExprToIVMap; 1409 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1410 PHINode *Phi = cast<PHINode>(I); 1411 if (!SE->isSCEVable(Phi->getType())) 1412 continue; 1413 1414 const SCEV *S = SE->getSCEV(Phi); 1415 DenseMap<const SCEV *, PHINode *>::const_iterator Pos; 1416 bool Inserted; 1417 tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi)); 1418 if (Inserted) 1419 continue; 1420 PHINode *OrigPhi = Pos->second; 1421 // Replacing the congruent phi is sufficient because acyclic redundancy 1422 // elimination, CSE/GVN, should handle the rest. However, once SCEV proves 1423 // that a phi is congruent, it's almost certain to be the head of an IV 1424 // user cycle that is isomorphic with the original phi. So it's worth 1425 // eagerly cleaning up the common case of a single IV increment. 1426 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1427 Instruction *OrigInc = 1428 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1429 Instruction *IsomorphicInc = 1430 cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); 1431 if (OrigInc != IsomorphicInc && 1432 SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) && 1433 HoistStep(OrigInc, IsomorphicInc, DT)) { 1434 DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: " 1435 << *IsomorphicInc << '\n'); 1436 IsomorphicInc->replaceAllUsesWith(OrigInc); 1437 DeadInsts.push_back(IsomorphicInc); 1438 } 1439 } 1440 DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); 1441 ++NumElimIV; 1442 Phi->replaceAllUsesWith(OrigPhi); 1443 DeadInsts.push_back(Phi); 1444 } 1445} 1446 1447//===----------------------------------------------------------------------===// 1448// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1449//===----------------------------------------------------------------------===// 1450 1451/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken 1452/// count expression can be safely and cheaply expanded into an instruction 1453/// sequence that can be used by LinearFunctionTestReplace. 1454static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { 1455 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1456 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || 1457 BackedgeTakenCount->isZero()) 1458 return false; 1459 1460 if (!L->getExitingBlock()) 1461 return false; 1462 1463 // Can't rewrite non-branch yet. 1464 BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); 1465 if (!BI) 1466 return false; 1467 1468 // Special case: If the backedge-taken count is a UDiv, it's very likely a 1469 // UDiv that ScalarEvolution produced in order to compute a precise 1470 // expression, rather than a UDiv from the user's code. If we can't find a 1471 // UDiv in the code with some simple searching, assume the former and forego 1472 // rewriting the loop. 1473 if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { 1474 ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); 1475 if (!OrigCond) return false; 1476 const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); 1477 R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); 1478 if (R != BackedgeTakenCount) { 1479 const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); 1480 L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); 1481 if (L != BackedgeTakenCount) 1482 return false; 1483 } 1484 } 1485 return true; 1486} 1487 1488/// getBackedgeIVType - Get the widest type used by the loop test after peeking 1489/// through Truncs. 1490/// 1491/// TODO: Unnecessary if LFTR does not force a canonical IV. 1492static Type *getBackedgeIVType(Loop *L) { 1493 if (!L->getExitingBlock()) 1494 return 0; 1495 1496 // Can't rewrite non-branch yet. 1497 BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); 1498 if (!BI) 1499 return 0; 1500 1501 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 1502 if (!Cond) 1503 return 0; 1504 1505 Type *Ty = 0; 1506 for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); 1507 OI != OE; ++OI) { 1508 assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); 1509 TruncInst *Trunc = dyn_cast<TruncInst>(*OI); 1510 if (!Trunc) 1511 continue; 1512 1513 return Trunc->getSrcTy(); 1514 } 1515 return Ty; 1516} 1517 1518/// LinearFunctionTestReplace - This method rewrites the exit condition of the 1519/// loop to be a canonical != comparison against the incremented loop induction 1520/// variable. This pass is able to rewrite the exit tests of any loop where the 1521/// SCEV analysis can determine a loop-invariant trip count of the loop, which 1522/// is actually a much broader range than just linear tests. 1523ICmpInst *IndVarSimplify:: 1524LinearFunctionTestReplace(Loop *L, 1525 const SCEV *BackedgeTakenCount, 1526 PHINode *IndVar, 1527 SCEVExpander &Rewriter) { 1528 assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); 1529 BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); 1530 1531 // If the exiting block is not the same as the backedge block, we must compare 1532 // against the preincremented value, otherwise we prefer to compare against 1533 // the post-incremented value. 1534 Value *CmpIndVar; 1535 const SCEV *RHS = BackedgeTakenCount; 1536 if (L->getExitingBlock() == L->getLoopLatch()) { 1537 // Add one to the "backedge-taken" count to get the trip count. 1538 // If this addition may overflow, we have to be more pessimistic and 1539 // cast the induction variable before doing the add. 1540 const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); 1541 const SCEV *N = 1542 SE->getAddExpr(BackedgeTakenCount, 1543 SE->getConstant(BackedgeTakenCount->getType(), 1)); 1544 if ((isa<SCEVConstant>(N) && !N->isZero()) || 1545 SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { 1546 // No overflow. Cast the sum. 1547 RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); 1548 } else { 1549 // Potential overflow. Cast before doing the add. 1550 RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, 1551 IndVar->getType()); 1552 RHS = SE->getAddExpr(RHS, 1553 SE->getConstant(IndVar->getType(), 1)); 1554 } 1555 1556 // The BackedgeTaken expression contains the number of times that the 1557 // backedge branches to the loop header. This is one less than the 1558 // number of times the loop executes, so use the incremented indvar. 1559 CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); 1560 } else { 1561 // We have to use the preincremented value... 1562 RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, 1563 IndVar->getType()); 1564 CmpIndVar = IndVar; 1565 } 1566 1567 // Expand the code for the iteration count. 1568 assert(SE->isLoopInvariant(RHS, L) && 1569 "Computed iteration count is not loop invariant!"); 1570 Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); 1571 1572 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1573 ICmpInst::Predicate Opcode; 1574 if (L->contains(BI->getSuccessor(0))) 1575 Opcode = ICmpInst::ICMP_NE; 1576 else 1577 Opcode = ICmpInst::ICMP_EQ; 1578 1579 DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1580 << " LHS:" << *CmpIndVar << '\n' 1581 << " op:\t" 1582 << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" 1583 << " RHS:\t" << *RHS << "\n"); 1584 1585 ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); 1586 Cond->setDebugLoc(BI->getDebugLoc()); 1587 Value *OrigCond = BI->getCondition(); 1588 // It's tempting to use replaceAllUsesWith here to fully replace the old 1589 // comparison, but that's not immediately safe, since users of the old 1590 // comparison may not be dominated by the new comparison. Instead, just 1591 // update the branch to use the new comparison; in the common case this 1592 // will make old comparison dead. 1593 BI->setCondition(Cond); 1594 DeadInsts.push_back(OrigCond); 1595 1596 ++NumLFTR; 1597 Changed = true; 1598 return Cond; 1599} 1600 1601//===----------------------------------------------------------------------===// 1602// SinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1603//===----------------------------------------------------------------------===// 1604 1605/// If there's a single exit block, sink any loop-invariant values that 1606/// were defined in the preheader but not used inside the loop into the 1607/// exit block to reduce register pressure in the loop. 1608void IndVarSimplify::SinkUnusedInvariants(Loop *L) { 1609 BasicBlock *ExitBlock = L->getExitBlock(); 1610 if (!ExitBlock) return; 1611 1612 BasicBlock *Preheader = L->getLoopPreheader(); 1613 if (!Preheader) return; 1614 1615 Instruction *InsertPt = ExitBlock->getFirstNonPHI(); 1616 BasicBlock::iterator I = Preheader->getTerminator(); 1617 while (I != Preheader->begin()) { 1618 --I; 1619 // New instructions were inserted at the end of the preheader. 1620 if (isa<PHINode>(I)) 1621 break; 1622 1623 // Don't move instructions which might have side effects, since the side 1624 // effects need to complete before instructions inside the loop. Also don't 1625 // move instructions which might read memory, since the loop may modify 1626 // memory. Note that it's okay if the instruction might have undefined 1627 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1628 // block. 1629 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1630 continue; 1631 1632 // Skip debug info intrinsics. 1633 if (isa<DbgInfoIntrinsic>(I)) 1634 continue; 1635 1636 // Don't sink static AllocaInsts out of the entry block, which would 1637 // turn them into dynamic allocas! 1638 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) 1639 if (AI->isStaticAlloca()) 1640 continue; 1641 1642 // Determine if there is a use in or before the loop (direct or 1643 // otherwise). 1644 bool UsedInLoop = false; 1645 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); 1646 UI != UE; ++UI) { 1647 User *U = *UI; 1648 BasicBlock *UseBB = cast<Instruction>(U)->getParent(); 1649 if (PHINode *P = dyn_cast<PHINode>(U)) { 1650 unsigned i = 1651 PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); 1652 UseBB = P->getIncomingBlock(i); 1653 } 1654 if (UseBB == Preheader || L->contains(UseBB)) { 1655 UsedInLoop = true; 1656 break; 1657 } 1658 } 1659 1660 // If there is, the def must remain in the preheader. 1661 if (UsedInLoop) 1662 continue; 1663 1664 // Otherwise, sink it to the exit block. 1665 Instruction *ToMove = I; 1666 bool Done = false; 1667 1668 if (I != Preheader->begin()) { 1669 // Skip debug info intrinsics. 1670 do { 1671 --I; 1672 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 1673 1674 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 1675 Done = true; 1676 } else { 1677 Done = true; 1678 } 1679 1680 ToMove->moveBefore(InsertPt); 1681 if (Done) break; 1682 InsertPt = ToMove; 1683 } 1684} 1685 1686//===----------------------------------------------------------------------===// 1687// IndVarSimplify driver. Manage several subpasses of IV simplification. 1688//===----------------------------------------------------------------------===// 1689 1690bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { 1691 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1692 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1693 // canonicalization can be a pessimization without LSR to "clean up" 1694 // afterwards. 1695 // - We depend on having a preheader; in particular, 1696 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1697 // and we're in trouble if we can't find the induction variable even when 1698 // we've manually inserted one. 1699 if (!L->isLoopSimplifyForm()) 1700 return false; 1701 1702 if (!DisableIVRewrite) 1703 IU = &getAnalysis<IVUsers>(); 1704 LI = &getAnalysis<LoopInfo>(); 1705 SE = &getAnalysis<ScalarEvolution>(); 1706 DT = &getAnalysis<DominatorTree>(); 1707 TD = getAnalysisIfAvailable<TargetData>(); 1708 1709 DeadInsts.clear(); 1710 Changed = false; 1711 1712 // If there are any floating-point recurrences, attempt to 1713 // transform them to use integer recurrences. 1714 RewriteNonIntegerIVs(L); 1715 1716 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1717 1718 // Create a rewriter object which we'll use to transform the code with. 1719 SCEVExpander Rewriter(*SE, "indvars"); 1720 1721 // Eliminate redundant IV users. 1722 // 1723 // Simplification works best when run before other consumers of SCEV. We 1724 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1725 // other expressions involving loop IVs have been evaluated. This helps SCEV 1726 // set no-wrap flags before normalizing sign/zero extension. 1727 if (DisableIVRewrite) { 1728 Rewriter.disableCanonicalMode(); 1729 SimplifyIVUsersNoRewrite(L, Rewriter); 1730 } 1731 1732 // Check to see if this loop has a computable loop-invariant execution count. 1733 // If so, this means that we can compute the final value of any expressions 1734 // that are recurrent in the loop, and substitute the exit values from the 1735 // loop into any instructions outside of the loop that use the final values of 1736 // the current expressions. 1737 // 1738 if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 1739 RewriteLoopExitValues(L, Rewriter); 1740 1741 // Eliminate redundant IV users. 1742 if (!DisableIVRewrite) 1743 SimplifyIVUsers(Rewriter); 1744 1745 // Eliminate redundant IV cycles. 1746 if (DisableIVRewrite) 1747 SimplifyCongruentIVs(L); 1748 1749 // Compute the type of the largest recurrence expression, and decide whether 1750 // a canonical induction variable should be inserted. 1751 Type *LargestType = 0; 1752 bool NeedCannIV = false; 1753 bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); 1754 if (ExpandBECount) { 1755 // If we have a known trip count and a single exit block, we'll be 1756 // rewriting the loop exit test condition below, which requires a 1757 // canonical induction variable. 1758 NeedCannIV = true; 1759 Type *Ty = BackedgeTakenCount->getType(); 1760 if (DisableIVRewrite) { 1761 // In this mode, SimplifyIVUsers may have already widened the IV used by 1762 // the backedge test and inserted a Trunc on the compare's operand. Get 1763 // the wider type to avoid creating a redundant narrow IV only used by the 1764 // loop test. 1765 LargestType = getBackedgeIVType(L); 1766 } 1767 if (!LargestType || 1768 SE->getTypeSizeInBits(Ty) > 1769 SE->getTypeSizeInBits(LargestType)) 1770 LargestType = SE->getEffectiveSCEVType(Ty); 1771 } 1772 if (!DisableIVRewrite) { 1773 for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { 1774 NeedCannIV = true; 1775 Type *Ty = 1776 SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); 1777 if (!LargestType || 1778 SE->getTypeSizeInBits(Ty) > 1779 SE->getTypeSizeInBits(LargestType)) 1780 LargestType = Ty; 1781 } 1782 } 1783 1784 // Now that we know the largest of the induction variable expressions 1785 // in this loop, insert a canonical induction variable of the largest size. 1786 PHINode *IndVar = 0; 1787 if (NeedCannIV) { 1788 // Check to see if the loop already has any canonical-looking induction 1789 // variables. If any are present and wider than the planned canonical 1790 // induction variable, temporarily remove them, so that the Rewriter 1791 // doesn't attempt to reuse them. 1792 SmallVector<PHINode *, 2> OldCannIVs; 1793 while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) { 1794 if (SE->getTypeSizeInBits(OldCannIV->getType()) > 1795 SE->getTypeSizeInBits(LargestType)) 1796 OldCannIV->removeFromParent(); 1797 else 1798 break; 1799 OldCannIVs.push_back(OldCannIV); 1800 } 1801 1802 IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType); 1803 1804 ++NumInserted; 1805 Changed = true; 1806 DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n'); 1807 1808 // Now that the official induction variable is established, reinsert 1809 // any old canonical-looking variables after it so that the IR remains 1810 // consistent. They will be deleted as part of the dead-PHI deletion at 1811 // the end of the pass. 1812 while (!OldCannIVs.empty()) { 1813 PHINode *OldCannIV = OldCannIVs.pop_back_val(); 1814 OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI()); 1815 } 1816 } 1817 1818 // If we have a trip count expression, rewrite the loop's exit condition 1819 // using it. We can currently only handle loops with a single exit. 1820 ICmpInst *NewICmp = 0; 1821 if (ExpandBECount) { 1822 assert(canExpandBackedgeTakenCount(L, SE) && 1823 "canonical IV disrupted BackedgeTaken expansion"); 1824 assert(NeedCannIV && 1825 "LinearFunctionTestReplace requires a canonical induction variable"); 1826 // Check preconditions for proper SCEVExpander operation. SCEV does not 1827 // express SCEVExpander's dependencies, such as LoopSimplify. Instead any 1828 // pass that uses the SCEVExpander must do it. This does not work well for 1829 // loop passes because SCEVExpander makes assumptions about all loops, while 1830 // LoopPassManager only forces the current loop to be simplified. 1831 // 1832 // FIXME: SCEV expansion has no way to bail out, so the caller must 1833 // explicitly check any assumptions made by SCEV. Brittle. 1834 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount); 1835 if (!AR || AR->getLoop()->getLoopPreheader()) 1836 NewICmp = 1837 LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter); 1838 } 1839 // Rewrite IV-derived expressions. 1840 if (!DisableIVRewrite) 1841 RewriteIVExpressions(L, Rewriter); 1842 1843 // Clear the rewriter cache, because values that are in the rewriter's cache 1844 // can be deleted in the loop below, causing the AssertingVH in the cache to 1845 // trigger. 1846 Rewriter.clear(); 1847 1848 // Now that we're done iterating through lists, clean up any instructions 1849 // which are now dead. 1850 while (!DeadInsts.empty()) 1851 if (Instruction *Inst = 1852 dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) 1853 RecursivelyDeleteTriviallyDeadInstructions(Inst); 1854 1855 // The Rewriter may not be used from this point on. 1856 1857 // Loop-invariant instructions in the preheader that aren't used in the 1858 // loop may be sunk below the loop to reduce register pressure. 1859 SinkUnusedInvariants(L); 1860 1861 // For completeness, inform IVUsers of the IV use in the newly-created 1862 // loop exit test instruction. 1863 if (NewICmp && IU) 1864 IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); 1865 1866 // Clean up dead instructions. 1867 Changed |= DeleteDeadPHIs(L->getHeader()); 1868 // Check a post-condition. 1869 assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); 1870 return Changed; 1871} 1872