1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This transformation analyzes and transforms the induction variables (and
11// computations derived from them) into simpler forms suitable for subsequent
12// analysis and transformation.
13//
14// If the trip count of a loop is computable, this pass also makes the following
15// changes:
16//   1. The exit condition for the loop is canonicalized to compare the
17//      induction value against the exit value.  This turns loops like:
18//        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
19//   2. Any use outside of the loop of an expression derived from the indvar
20//      is changed to compute the derived value outside of the loop, eliminating
21//      the dependence on the exit value of the induction variable.  If the only
22//      purpose of the loop is to compute the exit value of some derived
23//      expression, this transformation will make the loop dead.
24//
25//===----------------------------------------------------------------------===//
26
27#define DEBUG_TYPE "indvars"
28#include "llvm/Transforms/Scalar.h"
29#include "llvm/BasicBlock.h"
30#include "llvm/Constants.h"
31#include "llvm/Instructions.h"
32#include "llvm/IntrinsicInst.h"
33#include "llvm/LLVMContext.h"
34#include "llvm/Type.h"
35#include "llvm/Analysis/Dominators.h"
36#include "llvm/Analysis/IVUsers.h"
37#include "llvm/Analysis/ScalarEvolutionExpander.h"
38#include "llvm/Analysis/LoopInfo.h"
39#include "llvm/Analysis/LoopPass.h"
40#include "llvm/Support/CFG.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/Transforms/Utils/Local.h"
45#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46#include "llvm/Transforms/Utils/SimplifyIndVar.h"
47#include "llvm/Target/TargetData.h"
48#include "llvm/ADT/DenseMap.h"
49#include "llvm/ADT/SmallVector.h"
50#include "llvm/ADT/Statistic.h"
51using namespace llvm;
52
53STATISTIC(NumRemoved     , "Number of aux indvars removed");
54STATISTIC(NumWidened     , "Number of indvars widened");
55STATISTIC(NumInserted    , "Number of canonical indvars added");
56STATISTIC(NumReplaced    , "Number of exit values replaced");
57STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
58STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
59STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
60
61namespace llvm {
62  cl::opt<bool> EnableIVRewrite(
63    "enable-iv-rewrite", cl::Hidden,
64    cl::desc("Enable canonical induction variable rewriting"));
65
66  // Trip count verification can be enabled by default under NDEBUG if we
67  // implement a strong expression equivalence checker in SCEV. Until then, we
68  // use the verify-indvars flag, which may assert in some cases.
69  cl::opt<bool> VerifyIndvars(
70    "verify-indvars", cl::Hidden,
71    cl::desc("Verify the ScalarEvolution result after running indvars"));
72}
73
74namespace {
75  class IndVarSimplify : public LoopPass {
76    IVUsers         *IU;
77    LoopInfo        *LI;
78    ScalarEvolution *SE;
79    DominatorTree   *DT;
80    TargetData      *TD;
81
82    SmallVector<WeakVH, 16> DeadInsts;
83    bool Changed;
84  public:
85
86    static char ID; // Pass identification, replacement for typeid
87    IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
88                       Changed(false) {
89      initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
90    }
91
92    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
93
94    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
95      AU.addRequired<DominatorTree>();
96      AU.addRequired<LoopInfo>();
97      AU.addRequired<ScalarEvolution>();
98      AU.addRequiredID(LoopSimplifyID);
99      AU.addRequiredID(LCSSAID);
100      if (EnableIVRewrite)
101        AU.addRequired<IVUsers>();
102      AU.addPreserved<ScalarEvolution>();
103      AU.addPreservedID(LoopSimplifyID);
104      AU.addPreservedID(LCSSAID);
105      if (EnableIVRewrite)
106        AU.addPreserved<IVUsers>();
107      AU.setPreservesCFG();
108    }
109
110  private:
111    virtual void releaseMemory() {
112      DeadInsts.clear();
113    }
114
115    bool isValidRewrite(Value *FromVal, Value *ToVal);
116
117    void HandleFloatingPointIV(Loop *L, PHINode *PH);
118    void RewriteNonIntegerIVs(Loop *L);
119
120    void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
121
122    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
123
124    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
125
126    Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
127                                     PHINode *IndVar, SCEVExpander &Rewriter);
128
129    void SinkUnusedInvariants(Loop *L);
130  };
131}
132
133char IndVarSimplify::ID = 0;
134INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
135                "Induction Variable Simplification", false, false)
136INITIALIZE_PASS_DEPENDENCY(DominatorTree)
137INITIALIZE_PASS_DEPENDENCY(LoopInfo)
138INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
139INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
140INITIALIZE_PASS_DEPENDENCY(LCSSA)
141INITIALIZE_PASS_DEPENDENCY(IVUsers)
142INITIALIZE_PASS_END(IndVarSimplify, "indvars",
143                "Induction Variable Simplification", false, false)
144
145Pass *llvm::createIndVarSimplifyPass() {
146  return new IndVarSimplify();
147}
148
149/// isValidRewrite - Return true if the SCEV expansion generated by the
150/// rewriter can replace the original value. SCEV guarantees that it
151/// produces the same value, but the way it is produced may be illegal IR.
152/// Ideally, this function will only be called for verification.
153bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
154  // If an SCEV expression subsumed multiple pointers, its expansion could
155  // reassociate the GEP changing the base pointer. This is illegal because the
156  // final address produced by a GEP chain must be inbounds relative to its
157  // underlying object. Otherwise basic alias analysis, among other things,
158  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
159  // producing an expression involving multiple pointers. Until then, we must
160  // bail out here.
161  //
162  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
163  // because it understands lcssa phis while SCEV does not.
164  Value *FromPtr = FromVal;
165  Value *ToPtr = ToVal;
166  if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
167    FromPtr = GEP->getPointerOperand();
168  }
169  if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
170    ToPtr = GEP->getPointerOperand();
171  }
172  if (FromPtr != FromVal || ToPtr != ToVal) {
173    // Quickly check the common case
174    if (FromPtr == ToPtr)
175      return true;
176
177    // SCEV may have rewritten an expression that produces the GEP's pointer
178    // operand. That's ok as long as the pointer operand has the same base
179    // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
180    // base of a recurrence. This handles the case in which SCEV expansion
181    // converts a pointer type recurrence into a nonrecurrent pointer base
182    // indexed by an integer recurrence.
183    const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
184    const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
185    if (FromBase == ToBase)
186      return true;
187
188    DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
189          << *FromBase << " != " << *ToBase << "\n");
190
191    return false;
192  }
193  return true;
194}
195
196/// Determine the insertion point for this user. By default, insert immediately
197/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
198/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
199/// common dominator for the incoming blocks.
200static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
201                                          DominatorTree *DT) {
202  PHINode *PHI = dyn_cast<PHINode>(User);
203  if (!PHI)
204    return User;
205
206  Instruction *InsertPt = 0;
207  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
208    if (PHI->getIncomingValue(i) != Def)
209      continue;
210
211    BasicBlock *InsertBB = PHI->getIncomingBlock(i);
212    if (!InsertPt) {
213      InsertPt = InsertBB->getTerminator();
214      continue;
215    }
216    InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
217    InsertPt = InsertBB->getTerminator();
218  }
219  assert(InsertPt && "Missing phi operand");
220  assert((!isa<Instruction>(Def) ||
221          DT->dominates(cast<Instruction>(Def), InsertPt)) &&
222         "def does not dominate all uses");
223  return InsertPt;
224}
225
226//===----------------------------------------------------------------------===//
227// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
228//===----------------------------------------------------------------------===//
229
230/// ConvertToSInt - Convert APF to an integer, if possible.
231static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
232  bool isExact = false;
233  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
234    return false;
235  // See if we can convert this to an int64_t
236  uint64_t UIntVal;
237  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
238                           &isExact) != APFloat::opOK || !isExact)
239    return false;
240  IntVal = UIntVal;
241  return true;
242}
243
244/// HandleFloatingPointIV - If the loop has floating induction variable
245/// then insert corresponding integer induction variable if possible.
246/// For example,
247/// for(double i = 0; i < 10000; ++i)
248///   bar(i)
249/// is converted into
250/// for(int i = 0; i < 10000; ++i)
251///   bar((double)i);
252///
253void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
254  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
255  unsigned BackEdge     = IncomingEdge^1;
256
257  // Check incoming value.
258  ConstantFP *InitValueVal =
259    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
260
261  int64_t InitValue;
262  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
263    return;
264
265  // Check IV increment. Reject this PN if increment operation is not
266  // an add or increment value can not be represented by an integer.
267  BinaryOperator *Incr =
268    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
269  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
270
271  // If this is not an add of the PHI with a constantfp, or if the constant fp
272  // is not an integer, bail out.
273  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
274  int64_t IncValue;
275  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
276      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
277    return;
278
279  // Check Incr uses. One user is PN and the other user is an exit condition
280  // used by the conditional terminator.
281  Value::use_iterator IncrUse = Incr->use_begin();
282  Instruction *U1 = cast<Instruction>(*IncrUse++);
283  if (IncrUse == Incr->use_end()) return;
284  Instruction *U2 = cast<Instruction>(*IncrUse++);
285  if (IncrUse != Incr->use_end()) return;
286
287  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
288  // only used by a branch, we can't transform it.
289  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
290  if (!Compare)
291    Compare = dyn_cast<FCmpInst>(U2);
292  if (Compare == 0 || !Compare->hasOneUse() ||
293      !isa<BranchInst>(Compare->use_back()))
294    return;
295
296  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
297
298  // We need to verify that the branch actually controls the iteration count
299  // of the loop.  If not, the new IV can overflow and no one will notice.
300  // The branch block must be in the loop and one of the successors must be out
301  // of the loop.
302  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
303  if (!L->contains(TheBr->getParent()) ||
304      (L->contains(TheBr->getSuccessor(0)) &&
305       L->contains(TheBr->getSuccessor(1))))
306    return;
307
308
309  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
310  // transform it.
311  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
312  int64_t ExitValue;
313  if (ExitValueVal == 0 ||
314      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
315    return;
316
317  // Find new predicate for integer comparison.
318  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
319  switch (Compare->getPredicate()) {
320  default: return;  // Unknown comparison.
321  case CmpInst::FCMP_OEQ:
322  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
323  case CmpInst::FCMP_ONE:
324  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
325  case CmpInst::FCMP_OGT:
326  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
327  case CmpInst::FCMP_OGE:
328  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
329  case CmpInst::FCMP_OLT:
330  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
331  case CmpInst::FCMP_OLE:
332  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
333  }
334
335  // We convert the floating point induction variable to a signed i32 value if
336  // we can.  This is only safe if the comparison will not overflow in a way
337  // that won't be trapped by the integer equivalent operations.  Check for this
338  // now.
339  // TODO: We could use i64 if it is native and the range requires it.
340
341  // The start/stride/exit values must all fit in signed i32.
342  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
343    return;
344
345  // If not actually striding (add x, 0.0), avoid touching the code.
346  if (IncValue == 0)
347    return;
348
349  // Positive and negative strides have different safety conditions.
350  if (IncValue > 0) {
351    // If we have a positive stride, we require the init to be less than the
352    // exit value.
353    if (InitValue >= ExitValue)
354      return;
355
356    uint32_t Range = uint32_t(ExitValue-InitValue);
357    // Check for infinite loop, either:
358    // while (i <= Exit) or until (i > Exit)
359    if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
360      if (++Range == 0) return;  // Range overflows.
361    }
362
363    unsigned Leftover = Range % uint32_t(IncValue);
364
365    // If this is an equality comparison, we require that the strided value
366    // exactly land on the exit value, otherwise the IV condition will wrap
367    // around and do things the fp IV wouldn't.
368    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
369        Leftover != 0)
370      return;
371
372    // If the stride would wrap around the i32 before exiting, we can't
373    // transform the IV.
374    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
375      return;
376
377  } else {
378    // If we have a negative stride, we require the init to be greater than the
379    // exit value.
380    if (InitValue <= ExitValue)
381      return;
382
383    uint32_t Range = uint32_t(InitValue-ExitValue);
384    // Check for infinite loop, either:
385    // while (i >= Exit) or until (i < Exit)
386    if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
387      if (++Range == 0) return;  // Range overflows.
388    }
389
390    unsigned Leftover = Range % uint32_t(-IncValue);
391
392    // If this is an equality comparison, we require that the strided value
393    // exactly land on the exit value, otherwise the IV condition will wrap
394    // around and do things the fp IV wouldn't.
395    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
396        Leftover != 0)
397      return;
398
399    // If the stride would wrap around the i32 before exiting, we can't
400    // transform the IV.
401    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
402      return;
403  }
404
405  IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
406
407  // Insert new integer induction variable.
408  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
409  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
410                      PN->getIncomingBlock(IncomingEdge));
411
412  Value *NewAdd =
413    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
414                              Incr->getName()+".int", Incr);
415  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
416
417  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
418                                      ConstantInt::get(Int32Ty, ExitValue),
419                                      Compare->getName());
420
421  // In the following deletions, PN may become dead and may be deleted.
422  // Use a WeakVH to observe whether this happens.
423  WeakVH WeakPH = PN;
424
425  // Delete the old floating point exit comparison.  The branch starts using the
426  // new comparison.
427  NewCompare->takeName(Compare);
428  Compare->replaceAllUsesWith(NewCompare);
429  RecursivelyDeleteTriviallyDeadInstructions(Compare);
430
431  // Delete the old floating point increment.
432  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
433  RecursivelyDeleteTriviallyDeadInstructions(Incr);
434
435  // If the FP induction variable still has uses, this is because something else
436  // in the loop uses its value.  In order to canonicalize the induction
437  // variable, we chose to eliminate the IV and rewrite it in terms of an
438  // int->fp cast.
439  //
440  // We give preference to sitofp over uitofp because it is faster on most
441  // platforms.
442  if (WeakPH) {
443    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
444                                 PN->getParent()->getFirstInsertionPt());
445    PN->replaceAllUsesWith(Conv);
446    RecursivelyDeleteTriviallyDeadInstructions(PN);
447  }
448
449  // Add a new IVUsers entry for the newly-created integer PHI.
450  if (IU)
451    IU->AddUsersIfInteresting(NewPHI);
452
453  Changed = true;
454}
455
456void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
457  // First step.  Check to see if there are any floating-point recurrences.
458  // If there are, change them into integer recurrences, permitting analysis by
459  // the SCEV routines.
460  //
461  BasicBlock *Header = L->getHeader();
462
463  SmallVector<WeakVH, 8> PHIs;
464  for (BasicBlock::iterator I = Header->begin();
465       PHINode *PN = dyn_cast<PHINode>(I); ++I)
466    PHIs.push_back(PN);
467
468  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
469    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
470      HandleFloatingPointIV(L, PN);
471
472  // If the loop previously had floating-point IV, ScalarEvolution
473  // may not have been able to compute a trip count. Now that we've done some
474  // re-writing, the trip count may be computable.
475  if (Changed)
476    SE->forgetLoop(L);
477}
478
479//===----------------------------------------------------------------------===//
480// RewriteLoopExitValues - Optimize IV users outside the loop.
481// As a side effect, reduces the amount of IV processing within the loop.
482//===----------------------------------------------------------------------===//
483
484/// RewriteLoopExitValues - Check to see if this loop has a computable
485/// loop-invariant execution count.  If so, this means that we can compute the
486/// final value of any expressions that are recurrent in the loop, and
487/// substitute the exit values from the loop into any instructions outside of
488/// the loop that use the final values of the current expressions.
489///
490/// This is mostly redundant with the regular IndVarSimplify activities that
491/// happen later, except that it's more powerful in some cases, because it's
492/// able to brute-force evaluate arbitrary instructions as long as they have
493/// constant operands at the beginning of the loop.
494void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
495  // Verify the input to the pass in already in LCSSA form.
496  assert(L->isLCSSAForm(*DT));
497
498  SmallVector<BasicBlock*, 8> ExitBlocks;
499  L->getUniqueExitBlocks(ExitBlocks);
500
501  // Find all values that are computed inside the loop, but used outside of it.
502  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
503  // the exit blocks of the loop to find them.
504  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
505    BasicBlock *ExitBB = ExitBlocks[i];
506
507    // If there are no PHI nodes in this exit block, then no values defined
508    // inside the loop are used on this path, skip it.
509    PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
510    if (!PN) continue;
511
512    unsigned NumPreds = PN->getNumIncomingValues();
513
514    // Iterate over all of the PHI nodes.
515    BasicBlock::iterator BBI = ExitBB->begin();
516    while ((PN = dyn_cast<PHINode>(BBI++))) {
517      if (PN->use_empty())
518        continue; // dead use, don't replace it
519
520      // SCEV only supports integer expressions for now.
521      if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
522        continue;
523
524      // It's necessary to tell ScalarEvolution about this explicitly so that
525      // it can walk the def-use list and forget all SCEVs, as it may not be
526      // watching the PHI itself. Once the new exit value is in place, there
527      // may not be a def-use connection between the loop and every instruction
528      // which got a SCEVAddRecExpr for that loop.
529      SE->forgetValue(PN);
530
531      // Iterate over all of the values in all the PHI nodes.
532      for (unsigned i = 0; i != NumPreds; ++i) {
533        // If the value being merged in is not integer or is not defined
534        // in the loop, skip it.
535        Value *InVal = PN->getIncomingValue(i);
536        if (!isa<Instruction>(InVal))
537          continue;
538
539        // If this pred is for a subloop, not L itself, skip it.
540        if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
541          continue; // The Block is in a subloop, skip it.
542
543        // Check that InVal is defined in the loop.
544        Instruction *Inst = cast<Instruction>(InVal);
545        if (!L->contains(Inst))
546          continue;
547
548        // Okay, this instruction has a user outside of the current loop
549        // and varies predictably *inside* the loop.  Evaluate the value it
550        // contains when the loop exits, if possible.
551        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
552        if (!SE->isLoopInvariant(ExitValue, L))
553          continue;
554
555        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
556
557        DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
558                     << "  LoopVal = " << *Inst << "\n");
559
560        if (!isValidRewrite(Inst, ExitVal)) {
561          DeadInsts.push_back(ExitVal);
562          continue;
563        }
564        Changed = true;
565        ++NumReplaced;
566
567        PN->setIncomingValue(i, ExitVal);
568
569        // If this instruction is dead now, delete it.
570        RecursivelyDeleteTriviallyDeadInstructions(Inst);
571
572        if (NumPreds == 1) {
573          // Completely replace a single-pred PHI. This is safe, because the
574          // NewVal won't be variant in the loop, so we don't need an LCSSA phi
575          // node anymore.
576          PN->replaceAllUsesWith(ExitVal);
577          RecursivelyDeleteTriviallyDeadInstructions(PN);
578        }
579      }
580      if (NumPreds != 1) {
581        // Clone the PHI and delete the original one. This lets IVUsers and
582        // any other maps purge the original user from their records.
583        PHINode *NewPN = cast<PHINode>(PN->clone());
584        NewPN->takeName(PN);
585        NewPN->insertBefore(PN);
586        PN->replaceAllUsesWith(NewPN);
587        PN->eraseFromParent();
588      }
589    }
590  }
591
592  // The insertion point instruction may have been deleted; clear it out
593  // so that the rewriter doesn't trip over it later.
594  Rewriter.clearInsertPoint();
595}
596
597//===----------------------------------------------------------------------===//
598//  Rewrite IV users based on a canonical IV.
599//  Only for use with -enable-iv-rewrite.
600//===----------------------------------------------------------------------===//
601
602/// FIXME: It is an extremely bad idea to indvar substitute anything more
603/// complex than affine induction variables.  Doing so will put expensive
604/// polynomial evaluations inside of the loop, and the str reduction pass
605/// currently can only reduce affine polynomials.  For now just disable
606/// indvar subst on anything more complex than an affine addrec, unless
607/// it can be expanded to a trivial value.
608static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
609  // Loop-invariant values are safe.
610  if (SE->isLoopInvariant(S, L)) return true;
611
612  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
613  // to transform them into efficient code.
614  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
615    return AR->isAffine();
616
617  // An add is safe it all its operands are safe.
618  if (const SCEVCommutativeExpr *Commutative
619      = dyn_cast<SCEVCommutativeExpr>(S)) {
620    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
621         E = Commutative->op_end(); I != E; ++I)
622      if (!isSafe(*I, L, SE)) return false;
623    return true;
624  }
625
626  // A cast is safe if its operand is.
627  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
628    return isSafe(C->getOperand(), L, SE);
629
630  // A udiv is safe if its operands are.
631  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
632    return isSafe(UD->getLHS(), L, SE) &&
633           isSafe(UD->getRHS(), L, SE);
634
635  // SCEVUnknown is always safe.
636  if (isa<SCEVUnknown>(S))
637    return true;
638
639  // Nothing else is safe.
640  return false;
641}
642
643void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
644  // Rewrite all induction variable expressions in terms of the canonical
645  // induction variable.
646  //
647  // If there were induction variables of other sizes or offsets, manually
648  // add the offsets to the primary induction variable and cast, avoiding
649  // the need for the code evaluation methods to insert induction variables
650  // of different sizes.
651  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
652    Value *Op = UI->getOperandValToReplace();
653    Type *UseTy = Op->getType();
654    Instruction *User = UI->getUser();
655
656    // Compute the final addrec to expand into code.
657    const SCEV *AR = IU->getReplacementExpr(*UI);
658
659    // Evaluate the expression out of the loop, if possible.
660    if (!L->contains(UI->getUser())) {
661      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
662      if (SE->isLoopInvariant(ExitVal, L))
663        AR = ExitVal;
664    }
665
666    // FIXME: It is an extremely bad idea to indvar substitute anything more
667    // complex than affine induction variables.  Doing so will put expensive
668    // polynomial evaluations inside of the loop, and the str reduction pass
669    // currently can only reduce affine polynomials.  For now just disable
670    // indvar subst on anything more complex than an affine addrec, unless
671    // it can be expanded to a trivial value.
672    if (!isSafe(AR, L, SE))
673      continue;
674
675    // Determine the insertion point for this user. By default, insert
676    // immediately before the user. The SCEVExpander class will automatically
677    // hoist loop invariants out of the loop. For PHI nodes, there may be
678    // multiple uses, so compute the nearest common dominator for the
679    // incoming blocks.
680    Instruction *InsertPt = getInsertPointForUses(User, Op, DT);
681
682    // Now expand it into actual Instructions and patch it into place.
683    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
684
685    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
686                 << "   into = " << *NewVal << "\n");
687
688    if (!isValidRewrite(Op, NewVal)) {
689      DeadInsts.push_back(NewVal);
690      continue;
691    }
692    // Inform ScalarEvolution that this value is changing. The change doesn't
693    // affect its value, but it does potentially affect which use lists the
694    // value will be on after the replacement, which affects ScalarEvolution's
695    // ability to walk use lists and drop dangling pointers when a value is
696    // deleted.
697    SE->forgetValue(User);
698
699    // Patch the new value into place.
700    if (Op->hasName())
701      NewVal->takeName(Op);
702    if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
703      NewValI->setDebugLoc(User->getDebugLoc());
704    User->replaceUsesOfWith(Op, NewVal);
705    UI->setOperandValToReplace(NewVal);
706
707    ++NumRemoved;
708    Changed = true;
709
710    // The old value may be dead now.
711    DeadInsts.push_back(Op);
712  }
713}
714
715//===----------------------------------------------------------------------===//
716//  IV Widening - Extend the width of an IV to cover its widest uses.
717//===----------------------------------------------------------------------===//
718
719namespace {
720  // Collect information about induction variables that are used by sign/zero
721  // extend operations. This information is recorded by CollectExtend and
722  // provides the input to WidenIV.
723  struct WideIVInfo {
724    PHINode *NarrowIV;
725    Type *WidestNativeType; // Widest integer type created [sz]ext
726    bool IsSigned;          // Was an sext user seen before a zext?
727
728    WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
729  };
730
731  class WideIVVisitor : public IVVisitor {
732    ScalarEvolution *SE;
733    const TargetData *TD;
734
735  public:
736    WideIVInfo WI;
737
738    WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
739                  const TargetData *TData) :
740      SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
741
742    // Implement the interface used by simplifyUsersOfIV.
743    virtual void visitCast(CastInst *Cast);
744  };
745}
746
747/// visitCast - Update information about the induction variable that is
748/// extended by this sign or zero extend operation. This is used to determine
749/// the final width of the IV before actually widening it.
750void WideIVVisitor::visitCast(CastInst *Cast) {
751  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
752  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
753    return;
754
755  Type *Ty = Cast->getType();
756  uint64_t Width = SE->getTypeSizeInBits(Ty);
757  if (TD && !TD->isLegalInteger(Width))
758    return;
759
760  if (!WI.WidestNativeType) {
761    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
762    WI.IsSigned = IsSigned;
763    return;
764  }
765
766  // We extend the IV to satisfy the sign of its first user, arbitrarily.
767  if (WI.IsSigned != IsSigned)
768    return;
769
770  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
771    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
772}
773
774namespace {
775
776/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
777/// WideIV that computes the same value as the Narrow IV def.  This avoids
778/// caching Use* pointers.
779struct NarrowIVDefUse {
780  Instruction *NarrowDef;
781  Instruction *NarrowUse;
782  Instruction *WideDef;
783
784  NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
785
786  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
787    NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
788};
789
790/// WidenIV - The goal of this transform is to remove sign and zero extends
791/// without creating any new induction variables. To do this, it creates a new
792/// phi of the wider type and redirects all users, either removing extends or
793/// inserting truncs whenever we stop propagating the type.
794///
795class WidenIV {
796  // Parameters
797  PHINode *OrigPhi;
798  Type *WideType;
799  bool IsSigned;
800
801  // Context
802  LoopInfo        *LI;
803  Loop            *L;
804  ScalarEvolution *SE;
805  DominatorTree   *DT;
806
807  // Result
808  PHINode *WidePhi;
809  Instruction *WideInc;
810  const SCEV *WideIncExpr;
811  SmallVectorImpl<WeakVH> &DeadInsts;
812
813  SmallPtrSet<Instruction*,16> Widened;
814  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
815
816public:
817  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
818          ScalarEvolution *SEv, DominatorTree *DTree,
819          SmallVectorImpl<WeakVH> &DI) :
820    OrigPhi(WI.NarrowIV),
821    WideType(WI.WidestNativeType),
822    IsSigned(WI.IsSigned),
823    LI(LInfo),
824    L(LI->getLoopFor(OrigPhi->getParent())),
825    SE(SEv),
826    DT(DTree),
827    WidePhi(0),
828    WideInc(0),
829    WideIncExpr(0),
830    DeadInsts(DI) {
831    assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
832  }
833
834  PHINode *CreateWideIV(SCEVExpander &Rewriter);
835
836protected:
837  Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
838                   Instruction *Use);
839
840  Instruction *CloneIVUser(NarrowIVDefUse DU);
841
842  const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
843
844  const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
845
846  Instruction *WidenIVUse(NarrowIVDefUse DU);
847
848  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
849};
850} // anonymous namespace
851
852/// isLoopInvariant - Perform a quick domtree based check for loop invariance
853/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
854/// gratuitous for this purpose.
855static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
856  Instruction *Inst = dyn_cast<Instruction>(V);
857  if (!Inst)
858    return true;
859
860  return DT->properlyDominates(Inst->getParent(), L->getHeader());
861}
862
863Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
864                          Instruction *Use) {
865  // Set the debug location and conservative insertion point.
866  IRBuilder<> Builder(Use);
867  // Hoist the insertion point into loop preheaders as far as possible.
868  for (const Loop *L = LI->getLoopFor(Use->getParent());
869       L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
870       L = L->getParentLoop())
871    Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
872
873  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
874                    Builder.CreateZExt(NarrowOper, WideType);
875}
876
877/// CloneIVUser - Instantiate a wide operation to replace a narrow
878/// operation. This only needs to handle operations that can evaluation to
879/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
880Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
881  unsigned Opcode = DU.NarrowUse->getOpcode();
882  switch (Opcode) {
883  default:
884    return 0;
885  case Instruction::Add:
886  case Instruction::Mul:
887  case Instruction::UDiv:
888  case Instruction::Sub:
889  case Instruction::And:
890  case Instruction::Or:
891  case Instruction::Xor:
892  case Instruction::Shl:
893  case Instruction::LShr:
894  case Instruction::AShr:
895    DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
896
897    // Replace NarrowDef operands with WideDef. Otherwise, we don't know
898    // anything about the narrow operand yet so must insert a [sz]ext. It is
899    // probably loop invariant and will be folded or hoisted. If it actually
900    // comes from a widened IV, it should be removed during a future call to
901    // WidenIVUse.
902    Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
903      getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
904    Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
905      getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
906
907    BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
908    BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
909                                                    LHS, RHS,
910                                                    NarrowBO->getName());
911    IRBuilder<> Builder(DU.NarrowUse);
912    Builder.Insert(WideBO);
913    if (const OverflowingBinaryOperator *OBO =
914        dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
915      if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
916      if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
917    }
918    return WideBO;
919  }
920  llvm_unreachable(0);
921}
922
923/// No-wrap operations can transfer sign extension of their result to their
924/// operands. Generate the SCEV value for the widened operation without
925/// actually modifying the IR yet. If the expression after extending the
926/// operands is an AddRec for this loop, return it.
927const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
928  // Handle the common case of add<nsw/nuw>
929  if (DU.NarrowUse->getOpcode() != Instruction::Add)
930    return 0;
931
932  // One operand (NarrowDef) has already been extended to WideDef. Now determine
933  // if extending the other will lead to a recurrence.
934  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
935  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
936
937  const SCEV *ExtendOperExpr = 0;
938  const OverflowingBinaryOperator *OBO =
939    cast<OverflowingBinaryOperator>(DU.NarrowUse);
940  if (IsSigned && OBO->hasNoSignedWrap())
941    ExtendOperExpr = SE->getSignExtendExpr(
942      SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
943  else if(!IsSigned && OBO->hasNoUnsignedWrap())
944    ExtendOperExpr = SE->getZeroExtendExpr(
945      SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
946  else
947    return 0;
948
949  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
950    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
951                   IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
952
953  if (!AddRec || AddRec->getLoop() != L)
954    return 0;
955  return AddRec;
956}
957
958/// GetWideRecurrence - Is this instruction potentially interesting from
959/// IVUsers' perspective after widening it's type? In other words, can the
960/// extend be safely hoisted out of the loop with SCEV reducing the value to a
961/// recurrence on the same loop. If so, return the sign or zero extended
962/// recurrence. Otherwise return NULL.
963const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
964  if (!SE->isSCEVable(NarrowUse->getType()))
965    return 0;
966
967  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
968  if (SE->getTypeSizeInBits(NarrowExpr->getType())
969      >= SE->getTypeSizeInBits(WideType)) {
970    // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
971    // index. So don't follow this use.
972    return 0;
973  }
974
975  const SCEV *WideExpr = IsSigned ?
976    SE->getSignExtendExpr(NarrowExpr, WideType) :
977    SE->getZeroExtendExpr(NarrowExpr, WideType);
978  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
979  if (!AddRec || AddRec->getLoop() != L)
980    return 0;
981  return AddRec;
982}
983
984/// WidenIVUse - Determine whether an individual user of the narrow IV can be
985/// widened. If so, return the wide clone of the user.
986Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
987
988  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
989  if (isa<PHINode>(DU.NarrowUse) &&
990      LI->getLoopFor(DU.NarrowUse->getParent()) != L)
991    return 0;
992
993  // Our raison d'etre! Eliminate sign and zero extension.
994  if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
995    Value *NewDef = DU.WideDef;
996    if (DU.NarrowUse->getType() != WideType) {
997      unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
998      unsigned IVWidth = SE->getTypeSizeInBits(WideType);
999      if (CastWidth < IVWidth) {
1000        // The cast isn't as wide as the IV, so insert a Trunc.
1001        IRBuilder<> Builder(DU.NarrowUse);
1002        NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1003      }
1004      else {
1005        // A wider extend was hidden behind a narrower one. This may induce
1006        // another round of IV widening in which the intermediate IV becomes
1007        // dead. It should be very rare.
1008        DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1009              << " not wide enough to subsume " << *DU.NarrowUse << "\n");
1010        DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1011        NewDef = DU.NarrowUse;
1012      }
1013    }
1014    if (NewDef != DU.NarrowUse) {
1015      DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1016            << " replaced by " << *DU.WideDef << "\n");
1017      ++NumElimExt;
1018      DU.NarrowUse->replaceAllUsesWith(NewDef);
1019      DeadInsts.push_back(DU.NarrowUse);
1020    }
1021    // Now that the extend is gone, we want to expose it's uses for potential
1022    // further simplification. We don't need to directly inform SimplifyIVUsers
1023    // of the new users, because their parent IV will be processed later as a
1024    // new loop phi. If we preserved IVUsers analysis, we would also want to
1025    // push the uses of WideDef here.
1026
1027    // No further widening is needed. The deceased [sz]ext had done it for us.
1028    return 0;
1029  }
1030
1031  // Does this user itself evaluate to a recurrence after widening?
1032  const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
1033  if (!WideAddRec) {
1034      WideAddRec = GetExtendedOperandRecurrence(DU);
1035  }
1036  if (!WideAddRec) {
1037    // This user does not evaluate to a recurence after widening, so don't
1038    // follow it. Instead insert a Trunc to kill off the original use,
1039    // eventually isolating the original narrow IV so it can be removed.
1040    IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
1041    Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1042    DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1043    return 0;
1044  }
1045  // Assume block terminators cannot evaluate to a recurrence. We can't to
1046  // insert a Trunc after a terminator if there happens to be a critical edge.
1047  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1048         "SCEV is not expected to evaluate a block terminator");
1049
1050  // Reuse the IV increment that SCEVExpander created as long as it dominates
1051  // NarrowUse.
1052  Instruction *WideUse = 0;
1053  if (WideAddRec == WideIncExpr
1054      && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT))
1055    WideUse = WideInc;
1056  else {
1057    WideUse = CloneIVUser(DU);
1058    if (!WideUse)
1059      return 0;
1060  }
1061  // Evaluation of WideAddRec ensured that the narrow expression could be
1062  // extended outside the loop without overflow. This suggests that the wide use
1063  // evaluates to the same expression as the extended narrow use, but doesn't
1064  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1065  // where it fails, we simply throw away the newly created wide use.
1066  if (WideAddRec != SE->getSCEV(WideUse)) {
1067    DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
1068          << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
1069    DeadInsts.push_back(WideUse);
1070    return 0;
1071  }
1072
1073  // Returning WideUse pushes it on the worklist.
1074  return WideUse;
1075}
1076
1077/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
1078///
1079void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1080  for (Value::use_iterator UI = NarrowDef->use_begin(),
1081         UE = NarrowDef->use_end(); UI != UE; ++UI) {
1082    Instruction *NarrowUse = cast<Instruction>(*UI);
1083
1084    // Handle data flow merges and bizarre phi cycles.
1085    if (!Widened.insert(NarrowUse))
1086      continue;
1087
1088    NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
1089  }
1090}
1091
1092/// CreateWideIV - Process a single induction variable. First use the
1093/// SCEVExpander to create a wide induction variable that evaluates to the same
1094/// recurrence as the original narrow IV. Then use a worklist to forward
1095/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
1096/// interesting IV users, the narrow IV will be isolated for removal by
1097/// DeleteDeadPHIs.
1098///
1099/// It would be simpler to delete uses as they are processed, but we must avoid
1100/// invalidating SCEV expressions.
1101///
1102PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
1103  // Is this phi an induction variable?
1104  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1105  if (!AddRec)
1106    return NULL;
1107
1108  // Widen the induction variable expression.
1109  const SCEV *WideIVExpr = IsSigned ?
1110    SE->getSignExtendExpr(AddRec, WideType) :
1111    SE->getZeroExtendExpr(AddRec, WideType);
1112
1113  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1114         "Expect the new IV expression to preserve its type");
1115
1116  // Can the IV be extended outside the loop without overflow?
1117  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1118  if (!AddRec || AddRec->getLoop() != L)
1119    return NULL;
1120
1121  // An AddRec must have loop-invariant operands. Since this AddRec is
1122  // materialized by a loop header phi, the expression cannot have any post-loop
1123  // operands, so they must dominate the loop header.
1124  assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1125         SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
1126         && "Loop header phi recurrence inputs do not dominate the loop");
1127
1128  // The rewriter provides a value for the desired IV expression. This may
1129  // either find an existing phi or materialize a new one. Either way, we
1130  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1131  // of the phi-SCC dominates the loop entry.
1132  Instruction *InsertPt = L->getHeader()->begin();
1133  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1134
1135  // Remembering the WideIV increment generated by SCEVExpander allows
1136  // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
1137  // employ a general reuse mechanism because the call above is the only call to
1138  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1139  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1140    WideInc =
1141      cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1142    WideIncExpr = SE->getSCEV(WideInc);
1143  }
1144
1145  DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1146  ++NumWidened;
1147
1148  // Traverse the def-use chain using a worklist starting at the original IV.
1149  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1150
1151  Widened.insert(OrigPhi);
1152  pushNarrowIVUsers(OrigPhi, WidePhi);
1153
1154  while (!NarrowIVUsers.empty()) {
1155    NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1156
1157    // Process a def-use edge. This may replace the use, so don't hold a
1158    // use_iterator across it.
1159    Instruction *WideUse = WidenIVUse(DU);
1160
1161    // Follow all def-use edges from the previous narrow use.
1162    if (WideUse)
1163      pushNarrowIVUsers(DU.NarrowUse, WideUse);
1164
1165    // WidenIVUse may have removed the def-use edge.
1166    if (DU.NarrowDef->use_empty())
1167      DeadInsts.push_back(DU.NarrowDef);
1168  }
1169  return WidePhi;
1170}
1171
1172//===----------------------------------------------------------------------===//
1173//  Simplification of IV users based on SCEV evaluation.
1174//===----------------------------------------------------------------------===//
1175
1176
1177/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
1178/// users. Each successive simplification may push more users which may
1179/// themselves be candidates for simplification.
1180///
1181/// Sign/Zero extend elimination is interleaved with IV simplification.
1182///
1183void IndVarSimplify::SimplifyAndExtend(Loop *L,
1184                                       SCEVExpander &Rewriter,
1185                                       LPPassManager &LPM) {
1186  SmallVector<WideIVInfo, 8> WideIVs;
1187
1188  SmallVector<PHINode*, 8> LoopPhis;
1189  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1190    LoopPhis.push_back(cast<PHINode>(I));
1191  }
1192  // Each round of simplification iterates through the SimplifyIVUsers worklist
1193  // for all current phis, then determines whether any IVs can be
1194  // widened. Widening adds new phis to LoopPhis, inducing another round of
1195  // simplification on the wide IVs.
1196  while (!LoopPhis.empty()) {
1197    // Evaluate as many IV expressions as possible before widening any IVs. This
1198    // forces SCEV to set no-wrap flags before evaluating sign/zero
1199    // extension. The first time SCEV attempts to normalize sign/zero extension,
1200    // the result becomes final. So for the most predictable results, we delay
1201    // evaluation of sign/zero extend evaluation until needed, and avoid running
1202    // other SCEV based analysis prior to SimplifyAndExtend.
1203    do {
1204      PHINode *CurrIV = LoopPhis.pop_back_val();
1205
1206      // Information about sign/zero extensions of CurrIV.
1207      WideIVVisitor WIV(CurrIV, SE, TD);
1208
1209      Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
1210
1211      if (WIV.WI.WidestNativeType) {
1212        WideIVs.push_back(WIV.WI);
1213      }
1214    } while(!LoopPhis.empty());
1215
1216    for (; !WideIVs.empty(); WideIVs.pop_back()) {
1217      WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
1218      if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
1219        Changed = true;
1220        LoopPhis.push_back(WidePhi);
1221      }
1222    }
1223  }
1224}
1225
1226//===----------------------------------------------------------------------===//
1227//  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1228//===----------------------------------------------------------------------===//
1229
1230/// Check for expressions that ScalarEvolution generates to compute
1231/// BackedgeTakenInfo. If these expressions have not been reduced, then
1232/// expanding them may incur additional cost (albeit in the loop preheader).
1233static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
1234                                ScalarEvolution *SE) {
1235  // If the backedge-taken count is a UDiv, it's very likely a UDiv that
1236  // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
1237  // precise expression, rather than a UDiv from the user's code. If we can't
1238  // find a UDiv in the code with some simple searching, assume the former and
1239  // forego rewriting the loop.
1240  if (isa<SCEVUDivExpr>(S)) {
1241    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
1242    if (!OrigCond) return true;
1243    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
1244    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
1245    if (R != S) {
1246      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
1247      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
1248      if (L != S)
1249        return true;
1250    }
1251  }
1252
1253  if (EnableIVRewrite)
1254    return false;
1255
1256  // Recurse past add expressions, which commonly occur in the
1257  // BackedgeTakenCount. They may already exist in program code, and if not,
1258  // they are not too expensive rematerialize.
1259  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
1260    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
1261         I != E; ++I) {
1262      if (isHighCostExpansion(*I, BI, SE))
1263        return true;
1264    }
1265    return false;
1266  }
1267
1268  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
1269  // the exit condition.
1270  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
1271    return true;
1272
1273  // If we haven't recognized an expensive SCEV patter, assume its an expression
1274  // produced by program code.
1275  return false;
1276}
1277
1278/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
1279/// count expression can be safely and cheaply expanded into an instruction
1280/// sequence that can be used by LinearFunctionTestReplace.
1281static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
1282  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1283  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
1284      BackedgeTakenCount->isZero())
1285    return false;
1286
1287  if (!L->getExitingBlock())
1288    return false;
1289
1290  // Can't rewrite non-branch yet.
1291  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
1292  if (!BI)
1293    return false;
1294
1295  if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
1296    return false;
1297
1298  return true;
1299}
1300
1301/// getBackedgeIVType - Get the widest type used by the loop test after peeking
1302/// through Truncs.
1303///
1304/// TODO: Unnecessary when ForceLFTR is removed.
1305static Type *getBackedgeIVType(Loop *L) {
1306  if (!L->getExitingBlock())
1307    return 0;
1308
1309  // Can't rewrite non-branch yet.
1310  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
1311  if (!BI)
1312    return 0;
1313
1314  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1315  if (!Cond)
1316    return 0;
1317
1318  Type *Ty = 0;
1319  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
1320      OI != OE; ++OI) {
1321    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
1322    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
1323    if (!Trunc)
1324      continue;
1325
1326    return Trunc->getSrcTy();
1327  }
1328  return Ty;
1329}
1330
1331/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
1332/// invariant value to the phi.
1333static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
1334  Instruction *IncI = dyn_cast<Instruction>(IncV);
1335  if (!IncI)
1336    return 0;
1337
1338  switch (IncI->getOpcode()) {
1339  case Instruction::Add:
1340  case Instruction::Sub:
1341    break;
1342  case Instruction::GetElementPtr:
1343    // An IV counter must preserve its type.
1344    if (IncI->getNumOperands() == 2)
1345      break;
1346  default:
1347    return 0;
1348  }
1349
1350  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1351  if (Phi && Phi->getParent() == L->getHeader()) {
1352    if (isLoopInvariant(IncI->getOperand(1), L, DT))
1353      return Phi;
1354    return 0;
1355  }
1356  if (IncI->getOpcode() == Instruction::GetElementPtr)
1357    return 0;
1358
1359  // Allow add/sub to be commuted.
1360  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1361  if (Phi && Phi->getParent() == L->getHeader()) {
1362    if (isLoopInvariant(IncI->getOperand(0), L, DT))
1363      return Phi;
1364  }
1365  return 0;
1366}
1367
1368/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
1369/// that the current exit test is already sufficiently canonical.
1370static bool needsLFTR(Loop *L, DominatorTree *DT) {
1371  assert(L->getExitingBlock() && "expected loop exit");
1372
1373  BasicBlock *LatchBlock = L->getLoopLatch();
1374  // Don't bother with LFTR if the loop is not properly simplified.
1375  if (!LatchBlock)
1376    return false;
1377
1378  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
1379  assert(BI && "expected exit branch");
1380
1381  // Do LFTR to simplify the exit condition to an ICMP.
1382  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1383  if (!Cond)
1384    return true;
1385
1386  // Do LFTR to simplify the exit ICMP to EQ/NE
1387  ICmpInst::Predicate Pred = Cond->getPredicate();
1388  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1389    return true;
1390
1391  // Look for a loop invariant RHS
1392  Value *LHS = Cond->getOperand(0);
1393  Value *RHS = Cond->getOperand(1);
1394  if (!isLoopInvariant(RHS, L, DT)) {
1395    if (!isLoopInvariant(LHS, L, DT))
1396      return true;
1397    std::swap(LHS, RHS);
1398  }
1399  // Look for a simple IV counter LHS
1400  PHINode *Phi = dyn_cast<PHINode>(LHS);
1401  if (!Phi)
1402    Phi = getLoopPhiForCounter(LHS, L, DT);
1403
1404  if (!Phi)
1405    return true;
1406
1407  // Do LFTR if the exit condition's IV is *not* a simple counter.
1408  Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
1409  return Phi != getLoopPhiForCounter(IncV, L, DT);
1410}
1411
1412/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
1413/// be rewritten) loop exit test.
1414static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1415  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1416  Value *IncV = Phi->getIncomingValue(LatchIdx);
1417
1418  for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
1419       UI != UE; ++UI) {
1420    if (*UI != Cond && *UI != IncV) return false;
1421  }
1422
1423  for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
1424       UI != UE; ++UI) {
1425    if (*UI != Cond && *UI != Phi) return false;
1426  }
1427  return true;
1428}
1429
1430/// FindLoopCounter - Find an affine IV in canonical form.
1431///
1432/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
1433///
1434/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
1435/// This is difficult in general for SCEV because of potential overflow. But we
1436/// could at least handle constant BECounts.
1437static PHINode *
1438FindLoopCounter(Loop *L, const SCEV *BECount,
1439                ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
1440  // I'm not sure how BECount could be a pointer type, but we definitely don't
1441  // want to LFTR that.
1442  if (BECount->getType()->isPointerTy())
1443    return 0;
1444
1445  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1446
1447  Value *Cond =
1448    cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
1449
1450  // Loop over all of the PHI nodes, looking for a simple counter.
1451  PHINode *BestPhi = 0;
1452  const SCEV *BestInit = 0;
1453  BasicBlock *LatchBlock = L->getLoopLatch();
1454  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
1455
1456  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1457    PHINode *Phi = cast<PHINode>(I);
1458    if (!SE->isSCEVable(Phi->getType()))
1459      continue;
1460
1461    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1462    if (!AR || AR->getLoop() != L || !AR->isAffine())
1463      continue;
1464
1465    // AR may be a pointer type, while BECount is an integer type.
1466    // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1467    // AR may not be a narrower type, or we may never exit.
1468    uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1469    if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
1470      continue;
1471
1472    const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1473    if (!Step || !Step->isOne())
1474      continue;
1475
1476    int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1477    Value *IncV = Phi->getIncomingValue(LatchIdx);
1478    if (getLoopPhiForCounter(IncV, L, DT) != Phi)
1479      continue;
1480
1481    const SCEV *Init = AR->getStart();
1482
1483    if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1484      // Don't force a live loop counter if another IV can be used.
1485      if (AlmostDeadIV(Phi, LatchBlock, Cond))
1486        continue;
1487
1488      // Prefer to count-from-zero. This is a more "canonical" counter form. It
1489      // also prefers integer to pointer IVs.
1490      if (BestInit->isZero() != Init->isZero()) {
1491        if (BestInit->isZero())
1492          continue;
1493      }
1494      // If two IVs both count from zero or both count from nonzero then the
1495      // narrower is likely a dead phi that has been widened. Use the wider phi
1496      // to allow the other to be eliminated.
1497      if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1498        continue;
1499    }
1500    BestPhi = Phi;
1501    BestInit = Init;
1502  }
1503  return BestPhi;
1504}
1505
1506/// LinearFunctionTestReplace - This method rewrites the exit condition of the
1507/// loop to be a canonical != comparison against the incremented loop induction
1508/// variable.  This pass is able to rewrite the exit tests of any loop where the
1509/// SCEV analysis can determine a loop-invariant trip count of the loop, which
1510/// is actually a much broader range than just linear tests.
1511Value *IndVarSimplify::
1512LinearFunctionTestReplace(Loop *L,
1513                          const SCEV *BackedgeTakenCount,
1514                          PHINode *IndVar,
1515                          SCEVExpander &Rewriter) {
1516  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
1517  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
1518
1519  // LFTR can ignore IV overflow and truncate to the width of
1520  // BECount. This avoids materializing the add(zext(add)) expression.
1521  Type *CntTy = !EnableIVRewrite ?
1522    BackedgeTakenCount->getType() : IndVar->getType();
1523
1524  const SCEV *IVLimit = BackedgeTakenCount;
1525
1526  // If the exiting block is not the same as the backedge block, we must compare
1527  // against the preincremented value, otherwise we prefer to compare against
1528  // the post-incremented value.
1529  Value *CmpIndVar;
1530  if (L->getExitingBlock() == L->getLoopLatch()) {
1531    // Add one to the "backedge-taken" count to get the trip count.
1532    // If this addition may overflow, we have to be more pessimistic and
1533    // cast the induction variable before doing the add.
1534    const SCEV *N =
1535      SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
1536    if (CntTy == IVLimit->getType())
1537      IVLimit = N;
1538    else {
1539      const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
1540      if ((isa<SCEVConstant>(N) && !N->isZero()) ||
1541          SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
1542        // No overflow. Cast the sum.
1543        IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
1544      } else {
1545        // Potential overflow. Cast before doing the add.
1546        IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
1547        IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
1548      }
1549    }
1550    // The BackedgeTaken expression contains the number of times that the
1551    // backedge branches to the loop header.  This is one less than the
1552    // number of times the loop executes, so use the incremented indvar.
1553    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
1554  } else {
1555    // We have to use the preincremented value...
1556    IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
1557    CmpIndVar = IndVar;
1558  }
1559
1560  // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
1561  // So for, non-zero start compute the IVLimit here.
1562  bool isPtrIV = false;
1563  Type *CmpTy = CntTy;
1564  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
1565  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
1566  if (!AR->getStart()->isZero()) {
1567    assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1568    const SCEV *IVInit = AR->getStart();
1569
1570    // For pointer types, sign extend BECount in order to materialize a GEP.
1571    // Note that for without EnableIVRewrite, we never run SCEVExpander on a
1572    // pointer type, because we must preserve the existing GEPs. Instead we
1573    // directly generate a GEP later.
1574    if (IVInit->getType()->isPointerTy()) {
1575      isPtrIV = true;
1576      CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
1577      IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
1578    }
1579    // For integer types, truncate the IV before computing IVInit + BECount.
1580    else {
1581      if (SE->getTypeSizeInBits(IVInit->getType())
1582          > SE->getTypeSizeInBits(CmpTy))
1583        IVInit = SE->getTruncateExpr(IVInit, CmpTy);
1584
1585      IVLimit = SE->getAddExpr(IVInit, IVLimit);
1586    }
1587  }
1588  // Expand the code for the iteration count.
1589  IRBuilder<> Builder(BI);
1590
1591  assert(SE->isLoopInvariant(IVLimit, L) &&
1592         "Computed iteration count is not loop invariant!");
1593  Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
1594
1595  // Create a gep for IVInit + IVLimit from on an existing pointer base.
1596  assert(isPtrIV == IndVar->getType()->isPointerTy() &&
1597         "IndVar type must match IVInit type");
1598  if (isPtrIV) {
1599      Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
1600      assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
1601      assert(SE->getSizeOfExpr(
1602               cast<PointerType>(IVStart->getType())->getElementType())->isOne()
1603             && "unit stride pointer IV must be i8*");
1604
1605      Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1606      ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
1607      Builder.SetInsertPoint(BI);
1608  }
1609
1610  // Insert a new icmp_ne or icmp_eq instruction before the branch.
1611  ICmpInst::Predicate P;
1612  if (L->contains(BI->getSuccessor(0)))
1613    P = ICmpInst::ICMP_NE;
1614  else
1615    P = ICmpInst::ICMP_EQ;
1616
1617  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1618               << "      LHS:" << *CmpIndVar << '\n'
1619               << "       op:\t"
1620               << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
1621               << "      RHS:\t" << *ExitCnt << "\n"
1622               << "     Expr:\t" << *IVLimit << "\n");
1623
1624  if (SE->getTypeSizeInBits(CmpIndVar->getType())
1625      > SE->getTypeSizeInBits(CmpTy)) {
1626    CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
1627  }
1628
1629  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1630  Value *OrigCond = BI->getCondition();
1631  // It's tempting to use replaceAllUsesWith here to fully replace the old
1632  // comparison, but that's not immediately safe, since users of the old
1633  // comparison may not be dominated by the new comparison. Instead, just
1634  // update the branch to use the new comparison; in the common case this
1635  // will make old comparison dead.
1636  BI->setCondition(Cond);
1637  DeadInsts.push_back(OrigCond);
1638
1639  ++NumLFTR;
1640  Changed = true;
1641  return Cond;
1642}
1643
1644//===----------------------------------------------------------------------===//
1645//  SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1646//===----------------------------------------------------------------------===//
1647
1648/// If there's a single exit block, sink any loop-invariant values that
1649/// were defined in the preheader but not used inside the loop into the
1650/// exit block to reduce register pressure in the loop.
1651void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
1652  BasicBlock *ExitBlock = L->getExitBlock();
1653  if (!ExitBlock) return;
1654
1655  BasicBlock *Preheader = L->getLoopPreheader();
1656  if (!Preheader) return;
1657
1658  Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
1659  BasicBlock::iterator I = Preheader->getTerminator();
1660  while (I != Preheader->begin()) {
1661    --I;
1662    // New instructions were inserted at the end of the preheader.
1663    if (isa<PHINode>(I))
1664      break;
1665
1666    // Don't move instructions which might have side effects, since the side
1667    // effects need to complete before instructions inside the loop.  Also don't
1668    // move instructions which might read memory, since the loop may modify
1669    // memory. Note that it's okay if the instruction might have undefined
1670    // behavior: LoopSimplify guarantees that the preheader dominates the exit
1671    // block.
1672    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1673      continue;
1674
1675    // Skip debug info intrinsics.
1676    if (isa<DbgInfoIntrinsic>(I))
1677      continue;
1678
1679    // Skip landingpad instructions.
1680    if (isa<LandingPadInst>(I))
1681      continue;
1682
1683    // Don't sink static AllocaInsts out of the entry block, which would
1684    // turn them into dynamic allocas!
1685    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
1686      if (AI->isStaticAlloca())
1687        continue;
1688
1689    // Determine if there is a use in or before the loop (direct or
1690    // otherwise).
1691    bool UsedInLoop = false;
1692    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
1693         UI != UE; ++UI) {
1694      User *U = *UI;
1695      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
1696      if (PHINode *P = dyn_cast<PHINode>(U)) {
1697        unsigned i =
1698          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
1699        UseBB = P->getIncomingBlock(i);
1700      }
1701      if (UseBB == Preheader || L->contains(UseBB)) {
1702        UsedInLoop = true;
1703        break;
1704      }
1705    }
1706
1707    // If there is, the def must remain in the preheader.
1708    if (UsedInLoop)
1709      continue;
1710
1711    // Otherwise, sink it to the exit block.
1712    Instruction *ToMove = I;
1713    bool Done = false;
1714
1715    if (I != Preheader->begin()) {
1716      // Skip debug info intrinsics.
1717      do {
1718        --I;
1719      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
1720
1721      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
1722        Done = true;
1723    } else {
1724      Done = true;
1725    }
1726
1727    ToMove->moveBefore(InsertPt);
1728    if (Done) break;
1729    InsertPt = ToMove;
1730  }
1731}
1732
1733//===----------------------------------------------------------------------===//
1734//  IndVarSimplify driver. Manage several subpasses of IV simplification.
1735//===----------------------------------------------------------------------===//
1736
1737bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
1738  // If LoopSimplify form is not available, stay out of trouble. Some notes:
1739  //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1740  //    canonicalization can be a pessimization without LSR to "clean up"
1741  //    afterwards.
1742  //  - We depend on having a preheader; in particular,
1743  //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1744  //    and we're in trouble if we can't find the induction variable even when
1745  //    we've manually inserted one.
1746  if (!L->isLoopSimplifyForm())
1747    return false;
1748
1749  if (EnableIVRewrite)
1750    IU = &getAnalysis<IVUsers>();
1751  LI = &getAnalysis<LoopInfo>();
1752  SE = &getAnalysis<ScalarEvolution>();
1753  DT = &getAnalysis<DominatorTree>();
1754  TD = getAnalysisIfAvailable<TargetData>();
1755
1756  DeadInsts.clear();
1757  Changed = false;
1758
1759  // If there are any floating-point recurrences, attempt to
1760  // transform them to use integer recurrences.
1761  RewriteNonIntegerIVs(L);
1762
1763  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1764
1765  // Create a rewriter object which we'll use to transform the code with.
1766  SCEVExpander Rewriter(*SE, "indvars");
1767#ifndef NDEBUG
1768  Rewriter.setDebugType(DEBUG_TYPE);
1769#endif
1770
1771  // Eliminate redundant IV users.
1772  //
1773  // Simplification works best when run before other consumers of SCEV. We
1774  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1775  // other expressions involving loop IVs have been evaluated. This helps SCEV
1776  // set no-wrap flags before normalizing sign/zero extension.
1777  if (!EnableIVRewrite) {
1778    Rewriter.disableCanonicalMode();
1779    SimplifyAndExtend(L, Rewriter, LPM);
1780  }
1781
1782  // Check to see if this loop has a computable loop-invariant execution count.
1783  // If so, this means that we can compute the final value of any expressions
1784  // that are recurrent in the loop, and substitute the exit values from the
1785  // loop into any instructions outside of the loop that use the final values of
1786  // the current expressions.
1787  //
1788  if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1789    RewriteLoopExitValues(L, Rewriter);
1790
1791  // Eliminate redundant IV users.
1792  if (EnableIVRewrite)
1793    Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
1794
1795  // Eliminate redundant IV cycles.
1796  if (!EnableIVRewrite)
1797    NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
1798
1799  // Compute the type of the largest recurrence expression, and decide whether
1800  // a canonical induction variable should be inserted.
1801  Type *LargestType = 0;
1802  bool NeedCannIV = false;
1803  bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
1804  if (EnableIVRewrite && ExpandBECount) {
1805    // If we have a known trip count and a single exit block, we'll be
1806    // rewriting the loop exit test condition below, which requires a
1807    // canonical induction variable.
1808    NeedCannIV = true;
1809    Type *Ty = BackedgeTakenCount->getType();
1810    if (!EnableIVRewrite) {
1811      // In this mode, SimplifyIVUsers may have already widened the IV used by
1812      // the backedge test and inserted a Trunc on the compare's operand. Get
1813      // the wider type to avoid creating a redundant narrow IV only used by the
1814      // loop test.
1815      LargestType = getBackedgeIVType(L);
1816    }
1817    if (!LargestType ||
1818        SE->getTypeSizeInBits(Ty) >
1819        SE->getTypeSizeInBits(LargestType))
1820      LargestType = SE->getEffectiveSCEVType(Ty);
1821  }
1822  if (EnableIVRewrite) {
1823    for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
1824      NeedCannIV = true;
1825      Type *Ty =
1826        SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
1827      if (!LargestType ||
1828          SE->getTypeSizeInBits(Ty) >
1829          SE->getTypeSizeInBits(LargestType))
1830        LargestType = Ty;
1831    }
1832  }
1833
1834  // Now that we know the largest of the induction variable expressions
1835  // in this loop, insert a canonical induction variable of the largest size.
1836  PHINode *IndVar = 0;
1837  if (NeedCannIV) {
1838    // Check to see if the loop already has any canonical-looking induction
1839    // variables. If any are present and wider than the planned canonical
1840    // induction variable, temporarily remove them, so that the Rewriter
1841    // doesn't attempt to reuse them.
1842    SmallVector<PHINode *, 2> OldCannIVs;
1843    while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
1844      if (SE->getTypeSizeInBits(OldCannIV->getType()) >
1845          SE->getTypeSizeInBits(LargestType))
1846        OldCannIV->removeFromParent();
1847      else
1848        break;
1849      OldCannIVs.push_back(OldCannIV);
1850    }
1851
1852    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
1853
1854    ++NumInserted;
1855    Changed = true;
1856    DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
1857
1858    // Now that the official induction variable is established, reinsert
1859    // any old canonical-looking variables after it so that the IR remains
1860    // consistent. They will be deleted as part of the dead-PHI deletion at
1861    // the end of the pass.
1862    while (!OldCannIVs.empty()) {
1863      PHINode *OldCannIV = OldCannIVs.pop_back_val();
1864      OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
1865    }
1866  }
1867  else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
1868    IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
1869  }
1870  // If we have a trip count expression, rewrite the loop's exit condition
1871  // using it.  We can currently only handle loops with a single exit.
1872  Value *NewICmp = 0;
1873  if (ExpandBECount && IndVar) {
1874    // Check preconditions for proper SCEVExpander operation. SCEV does not
1875    // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
1876    // pass that uses the SCEVExpander must do it. This does not work well for
1877    // loop passes because SCEVExpander makes assumptions about all loops, while
1878    // LoopPassManager only forces the current loop to be simplified.
1879    //
1880    // FIXME: SCEV expansion has no way to bail out, so the caller must
1881    // explicitly check any assumptions made by SCEV. Brittle.
1882    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
1883    if (!AR || AR->getLoop()->getLoopPreheader())
1884      NewICmp =
1885        LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
1886  }
1887  // Rewrite IV-derived expressions.
1888  if (EnableIVRewrite)
1889    RewriteIVExpressions(L, Rewriter);
1890
1891  // Clear the rewriter cache, because values that are in the rewriter's cache
1892  // can be deleted in the loop below, causing the AssertingVH in the cache to
1893  // trigger.
1894  Rewriter.clear();
1895
1896  // Now that we're done iterating through lists, clean up any instructions
1897  // which are now dead.
1898  while (!DeadInsts.empty())
1899    if (Instruction *Inst =
1900          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
1901      RecursivelyDeleteTriviallyDeadInstructions(Inst);
1902
1903  // The Rewriter may not be used from this point on.
1904
1905  // Loop-invariant instructions in the preheader that aren't used in the
1906  // loop may be sunk below the loop to reduce register pressure.
1907  SinkUnusedInvariants(L);
1908
1909  // For completeness, inform IVUsers of the IV use in the newly-created
1910  // loop exit test instruction.
1911  if (IU && NewICmp) {
1912    ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
1913    if (NewICmpInst)
1914      IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
1915  }
1916  // Clean up dead instructions.
1917  Changed |= DeleteDeadPHIs(L->getHeader());
1918  // Check a post-condition.
1919  assert(L->isLCSSAForm(*DT) &&
1920         "Indvars did not leave the loop in lcssa form!");
1921
1922  // Verify that LFTR, and any other change have not interfered with SCEV's
1923  // ability to compute trip count.
1924#ifndef NDEBUG
1925  if (!EnableIVRewrite && VerifyIndvars &&
1926      !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1927    SE->forgetLoop(L);
1928    const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
1929    if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
1930        SE->getTypeSizeInBits(NewBECount->getType()))
1931      NewBECount = SE->getTruncateOrNoop(NewBECount,
1932                                         BackedgeTakenCount->getType());
1933    else
1934      BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
1935                                                 NewBECount->getType());
1936    assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
1937  }
1938#endif
1939
1940  return Changed;
1941}
1942