ScalarEvolutionExpander.cpp revision 3ecfc861b4365f341c5c969b40e1afccde676e6f
1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the implementation of the scalar evolution expander,
11// which is used to generate the code corresponding to a given scalar evolution
12// expression.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Analysis/ScalarEvolutionExpander.h"
17#include "llvm/Analysis/LoopInfo.h"
18#include "llvm/IntrinsicInst.h"
19#include "llvm/LLVMContext.h"
20#include "llvm/Target/TargetData.h"
21#include "llvm/ADT/STLExtras.h"
22using namespace llvm;
23
24/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
25/// reusing an existing cast if a suitable one exists, moving an existing
26/// cast if a suitable one exists but isn't in the right place, or
27/// creating a new one.
28Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty,
29                                       Instruction::CastOps Op,
30                                       BasicBlock::iterator IP) {
31  // Check to see if there is already a cast!
32  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
33       UI != E; ++UI) {
34    User *U = *UI;
35    if (U->getType() == Ty)
36      if (CastInst *CI = dyn_cast<CastInst>(U))
37        if (CI->getOpcode() == Op) {
38          // If the cast isn't where we want it, fix it.
39          if (BasicBlock::iterator(CI) != IP) {
40            // Create a new cast, and leave the old cast in place in case
41            // it is being used as an insert point. Clear its operand
42            // so that it doesn't hold anything live.
43            Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP);
44            NewCI->takeName(CI);
45            CI->replaceAllUsesWith(NewCI);
46            CI->setOperand(0, UndefValue::get(V->getType()));
47            rememberInstruction(NewCI);
48            return NewCI;
49          }
50          rememberInstruction(CI);
51          return CI;
52        }
53  }
54
55  // Create a new cast.
56  Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP);
57  rememberInstruction(I);
58  return I;
59}
60
61/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
62/// which must be possible with a noop cast, doing what we can to share
63/// the casts.
64Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
65  Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
66  assert((Op == Instruction::BitCast ||
67          Op == Instruction::PtrToInt ||
68          Op == Instruction::IntToPtr) &&
69         "InsertNoopCastOfTo cannot perform non-noop casts!");
70  assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
71         "InsertNoopCastOfTo cannot change sizes!");
72
73  // Short-circuit unnecessary bitcasts.
74  if (Op == Instruction::BitCast && V->getType() == Ty)
75    return V;
76
77  // Short-circuit unnecessary inttoptr<->ptrtoint casts.
78  if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
79      SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
80    if (CastInst *CI = dyn_cast<CastInst>(V))
81      if ((CI->getOpcode() == Instruction::PtrToInt ||
82           CI->getOpcode() == Instruction::IntToPtr) &&
83          SE.getTypeSizeInBits(CI->getType()) ==
84          SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
85        return CI->getOperand(0);
86    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
87      if ((CE->getOpcode() == Instruction::PtrToInt ||
88           CE->getOpcode() == Instruction::IntToPtr) &&
89          SE.getTypeSizeInBits(CE->getType()) ==
90          SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
91        return CE->getOperand(0);
92  }
93
94  // Fold a cast of a constant.
95  if (Constant *C = dyn_cast<Constant>(V))
96    return ConstantExpr::getCast(Op, C, Ty);
97
98  // Cast the argument at the beginning of the entry block, after
99  // any bitcasts of other arguments.
100  if (Argument *A = dyn_cast<Argument>(V)) {
101    BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
102    while ((isa<BitCastInst>(IP) &&
103            isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
104            cast<BitCastInst>(IP)->getOperand(0) != A) ||
105           isa<DbgInfoIntrinsic>(IP))
106      ++IP;
107    return ReuseOrCreateCast(A, Ty, Op, IP);
108  }
109
110  // Cast the instruction immediately after the instruction.
111  Instruction *I = cast<Instruction>(V);
112  BasicBlock::iterator IP = I; ++IP;
113  if (InvokeInst *II = dyn_cast<InvokeInst>(I))
114    IP = II->getNormalDest()->begin();
115  while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP;
116  return ReuseOrCreateCast(I, Ty, Op, IP);
117}
118
119/// InsertBinop - Insert the specified binary operator, doing a small amount
120/// of work to avoid inserting an obviously redundant operation.
121Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
122                                 Value *LHS, Value *RHS) {
123  // Fold a binop with constant operands.
124  if (Constant *CLHS = dyn_cast<Constant>(LHS))
125    if (Constant *CRHS = dyn_cast<Constant>(RHS))
126      return ConstantExpr::get(Opcode, CLHS, CRHS);
127
128  // Do a quick scan to see if we have this binop nearby.  If so, reuse it.
129  unsigned ScanLimit = 6;
130  BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
131  // Scanning starts from the last instruction before the insertion point.
132  BasicBlock::iterator IP = Builder.GetInsertPoint();
133  if (IP != BlockBegin) {
134    --IP;
135    for (; ScanLimit; --IP, --ScanLimit) {
136      // Don't count dbg.value against the ScanLimit, to avoid perturbing the
137      // generated code.
138      if (isa<DbgInfoIntrinsic>(IP))
139        ScanLimit++;
140      if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
141          IP->getOperand(1) == RHS)
142        return IP;
143      if (IP == BlockBegin) break;
144    }
145  }
146
147  // Save the original insertion point so we can restore it when we're done.
148  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
149  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
150
151  // Move the insertion point out of as many loops as we can.
152  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
153    if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
154    BasicBlock *Preheader = L->getLoopPreheader();
155    if (!Preheader) break;
156
157    // Ok, move up a level.
158    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
159  }
160
161  // If we haven't found this binop, insert it.
162  Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
163  rememberInstruction(BO);
164
165  // Restore the original insert point.
166  if (SaveInsertBB)
167    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
168
169  return BO;
170}
171
172/// FactorOutConstant - Test if S is divisible by Factor, using signed
173/// division. If so, update S with Factor divided out and return true.
174/// S need not be evenly divisible if a reasonable remainder can be
175/// computed.
176/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
177/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
178/// check to see if the divide was folded.
179static bool FactorOutConstant(const SCEV *&S,
180                              const SCEV *&Remainder,
181                              const SCEV *Factor,
182                              ScalarEvolution &SE,
183                              const TargetData *TD) {
184  // Everything is divisible by one.
185  if (Factor->isOne())
186    return true;
187
188  // x/x == 1.
189  if (S == Factor) {
190    S = SE.getConstant(S->getType(), 1);
191    return true;
192  }
193
194  // For a Constant, check for a multiple of the given factor.
195  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
196    // 0/x == 0.
197    if (C->isZero())
198      return true;
199    // Check for divisibility.
200    if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
201      ConstantInt *CI =
202        ConstantInt::get(SE.getContext(),
203                         C->getValue()->getValue().sdiv(
204                                                   FC->getValue()->getValue()));
205      // If the quotient is zero and the remainder is non-zero, reject
206      // the value at this scale. It will be considered for subsequent
207      // smaller scales.
208      if (!CI->isZero()) {
209        const SCEV *Div = SE.getConstant(CI);
210        S = Div;
211        Remainder =
212          SE.getAddExpr(Remainder,
213                        SE.getConstant(C->getValue()->getValue().srem(
214                                                  FC->getValue()->getValue())));
215        return true;
216      }
217    }
218  }
219
220  // In a Mul, check if there is a constant operand which is a multiple
221  // of the given factor.
222  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
223    if (TD) {
224      // With TargetData, the size is known. Check if there is a constant
225      // operand which is a multiple of the given factor. If so, we can
226      // factor it.
227      const SCEVConstant *FC = cast<SCEVConstant>(Factor);
228      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
229        if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
230          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
231          NewMulOps[0] =
232            SE.getConstant(C->getValue()->getValue().sdiv(
233                                                   FC->getValue()->getValue()));
234          S = SE.getMulExpr(NewMulOps);
235          return true;
236        }
237    } else {
238      // Without TargetData, check if Factor can be factored out of any of the
239      // Mul's operands. If so, we can just remove it.
240      for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
241        const SCEV *SOp = M->getOperand(i);
242        const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
243        if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
244            Remainder->isZero()) {
245          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
246          NewMulOps[i] = SOp;
247          S = SE.getMulExpr(NewMulOps);
248          return true;
249        }
250      }
251    }
252  }
253
254  // In an AddRec, check if both start and step are divisible.
255  if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
256    const SCEV *Step = A->getStepRecurrence(SE);
257    const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
258    if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
259      return false;
260    if (!StepRem->isZero())
261      return false;
262    const SCEV *Start = A->getStart();
263    if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
264      return false;
265    // FIXME: can use A->getNoWrapFlags(FlagNW)
266    S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap);
267    return true;
268  }
269
270  return false;
271}
272
273/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
274/// is the number of SCEVAddRecExprs present, which are kept at the end of
275/// the list.
276///
277static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
278                                const Type *Ty,
279                                ScalarEvolution &SE) {
280  unsigned NumAddRecs = 0;
281  for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
282    ++NumAddRecs;
283  // Group Ops into non-addrecs and addrecs.
284  SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
285  SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
286  // Let ScalarEvolution sort and simplify the non-addrecs list.
287  const SCEV *Sum = NoAddRecs.empty() ?
288                    SE.getConstant(Ty, 0) :
289                    SE.getAddExpr(NoAddRecs);
290  // If it returned an add, use the operands. Otherwise it simplified
291  // the sum into a single value, so just use that.
292  Ops.clear();
293  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
294    Ops.append(Add->op_begin(), Add->op_end());
295  else if (!Sum->isZero())
296    Ops.push_back(Sum);
297  // Then append the addrecs.
298  Ops.append(AddRecs.begin(), AddRecs.end());
299}
300
301/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
302/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
303/// This helps expose more opportunities for folding parts of the expressions
304/// into GEP indices.
305///
306static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
307                         const Type *Ty,
308                         ScalarEvolution &SE) {
309  // Find the addrecs.
310  SmallVector<const SCEV *, 8> AddRecs;
311  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
312    while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
313      const SCEV *Start = A->getStart();
314      if (Start->isZero()) break;
315      const SCEV *Zero = SE.getConstant(Ty, 0);
316      AddRecs.push_back(SE.getAddRecExpr(Zero,
317                                         A->getStepRecurrence(SE),
318                                         A->getLoop(),
319                                         // FIXME: A->getNoWrapFlags(FlagNW)
320                                         SCEV::FlagAnyWrap));
321      if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
322        Ops[i] = Zero;
323        Ops.append(Add->op_begin(), Add->op_end());
324        e += Add->getNumOperands();
325      } else {
326        Ops[i] = Start;
327      }
328    }
329  if (!AddRecs.empty()) {
330    // Add the addrecs onto the end of the list.
331    Ops.append(AddRecs.begin(), AddRecs.end());
332    // Resort the operand list, moving any constants to the front.
333    SimplifyAddOperands(Ops, Ty, SE);
334  }
335}
336
337/// expandAddToGEP - Expand an addition expression with a pointer type into
338/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
339/// BasicAliasAnalysis and other passes analyze the result. See the rules
340/// for getelementptr vs. inttoptr in
341/// http://llvm.org/docs/LangRef.html#pointeraliasing
342/// for details.
343///
344/// Design note: The correctness of using getelementptr here depends on
345/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
346/// they may introduce pointer arithmetic which may not be safely converted
347/// into getelementptr.
348///
349/// Design note: It might seem desirable for this function to be more
350/// loop-aware. If some of the indices are loop-invariant while others
351/// aren't, it might seem desirable to emit multiple GEPs, keeping the
352/// loop-invariant portions of the overall computation outside the loop.
353/// However, there are a few reasons this is not done here. Hoisting simple
354/// arithmetic is a low-level optimization that often isn't very
355/// important until late in the optimization process. In fact, passes
356/// like InstructionCombining will combine GEPs, even if it means
357/// pushing loop-invariant computation down into loops, so even if the
358/// GEPs were split here, the work would quickly be undone. The
359/// LoopStrengthReduction pass, which is usually run quite late (and
360/// after the last InstructionCombining pass), takes care of hoisting
361/// loop-invariant portions of expressions, after considering what
362/// can be folded using target addressing modes.
363///
364Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
365                                    const SCEV *const *op_end,
366                                    const PointerType *PTy,
367                                    const Type *Ty,
368                                    Value *V) {
369  const Type *ElTy = PTy->getElementType();
370  SmallVector<Value *, 4> GepIndices;
371  SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
372  bool AnyNonZeroIndices = false;
373
374  // Split AddRecs up into parts as either of the parts may be usable
375  // without the other.
376  SplitAddRecs(Ops, Ty, SE);
377
378  // Descend down the pointer's type and attempt to convert the other
379  // operands into GEP indices, at each level. The first index in a GEP
380  // indexes into the array implied by the pointer operand; the rest of
381  // the indices index into the element or field type selected by the
382  // preceding index.
383  for (;;) {
384    // If the scale size is not 0, attempt to factor out a scale for
385    // array indexing.
386    SmallVector<const SCEV *, 8> ScaledOps;
387    if (ElTy->isSized()) {
388      const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
389      if (!ElSize->isZero()) {
390        SmallVector<const SCEV *, 8> NewOps;
391        for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
392          const SCEV *Op = Ops[i];
393          const SCEV *Remainder = SE.getConstant(Ty, 0);
394          if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
395            // Op now has ElSize factored out.
396            ScaledOps.push_back(Op);
397            if (!Remainder->isZero())
398              NewOps.push_back(Remainder);
399            AnyNonZeroIndices = true;
400          } else {
401            // The operand was not divisible, so add it to the list of operands
402            // we'll scan next iteration.
403            NewOps.push_back(Ops[i]);
404          }
405        }
406        // If we made any changes, update Ops.
407        if (!ScaledOps.empty()) {
408          Ops = NewOps;
409          SimplifyAddOperands(Ops, Ty, SE);
410        }
411      }
412    }
413
414    // Record the scaled array index for this level of the type. If
415    // we didn't find any operands that could be factored, tentatively
416    // assume that element zero was selected (since the zero offset
417    // would obviously be folded away).
418    Value *Scaled = ScaledOps.empty() ?
419                    Constant::getNullValue(Ty) :
420                    expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
421    GepIndices.push_back(Scaled);
422
423    // Collect struct field index operands.
424    while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
425      bool FoundFieldNo = false;
426      // An empty struct has no fields.
427      if (STy->getNumElements() == 0) break;
428      if (SE.TD) {
429        // With TargetData, field offsets are known. See if a constant offset
430        // falls within any of the struct fields.
431        if (Ops.empty()) break;
432        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
433          if (SE.getTypeSizeInBits(C->getType()) <= 64) {
434            const StructLayout &SL = *SE.TD->getStructLayout(STy);
435            uint64_t FullOffset = C->getValue()->getZExtValue();
436            if (FullOffset < SL.getSizeInBytes()) {
437              unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
438              GepIndices.push_back(
439                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
440              ElTy = STy->getTypeAtIndex(ElIdx);
441              Ops[0] =
442                SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
443              AnyNonZeroIndices = true;
444              FoundFieldNo = true;
445            }
446          }
447      } else {
448        // Without TargetData, just check for an offsetof expression of the
449        // appropriate struct type.
450        for (unsigned i = 0, e = Ops.size(); i != e; ++i)
451          if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
452            const Type *CTy;
453            Constant *FieldNo;
454            if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
455              GepIndices.push_back(FieldNo);
456              ElTy =
457                STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
458              Ops[i] = SE.getConstant(Ty, 0);
459              AnyNonZeroIndices = true;
460              FoundFieldNo = true;
461              break;
462            }
463          }
464      }
465      // If no struct field offsets were found, tentatively assume that
466      // field zero was selected (since the zero offset would obviously
467      // be folded away).
468      if (!FoundFieldNo) {
469        ElTy = STy->getTypeAtIndex(0u);
470        GepIndices.push_back(
471          Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
472      }
473    }
474
475    if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
476      ElTy = ATy->getElementType();
477    else
478      break;
479  }
480
481  // If none of the operands were convertible to proper GEP indices, cast
482  // the base to i8* and do an ugly getelementptr with that. It's still
483  // better than ptrtoint+arithmetic+inttoptr at least.
484  if (!AnyNonZeroIndices) {
485    // Cast the base to i8*.
486    V = InsertNoopCastOfTo(V,
487       Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
488
489    // Expand the operands for a plain byte offset.
490    Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
491
492    // Fold a GEP with constant operands.
493    if (Constant *CLHS = dyn_cast<Constant>(V))
494      if (Constant *CRHS = dyn_cast<Constant>(Idx))
495        return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1);
496
497    // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
498    unsigned ScanLimit = 6;
499    BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
500    // Scanning starts from the last instruction before the insertion point.
501    BasicBlock::iterator IP = Builder.GetInsertPoint();
502    if (IP != BlockBegin) {
503      --IP;
504      for (; ScanLimit; --IP, --ScanLimit) {
505        // Don't count dbg.value against the ScanLimit, to avoid perturbing the
506        // generated code.
507        if (isa<DbgInfoIntrinsic>(IP))
508          ScanLimit++;
509        if (IP->getOpcode() == Instruction::GetElementPtr &&
510            IP->getOperand(0) == V && IP->getOperand(1) == Idx)
511          return IP;
512        if (IP == BlockBegin) break;
513      }
514    }
515
516    // Save the original insertion point so we can restore it when we're done.
517    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
518    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
519
520    // Move the insertion point out of as many loops as we can.
521    while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
522      if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
523      BasicBlock *Preheader = L->getLoopPreheader();
524      if (!Preheader) break;
525
526      // Ok, move up a level.
527      Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
528    }
529
530    // Emit a GEP.
531    Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
532    rememberInstruction(GEP);
533
534    // Restore the original insert point.
535    if (SaveInsertBB)
536      restoreInsertPoint(SaveInsertBB, SaveInsertPt);
537
538    return GEP;
539  }
540
541  // Save the original insertion point so we can restore it when we're done.
542  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
543  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
544
545  // Move the insertion point out of as many loops as we can.
546  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
547    if (!L->isLoopInvariant(V)) break;
548
549    bool AnyIndexNotLoopInvariant = false;
550    for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
551         E = GepIndices.end(); I != E; ++I)
552      if (!L->isLoopInvariant(*I)) {
553        AnyIndexNotLoopInvariant = true;
554        break;
555      }
556    if (AnyIndexNotLoopInvariant)
557      break;
558
559    BasicBlock *Preheader = L->getLoopPreheader();
560    if (!Preheader) break;
561
562    // Ok, move up a level.
563    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
564  }
565
566  // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
567  // because ScalarEvolution may have changed the address arithmetic to
568  // compute a value which is beyond the end of the allocated object.
569  Value *Casted = V;
570  if (V->getType() != PTy)
571    Casted = InsertNoopCastOfTo(Casted, PTy);
572  Value *GEP = Builder.CreateGEP(Casted,
573                                 GepIndices.begin(),
574                                 GepIndices.end(),
575                                 "scevgep");
576  Ops.push_back(SE.getUnknown(GEP));
577  rememberInstruction(GEP);
578
579  // Restore the original insert point.
580  if (SaveInsertBB)
581    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
582
583  return expand(SE.getAddExpr(Ops));
584}
585
586/// isNonConstantNegative - Return true if the specified scev is negated, but
587/// not a constant.
588static bool isNonConstantNegative(const SCEV *F) {
589  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
590  if (!Mul) return false;
591
592  // If there is a constant factor, it will be first.
593  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
594  if (!SC) return false;
595
596  // Return true if the value is negative, this matches things like (-42 * V).
597  return SC->getValue()->getValue().isNegative();
598}
599
600/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
601/// SCEV expansion. If they are nested, this is the most nested. If they are
602/// neighboring, pick the later.
603static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
604                                        DominatorTree &DT) {
605  if (!A) return B;
606  if (!B) return A;
607  if (A->contains(B)) return B;
608  if (B->contains(A)) return A;
609  if (DT.dominates(A->getHeader(), B->getHeader())) return B;
610  if (DT.dominates(B->getHeader(), A->getHeader())) return A;
611  return A; // Arbitrarily break the tie.
612}
613
614/// getRelevantLoop - Get the most relevant loop associated with the given
615/// expression, according to PickMostRelevantLoop.
616const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
617  // Test whether we've already computed the most relevant loop for this SCEV.
618  std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
619    RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
620  if (!Pair.second)
621    return Pair.first->second;
622
623  if (isa<SCEVConstant>(S))
624    // A constant has no relevant loops.
625    return 0;
626  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
627    if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
628      return Pair.first->second = SE.LI->getLoopFor(I->getParent());
629    // A non-instruction has no relevant loops.
630    return 0;
631  }
632  if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
633    const Loop *L = 0;
634    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
635      L = AR->getLoop();
636    for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
637         I != E; ++I)
638      L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
639    return RelevantLoops[N] = L;
640  }
641  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
642    const Loop *Result = getRelevantLoop(C->getOperand());
643    return RelevantLoops[C] = Result;
644  }
645  if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
646    const Loop *Result =
647      PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
648                           getRelevantLoop(D->getRHS()),
649                           *SE.DT);
650    return RelevantLoops[D] = Result;
651  }
652  llvm_unreachable("Unexpected SCEV type!");
653  return 0;
654}
655
656namespace {
657
658/// LoopCompare - Compare loops by PickMostRelevantLoop.
659class LoopCompare {
660  DominatorTree &DT;
661public:
662  explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
663
664  bool operator()(std::pair<const Loop *, const SCEV *> LHS,
665                  std::pair<const Loop *, const SCEV *> RHS) const {
666    // Keep pointer operands sorted at the end.
667    if (LHS.second->getType()->isPointerTy() !=
668        RHS.second->getType()->isPointerTy())
669      return LHS.second->getType()->isPointerTy();
670
671    // Compare loops with PickMostRelevantLoop.
672    if (LHS.first != RHS.first)
673      return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
674
675    // If one operand is a non-constant negative and the other is not,
676    // put the non-constant negative on the right so that a sub can
677    // be used instead of a negate and add.
678    if (isNonConstantNegative(LHS.second)) {
679      if (!isNonConstantNegative(RHS.second))
680        return false;
681    } else if (isNonConstantNegative(RHS.second))
682      return true;
683
684    // Otherwise they are equivalent according to this comparison.
685    return false;
686  }
687};
688
689}
690
691Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
692  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
693
694  // Collect all the add operands in a loop, along with their associated loops.
695  // Iterate in reverse so that constants are emitted last, all else equal, and
696  // so that pointer operands are inserted first, which the code below relies on
697  // to form more involved GEPs.
698  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
699  for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
700       E(S->op_begin()); I != E; ++I)
701    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
702
703  // Sort by loop. Use a stable sort so that constants follow non-constants and
704  // pointer operands precede non-pointer operands.
705  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
706
707  // Emit instructions to add all the operands. Hoist as much as possible
708  // out of loops, and form meaningful getelementptrs where possible.
709  Value *Sum = 0;
710  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
711       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
712    const Loop *CurLoop = I->first;
713    const SCEV *Op = I->second;
714    if (!Sum) {
715      // This is the first operand. Just expand it.
716      Sum = expand(Op);
717      ++I;
718    } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
719      // The running sum expression is a pointer. Try to form a getelementptr
720      // at this level with that as the base.
721      SmallVector<const SCEV *, 4> NewOps;
722      for (; I != E && I->first == CurLoop; ++I) {
723        // If the operand is SCEVUnknown and not instructions, peek through
724        // it, to enable more of it to be folded into the GEP.
725        const SCEV *X = I->second;
726        if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
727          if (!isa<Instruction>(U->getValue()))
728            X = SE.getSCEV(U->getValue());
729        NewOps.push_back(X);
730      }
731      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
732    } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
733      // The running sum is an integer, and there's a pointer at this level.
734      // Try to form a getelementptr. If the running sum is instructions,
735      // use a SCEVUnknown to avoid re-analyzing them.
736      SmallVector<const SCEV *, 4> NewOps;
737      NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
738                                               SE.getSCEV(Sum));
739      for (++I; I != E && I->first == CurLoop; ++I)
740        NewOps.push_back(I->second);
741      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
742    } else if (isNonConstantNegative(Op)) {
743      // Instead of doing a negate and add, just do a subtract.
744      Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
745      Sum = InsertNoopCastOfTo(Sum, Ty);
746      Sum = InsertBinop(Instruction::Sub, Sum, W);
747      ++I;
748    } else {
749      // A simple add.
750      Value *W = expandCodeFor(Op, Ty);
751      Sum = InsertNoopCastOfTo(Sum, Ty);
752      // Canonicalize a constant to the RHS.
753      if (isa<Constant>(Sum)) std::swap(Sum, W);
754      Sum = InsertBinop(Instruction::Add, Sum, W);
755      ++I;
756    }
757  }
758
759  return Sum;
760}
761
762Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
763  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
764
765  // Collect all the mul operands in a loop, along with their associated loops.
766  // Iterate in reverse so that constants are emitted last, all else equal.
767  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
768  for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
769       E(S->op_begin()); I != E; ++I)
770    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
771
772  // Sort by loop. Use a stable sort so that constants follow non-constants.
773  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
774
775  // Emit instructions to mul all the operands. Hoist as much as possible
776  // out of loops.
777  Value *Prod = 0;
778  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
779       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
780    const SCEV *Op = I->second;
781    if (!Prod) {
782      // This is the first operand. Just expand it.
783      Prod = expand(Op);
784      ++I;
785    } else if (Op->isAllOnesValue()) {
786      // Instead of doing a multiply by negative one, just do a negate.
787      Prod = InsertNoopCastOfTo(Prod, Ty);
788      Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
789      ++I;
790    } else {
791      // A simple mul.
792      Value *W = expandCodeFor(Op, Ty);
793      Prod = InsertNoopCastOfTo(Prod, Ty);
794      // Canonicalize a constant to the RHS.
795      if (isa<Constant>(Prod)) std::swap(Prod, W);
796      Prod = InsertBinop(Instruction::Mul, Prod, W);
797      ++I;
798    }
799  }
800
801  return Prod;
802}
803
804Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
805  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
806
807  Value *LHS = expandCodeFor(S->getLHS(), Ty);
808  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
809    const APInt &RHS = SC->getValue()->getValue();
810    if (RHS.isPowerOf2())
811      return InsertBinop(Instruction::LShr, LHS,
812                         ConstantInt::get(Ty, RHS.logBase2()));
813  }
814
815  Value *RHS = expandCodeFor(S->getRHS(), Ty);
816  return InsertBinop(Instruction::UDiv, LHS, RHS);
817}
818
819/// Move parts of Base into Rest to leave Base with the minimal
820/// expression that provides a pointer operand suitable for a
821/// GEP expansion.
822static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
823                              ScalarEvolution &SE) {
824  while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
825    Base = A->getStart();
826    Rest = SE.getAddExpr(Rest,
827                         SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
828                                          A->getStepRecurrence(SE),
829                                          A->getLoop(),
830                                          // FIXME: A->getNoWrapFlags(FlagNW)
831                                          SCEV::FlagAnyWrap));
832  }
833  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
834    Base = A->getOperand(A->getNumOperands()-1);
835    SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
836    NewAddOps.back() = Rest;
837    Rest = SE.getAddExpr(NewAddOps);
838    ExposePointerBase(Base, Rest, SE);
839  }
840}
841
842/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
843/// the base addrec, which is the addrec without any non-loop-dominating
844/// values, and return the PHI.
845PHINode *
846SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
847                                        const Loop *L,
848                                        const Type *ExpandTy,
849                                        const Type *IntTy) {
850  // Reuse a previously-inserted PHI, if present.
851  for (BasicBlock::iterator I = L->getHeader()->begin();
852       PHINode *PN = dyn_cast<PHINode>(I); ++I)
853    if (SE.isSCEVable(PN->getType()) &&
854        (SE.getEffectiveSCEVType(PN->getType()) ==
855         SE.getEffectiveSCEVType(Normalized->getType())) &&
856        SE.getSCEV(PN) == Normalized)
857      if (BasicBlock *LatchBlock = L->getLoopLatch()) {
858        Instruction *IncV =
859          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
860
861        // Determine if this is a well-behaved chain of instructions leading
862        // back to the PHI. It probably will be, if we're scanning an inner
863        // loop already visited by LSR for example, but it wouldn't have
864        // to be.
865        do {
866          if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
867              (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) {
868            IncV = 0;
869            break;
870          }
871          // If any of the operands don't dominate the insert position, bail.
872          // Addrec operands are always loop-invariant, so this can only happen
873          // if there are instructions which haven't been hoisted.
874          for (User::op_iterator OI = IncV->op_begin()+1,
875               OE = IncV->op_end(); OI != OE; ++OI)
876            if (Instruction *OInst = dyn_cast<Instruction>(OI))
877              if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
878                IncV = 0;
879                break;
880              }
881          if (!IncV)
882            break;
883          // Advance to the next instruction.
884          IncV = dyn_cast<Instruction>(IncV->getOperand(0));
885          if (!IncV)
886            break;
887          if (IncV->mayHaveSideEffects()) {
888            IncV = 0;
889            break;
890          }
891        } while (IncV != PN);
892
893        if (IncV) {
894          // Ok, the add recurrence looks usable.
895          // Remember this PHI, even in post-inc mode.
896          InsertedValues.insert(PN);
897          // Remember the increment.
898          IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
899          rememberInstruction(IncV);
900          if (L == IVIncInsertLoop)
901            do {
902              if (SE.DT->dominates(IncV, IVIncInsertPos))
903                break;
904              // Make sure the increment is where we want it. But don't move it
905              // down past a potential existing post-inc user.
906              IncV->moveBefore(IVIncInsertPos);
907              IVIncInsertPos = IncV;
908              IncV = cast<Instruction>(IncV->getOperand(0));
909            } while (IncV != PN);
910          return PN;
911        }
912      }
913
914  // Save the original insertion point so we can restore it when we're done.
915  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
916  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
917
918  // Expand code for the start value.
919  Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
920                                L->getHeader()->begin());
921
922  // Expand code for the step value. Insert instructions right before the
923  // terminator corresponding to the back-edge. Do this before creating the PHI
924  // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
925  // negative, insert a sub instead of an add for the increment (unless it's a
926  // constant, because subtracts of constants are canonicalized to adds).
927  const SCEV *Step = Normalized->getStepRecurrence(SE);
928  bool isPointer = ExpandTy->isPointerTy();
929  bool isNegative = !isPointer && isNonConstantNegative(Step);
930  if (isNegative)
931    Step = SE.getNegativeSCEV(Step);
932  Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
933
934  // Create the PHI.
935  BasicBlock *Header = L->getHeader();
936  Builder.SetInsertPoint(Header, Header->begin());
937  pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
938  PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), "lsr.iv");
939  rememberInstruction(PN);
940
941  // Create the step instructions and populate the PHI.
942  for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
943    BasicBlock *Pred = *HPI;
944
945    // Add a start value.
946    if (!L->contains(Pred)) {
947      PN->addIncoming(StartV, Pred);
948      continue;
949    }
950
951    // Create a step value and add it to the PHI. If IVIncInsertLoop is
952    // non-null and equal to the addrec's loop, insert the instructions
953    // at IVIncInsertPos.
954    Instruction *InsertPos = L == IVIncInsertLoop ?
955      IVIncInsertPos : Pred->getTerminator();
956    Builder.SetInsertPoint(InsertPos->getParent(), InsertPos);
957    Value *IncV;
958    // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
959    if (isPointer) {
960      const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
961      // If the step isn't constant, don't use an implicitly scaled GEP, because
962      // that would require a multiply inside the loop.
963      if (!isa<ConstantInt>(StepV))
964        GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
965                                    GEPPtrTy->getAddressSpace());
966      const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
967      IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
968      if (IncV->getType() != PN->getType()) {
969        IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp");
970        rememberInstruction(IncV);
971      }
972    } else {
973      IncV = isNegative ?
974        Builder.CreateSub(PN, StepV, "lsr.iv.next") :
975        Builder.CreateAdd(PN, StepV, "lsr.iv.next");
976      rememberInstruction(IncV);
977    }
978    PN->addIncoming(IncV, Pred);
979  }
980
981  // Restore the original insert point.
982  if (SaveInsertBB)
983    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
984
985  // Remember this PHI, even in post-inc mode.
986  InsertedValues.insert(PN);
987
988  return PN;
989}
990
991Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
992  const Type *STy = S->getType();
993  const Type *IntTy = SE.getEffectiveSCEVType(STy);
994  const Loop *L = S->getLoop();
995
996  // Determine a normalized form of this expression, which is the expression
997  // before any post-inc adjustment is made.
998  const SCEVAddRecExpr *Normalized = S;
999  if (PostIncLoops.count(L)) {
1000    PostIncLoopSet Loops;
1001    Loops.insert(L);
1002    Normalized =
1003      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
1004                                                  Loops, SE, *SE.DT));
1005  }
1006
1007  // Strip off any non-loop-dominating component from the addrec start.
1008  const SCEV *Start = Normalized->getStart();
1009  const SCEV *PostLoopOffset = 0;
1010  if (!SE.properlyDominates(Start, L->getHeader())) {
1011    PostLoopOffset = Start;
1012    Start = SE.getConstant(Normalized->getType(), 0);
1013    Normalized = cast<SCEVAddRecExpr>(
1014      SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
1015                       Normalized->getLoop(),
1016                       // FIXME: Normalized->getNoWrapFlags(FlagNW)
1017                       SCEV::FlagAnyWrap));
1018  }
1019
1020  // Strip off any non-loop-dominating component from the addrec step.
1021  const SCEV *Step = Normalized->getStepRecurrence(SE);
1022  const SCEV *PostLoopScale = 0;
1023  if (!SE.dominates(Step, L->getHeader())) {
1024    PostLoopScale = Step;
1025    Step = SE.getConstant(Normalized->getType(), 1);
1026    Normalized =
1027      cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
1028                                            Normalized->getLoop(),
1029                                            // FIXME: Normalized
1030                                            // ->getNoWrapFlags(FlagNW)
1031                                            SCEV::FlagAnyWrap));
1032  }
1033
1034  // Expand the core addrec. If we need post-loop scaling, force it to
1035  // expand to an integer type to avoid the need for additional casting.
1036  const Type *ExpandTy = PostLoopScale ? IntTy : STy;
1037  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
1038
1039  // Accommodate post-inc mode, if necessary.
1040  Value *Result;
1041  if (!PostIncLoops.count(L))
1042    Result = PN;
1043  else {
1044    // In PostInc mode, use the post-incremented value.
1045    BasicBlock *LatchBlock = L->getLoopLatch();
1046    assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1047    Result = PN->getIncomingValueForBlock(LatchBlock);
1048  }
1049
1050  // Re-apply any non-loop-dominating scale.
1051  if (PostLoopScale) {
1052    Result = InsertNoopCastOfTo(Result, IntTy);
1053    Result = Builder.CreateMul(Result,
1054                               expandCodeFor(PostLoopScale, IntTy));
1055    rememberInstruction(Result);
1056  }
1057
1058  // Re-apply any non-loop-dominating offset.
1059  if (PostLoopOffset) {
1060    if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
1061      const SCEV *const OffsetArray[1] = { PostLoopOffset };
1062      Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
1063    } else {
1064      Result = InsertNoopCastOfTo(Result, IntTy);
1065      Result = Builder.CreateAdd(Result,
1066                                 expandCodeFor(PostLoopOffset, IntTy));
1067      rememberInstruction(Result);
1068    }
1069  }
1070
1071  return Result;
1072}
1073
1074Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1075  if (!CanonicalMode) return expandAddRecExprLiterally(S);
1076
1077  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
1078  const Loop *L = S->getLoop();
1079
1080  // First check for an existing canonical IV in a suitable type.
1081  PHINode *CanonicalIV = 0;
1082  if (PHINode *PN = L->getCanonicalInductionVariable())
1083    if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
1084      CanonicalIV = PN;
1085
1086  // Rewrite an AddRec in terms of the canonical induction variable, if
1087  // its type is more narrow.
1088  if (CanonicalIV &&
1089      SE.getTypeSizeInBits(CanonicalIV->getType()) >
1090      SE.getTypeSizeInBits(Ty)) {
1091    SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
1092    for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1093      NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
1094    Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
1095                                       // FIXME: S->getNoWrapFlags(FlagNW)
1096                                       SCEV::FlagAnyWrap));
1097    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
1098    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
1099    BasicBlock::iterator NewInsertPt =
1100      llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
1101    while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt))
1102      ++NewInsertPt;
1103    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
1104                      NewInsertPt);
1105    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
1106    return V;
1107  }
1108
1109  // {X,+,F} --> X + {0,+,F}
1110  if (!S->getStart()->isZero()) {
1111    SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
1112    NewOps[0] = SE.getConstant(Ty, 0);
1113    // FIXME: can use S->getNoWrapFlags()
1114    const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap);
1115
1116    // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
1117    // comments on expandAddToGEP for details.
1118    const SCEV *Base = S->getStart();
1119    const SCEV *RestArray[1] = { Rest };
1120    // Dig into the expression to find the pointer base for a GEP.
1121    ExposePointerBase(Base, RestArray[0], SE);
1122    // If we found a pointer, expand the AddRec with a GEP.
1123    if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
1124      // Make sure the Base isn't something exotic, such as a multiplied
1125      // or divided pointer value. In those cases, the result type isn't
1126      // actually a pointer type.
1127      if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
1128        Value *StartV = expand(Base);
1129        assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
1130        return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
1131      }
1132    }
1133
1134    // Just do a normal add. Pre-expand the operands to suppress folding.
1135    return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
1136                                SE.getUnknown(expand(Rest))));
1137  }
1138
1139  // If we don't yet have a canonical IV, create one.
1140  if (!CanonicalIV) {
1141    // Create and insert the PHI node for the induction variable in the
1142    // specified loop.
1143    BasicBlock *Header = L->getHeader();
1144    pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1145    CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
1146                                  Header->begin());
1147    rememberInstruction(CanonicalIV);
1148
1149    Constant *One = ConstantInt::get(Ty, 1);
1150    for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1151      BasicBlock *HP = *HPI;
1152      if (L->contains(HP)) {
1153        // Insert a unit add instruction right before the terminator
1154        // corresponding to the back-edge.
1155        Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
1156                                                     "indvar.next",
1157                                                     HP->getTerminator());
1158        rememberInstruction(Add);
1159        CanonicalIV->addIncoming(Add, HP);
1160      } else {
1161        CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
1162      }
1163    }
1164  }
1165
1166  // {0,+,1} --> Insert a canonical induction variable into the loop!
1167  if (S->isAffine() && S->getOperand(1)->isOne()) {
1168    assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1169           "IVs with types different from the canonical IV should "
1170           "already have been handled!");
1171    return CanonicalIV;
1172  }
1173
1174  // {0,+,F} --> {0,+,1} * F
1175
1176  // If this is a simple linear addrec, emit it now as a special case.
1177  if (S->isAffine())    // {0,+,F} --> i*F
1178    return
1179      expand(SE.getTruncateOrNoop(
1180        SE.getMulExpr(SE.getUnknown(CanonicalIV),
1181                      SE.getNoopOrAnyExtend(S->getOperand(1),
1182                                            CanonicalIV->getType())),
1183        Ty));
1184
1185  // If this is a chain of recurrences, turn it into a closed form, using the
1186  // folders, then expandCodeFor the closed form.  This allows the folders to
1187  // simplify the expression without having to build a bunch of special code
1188  // into this folder.
1189  const SCEV *IH = SE.getUnknown(CanonicalIV);   // Get I as a "symbolic" SCEV.
1190
1191  // Promote S up to the canonical IV type, if the cast is foldable.
1192  const SCEV *NewS = S;
1193  const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
1194  if (isa<SCEVAddRecExpr>(Ext))
1195    NewS = Ext;
1196
1197  const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
1198  //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
1199
1200  // Truncate the result down to the original type, if needed.
1201  const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1202  return expand(T);
1203}
1204
1205Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1206  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
1207  Value *V = expandCodeFor(S->getOperand(),
1208                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
1209  Value *I = Builder.CreateTrunc(V, Ty, "tmp");
1210  rememberInstruction(I);
1211  return I;
1212}
1213
1214Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1215  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
1216  Value *V = expandCodeFor(S->getOperand(),
1217                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
1218  Value *I = Builder.CreateZExt(V, Ty, "tmp");
1219  rememberInstruction(I);
1220  return I;
1221}
1222
1223Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1224  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
1225  Value *V = expandCodeFor(S->getOperand(),
1226                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
1227  Value *I = Builder.CreateSExt(V, Ty, "tmp");
1228  rememberInstruction(I);
1229  return I;
1230}
1231
1232Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1233  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
1234  const Type *Ty = LHS->getType();
1235  for (int i = S->getNumOperands()-2; i >= 0; --i) {
1236    // In the case of mixed integer and pointer types, do the
1237    // rest of the comparisons as integer.
1238    if (S->getOperand(i)->getType() != Ty) {
1239      Ty = SE.getEffectiveSCEVType(Ty);
1240      LHS = InsertNoopCastOfTo(LHS, Ty);
1241    }
1242    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
1243    Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
1244    rememberInstruction(ICmp);
1245    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
1246    rememberInstruction(Sel);
1247    LHS = Sel;
1248  }
1249  // In the case of mixed integer and pointer types, cast the
1250  // final result back to the pointer type.
1251  if (LHS->getType() != S->getType())
1252    LHS = InsertNoopCastOfTo(LHS, S->getType());
1253  return LHS;
1254}
1255
1256Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1257  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
1258  const Type *Ty = LHS->getType();
1259  for (int i = S->getNumOperands()-2; i >= 0; --i) {
1260    // In the case of mixed integer and pointer types, do the
1261    // rest of the comparisons as integer.
1262    if (S->getOperand(i)->getType() != Ty) {
1263      Ty = SE.getEffectiveSCEVType(Ty);
1264      LHS = InsertNoopCastOfTo(LHS, Ty);
1265    }
1266    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
1267    Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
1268    rememberInstruction(ICmp);
1269    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
1270    rememberInstruction(Sel);
1271    LHS = Sel;
1272  }
1273  // In the case of mixed integer and pointer types, cast the
1274  // final result back to the pointer type.
1275  if (LHS->getType() != S->getType())
1276    LHS = InsertNoopCastOfTo(LHS, S->getType());
1277  return LHS;
1278}
1279
1280Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty,
1281                                   Instruction *I) {
1282  BasicBlock::iterator IP = I;
1283  while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
1284    ++IP;
1285  Builder.SetInsertPoint(IP->getParent(), IP);
1286  return expandCodeFor(SH, Ty);
1287}
1288
1289Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
1290  // Expand the code for this SCEV.
1291  Value *V = expand(SH);
1292  if (Ty) {
1293    assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1294           "non-trivial casts should be done with the SCEVs directly!");
1295    V = InsertNoopCastOfTo(V, Ty);
1296  }
1297  return V;
1298}
1299
1300Value *SCEVExpander::expand(const SCEV *S) {
1301  // Compute an insertion point for this SCEV object. Hoist the instructions
1302  // as far out in the loop nest as possible.
1303  Instruction *InsertPt = Builder.GetInsertPoint();
1304  for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
1305       L = L->getParentLoop())
1306    if (SE.isLoopInvariant(S, L)) {
1307      if (!L) break;
1308      if (BasicBlock *Preheader = L->getLoopPreheader())
1309        InsertPt = Preheader->getTerminator();
1310    } else {
1311      // If the SCEV is computable at this level, insert it into the header
1312      // after the PHIs (and after any other instructions that we've inserted
1313      // there) so that it is guaranteed to dominate any user inside the loop.
1314      if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
1315        InsertPt = L->getHeader()->getFirstNonPHI();
1316      while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
1317        InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
1318      break;
1319    }
1320
1321  // Check to see if we already expanded this here.
1322  std::map<std::pair<const SCEV *, Instruction *>,
1323           AssertingVH<Value> >::iterator I =
1324    InsertedExpressions.find(std::make_pair(S, InsertPt));
1325  if (I != InsertedExpressions.end())
1326    return I->second;
1327
1328  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
1329  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
1330  Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1331
1332  // Expand the expression into instructions.
1333  Value *V = visit(S);
1334
1335  // Remember the expanded value for this SCEV at this location.
1336  if (PostIncLoops.empty())
1337    InsertedExpressions[std::make_pair(S, InsertPt)] = V;
1338
1339  restoreInsertPoint(SaveInsertBB, SaveInsertPt);
1340  return V;
1341}
1342
1343void SCEVExpander::rememberInstruction(Value *I) {
1344  if (!PostIncLoops.empty())
1345    InsertedPostIncValues.insert(I);
1346  else
1347    InsertedValues.insert(I);
1348
1349  // If we just claimed an existing instruction and that instruction had
1350  // been the insert point, adjust the insert point forward so that
1351  // subsequently inserted code will be dominated.
1352  if (Builder.GetInsertPoint() == I) {
1353    BasicBlock::iterator It = cast<Instruction>(I);
1354    do { ++It; } while (isInsertedInstruction(It) ||
1355                        isa<DbgInfoIntrinsic>(It));
1356    Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
1357  }
1358}
1359
1360void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
1361  // If we acquired more instructions since the old insert point was saved,
1362  // advance past them.
1363  while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
1364
1365  Builder.SetInsertPoint(BB, I);
1366}
1367
1368/// getOrInsertCanonicalInductionVariable - This method returns the
1369/// canonical induction variable of the specified type for the specified
1370/// loop (inserting one if there is none).  A canonical induction variable
1371/// starts at zero and steps by one on each iteration.
1372PHINode *
1373SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
1374                                                    const Type *Ty) {
1375  assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
1376
1377  // Build a SCEV for {0,+,1}<L>.
1378  // Conservatively use FlagAnyWrap for now.
1379  const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
1380                                   SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
1381
1382  // Emit code for it.
1383  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
1384  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
1385  PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
1386  if (SaveInsertBB)
1387    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
1388
1389  return V;
1390}
1391