InstructionCombining.cpp revision f3d1b5dd68b8c9fe15158ce330a8b1949269e3df
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// InstructionCombining - Combine instructions to form fewer, simple
11// instructions.  This pass does not modify the CFG.  This pass is where
12// algebraic simplification happens.
13//
14// This pass combines things like:
15//    %Y = add i32 %X, 1
16//    %Z = add i32 %Y, 1
17// into:
18//    %Z = add i32 %X, 2
19//
20// This is a simple worklist driven algorithm.
21//
22// This pass guarantees that the following canonicalizations are performed on
23// the program:
24//    1. If a binary operator has a constant operand, it is moved to the RHS
25//    2. Bitwise operators with constant operands are always grouped so that
26//       shifts are performed first, then or's, then and's, then xor's.
27//    3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
28//    4. All cmp instructions on boolean values are replaced with logical ops
29//    5. add X, X is represented as (X*2) => (X << 1)
30//    6. Multiplies with a power-of-two constant argument are transformed into
31//       shifts.
32//   ... etc.
33//
34//===----------------------------------------------------------------------===//
35
36#define DEBUG_TYPE "instcombine"
37#include "llvm/Transforms/Scalar.h"
38#include "InstCombine.h"
39#include "llvm/IntrinsicInst.h"
40#include "llvm/LLVMContext.h"
41#include "llvm/DerivedTypes.h"
42#include "llvm/GlobalVariable.h"
43#include "llvm/Operator.h"
44#include "llvm/Analysis/ConstantFolding.h"
45#include "llvm/Analysis/InstructionSimplify.h"
46#include "llvm/Analysis/MemoryBuiltins.h"
47#include "llvm/Target/TargetData.h"
48#include "llvm/Transforms/Utils/BasicBlockUtils.h"
49#include "llvm/Transforms/Utils/Local.h"
50#include "llvm/Support/CallSite.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/Support/GetElementPtrTypeIterator.h"
54#include "llvm/Support/MathExtras.h"
55#include "llvm/Support/PatternMatch.h"
56#include "llvm/ADT/SmallPtrSet.h"
57#include "llvm/ADT/Statistic.h"
58#include "llvm/ADT/STLExtras.h"
59#include <algorithm>
60#include <climits>
61using namespace llvm;
62using namespace llvm::PatternMatch;
63
64STATISTIC(NumCombined , "Number of insts combined");
65STATISTIC(NumConstProp, "Number of constant folds");
66STATISTIC(NumDeadInst , "Number of dead inst eliminated");
67STATISTIC(NumDeadStore, "Number of dead stores eliminated");
68STATISTIC(NumSunkInst , "Number of instructions sunk");
69
70
71char InstCombiner::ID = 0;
72static RegisterPass<InstCombiner>
73X("instcombine", "Combine redundant instructions");
74
75void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
76  AU.addPreservedID(LCSSAID);
77  AU.setPreservesCFG();
78}
79
80
81// isOnlyUse - Return true if this instruction will be deleted if we stop using
82// it.
83static bool isOnlyUse(Value *V) {
84  return V->hasOneUse() || isa<Constant>(V);
85}
86
87// getPromotedType - Return the specified type promoted as it would be to pass
88// though a va_arg area...
89static const Type *getPromotedType(const Type *Ty) {
90  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
91    if (ITy->getBitWidth() < 32)
92      return Type::getInt32Ty(Ty->getContext());
93  }
94  return Ty;
95}
96
97/// ShouldChangeType - Return true if it is desirable to convert a computation
98/// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
99/// type for example, or from a smaller to a larger illegal type.
100bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
101  assert(isa<IntegerType>(From) && isa<IntegerType>(To));
102
103  // If we don't have TD, we don't know if the source/dest are legal.
104  if (!TD) return false;
105
106  unsigned FromWidth = From->getPrimitiveSizeInBits();
107  unsigned ToWidth = To->getPrimitiveSizeInBits();
108  bool FromLegal = TD->isLegalInteger(FromWidth);
109  bool ToLegal = TD->isLegalInteger(ToWidth);
110
111  // If this is a legal integer from type, and the result would be an illegal
112  // type, don't do the transformation.
113  if (FromLegal && !ToLegal)
114    return false;
115
116  // Otherwise, if both are illegal, do not increase the size of the result. We
117  // do allow things like i160 -> i64, but not i64 -> i160.
118  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
119    return false;
120
121  return true;
122}
123
124/// getBitCastOperand - If the specified operand is a CastInst, a constant
125/// expression bitcast, or a GetElementPtrInst with all zero indices, return the
126/// operand value, otherwise return null.
127static Value *getBitCastOperand(Value *V) {
128  if (Operator *O = dyn_cast<Operator>(V)) {
129    if (O->getOpcode() == Instruction::BitCast)
130      return O->getOperand(0);
131    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
132      if (GEP->hasAllZeroIndices())
133        return GEP->getPointerOperand();
134  }
135  return 0;
136}
137
138
139
140// SimplifyCommutative - This performs a few simplifications for commutative
141// operators:
142//
143//  1. Order operands such that they are listed from right (least complex) to
144//     left (most complex).  This puts constants before unary operators before
145//     binary operators.
146//
147//  2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
148//  3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
149//
150bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
151  bool Changed = false;
152  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
153    Changed = !I.swapOperands();
154
155  if (!I.isAssociative()) return Changed;
156  Instruction::BinaryOps Opcode = I.getOpcode();
157  if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
158    if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
159      if (isa<Constant>(I.getOperand(1))) {
160        Constant *Folded = ConstantExpr::get(I.getOpcode(),
161                                             cast<Constant>(I.getOperand(1)),
162                                             cast<Constant>(Op->getOperand(1)));
163        I.setOperand(0, Op->getOperand(0));
164        I.setOperand(1, Folded);
165        return true;
166      } else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1)))
167        if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
168            isOnlyUse(Op) && isOnlyUse(Op1)) {
169          Constant *C1 = cast<Constant>(Op->getOperand(1));
170          Constant *C2 = cast<Constant>(Op1->getOperand(1));
171
172          // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
173          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
174          Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
175                                                    Op1->getOperand(0),
176                                                    Op1->getName(), &I);
177          Worklist.Add(New);
178          I.setOperand(0, New);
179          I.setOperand(1, Folded);
180          return true;
181        }
182    }
183  return Changed;
184}
185
186// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
187// if the LHS is a constant zero (which is the 'negate' form).
188//
189Value *InstCombiner::dyn_castNegVal(Value *V) const {
190  if (BinaryOperator::isNeg(V))
191    return BinaryOperator::getNegArgument(V);
192
193  // Constants can be considered to be negated values if they can be folded.
194  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
195    return ConstantExpr::getNeg(C);
196
197  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
198    if (C->getType()->getElementType()->isInteger())
199      return ConstantExpr::getNeg(C);
200
201  return 0;
202}
203
204// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
205// instruction if the LHS is a constant negative zero (which is the 'negate'
206// form).
207//
208static inline Value *dyn_castFNegVal(Value *V) {
209  if (BinaryOperator::isFNeg(V))
210    return BinaryOperator::getFNegArgument(V);
211
212  // Constants can be considered to be negated values if they can be folded.
213  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
214    return ConstantExpr::getFNeg(C);
215
216  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
217    if (C->getType()->getElementType()->isFloatingPoint())
218      return ConstantExpr::getFNeg(C);
219
220  return 0;
221}
222
223/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
224/// returning the kind and providing the out parameter results if we
225/// successfully match.
226static SelectPatternFlavor
227MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
228  SelectInst *SI = dyn_cast<SelectInst>(V);
229  if (SI == 0) return SPF_UNKNOWN;
230
231  ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
232  if (ICI == 0) return SPF_UNKNOWN;
233
234  LHS = ICI->getOperand(0);
235  RHS = ICI->getOperand(1);
236
237  // (icmp X, Y) ? X : Y
238  if (SI->getTrueValue() == ICI->getOperand(0) &&
239      SI->getFalseValue() == ICI->getOperand(1)) {
240    switch (ICI->getPredicate()) {
241    default: return SPF_UNKNOWN; // Equality.
242    case ICmpInst::ICMP_UGT:
243    case ICmpInst::ICMP_UGE: return SPF_UMAX;
244    case ICmpInst::ICMP_SGT:
245    case ICmpInst::ICMP_SGE: return SPF_SMAX;
246    case ICmpInst::ICMP_ULT:
247    case ICmpInst::ICMP_ULE: return SPF_UMIN;
248    case ICmpInst::ICMP_SLT:
249    case ICmpInst::ICMP_SLE: return SPF_SMIN;
250    }
251  }
252
253  // (icmp X, Y) ? Y : X
254  if (SI->getTrueValue() == ICI->getOperand(1) &&
255      SI->getFalseValue() == ICI->getOperand(0)) {
256    switch (ICI->getPredicate()) {
257      default: return SPF_UNKNOWN; // Equality.
258      case ICmpInst::ICMP_UGT:
259      case ICmpInst::ICMP_UGE: return SPF_UMIN;
260      case ICmpInst::ICMP_SGT:
261      case ICmpInst::ICMP_SGE: return SPF_SMIN;
262      case ICmpInst::ICMP_ULT:
263      case ICmpInst::ICMP_ULE: return SPF_UMAX;
264      case ICmpInst::ICMP_SLT:
265      case ICmpInst::ICMP_SLE: return SPF_SMAX;
266    }
267  }
268
269  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
270
271  return SPF_UNKNOWN;
272}
273
274/// isFreeToInvert - Return true if the specified value is free to invert (apply
275/// ~ to).  This happens in cases where the ~ can be eliminated.
276static inline bool isFreeToInvert(Value *V) {
277  // ~(~(X)) -> X.
278  if (BinaryOperator::isNot(V))
279    return true;
280
281  // Constants can be considered to be not'ed values.
282  if (isa<ConstantInt>(V))
283    return true;
284
285  // Compares can be inverted if they have a single use.
286  if (CmpInst *CI = dyn_cast<CmpInst>(V))
287    return CI->hasOneUse();
288
289  return false;
290}
291
292static inline Value *dyn_castNotVal(Value *V) {
293  // If this is not(not(x)) don't return that this is a not: we want the two
294  // not's to be folded first.
295  if (BinaryOperator::isNot(V)) {
296    Value *Operand = BinaryOperator::getNotArgument(V);
297    if (!isFreeToInvert(Operand))
298      return Operand;
299  }
300
301  // Constants can be considered to be not'ed values...
302  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
303    return ConstantInt::get(C->getType(), ~C->getValue());
304  return 0;
305}
306
307
308
309// dyn_castFoldableMul - If this value is a multiply that can be folded into
310// other computations (because it has a constant operand), return the
311// non-constant operand of the multiply, and set CST to point to the multiplier.
312// Otherwise, return null.
313//
314static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
315  if (V->hasOneUse() && V->getType()->isInteger())
316    if (Instruction *I = dyn_cast<Instruction>(V)) {
317      if (I->getOpcode() == Instruction::Mul)
318        if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
319          return I->getOperand(0);
320      if (I->getOpcode() == Instruction::Shl)
321        if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
322          // The multiplier is really 1 << CST.
323          uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
324          uint32_t CSTVal = CST->getLimitedValue(BitWidth);
325          CST = ConstantInt::get(V->getType()->getContext(),
326                                 APInt(BitWidth, 1).shl(CSTVal));
327          return I->getOperand(0);
328        }
329    }
330  return 0;
331}
332
333/// AddOne - Add one to a ConstantInt
334static Constant *AddOne(Constant *C) {
335  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
336}
337/// SubOne - Subtract one from a ConstantInt
338static Constant *SubOne(ConstantInt *C) {
339  return ConstantExpr::getSub(C,  ConstantInt::get(C->getType(), 1));
340}
341/// MultiplyOverflows - True if the multiply can not be expressed in an int
342/// this size.
343static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
344  uint32_t W = C1->getBitWidth();
345  APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
346  if (sign) {
347    LHSExt.sext(W * 2);
348    RHSExt.sext(W * 2);
349  } else {
350    LHSExt.zext(W * 2);
351    RHSExt.zext(W * 2);
352  }
353
354  APInt MulExt = LHSExt * RHSExt;
355
356  if (!sign)
357    return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
358
359  APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
360  APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
361  return MulExt.slt(Min) || MulExt.sgt(Max);
362}
363
364
365
366/// AssociativeOpt - Perform an optimization on an associative operator.  This
367/// function is designed to check a chain of associative operators for a
368/// potential to apply a certain optimization.  Since the optimization may be
369/// applicable if the expression was reassociated, this checks the chain, then
370/// reassociates the expression as necessary to expose the optimization
371/// opportunity.  This makes use of a special Functor, which must define
372/// 'shouldApply' and 'apply' methods.
373///
374template<typename Functor>
375static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
376  unsigned Opcode = Root.getOpcode();
377  Value *LHS = Root.getOperand(0);
378
379  // Quick check, see if the immediate LHS matches...
380  if (F.shouldApply(LHS))
381    return F.apply(Root);
382
383  // Otherwise, if the LHS is not of the same opcode as the root, return.
384  Instruction *LHSI = dyn_cast<Instruction>(LHS);
385  while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) {
386    // Should we apply this transform to the RHS?
387    bool ShouldApply = F.shouldApply(LHSI->getOperand(1));
388
389    // If not to the RHS, check to see if we should apply to the LHS...
390    if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) {
391      cast<BinaryOperator>(LHSI)->swapOperands();   // Make the LHS the RHS
392      ShouldApply = true;
393    }
394
395    // If the functor wants to apply the optimization to the RHS of LHSI,
396    // reassociate the expression from ((? op A) op B) to (? op (A op B))
397    if (ShouldApply) {
398      // Now all of the instructions are in the current basic block, go ahead
399      // and perform the reassociation.
400      Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0));
401
402      // First move the selected RHS to the LHS of the root...
403      Root.setOperand(0, LHSI->getOperand(1));
404
405      // Make what used to be the LHS of the root be the user of the root...
406      Value *ExtraOperand = TmpLHSI->getOperand(1);
407      if (&Root == TmpLHSI) {
408        Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType()));
409        return 0;
410      }
411      Root.replaceAllUsesWith(TmpLHSI);          // Users now use TmpLHSI
412      TmpLHSI->setOperand(1, &Root);             // TmpLHSI now uses the root
413      BasicBlock::iterator ARI = &Root; ++ARI;
414      TmpLHSI->moveBefore(ARI);                  // Move TmpLHSI to after Root
415      ARI = Root;
416
417      // Now propagate the ExtraOperand down the chain of instructions until we
418      // get to LHSI.
419      while (TmpLHSI != LHSI) {
420        Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0));
421        // Move the instruction to immediately before the chain we are
422        // constructing to avoid breaking dominance properties.
423        NextLHSI->moveBefore(ARI);
424        ARI = NextLHSI;
425
426        Value *NextOp = NextLHSI->getOperand(1);
427        NextLHSI->setOperand(1, ExtraOperand);
428        TmpLHSI = NextLHSI;
429        ExtraOperand = NextOp;
430      }
431
432      // Now that the instructions are reassociated, have the functor perform
433      // the transformation...
434      return F.apply(Root);
435    }
436
437    LHSI = dyn_cast<Instruction>(LHSI->getOperand(0));
438  }
439  return 0;
440}
441
442namespace {
443
444// AddRHS - Implements: X + X --> X << 1
445struct AddRHS {
446  Value *RHS;
447  explicit AddRHS(Value *rhs) : RHS(rhs) {}
448  bool shouldApply(Value *LHS) const { return LHS == RHS; }
449  Instruction *apply(BinaryOperator &Add) const {
450    return BinaryOperator::CreateShl(Add.getOperand(0),
451                                     ConstantInt::get(Add.getType(), 1));
452  }
453};
454
455// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2)
456//                 iff C1&C2 == 0
457struct AddMaskingAnd {
458  Constant *C2;
459  explicit AddMaskingAnd(Constant *c) : C2(c) {}
460  bool shouldApply(Value *LHS) const {
461    ConstantInt *C1;
462    return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) &&
463           ConstantExpr::getAnd(C1, C2)->isNullValue();
464  }
465  Instruction *apply(BinaryOperator &Add) const {
466    return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
467  }
468};
469
470}
471
472static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
473                                             InstCombiner *IC) {
474  if (CastInst *CI = dyn_cast<CastInst>(&I))
475    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
476
477  // Figure out if the constant is the left or the right argument.
478  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
479  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
480
481  if (Constant *SOC = dyn_cast<Constant>(SO)) {
482    if (ConstIsRHS)
483      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
484    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
485  }
486
487  Value *Op0 = SO, *Op1 = ConstOperand;
488  if (!ConstIsRHS)
489    std::swap(Op0, Op1);
490
491  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
492    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
493                                    SO->getName()+".op");
494  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
495    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
496                                   SO->getName()+".cmp");
497  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
498    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
499                                   SO->getName()+".cmp");
500  llvm_unreachable("Unknown binary instruction type!");
501}
502
503// FoldOpIntoSelect - Given an instruction with a select as one operand and a
504// constant as the other operand, try to fold the binary operator into the
505// select arguments.  This also works for Cast instructions, which obviously do
506// not have a second operand.
507Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
508  // Don't modify shared select instructions
509  if (!SI->hasOneUse()) return 0;
510  Value *TV = SI->getOperand(1);
511  Value *FV = SI->getOperand(2);
512
513  if (isa<Constant>(TV) || isa<Constant>(FV)) {
514    // Bool selects with constant operands can be folded to logical ops.
515    if (SI->getType() == Type::getInt1Ty(SI->getContext())) return 0;
516
517    Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
518    Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
519
520    return SelectInst::Create(SI->getCondition(), SelectTrueVal,
521                              SelectFalseVal);
522  }
523  return 0;
524}
525
526
527/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
528/// has a PHI node as operand #0, see if we can fold the instruction into the
529/// PHI (which is only possible if all operands to the PHI are constants).
530///
531/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
532/// that would normally be unprofitable because they strongly encourage jump
533/// threading.
534Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
535                                         bool AllowAggressive) {
536  AllowAggressive = false;
537  PHINode *PN = cast<PHINode>(I.getOperand(0));
538  unsigned NumPHIValues = PN->getNumIncomingValues();
539  if (NumPHIValues == 0 ||
540      // We normally only transform phis with a single use, unless we're trying
541      // hard to make jump threading happen.
542      (!PN->hasOneUse() && !AllowAggressive))
543    return 0;
544
545
546  // Check to see if all of the operands of the PHI are simple constants
547  // (constantint/constantfp/undef).  If there is one non-constant value,
548  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
549  // bail out.  We don't do arbitrary constant expressions here because moving
550  // their computation can be expensive without a cost model.
551  BasicBlock *NonConstBB = 0;
552  for (unsigned i = 0; i != NumPHIValues; ++i)
553    if (!isa<Constant>(PN->getIncomingValue(i)) ||
554        isa<ConstantExpr>(PN->getIncomingValue(i))) {
555      if (NonConstBB) return 0;  // More than one non-const value.
556      if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.
557      NonConstBB = PN->getIncomingBlock(i);
558
559      // If the incoming non-constant value is in I's block, we have an infinite
560      // loop.
561      if (NonConstBB == I.getParent())
562        return 0;
563    }
564
565  // If there is exactly one non-constant value, we can insert a copy of the
566  // operation in that block.  However, if this is a critical edge, we would be
567  // inserting the computation one some other paths (e.g. inside a loop).  Only
568  // do this if the pred block is unconditionally branching into the phi block.
569  if (NonConstBB != 0 && !AllowAggressive) {
570    BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
571    if (!BI || !BI->isUnconditional()) return 0;
572  }
573
574  // Okay, we can do the transformation: create the new PHI node.
575  PHINode *NewPN = PHINode::Create(I.getType(), "");
576  NewPN->reserveOperandSpace(PN->getNumOperands()/2);
577  InsertNewInstBefore(NewPN, *PN);
578  NewPN->takeName(PN);
579
580  // Next, add all of the operands to the PHI.
581  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
582    // We only currently try to fold the condition of a select when it is a phi,
583    // not the true/false values.
584    Value *TrueV = SI->getTrueValue();
585    Value *FalseV = SI->getFalseValue();
586    BasicBlock *PhiTransBB = PN->getParent();
587    for (unsigned i = 0; i != NumPHIValues; ++i) {
588      BasicBlock *ThisBB = PN->getIncomingBlock(i);
589      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
590      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
591      Value *InV = 0;
592      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
593        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
594      } else {
595        assert(PN->getIncomingBlock(i) == NonConstBB);
596        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
597                                 FalseVInPred,
598                                 "phitmp", NonConstBB->getTerminator());
599        Worklist.Add(cast<Instruction>(InV));
600      }
601      NewPN->addIncoming(InV, ThisBB);
602    }
603  } else if (I.getNumOperands() == 2) {
604    Constant *C = cast<Constant>(I.getOperand(1));
605    for (unsigned i = 0; i != NumPHIValues; ++i) {
606      Value *InV = 0;
607      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
608        if (CmpInst *CI = dyn_cast<CmpInst>(&I))
609          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
610        else
611          InV = ConstantExpr::get(I.getOpcode(), InC, C);
612      } else {
613        assert(PN->getIncomingBlock(i) == NonConstBB);
614        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
615          InV = BinaryOperator::Create(BO->getOpcode(),
616                                       PN->getIncomingValue(i), C, "phitmp",
617                                       NonConstBB->getTerminator());
618        else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
619          InV = CmpInst::Create(CI->getOpcode(),
620                                CI->getPredicate(),
621                                PN->getIncomingValue(i), C, "phitmp",
622                                NonConstBB->getTerminator());
623        else
624          llvm_unreachable("Unknown binop!");
625
626        Worklist.Add(cast<Instruction>(InV));
627      }
628      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
629    }
630  } else {
631    CastInst *CI = cast<CastInst>(&I);
632    const Type *RetTy = CI->getType();
633    for (unsigned i = 0; i != NumPHIValues; ++i) {
634      Value *InV;
635      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
636        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
637      } else {
638        assert(PN->getIncomingBlock(i) == NonConstBB);
639        InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),
640                               I.getType(), "phitmp",
641                               NonConstBB->getTerminator());
642        Worklist.Add(cast<Instruction>(InV));
643      }
644      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
645    }
646  }
647  return ReplaceInstUsesWith(I, NewPN);
648}
649
650
651/// WillNotOverflowSignedAdd - Return true if we can prove that:
652///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
653/// This basically requires proving that the add in the original type would not
654/// overflow to change the sign bit or have a carry out.
655bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
656  // There are different heuristics we can use for this.  Here are some simple
657  // ones.
658
659  // Add has the property that adding any two 2's complement numbers can only
660  // have one carry bit which can change a sign.  As such, if LHS and RHS each
661  // have at least two sign bits, we know that the addition of the two values
662  // will sign extend fine.
663  if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
664    return true;
665
666
667  // If one of the operands only has one non-zero bit, and if the other operand
668  // has a known-zero bit in a more significant place than it (not including the
669  // sign bit) the ripple may go up to and fill the zero, but won't change the
670  // sign.  For example, (X & ~4) + 1.
671
672  // TODO: Implement.
673
674  return false;
675}
676
677
678Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
679  bool Changed = SimplifyCommutative(I);
680  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
681
682  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
683                                 I.hasNoUnsignedWrap(), TD))
684    return ReplaceInstUsesWith(I, V);
685
686
687  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
688    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
689      // X + (signbit) --> X ^ signbit
690      const APInt& Val = CI->getValue();
691      uint32_t BitWidth = Val.getBitWidth();
692      if (Val == APInt::getSignBit(BitWidth))
693        return BinaryOperator::CreateXor(LHS, RHS);
694
695      // See if SimplifyDemandedBits can simplify this.  This handles stuff like
696      // (X & 254)+1 -> (X&254)|1
697      if (SimplifyDemandedInstructionBits(I))
698        return &I;
699
700      // zext(bool) + C -> bool ? C + 1 : C
701      if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
702        if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
703          return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
704    }
705
706    if (isa<PHINode>(LHS))
707      if (Instruction *NV = FoldOpIntoPhi(I))
708        return NV;
709
710    ConstantInt *XorRHS = 0;
711    Value *XorLHS = 0;
712    if (isa<ConstantInt>(RHSC) &&
713        match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
714      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
715      const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
716
717      uint32_t Size = TySizeBits / 2;
718      APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1));
719      APInt CFF80Val(-C0080Val);
720      do {
721        if (TySizeBits > Size) {
722          // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
723          // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
724          if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) ||
725              (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) {
726            // This is a sign extend if the top bits are known zero.
727            if (!MaskedValueIsZero(XorLHS,
728                   APInt::getHighBitsSet(TySizeBits, TySizeBits - Size)))
729              Size = 0;  // Not a sign ext, but can't be any others either.
730            break;
731          }
732        }
733        Size >>= 1;
734        C0080Val = APIntOps::lshr(C0080Val, Size);
735        CFF80Val = APIntOps::ashr(CFF80Val, Size);
736      } while (Size >= 1);
737
738      // FIXME: This shouldn't be necessary. When the backends can handle types
739      // with funny bit widths then this switch statement should be removed. It
740      // is just here to get the size of the "middle" type back up to something
741      // that the back ends can handle.
742      const Type *MiddleType = 0;
743      switch (Size) {
744        default: break;
745        case 32:
746        case 16:
747        case  8: MiddleType = IntegerType::get(I.getContext(), Size); break;
748      }
749      if (MiddleType) {
750        Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");
751        return new SExtInst(NewTrunc, I.getType(), I.getName());
752      }
753    }
754  }
755
756  if (I.getType() == Type::getInt1Ty(I.getContext()))
757    return BinaryOperator::CreateXor(LHS, RHS);
758
759  // X + X --> X << 1
760  if (I.getType()->isInteger()) {
761    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS)))
762      return Result;
763
764    if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
765      if (RHSI->getOpcode() == Instruction::Sub)
766        if (LHS == RHSI->getOperand(1))                   // A + (B - A) --> B
767          return ReplaceInstUsesWith(I, RHSI->getOperand(0));
768    }
769    if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
770      if (LHSI->getOpcode() == Instruction::Sub)
771        if (RHS == LHSI->getOperand(1))                   // (B - A) + A --> B
772          return ReplaceInstUsesWith(I, LHSI->getOperand(0));
773    }
774  }
775
776  // -A + B  -->  B - A
777  // -A + -B  -->  -(A + B)
778  if (Value *LHSV = dyn_castNegVal(LHS)) {
779    if (LHS->getType()->isIntOrIntVector()) {
780      if (Value *RHSV = dyn_castNegVal(RHS)) {
781        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
782        return BinaryOperator::CreateNeg(NewAdd);
783      }
784    }
785
786    return BinaryOperator::CreateSub(RHS, LHSV);
787  }
788
789  // A + -B  -->  A - B
790  if (!isa<Constant>(RHS))
791    if (Value *V = dyn_castNegVal(RHS))
792      return BinaryOperator::CreateSub(LHS, V);
793
794
795  ConstantInt *C2;
796  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
797    if (X == RHS)   // X*C + X --> X * (C+1)
798      return BinaryOperator::CreateMul(RHS, AddOne(C2));
799
800    // X*C1 + X*C2 --> X * (C1+C2)
801    ConstantInt *C1;
802    if (X == dyn_castFoldableMul(RHS, C1))
803      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
804  }
805
806  // X + X*C --> X * (C+1)
807  if (dyn_castFoldableMul(RHS, C2) == LHS)
808    return BinaryOperator::CreateMul(LHS, AddOne(C2));
809
810  // X + ~X --> -1   since   ~X = -X-1
811  if (dyn_castNotVal(LHS) == RHS ||
812      dyn_castNotVal(RHS) == LHS)
813    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
814
815
816  // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0
817  if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
818    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
819      return R;
820
821  // A+B --> A|B iff A and B have no bits set in common.
822  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
823    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
824    APInt LHSKnownOne(IT->getBitWidth(), 0);
825    APInt LHSKnownZero(IT->getBitWidth(), 0);
826    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
827    if (LHSKnownZero != 0) {
828      APInt RHSKnownOne(IT->getBitWidth(), 0);
829      APInt RHSKnownZero(IT->getBitWidth(), 0);
830      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
831
832      // No bits in common -> bitwise or.
833      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
834        return BinaryOperator::CreateOr(LHS, RHS);
835    }
836  }
837
838  // W*X + Y*Z --> W * (X+Z)  iff W == Y
839  if (I.getType()->isIntOrIntVector()) {
840    Value *W, *X, *Y, *Z;
841    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
842        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
843      if (W != Y) {
844        if (W == Z) {
845          std::swap(Y, Z);
846        } else if (Y == X) {
847          std::swap(W, X);
848        } else if (X == Z) {
849          std::swap(Y, Z);
850          std::swap(W, X);
851        }
852      }
853
854      if (W == Y) {
855        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
856        return BinaryOperator::CreateMul(W, NewAdd);
857      }
858    }
859  }
860
861  if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
862    Value *X = 0;
863    if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
864      return BinaryOperator::CreateSub(SubOne(CRHS), X);
865
866    // (X & FF00) + xx00  -> (X+xx00) & FF00
867    if (LHS->hasOneUse() &&
868        match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
869      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
870      if (Anded == CRHS) {
871        // See if all bits from the first bit set in the Add RHS up are included
872        // in the mask.  First, get the rightmost bit.
873        const APInt& AddRHSV = CRHS->getValue();
874
875        // Form a mask of all bits from the lowest bit added through the top.
876        APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
877
878        // See if the and mask includes all of these bits.
879        APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
880
881        if (AddRHSHighBits == AddRHSHighBitsAnd) {
882          // Okay, the xform is safe.  Insert the new add pronto.
883          Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
884          return BinaryOperator::CreateAnd(NewAdd, C2);
885        }
886      }
887    }
888
889    // Try to fold constant add into select arguments.
890    if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
891      if (Instruction *R = FoldOpIntoSelect(I, SI))
892        return R;
893  }
894
895  // add (select X 0 (sub n A)) A  -->  select X A n
896  {
897    SelectInst *SI = dyn_cast<SelectInst>(LHS);
898    Value *A = RHS;
899    if (!SI) {
900      SI = dyn_cast<SelectInst>(RHS);
901      A = LHS;
902    }
903    if (SI && SI->hasOneUse()) {
904      Value *TV = SI->getTrueValue();
905      Value *FV = SI->getFalseValue();
906      Value *N;
907
908      // Can we fold the add into the argument of the select?
909      // We check both true and false select arguments for a matching subtract.
910      if (match(FV, m_Zero()) &&
911          match(TV, m_Sub(m_Value(N), m_Specific(A))))
912        // Fold the add into the true select value.
913        return SelectInst::Create(SI->getCondition(), N, A);
914      if (match(TV, m_Zero()) &&
915          match(FV, m_Sub(m_Value(N), m_Specific(A))))
916        // Fold the add into the false select value.
917        return SelectInst::Create(SI->getCondition(), A, N);
918    }
919  }
920
921  // Check for (add (sext x), y), see if we can merge this into an
922  // integer add followed by a sext.
923  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
924    // (add (sext x), cst) --> (sext (add x, cst'))
925    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
926      Constant *CI =
927        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
928      if (LHSConv->hasOneUse() &&
929          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
930          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
931        // Insert the new, smaller add.
932        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
933                                              CI, "addconv");
934        return new SExtInst(NewAdd, I.getType());
935      }
936    }
937
938    // (add (sext x), (sext y)) --> (sext (add int x, y))
939    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
940      // Only do this if x/y have the same type, if at last one of them has a
941      // single use (so we don't increase the number of sexts), and if the
942      // integer add will not overflow.
943      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
944          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
945          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
946                                   RHSConv->getOperand(0))) {
947        // Insert the new integer add.
948        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
949                                              RHSConv->getOperand(0), "addconv");
950        return new SExtInst(NewAdd, I.getType());
951      }
952    }
953  }
954
955  return Changed ? &I : 0;
956}
957
958Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
959  bool Changed = SimplifyCommutative(I);
960  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
961
962  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
963    // X + 0 --> X
964    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
965      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
966                              (I.getType())->getValueAPF()))
967        return ReplaceInstUsesWith(I, LHS);
968    }
969
970    if (isa<PHINode>(LHS))
971      if (Instruction *NV = FoldOpIntoPhi(I))
972        return NV;
973  }
974
975  // -A + B  -->  B - A
976  // -A + -B  -->  -(A + B)
977  if (Value *LHSV = dyn_castFNegVal(LHS))
978    return BinaryOperator::CreateFSub(RHS, LHSV);
979
980  // A + -B  -->  A - B
981  if (!isa<Constant>(RHS))
982    if (Value *V = dyn_castFNegVal(RHS))
983      return BinaryOperator::CreateFSub(LHS, V);
984
985  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
986  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
987    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
988      return ReplaceInstUsesWith(I, LHS);
989
990  // Check for (add double (sitofp x), y), see if we can merge this into an
991  // integer add followed by a promotion.
992  if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
993    // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
994    // ... if the constant fits in the integer value.  This is useful for things
995    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
996    // requires a constant pool load, and generally allows the add to be better
997    // instcombined.
998    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
999      Constant *CI =
1000      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
1001      if (LHSConv->hasOneUse() &&
1002          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
1003          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
1004        // Insert the new integer add.
1005        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
1006                                              CI, "addconv");
1007        return new SIToFPInst(NewAdd, I.getType());
1008      }
1009    }
1010
1011    // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
1012    if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
1013      // Only do this if x/y have the same type, if at last one of them has a
1014      // single use (so we don't increase the number of int->fp conversions),
1015      // and if the integer add will not overflow.
1016      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
1017          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
1018          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
1019                                   RHSConv->getOperand(0))) {
1020        // Insert the new integer add.
1021        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
1022                                              RHSConv->getOperand(0),"addconv");
1023        return new SIToFPInst(NewAdd, I.getType());
1024      }
1025    }
1026  }
1027
1028  return Changed ? &I : 0;
1029}
1030
1031
1032/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
1033/// code necessary to compute the offset from the base pointer (without adding
1034/// in the base pointer).  Return the result as a signed integer of intptr size.
1035Value *InstCombiner::EmitGEPOffset(User *GEP) {
1036  TargetData &TD = *getTargetData();
1037  gep_type_iterator GTI = gep_type_begin(GEP);
1038  const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
1039  Value *Result = Constant::getNullValue(IntPtrTy);
1040
1041  // Build a mask for high order bits.
1042  unsigned IntPtrWidth = TD.getPointerSizeInBits();
1043  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
1044
1045  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
1046       ++i, ++GTI) {
1047    Value *Op = *i;
1048    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
1049    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
1050      if (OpC->isZero()) continue;
1051
1052      // Handle a struct index, which adds its field offset to the pointer.
1053      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
1054        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
1055
1056        Result = Builder->CreateAdd(Result,
1057                                    ConstantInt::get(IntPtrTy, Size),
1058                                    GEP->getName()+".offs");
1059        continue;
1060      }
1061
1062      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
1063      Constant *OC =
1064              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
1065      Scale = ConstantExpr::getMul(OC, Scale);
1066      // Emit an add instruction.
1067      Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
1068      continue;
1069    }
1070    // Convert to correct type.
1071    if (Op->getType() != IntPtrTy)
1072      Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
1073    if (Size != 1) {
1074      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
1075      // We'll let instcombine(mul) convert this to a shl if possible.
1076      Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
1077    }
1078
1079    // Emit an add instruction.
1080    Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
1081  }
1082  return Result;
1083}
1084
1085
1086
1087
1088/// Optimize pointer differences into the same array into a size.  Consider:
1089///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
1090/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
1091///
1092Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
1093                                               const Type *Ty) {
1094  assert(TD && "Must have target data info for this");
1095
1096  // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
1097  // this.
1098  bool Swapped;
1099  GetElementPtrInst *GEP = 0;
1100  ConstantExpr *CstGEP = 0;
1101
1102  // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
1103  // For now we require one side to be the base pointer "A" or a constant
1104  // expression derived from it.
1105  if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
1106    // (gep X, ...) - X
1107    if (LHSGEP->getOperand(0) == RHS) {
1108      GEP = LHSGEP;
1109      Swapped = false;
1110    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
1111      // (gep X, ...) - (ce_gep X, ...)
1112      if (CE->getOpcode() == Instruction::GetElementPtr &&
1113          LHSGEP->getOperand(0) == CE->getOperand(0)) {
1114        CstGEP = CE;
1115        GEP = LHSGEP;
1116        Swapped = false;
1117      }
1118    }
1119  }
1120
1121  if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
1122    // X - (gep X, ...)
1123    if (RHSGEP->getOperand(0) == LHS) {
1124      GEP = RHSGEP;
1125      Swapped = true;
1126    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
1127      // (ce_gep X, ...) - (gep X, ...)
1128      if (CE->getOpcode() == Instruction::GetElementPtr &&
1129          RHSGEP->getOperand(0) == CE->getOperand(0)) {
1130        CstGEP = CE;
1131        GEP = RHSGEP;
1132        Swapped = true;
1133      }
1134    }
1135  }
1136
1137  if (GEP == 0)
1138    return 0;
1139
1140  // Emit the offset of the GEP and an intptr_t.
1141  Value *Result = EmitGEPOffset(GEP);
1142
1143  // If we had a constant expression GEP on the other side offsetting the
1144  // pointer, subtract it from the offset we have.
1145  if (CstGEP) {
1146    Value *CstOffset = EmitGEPOffset(CstGEP);
1147    Result = Builder->CreateSub(Result, CstOffset);
1148  }
1149
1150
1151  // If we have p - gep(p, ...)  then we have to negate the result.
1152  if (Swapped)
1153    Result = Builder->CreateNeg(Result, "diff.neg");
1154
1155  return Builder->CreateIntCast(Result, Ty, true);
1156}
1157
1158
1159Instruction *InstCombiner::visitSub(BinaryOperator &I) {
1160  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1161
1162  if (Op0 == Op1)                        // sub X, X  -> 0
1163    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1164
1165  // If this is a 'B = x-(-A)', change to B = x+A.  This preserves NSW/NUW.
1166  if (Value *V = dyn_castNegVal(Op1)) {
1167    BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
1168    Res->setHasNoSignedWrap(I.hasNoSignedWrap());
1169    Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
1170    return Res;
1171  }
1172
1173  if (isa<UndefValue>(Op0))
1174    return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
1175  if (isa<UndefValue>(Op1))
1176    return ReplaceInstUsesWith(I, Op1);    // X - undef -> undef
1177  if (I.getType() == Type::getInt1Ty(I.getContext()))
1178    return BinaryOperator::CreateXor(Op0, Op1);
1179
1180  if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
1181    // Replace (-1 - A) with (~A).
1182    if (C->isAllOnesValue())
1183      return BinaryOperator::CreateNot(Op1);
1184
1185    // C - ~X == X + (1+C)
1186    Value *X = 0;
1187    if (match(Op1, m_Not(m_Value(X))))
1188      return BinaryOperator::CreateAdd(X, AddOne(C));
1189
1190    // -(X >>u 31) -> (X >>s 31)
1191    // -(X >>s 31) -> (X >>u 31)
1192    if (C->isZero()) {
1193      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
1194        if (SI->getOpcode() == Instruction::LShr) {
1195          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1196            // Check to see if we are shifting out everything but the sign bit.
1197            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
1198                SI->getType()->getPrimitiveSizeInBits()-1) {
1199              // Ok, the transformation is safe.  Insert AShr.
1200              return BinaryOperator::Create(Instruction::AShr,
1201                                          SI->getOperand(0), CU, SI->getName());
1202            }
1203          }
1204        } else if (SI->getOpcode() == Instruction::AShr) {
1205          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1206            // Check to see if we are shifting out everything but the sign bit.
1207            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
1208                SI->getType()->getPrimitiveSizeInBits()-1) {
1209              // Ok, the transformation is safe.  Insert LShr.
1210              return BinaryOperator::CreateLShr(
1211                                          SI->getOperand(0), CU, SI->getName());
1212            }
1213          }
1214        }
1215      }
1216    }
1217
1218    // Try to fold constant sub into select arguments.
1219    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
1220      if (Instruction *R = FoldOpIntoSelect(I, SI))
1221        return R;
1222
1223    // C - zext(bool) -> bool ? C - 1 : C
1224    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
1225      if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
1226        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
1227  }
1228
1229  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
1230    if (Op1I->getOpcode() == Instruction::Add) {
1231      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
1232        return BinaryOperator::CreateNeg(Op1I->getOperand(1),
1233                                         I.getName());
1234      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
1235        return BinaryOperator::CreateNeg(Op1I->getOperand(0),
1236                                         I.getName());
1237      else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
1238        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
1239          // C1-(X+C2) --> (C1-C2)-X
1240          return BinaryOperator::CreateSub(
1241            ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
1242      }
1243    }
1244
1245    if (Op1I->hasOneUse()) {
1246      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
1247      // is not used by anyone else...
1248      //
1249      if (Op1I->getOpcode() == Instruction::Sub) {
1250        // Swap the two operands of the subexpr...
1251        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
1252        Op1I->setOperand(0, IIOp1);
1253        Op1I->setOperand(1, IIOp0);
1254
1255        // Create the new top level add instruction...
1256        return BinaryOperator::CreateAdd(Op0, Op1);
1257      }
1258
1259      // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
1260      //
1261      if (Op1I->getOpcode() == Instruction::And &&
1262          (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
1263        Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
1264
1265        Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
1266        return BinaryOperator::CreateAnd(Op0, NewNot);
1267      }
1268
1269      // 0 - (X sdiv C)  -> (X sdiv -C)
1270      if (Op1I->getOpcode() == Instruction::SDiv)
1271        if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
1272          if (CSI->isZero())
1273            if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
1274              return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
1275                                          ConstantExpr::getNeg(DivRHS));
1276
1277      // X - X*C --> X * (1-C)
1278      ConstantInt *C2 = 0;
1279      if (dyn_castFoldableMul(Op1I, C2) == Op0) {
1280        Constant *CP1 =
1281          ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
1282                                             C2);
1283        return BinaryOperator::CreateMul(Op0, CP1);
1284      }
1285    }
1286  }
1287
1288  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
1289    if (Op0I->getOpcode() == Instruction::Add) {
1290      if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
1291        return ReplaceInstUsesWith(I, Op0I->getOperand(1));
1292      else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
1293        return ReplaceInstUsesWith(I, Op0I->getOperand(0));
1294    } else if (Op0I->getOpcode() == Instruction::Sub) {
1295      if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
1296        return BinaryOperator::CreateNeg(Op0I->getOperand(1),
1297                                         I.getName());
1298    }
1299  }
1300
1301  ConstantInt *C1;
1302  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
1303    if (X == Op1)  // X*C - X --> X * (C-1)
1304      return BinaryOperator::CreateMul(Op1, SubOne(C1));
1305
1306    ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
1307    if (X == dyn_castFoldableMul(Op1, C2))
1308      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
1309  }
1310
1311  // Optimize pointer differences into the same array into a size.  Consider:
1312  //  &A[10] - &A[0]: we should compile this to "10".
1313  if (TD) {
1314    Value *LHSOp, *RHSOp;
1315    if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
1316        match(Op1, m_PtrToInt(m_Value(RHSOp))))
1317      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
1318        return ReplaceInstUsesWith(I, Res);
1319
1320    // trunc(p)-trunc(q) -> trunc(p-q)
1321    if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
1322        match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
1323      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
1324        return ReplaceInstUsesWith(I, Res);
1325  }
1326
1327  return 0;
1328}
1329
1330Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
1331  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1332
1333  // If this is a 'B = x-(-A)', change to B = x+A...
1334  if (Value *V = dyn_castFNegVal(Op1))
1335    return BinaryOperator::CreateFAdd(Op0, V);
1336
1337  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
1338    if (Op1I->getOpcode() == Instruction::FAdd) {
1339      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
1340        return BinaryOperator::CreateFNeg(Op1I->getOperand(1),
1341                                          I.getName());
1342      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
1343        return BinaryOperator::CreateFNeg(Op1I->getOperand(0),
1344                                          I.getName());
1345    }
1346  }
1347
1348  return 0;
1349}
1350
1351Instruction *InstCombiner::visitMul(BinaryOperator &I) {
1352  bool Changed = SimplifyCommutative(I);
1353  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1354
1355  if (isa<UndefValue>(Op1))              // undef * X -> 0
1356    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1357
1358  // Simplify mul instructions with a constant RHS.
1359  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
1360    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
1361
1362      // ((X << C1)*C2) == (X * (C2 << C1))
1363      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
1364        if (SI->getOpcode() == Instruction::Shl)
1365          if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
1366            return BinaryOperator::CreateMul(SI->getOperand(0),
1367                                        ConstantExpr::getShl(CI, ShOp));
1368
1369      if (CI->isZero())
1370        return ReplaceInstUsesWith(I, Op1C);  // X * 0  == 0
1371      if (CI->equalsInt(1))                  // X * 1  == X
1372        return ReplaceInstUsesWith(I, Op0);
1373      if (CI->isAllOnesValue())              // X * -1 == 0 - X
1374        return BinaryOperator::CreateNeg(Op0, I.getName());
1375
1376      const APInt& Val = cast<ConstantInt>(CI)->getValue();
1377      if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
1378        return BinaryOperator::CreateShl(Op0,
1379                 ConstantInt::get(Op0->getType(), Val.logBase2()));
1380      }
1381    } else if (isa<VectorType>(Op1C->getType())) {
1382      if (Op1C->isNullValue())
1383        return ReplaceInstUsesWith(I, Op1C);
1384
1385      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
1386        if (Op1V->isAllOnesValue())              // X * -1 == 0 - X
1387          return BinaryOperator::CreateNeg(Op0, I.getName());
1388
1389        // As above, vector X*splat(1.0) -> X in all defined cases.
1390        if (Constant *Splat = Op1V->getSplatValue()) {
1391          if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
1392            if (CI->equalsInt(1))
1393              return ReplaceInstUsesWith(I, Op0);
1394        }
1395      }
1396    }
1397
1398    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
1399      if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
1400          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
1401        // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
1402        Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
1403        Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
1404        return BinaryOperator::CreateAdd(Add, C1C2);
1405
1406      }
1407
1408    // Try to fold constant mul into select arguments.
1409    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
1410      if (Instruction *R = FoldOpIntoSelect(I, SI))
1411        return R;
1412
1413    if (isa<PHINode>(Op0))
1414      if (Instruction *NV = FoldOpIntoPhi(I))
1415        return NV;
1416  }
1417
1418  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
1419    if (Value *Op1v = dyn_castNegVal(Op1))
1420      return BinaryOperator::CreateMul(Op0v, Op1v);
1421
1422  // (X / Y) *  Y = X - (X % Y)
1423  // (X / Y) * -Y = (X % Y) - X
1424  {
1425    Value *Op1C = Op1;
1426    BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
1427    if (!BO ||
1428        (BO->getOpcode() != Instruction::UDiv &&
1429         BO->getOpcode() != Instruction::SDiv)) {
1430      Op1C = Op0;
1431      BO = dyn_cast<BinaryOperator>(Op1);
1432    }
1433    Value *Neg = dyn_castNegVal(Op1C);
1434    if (BO && BO->hasOneUse() &&
1435        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
1436        (BO->getOpcode() == Instruction::UDiv ||
1437         BO->getOpcode() == Instruction::SDiv)) {
1438      Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
1439
1440      // If the division is exact, X % Y is zero.
1441      if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
1442        if (SDiv->isExact()) {
1443          if (Op1BO == Op1C)
1444            return ReplaceInstUsesWith(I, Op0BO);
1445          return BinaryOperator::CreateNeg(Op0BO);
1446        }
1447
1448      Value *Rem;
1449      if (BO->getOpcode() == Instruction::UDiv)
1450        Rem = Builder->CreateURem(Op0BO, Op1BO);
1451      else
1452        Rem = Builder->CreateSRem(Op0BO, Op1BO);
1453      Rem->takeName(BO);
1454
1455      if (Op1BO == Op1C)
1456        return BinaryOperator::CreateSub(Op0BO, Rem);
1457      return BinaryOperator::CreateSub(Rem, Op0BO);
1458    }
1459  }
1460
1461  /// i1 mul -> i1 and.
1462  if (I.getType() == Type::getInt1Ty(I.getContext()))
1463    return BinaryOperator::CreateAnd(Op0, Op1);
1464
1465  // X*(1 << Y) --> X << Y
1466  // (1 << Y)*X --> X << Y
1467  {
1468    Value *Y;
1469    if (match(Op0, m_Shl(m_One(), m_Value(Y))))
1470      return BinaryOperator::CreateShl(Op1, Y);
1471    if (match(Op1, m_Shl(m_One(), m_Value(Y))))
1472      return BinaryOperator::CreateShl(Op0, Y);
1473  }
1474
1475  // If one of the operands of the multiply is a cast from a boolean value, then
1476  // we know the bool is either zero or one, so this is a 'masking' multiply.
1477  //   X * Y (where Y is 0 or 1) -> X & (0-Y)
1478  if (!isa<VectorType>(I.getType())) {
1479    // -2 is "-1 << 1" so it is all bits set except the low one.
1480    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
1481
1482    Value *BoolCast = 0, *OtherOp = 0;
1483    if (MaskedValueIsZero(Op0, Negative2))
1484      BoolCast = Op0, OtherOp = Op1;
1485    else if (MaskedValueIsZero(Op1, Negative2))
1486      BoolCast = Op1, OtherOp = Op0;
1487
1488    if (BoolCast) {
1489      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
1490                                    BoolCast, "tmp");
1491      return BinaryOperator::CreateAnd(V, OtherOp);
1492    }
1493  }
1494
1495  return Changed ? &I : 0;
1496}
1497
1498Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
1499  bool Changed = SimplifyCommutative(I);
1500  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1501
1502  // Simplify mul instructions with a constant RHS...
1503  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
1504    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
1505      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
1506      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
1507      if (Op1F->isExactlyValue(1.0))
1508        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
1509    } else if (isa<VectorType>(Op1C->getType())) {
1510      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
1511        // As above, vector X*splat(1.0) -> X in all defined cases.
1512        if (Constant *Splat = Op1V->getSplatValue()) {
1513          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
1514            if (F->isExactlyValue(1.0))
1515              return ReplaceInstUsesWith(I, Op0);
1516        }
1517      }
1518    }
1519
1520    // Try to fold constant mul into select arguments.
1521    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
1522      if (Instruction *R = FoldOpIntoSelect(I, SI))
1523        return R;
1524
1525    if (isa<PHINode>(Op0))
1526      if (Instruction *NV = FoldOpIntoPhi(I))
1527        return NV;
1528  }
1529
1530  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
1531    if (Value *Op1v = dyn_castFNegVal(Op1))
1532      return BinaryOperator::CreateFMul(Op0v, Op1v);
1533
1534  return Changed ? &I : 0;
1535}
1536
1537/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
1538/// instruction.
1539bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
1540  SelectInst *SI = cast<SelectInst>(I.getOperand(1));
1541
1542  // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
1543  int NonNullOperand = -1;
1544  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
1545    if (ST->isNullValue())
1546      NonNullOperand = 2;
1547  // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
1548  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
1549    if (ST->isNullValue())
1550      NonNullOperand = 1;
1551
1552  if (NonNullOperand == -1)
1553    return false;
1554
1555  Value *SelectCond = SI->getOperand(0);
1556
1557  // Change the div/rem to use 'Y' instead of the select.
1558  I.setOperand(1, SI->getOperand(NonNullOperand));
1559
1560  // Okay, we know we replace the operand of the div/rem with 'Y' with no
1561  // problem.  However, the select, or the condition of the select may have
1562  // multiple uses.  Based on our knowledge that the operand must be non-zero,
1563  // propagate the known value for the select into other uses of it, and
1564  // propagate a known value of the condition into its other users.
1565
1566  // If the select and condition only have a single use, don't bother with this,
1567  // early exit.
1568  if (SI->use_empty() && SelectCond->hasOneUse())
1569    return true;
1570
1571  // Scan the current block backward, looking for other uses of SI.
1572  BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
1573
1574  while (BBI != BBFront) {
1575    --BBI;
1576    // If we found a call to a function, we can't assume it will return, so
1577    // information from below it cannot be propagated above it.
1578    if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
1579      break;
1580
1581    // Replace uses of the select or its condition with the known values.
1582    for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
1583         I != E; ++I) {
1584      if (*I == SI) {
1585        *I = SI->getOperand(NonNullOperand);
1586        Worklist.Add(BBI);
1587      } else if (*I == SelectCond) {
1588        *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
1589                                   ConstantInt::getFalse(BBI->getContext());
1590        Worklist.Add(BBI);
1591      }
1592    }
1593
1594    // If we past the instruction, quit looking for it.
1595    if (&*BBI == SI)
1596      SI = 0;
1597    if (&*BBI == SelectCond)
1598      SelectCond = 0;
1599
1600    // If we ran out of things to eliminate, break out of the loop.
1601    if (SelectCond == 0 && SI == 0)
1602      break;
1603
1604  }
1605  return true;
1606}
1607
1608
1609/// This function implements the transforms on div instructions that work
1610/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
1611/// used by the visitors to those instructions.
1612/// @brief Transforms common to all three div instructions
1613Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
1614  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1615
1616  // undef / X -> 0        for integer.
1617  // undef / X -> undef    for FP (the undef could be a snan).
1618  if (isa<UndefValue>(Op0)) {
1619    if (Op0->getType()->isFPOrFPVector())
1620      return ReplaceInstUsesWith(I, Op0);
1621    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1622  }
1623
1624  // X / undef -> undef
1625  if (isa<UndefValue>(Op1))
1626    return ReplaceInstUsesWith(I, Op1);
1627
1628  return 0;
1629}
1630
1631/// This function implements the transforms common to both integer division
1632/// instructions (udiv and sdiv). It is called by the visitors to those integer
1633/// division instructions.
1634/// @brief Common integer divide transforms
1635Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
1636  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1637
1638  // (sdiv X, X) --> 1     (udiv X, X) --> 1
1639  if (Op0 == Op1) {
1640    if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
1641      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
1642      std::vector<Constant*> Elts(Ty->getNumElements(), CI);
1643      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
1644    }
1645
1646    Constant *CI = ConstantInt::get(I.getType(), 1);
1647    return ReplaceInstUsesWith(I, CI);
1648  }
1649
1650  if (Instruction *Common = commonDivTransforms(I))
1651    return Common;
1652
1653  // Handle cases involving: [su]div X, (select Cond, Y, Z)
1654  // This does not apply for fdiv.
1655  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
1656    return &I;
1657
1658  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
1659    // div X, 1 == X
1660    if (RHS->equalsInt(1))
1661      return ReplaceInstUsesWith(I, Op0);
1662
1663    // (X / C1) / C2  -> X / (C1*C2)
1664    if (Instruction *LHS = dyn_cast<Instruction>(Op0))
1665      if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
1666        if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
1667          if (MultiplyOverflows(RHS, LHSRHS,
1668                                I.getOpcode()==Instruction::SDiv))
1669            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1670          else
1671            return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
1672                                      ConstantExpr::getMul(RHS, LHSRHS));
1673        }
1674
1675    if (!RHS->isZero()) { // avoid X udiv 0
1676      if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
1677        if (Instruction *R = FoldOpIntoSelect(I, SI))
1678          return R;
1679      if (isa<PHINode>(Op0))
1680        if (Instruction *NV = FoldOpIntoPhi(I))
1681          return NV;
1682    }
1683  }
1684
1685  // 0 / X == 0, we don't need to preserve faults!
1686  if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
1687    if (LHS->equalsInt(0))
1688      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1689
1690  // It can't be division by zero, hence it must be division by one.
1691  if (I.getType() == Type::getInt1Ty(I.getContext()))
1692    return ReplaceInstUsesWith(I, Op0);
1693
1694  if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
1695    if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue()))
1696      // div X, 1 == X
1697      if (X->isOne())
1698        return ReplaceInstUsesWith(I, Op0);
1699  }
1700
1701  return 0;
1702}
1703
1704Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
1705  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1706
1707  // Handle the integer div common cases
1708  if (Instruction *Common = commonIDivTransforms(I))
1709    return Common;
1710
1711  if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
1712    // X udiv C^2 -> X >> C
1713    // Check to see if this is an unsigned division with an exact power of 2,
1714    // if so, convert to a right shift.
1715    if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2
1716      return BinaryOperator::CreateLShr(Op0,
1717            ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
1718
1719    // X udiv C, where C >= signbit
1720    if (C->getValue().isNegative()) {
1721      Value *IC = Builder->CreateICmpULT( Op0, C);
1722      return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
1723                                ConstantInt::get(I.getType(), 1));
1724    }
1725  }
1726
1727  // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
1728  if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) {
1729    if (RHSI->getOpcode() == Instruction::Shl &&
1730        isa<ConstantInt>(RHSI->getOperand(0))) {
1731      const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue();
1732      if (C1.isPowerOf2()) {
1733        Value *N = RHSI->getOperand(1);
1734        const Type *NTy = N->getType();
1735        if (uint32_t C2 = C1.logBase2())
1736          N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");
1737        return BinaryOperator::CreateLShr(Op0, N);
1738      }
1739    }
1740  }
1741
1742  // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
1743  // where C1&C2 are powers of two.
1744  if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
1745    if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
1746      if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2)))  {
1747        const APInt &TVA = STO->getValue(), &FVA = SFO->getValue();
1748        if (TVA.isPowerOf2() && FVA.isPowerOf2()) {
1749          // Compute the shift amounts
1750          uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
1751          // Construct the "on true" case of the select
1752          Constant *TC = ConstantInt::get(Op0->getType(), TSA);
1753          Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");
1754
1755          // Construct the "on false" case of the select
1756          Constant *FC = ConstantInt::get(Op0->getType(), FSA);
1757          Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");
1758
1759          // construct the select instruction and return it.
1760          return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
1761        }
1762      }
1763  return 0;
1764}
1765
1766Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
1767  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1768
1769  // Handle the integer div common cases
1770  if (Instruction *Common = commonIDivTransforms(I))
1771    return Common;
1772
1773  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
1774    // sdiv X, -1 == -X
1775    if (RHS->isAllOnesValue())
1776      return BinaryOperator::CreateNeg(Op0);
1777
1778    // sdiv X, C  -->  ashr X, log2(C)
1779    if (cast<SDivOperator>(&I)->isExact() &&
1780        RHS->getValue().isNonNegative() &&
1781        RHS->getValue().isPowerOf2()) {
1782      Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
1783                                            RHS->getValue().exactLogBase2());
1784      return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName());
1785    }
1786
1787    // -X/C  -->  X/-C  provided the negation doesn't overflow.
1788    if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
1789      if (isa<Constant>(Sub->getOperand(0)) &&
1790          cast<Constant>(Sub->getOperand(0))->isNullValue() &&
1791          Sub->hasNoSignedWrap())
1792        return BinaryOperator::CreateSDiv(Sub->getOperand(1),
1793                                          ConstantExpr::getNeg(RHS));
1794  }
1795
1796  // If the sign bits of both operands are zero (i.e. we can prove they are
1797  // unsigned inputs), turn this into a udiv.
1798  if (I.getType()->isInteger()) {
1799    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
1800    if (MaskedValueIsZero(Op0, Mask)) {
1801      if (MaskedValueIsZero(Op1, Mask)) {
1802        // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
1803        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
1804      }
1805      ConstantInt *ShiftedInt;
1806      if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) &&
1807          ShiftedInt->getValue().isPowerOf2()) {
1808        // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
1809        // Safe because the only negative value (1 << Y) can take on is
1810        // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
1811        // the sign bit set.
1812        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
1813      }
1814    }
1815  }
1816
1817  return 0;
1818}
1819
1820Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
1821  return commonDivTransforms(I);
1822}
1823
1824/// This function implements the transforms on rem instructions that work
1825/// regardless of the kind of rem instruction it is (urem, srem, or frem). It
1826/// is used by the visitors to those instructions.
1827/// @brief Transforms common to all three rem instructions
1828Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
1829  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1830
1831  if (isa<UndefValue>(Op0)) {             // undef % X -> 0
1832    if (I.getType()->isFPOrFPVector())
1833      return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
1834    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1835  }
1836  if (isa<UndefValue>(Op1))
1837    return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
1838
1839  // Handle cases involving: rem X, (select Cond, Y, Z)
1840  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
1841    return &I;
1842
1843  return 0;
1844}
1845
1846/// This function implements the transforms common to both integer remainder
1847/// instructions (urem and srem). It is called by the visitors to those integer
1848/// remainder instructions.
1849/// @brief Common integer remainder transforms
1850Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
1851  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1852
1853  if (Instruction *common = commonRemTransforms(I))
1854    return common;
1855
1856  // 0 % X == 0 for integer, we don't need to preserve faults!
1857  if (Constant *LHS = dyn_cast<Constant>(Op0))
1858    if (LHS->isNullValue())
1859      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1860
1861  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
1862    // X % 0 == undef, we don't need to preserve faults!
1863    if (RHS->equalsInt(0))
1864      return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
1865
1866    if (RHS->equalsInt(1))  // X % 1 == 0
1867      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1868
1869    if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
1870      if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
1871        if (Instruction *R = FoldOpIntoSelect(I, SI))
1872          return R;
1873      } else if (isa<PHINode>(Op0I)) {
1874        if (Instruction *NV = FoldOpIntoPhi(I))
1875          return NV;
1876      }
1877
1878      // See if we can fold away this rem instruction.
1879      if (SimplifyDemandedInstructionBits(I))
1880        return &I;
1881    }
1882  }
1883
1884  return 0;
1885}
1886
1887Instruction *InstCombiner::visitURem(BinaryOperator &I) {
1888  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1889
1890  if (Instruction *common = commonIRemTransforms(I))
1891    return common;
1892
1893  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
1894    // X urem C^2 -> X and C
1895    // Check to see if this is an unsigned remainder with an exact power of 2,
1896    // if so, convert to a bitwise and.
1897    if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
1898      if (C->getValue().isPowerOf2())
1899        return BinaryOperator::CreateAnd(Op0, SubOne(C));
1900  }
1901
1902  if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
1903    // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
1904    if (RHSI->getOpcode() == Instruction::Shl &&
1905        isa<ConstantInt>(RHSI->getOperand(0))) {
1906      if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
1907        Constant *N1 = Constant::getAllOnesValue(I.getType());
1908        Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");
1909        return BinaryOperator::CreateAnd(Op0, Add);
1910      }
1911    }
1912  }
1913
1914  // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2)
1915  // where C1&C2 are powers of two.
1916  if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
1917    if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
1918      if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
1919        // STO == 0 and SFO == 0 handled above.
1920        if ((STO->getValue().isPowerOf2()) &&
1921            (SFO->getValue().isPowerOf2())) {
1922          Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO),
1923                                              SI->getName()+".t");
1924          Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO),
1925                                               SI->getName()+".f");
1926          return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
1927        }
1928      }
1929  }
1930
1931  return 0;
1932}
1933
1934Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
1935  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1936
1937  // Handle the integer rem common cases
1938  if (Instruction *Common = commonIRemTransforms(I))
1939    return Common;
1940
1941  if (Value *RHSNeg = dyn_castNegVal(Op1))
1942    if (!isa<Constant>(RHSNeg) ||
1943        (isa<ConstantInt>(RHSNeg) &&
1944         cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
1945      // X % -Y -> X % Y
1946      Worklist.AddValue(I.getOperand(1));
1947      I.setOperand(1, RHSNeg);
1948      return &I;
1949    }
1950
1951  // If the sign bits of both operands are zero (i.e. we can prove they are
1952  // unsigned inputs), turn this into a urem.
1953  if (I.getType()->isInteger()) {
1954    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
1955    if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
1956      // X srem Y -> X urem Y, iff X and Y don't have sign bit set
1957      return BinaryOperator::CreateURem(Op0, Op1, I.getName());
1958    }
1959  }
1960
1961  // If it's a constant vector, flip any negative values positive.
1962  if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
1963    unsigned VWidth = RHSV->getNumOperands();
1964
1965    bool hasNegative = false;
1966    for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
1967      if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
1968        if (RHS->getValue().isNegative())
1969          hasNegative = true;
1970
1971    if (hasNegative) {
1972      std::vector<Constant *> Elts(VWidth);
1973      for (unsigned i = 0; i != VWidth; ++i) {
1974        if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
1975          if (RHS->getValue().isNegative())
1976            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
1977          else
1978            Elts[i] = RHS;
1979        }
1980      }
1981
1982      Constant *NewRHSV = ConstantVector::get(Elts);
1983      if (NewRHSV != RHSV) {
1984        Worklist.AddValue(I.getOperand(1));
1985        I.setOperand(1, NewRHSV);
1986        return &I;
1987      }
1988    }
1989  }
1990
1991  return 0;
1992}
1993
1994Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
1995  return commonRemTransforms(I);
1996}
1997
1998// isOneBitSet - Return true if there is exactly one bit set in the specified
1999// constant.
2000static bool isOneBitSet(const ConstantInt *CI) {
2001  return CI->getValue().isPowerOf2();
2002}
2003
2004/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
2005/// are carefully arranged to allow folding of expressions such as:
2006///
2007///      (A < B) | (A > B) --> (A != B)
2008///
2009/// Note that this is only valid if the first and second predicates have the
2010/// same sign. Is illegal to do: (A u< B) | (A s> B)
2011///
2012/// Three bits are used to represent the condition, as follows:
2013///   0  A > B
2014///   1  A == B
2015///   2  A < B
2016///
2017/// <=>  Value  Definition
2018/// 000     0   Always false
2019/// 001     1   A >  B
2020/// 010     2   A == B
2021/// 011     3   A >= B
2022/// 100     4   A <  B
2023/// 101     5   A != B
2024/// 110     6   A <= B
2025/// 111     7   Always true
2026///
2027static unsigned getICmpCode(const ICmpInst *ICI) {
2028  switch (ICI->getPredicate()) {
2029    // False -> 0
2030  case ICmpInst::ICMP_UGT: return 1;  // 001
2031  case ICmpInst::ICMP_SGT: return 1;  // 001
2032  case ICmpInst::ICMP_EQ:  return 2;  // 010
2033  case ICmpInst::ICMP_UGE: return 3;  // 011
2034  case ICmpInst::ICMP_SGE: return 3;  // 011
2035  case ICmpInst::ICMP_ULT: return 4;  // 100
2036  case ICmpInst::ICMP_SLT: return 4;  // 100
2037  case ICmpInst::ICMP_NE:  return 5;  // 101
2038  case ICmpInst::ICMP_ULE: return 6;  // 110
2039  case ICmpInst::ICMP_SLE: return 6;  // 110
2040    // True -> 7
2041  default:
2042    llvm_unreachable("Invalid ICmp predicate!");
2043    return 0;
2044  }
2045}
2046
2047/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
2048/// predicate into a three bit mask. It also returns whether it is an ordered
2049/// predicate by reference.
2050static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
2051  isOrdered = false;
2052  switch (CC) {
2053  case FCmpInst::FCMP_ORD: isOrdered = true; return 0;  // 000
2054  case FCmpInst::FCMP_UNO:                   return 0;  // 000
2055  case FCmpInst::FCMP_OGT: isOrdered = true; return 1;  // 001
2056  case FCmpInst::FCMP_UGT:                   return 1;  // 001
2057  case FCmpInst::FCMP_OEQ: isOrdered = true; return 2;  // 010
2058  case FCmpInst::FCMP_UEQ:                   return 2;  // 010
2059  case FCmpInst::FCMP_OGE: isOrdered = true; return 3;  // 011
2060  case FCmpInst::FCMP_UGE:                   return 3;  // 011
2061  case FCmpInst::FCMP_OLT: isOrdered = true; return 4;  // 100
2062  case FCmpInst::FCMP_ULT:                   return 4;  // 100
2063  case FCmpInst::FCMP_ONE: isOrdered = true; return 5;  // 101
2064  case FCmpInst::FCMP_UNE:                   return 5;  // 101
2065  case FCmpInst::FCMP_OLE: isOrdered = true; return 6;  // 110
2066  case FCmpInst::FCMP_ULE:                   return 6;  // 110
2067    // True -> 7
2068  default:
2069    // Not expecting FCMP_FALSE and FCMP_TRUE;
2070    llvm_unreachable("Unexpected FCmp predicate!");
2071    return 0;
2072  }
2073}
2074
2075/// getICmpValue - This is the complement of getICmpCode, which turns an
2076/// opcode and two operands into either a constant true or false, or a brand
2077/// new ICmp instruction. The sign is passed in to determine which kind
2078/// of predicate to use in the new icmp instruction.
2079static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) {
2080  switch (code) {
2081  default: llvm_unreachable("Illegal ICmp code!");
2082  case  0: return ConstantInt::getFalse(LHS->getContext());
2083  case  1:
2084    if (sign)
2085      return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
2086    else
2087      return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
2088  case  2: return new ICmpInst(ICmpInst::ICMP_EQ,  LHS, RHS);
2089  case  3:
2090    if (sign)
2091      return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
2092    else
2093      return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
2094  case  4:
2095    if (sign)
2096      return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
2097    else
2098      return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
2099  case  5: return new ICmpInst(ICmpInst::ICMP_NE,  LHS, RHS);
2100  case  6:
2101    if (sign)
2102      return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
2103    else
2104      return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
2105  case  7: return ConstantInt::getTrue(LHS->getContext());
2106  }
2107}
2108
2109/// getFCmpValue - This is the complement of getFCmpCode, which turns an
2110/// opcode and two operands into either a FCmp instruction. isordered is passed
2111/// in to determine which kind of predicate to use in the new fcmp instruction.
2112static Value *getFCmpValue(bool isordered, unsigned code,
2113                           Value *LHS, Value *RHS) {
2114  switch (code) {
2115  default: llvm_unreachable("Illegal FCmp code!");
2116  case  0:
2117    if (isordered)
2118      return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
2119    else
2120      return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
2121  case  1:
2122    if (isordered)
2123      return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
2124    else
2125      return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
2126  case  2:
2127    if (isordered)
2128      return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
2129    else
2130      return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
2131  case  3:
2132    if (isordered)
2133      return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
2134    else
2135      return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
2136  case  4:
2137    if (isordered)
2138      return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
2139    else
2140      return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
2141  case  5:
2142    if (isordered)
2143      return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
2144    else
2145      return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
2146  case  6:
2147    if (isordered)
2148      return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
2149    else
2150      return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
2151  case  7: return ConstantInt::getTrue(LHS->getContext());
2152  }
2153}
2154
2155/// PredicatesFoldable - Return true if both predicates match sign or if at
2156/// least one of them is an equality comparison (which is signless).
2157static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
2158  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
2159         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
2160         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
2161}
2162
2163namespace {
2164// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
2165struct FoldICmpLogical {
2166  InstCombiner &IC;
2167  Value *LHS, *RHS;
2168  ICmpInst::Predicate pred;
2169  FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI)
2170    : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)),
2171      pred(ICI->getPredicate()) {}
2172  bool shouldApply(Value *V) const {
2173    if (ICmpInst *ICI = dyn_cast<ICmpInst>(V))
2174      if (PredicatesFoldable(pred, ICI->getPredicate()))
2175        return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) ||
2176                (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS));
2177    return false;
2178  }
2179  Instruction *apply(Instruction &Log) const {
2180    ICmpInst *ICI = cast<ICmpInst>(Log.getOperand(0));
2181    if (ICI->getOperand(0) != LHS) {
2182      assert(ICI->getOperand(1) == LHS);
2183      ICI->swapOperands();  // Swap the LHS and RHS of the ICmp
2184    }
2185
2186    ICmpInst *RHSICI = cast<ICmpInst>(Log.getOperand(1));
2187    unsigned LHSCode = getICmpCode(ICI);
2188    unsigned RHSCode = getICmpCode(RHSICI);
2189    unsigned Code;
2190    switch (Log.getOpcode()) {
2191    case Instruction::And: Code = LHSCode & RHSCode; break;
2192    case Instruction::Or:  Code = LHSCode | RHSCode; break;
2193    case Instruction::Xor: Code = LHSCode ^ RHSCode; break;
2194    default: llvm_unreachable("Illegal logical opcode!"); return 0;
2195    }
2196
2197    bool isSigned = RHSICI->isSigned() || ICI->isSigned();
2198    Value *RV = getICmpValue(isSigned, Code, LHS, RHS);
2199    if (Instruction *I = dyn_cast<Instruction>(RV))
2200      return I;
2201    // Otherwise, it's a constant boolean value...
2202    return IC.ReplaceInstUsesWith(Log, RV);
2203  }
2204};
2205} // end anonymous namespace
2206
2207// OptAndOp - This handles expressions of the form ((val OP C1) & C2).  Where
2208// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
2209// guaranteed to be a binary operator.
2210Instruction *InstCombiner::OptAndOp(Instruction *Op,
2211                                    ConstantInt *OpRHS,
2212                                    ConstantInt *AndRHS,
2213                                    BinaryOperator &TheAnd) {
2214  Value *X = Op->getOperand(0);
2215  Constant *Together = 0;
2216  if (!Op->isShift())
2217    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
2218
2219  switch (Op->getOpcode()) {
2220  case Instruction::Xor:
2221    if (Op->hasOneUse()) {
2222      // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
2223      Value *And = Builder->CreateAnd(X, AndRHS);
2224      And->takeName(Op);
2225      return BinaryOperator::CreateXor(And, Together);
2226    }
2227    break;
2228  case Instruction::Or:
2229    if (Together == AndRHS) // (X | C) & C --> C
2230      return ReplaceInstUsesWith(TheAnd, AndRHS);
2231
2232    if (Op->hasOneUse() && Together != OpRHS) {
2233      // (X | C1) & C2 --> (X | (C1&C2)) & C2
2234      Value *Or = Builder->CreateOr(X, Together);
2235      Or->takeName(Op);
2236      return BinaryOperator::CreateAnd(Or, AndRHS);
2237    }
2238    break;
2239  case Instruction::Add:
2240    if (Op->hasOneUse()) {
2241      // Adding a one to a single bit bit-field should be turned into an XOR
2242      // of the bit.  First thing to check is to see if this AND is with a
2243      // single bit constant.
2244      const APInt& AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
2245
2246      // If there is only one bit set...
2247      if (isOneBitSet(cast<ConstantInt>(AndRHS))) {
2248        // Ok, at this point, we know that we are masking the result of the
2249        // ADD down to exactly one bit.  If the constant we are adding has
2250        // no bits set below this bit, then we can eliminate the ADD.
2251        const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
2252
2253        // Check to see if any bits below the one bit set in AndRHSV are set.
2254        if ((AddRHS & (AndRHSV-1)) == 0) {
2255          // If not, the only thing that can effect the output of the AND is
2256          // the bit specified by AndRHSV.  If that bit is set, the effect of
2257          // the XOR is to toggle the bit.  If it is clear, then the ADD has
2258          // no effect.
2259          if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
2260            TheAnd.setOperand(0, X);
2261            return &TheAnd;
2262          } else {
2263            // Pull the XOR out of the AND.
2264            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
2265            NewAnd->takeName(Op);
2266            return BinaryOperator::CreateXor(NewAnd, AndRHS);
2267          }
2268        }
2269      }
2270    }
2271    break;
2272
2273  case Instruction::Shl: {
2274    // We know that the AND will not produce any of the bits shifted in, so if
2275    // the anded constant includes them, clear them now!
2276    //
2277    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
2278    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
2279    APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
2280    ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
2281                                       AndRHS->getValue() & ShlMask);
2282
2283    if (CI->getValue() == ShlMask) {
2284    // Masking out bits that the shift already masks
2285      return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
2286    } else if (CI != AndRHS) {                  // Reducing bits set in and.
2287      TheAnd.setOperand(1, CI);
2288      return &TheAnd;
2289    }
2290    break;
2291  }
2292  case Instruction::LShr: {
2293    // We know that the AND will not produce any of the bits shifted in, so if
2294    // the anded constant includes them, clear them now!  This only applies to
2295    // unsigned shifts, because a signed shr may bring in set bits!
2296    //
2297    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
2298    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
2299    APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
2300    ConstantInt *CI = ConstantInt::get(Op->getContext(),
2301                                       AndRHS->getValue() & ShrMask);
2302
2303    if (CI->getValue() == ShrMask) {
2304    // Masking out bits that the shift already masks.
2305      return ReplaceInstUsesWith(TheAnd, Op);
2306    } else if (CI != AndRHS) {
2307      TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
2308      return &TheAnd;
2309    }
2310    break;
2311  }
2312  case Instruction::AShr:
2313    // Signed shr.
2314    // See if this is shifting in some sign extension, then masking it out
2315    // with an and.
2316    if (Op->hasOneUse()) {
2317      uint32_t BitWidth = AndRHS->getType()->getBitWidth();
2318      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
2319      APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
2320      Constant *C = ConstantInt::get(Op->getContext(),
2321                                     AndRHS->getValue() & ShrMask);
2322      if (C == AndRHS) {          // Masking out bits shifted in.
2323        // (Val ashr C1) & C2 -> (Val lshr C1) & C2
2324        // Make the argument unsigned.
2325        Value *ShVal = Op->getOperand(0);
2326        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
2327        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
2328      }
2329    }
2330    break;
2331  }
2332  return 0;
2333}
2334
2335
2336/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
2337/// true, otherwise (V < Lo || V >= Hi).  In pratice, we emit the more efficient
2338/// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
2339/// whether to treat the V, Lo and HI as signed or not. IB is the location to
2340/// insert new instructions.
2341Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
2342                                           bool isSigned, bool Inside,
2343                                           Instruction &IB) {
2344  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
2345            ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
2346         "Lo is not <= Hi in range emission code!");
2347
2348  if (Inside) {
2349    if (Lo == Hi)  // Trivially false.
2350      return new ICmpInst(ICmpInst::ICMP_NE, V, V);
2351
2352    // V >= Min && V < Hi --> V < Hi
2353    if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
2354      ICmpInst::Predicate pred = (isSigned ?
2355        ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
2356      return new ICmpInst(pred, V, Hi);
2357    }
2358
2359    // Emit V-Lo <u Hi-Lo
2360    Constant *NegLo = ConstantExpr::getNeg(Lo);
2361    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
2362    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
2363    return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
2364  }
2365
2366  if (Lo == Hi)  // Trivially true.
2367    return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
2368
2369  // V < Min || V >= Hi -> V > Hi-1
2370  Hi = SubOne(cast<ConstantInt>(Hi));
2371  if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
2372    ICmpInst::Predicate pred = (isSigned ?
2373        ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
2374    return new ICmpInst(pred, V, Hi);
2375  }
2376
2377  // Emit V-Lo >u Hi-1-Lo
2378  // Note that Hi has already had one subtracted from it, above.
2379  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
2380  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
2381  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
2382  return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
2383}
2384
2385// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
2386// any number of 0s on either side.  The 1s are allowed to wrap from LSB to
2387// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
2388// not, since all 1s are not contiguous.
2389static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
2390  const APInt& V = Val->getValue();
2391  uint32_t BitWidth = Val->getType()->getBitWidth();
2392  if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
2393
2394  // look for the first zero bit after the run of ones
2395  MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
2396  // look for the first non-zero bit
2397  ME = V.getActiveBits();
2398  return true;
2399}
2400
2401/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
2402/// where isSub determines whether the operator is a sub.  If we can fold one of
2403/// the following xforms:
2404///
2405/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
2406/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
2407/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
2408///
2409/// return (A +/- B).
2410///
2411Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
2412                                        ConstantInt *Mask, bool isSub,
2413                                        Instruction &I) {
2414  Instruction *LHSI = dyn_cast<Instruction>(LHS);
2415  if (!LHSI || LHSI->getNumOperands() != 2 ||
2416      !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
2417
2418  ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
2419
2420  switch (LHSI->getOpcode()) {
2421  default: return 0;
2422  case Instruction::And:
2423    if (ConstantExpr::getAnd(N, Mask) == Mask) {
2424      // If the AndRHS is a power of two minus one (0+1+), this is simple.
2425      if ((Mask->getValue().countLeadingZeros() +
2426           Mask->getValue().countPopulation()) ==
2427          Mask->getValue().getBitWidth())
2428        break;
2429
2430      // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
2431      // part, we don't need any explicit masks to take them out of A.  If that
2432      // is all N is, ignore it.
2433      uint32_t MB = 0, ME = 0;
2434      if (isRunOfOnes(Mask, MB, ME)) {  // begin/end bit of run, inclusive
2435        uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
2436        APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
2437        if (MaskedValueIsZero(RHS, Mask))
2438          break;
2439      }
2440    }
2441    return 0;
2442  case Instruction::Or:
2443  case Instruction::Xor:
2444    // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
2445    if ((Mask->getValue().countLeadingZeros() +
2446         Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
2447        && ConstantExpr::getAnd(N, Mask)->isNullValue())
2448      break;
2449    return 0;
2450  }
2451
2452  if (isSub)
2453    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
2454  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
2455}
2456
2457/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
2458Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
2459                                          ICmpInst *LHS, ICmpInst *RHS) {
2460  Value *Val, *Val2;
2461  ConstantInt *LHSCst, *RHSCst;
2462  ICmpInst::Predicate LHSCC, RHSCC;
2463
2464  // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
2465  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val),
2466                         m_ConstantInt(LHSCst))) ||
2467      !match(RHS, m_ICmp(RHSCC, m_Value(Val2),
2468                         m_ConstantInt(RHSCst))))
2469    return 0;
2470
2471  if (LHSCst == RHSCst && LHSCC == RHSCC) {
2472    // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
2473    // where C is a power of 2
2474    if (LHSCC == ICmpInst::ICMP_ULT &&
2475        LHSCst->getValue().isPowerOf2()) {
2476      Value *NewOr = Builder->CreateOr(Val, Val2);
2477      return new ICmpInst(LHSCC, NewOr, LHSCst);
2478    }
2479
2480    // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
2481    if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
2482      Value *NewOr = Builder->CreateOr(Val, Val2);
2483      return new ICmpInst(LHSCC, NewOr, LHSCst);
2484    }
2485  }
2486
2487  // From here on, we only handle:
2488  //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
2489  if (Val != Val2) return 0;
2490
2491  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
2492  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
2493      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
2494      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
2495      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
2496    return 0;
2497
2498  // We can't fold (ugt x, C) & (sgt x, C2).
2499  if (!PredicatesFoldable(LHSCC, RHSCC))
2500    return 0;
2501
2502  // Ensure that the larger constant is on the RHS.
2503  bool ShouldSwap;
2504  if (CmpInst::isSigned(LHSCC) ||
2505      (ICmpInst::isEquality(LHSCC) &&
2506       CmpInst::isSigned(RHSCC)))
2507    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
2508  else
2509    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
2510
2511  if (ShouldSwap) {
2512    std::swap(LHS, RHS);
2513    std::swap(LHSCst, RHSCst);
2514    std::swap(LHSCC, RHSCC);
2515  }
2516
2517  // At this point, we know we have have two icmp instructions
2518  // comparing a value against two constants and and'ing the result
2519  // together.  Because of the above check, we know that we only have
2520  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
2521  // (from the FoldICmpLogical check above), that the two constants
2522  // are not equal and that the larger constant is on the RHS
2523  assert(LHSCst != RHSCst && "Compares not folded above?");
2524
2525  switch (LHSCC) {
2526  default: llvm_unreachable("Unknown integer condition code!");
2527  case ICmpInst::ICMP_EQ:
2528    switch (RHSCC) {
2529    default: llvm_unreachable("Unknown integer condition code!");
2530    case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
2531    case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
2532    case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
2533      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
2534    case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
2535    case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
2536    case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
2537      return ReplaceInstUsesWith(I, LHS);
2538    }
2539  case ICmpInst::ICMP_NE:
2540    switch (RHSCC) {
2541    default: llvm_unreachable("Unknown integer condition code!");
2542    case ICmpInst::ICMP_ULT:
2543      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
2544        return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
2545      break;                        // (X != 13 & X u< 15) -> no change
2546    case ICmpInst::ICMP_SLT:
2547      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
2548        return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
2549      break;                        // (X != 13 & X s< 15) -> no change
2550    case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
2551    case ICmpInst::ICMP_UGT:        // (X != 13 & X u> 15) -> X u> 15
2552    case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
2553      return ReplaceInstUsesWith(I, RHS);
2554    case ICmpInst::ICMP_NE:
2555      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
2556        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
2557        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
2558        return new ICmpInst(ICmpInst::ICMP_UGT, Add,
2559                            ConstantInt::get(Add->getType(), 1));
2560      }
2561      break;                        // (X != 13 & X != 15) -> no change
2562    }
2563    break;
2564  case ICmpInst::ICMP_ULT:
2565    switch (RHSCC) {
2566    default: llvm_unreachable("Unknown integer condition code!");
2567    case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
2568    case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
2569      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
2570    case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
2571      break;
2572    case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
2573    case ICmpInst::ICMP_ULT:        // (X u< 13 & X u< 15) -> X u< 13
2574      return ReplaceInstUsesWith(I, LHS);
2575    case ICmpInst::ICMP_SLT:        // (X u< 13 & X s< 15) -> no change
2576      break;
2577    }
2578    break;
2579  case ICmpInst::ICMP_SLT:
2580    switch (RHSCC) {
2581    default: llvm_unreachable("Unknown integer condition code!");
2582    case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
2583    case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
2584      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
2585    case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
2586      break;
2587    case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
2588    case ICmpInst::ICMP_SLT:        // (X s< 13 & X s< 15) -> X < 13
2589      return ReplaceInstUsesWith(I, LHS);
2590    case ICmpInst::ICMP_ULT:        // (X s< 13 & X u< 15) -> no change
2591      break;
2592    }
2593    break;
2594  case ICmpInst::ICMP_UGT:
2595    switch (RHSCC) {
2596    default: llvm_unreachable("Unknown integer condition code!");
2597    case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
2598    case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
2599      return ReplaceInstUsesWith(I, RHS);
2600    case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
2601      break;
2602    case ICmpInst::ICMP_NE:
2603      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
2604        return new ICmpInst(LHSCC, Val, RHSCst);
2605      break;                        // (X u> 13 & X != 15) -> no change
2606    case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
2607      return InsertRangeTest(Val, AddOne(LHSCst),
2608                             RHSCst, false, true, I);
2609    case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
2610      break;
2611    }
2612    break;
2613  case ICmpInst::ICMP_SGT:
2614    switch (RHSCC) {
2615    default: llvm_unreachable("Unknown integer condition code!");
2616    case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
2617    case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
2618      return ReplaceInstUsesWith(I, RHS);
2619    case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
2620      break;
2621    case ICmpInst::ICMP_NE:
2622      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
2623        return new ICmpInst(LHSCC, Val, RHSCst);
2624      break;                        // (X s> 13 & X != 15) -> no change
2625    case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
2626      return InsertRangeTest(Val, AddOne(LHSCst),
2627                             RHSCst, true, true, I);
2628    case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
2629      break;
2630    }
2631    break;
2632  }
2633
2634  return 0;
2635}
2636
2637Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
2638                                          FCmpInst *RHS) {
2639
2640  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
2641      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
2642    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
2643    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
2644      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
2645        // If either of the constants are nans, then the whole thing returns
2646        // false.
2647        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
2648          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
2649        return new FCmpInst(FCmpInst::FCMP_ORD,
2650                            LHS->getOperand(0), RHS->getOperand(0));
2651      }
2652
2653    // Handle vector zeros.  This occurs because the canonical form of
2654    // "fcmp ord x,x" is "fcmp ord x, 0".
2655    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
2656        isa<ConstantAggregateZero>(RHS->getOperand(1)))
2657      return new FCmpInst(FCmpInst::FCMP_ORD,
2658                          LHS->getOperand(0), RHS->getOperand(0));
2659    return 0;
2660  }
2661
2662  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
2663  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
2664  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
2665
2666
2667  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
2668    // Swap RHS operands to match LHS.
2669    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
2670    std::swap(Op1LHS, Op1RHS);
2671  }
2672
2673  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
2674    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
2675    if (Op0CC == Op1CC)
2676      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
2677
2678    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
2679      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
2680    if (Op0CC == FCmpInst::FCMP_TRUE)
2681      return ReplaceInstUsesWith(I, RHS);
2682    if (Op1CC == FCmpInst::FCMP_TRUE)
2683      return ReplaceInstUsesWith(I, LHS);
2684
2685    bool Op0Ordered;
2686    bool Op1Ordered;
2687    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
2688    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
2689    if (Op1Pred == 0) {
2690      std::swap(LHS, RHS);
2691      std::swap(Op0Pred, Op1Pred);
2692      std::swap(Op0Ordered, Op1Ordered);
2693    }
2694    if (Op0Pred == 0) {
2695      // uno && ueq -> uno && (uno || eq) -> ueq
2696      // ord && olt -> ord && (ord && lt) -> olt
2697      if (Op0Ordered == Op1Ordered)
2698        return ReplaceInstUsesWith(I, RHS);
2699
2700      // uno && oeq -> uno && (ord && eq) -> false
2701      // uno && ord -> false
2702      if (!Op0Ordered)
2703        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
2704      // ord && ueq -> ord && (uno || eq) -> oeq
2705      return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS));
2706    }
2707  }
2708
2709  return 0;
2710}
2711
2712
2713Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
2714  bool Changed = SimplifyCommutative(I);
2715  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2716
2717  if (Value *V = SimplifyAndInst(Op0, Op1, TD))
2718    return ReplaceInstUsesWith(I, V);
2719
2720  // See if we can simplify any instructions used by the instruction whose sole
2721  // purpose is to compute bits we don't care about.
2722  if (SimplifyDemandedInstructionBits(I))
2723    return &I;
2724
2725  if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
2726    const APInt &AndRHSMask = AndRHS->getValue();
2727    APInt NotAndRHS(~AndRHSMask);
2728
2729    // Optimize a variety of ((val OP C1) & C2) combinations...
2730    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
2731      Value *Op0LHS = Op0I->getOperand(0);
2732      Value *Op0RHS = Op0I->getOperand(1);
2733      switch (Op0I->getOpcode()) {
2734      default: break;
2735      case Instruction::Xor:
2736      case Instruction::Or:
2737        // If the mask is only needed on one incoming arm, push it up.
2738        if (!Op0I->hasOneUse()) break;
2739
2740        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
2741          // Not masking anything out for the LHS, move to RHS.
2742          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
2743                                             Op0RHS->getName()+".masked");
2744          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
2745        }
2746        if (!isa<Constant>(Op0RHS) &&
2747            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
2748          // Not masking anything out for the RHS, move to LHS.
2749          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
2750                                             Op0LHS->getName()+".masked");
2751          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
2752        }
2753
2754        break;
2755      case Instruction::Add:
2756        // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
2757        // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
2758        // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
2759        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
2760          return BinaryOperator::CreateAnd(V, AndRHS);
2761        if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
2762          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
2763        break;
2764
2765      case Instruction::Sub:
2766        // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
2767        // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
2768        // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
2769        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
2770          return BinaryOperator::CreateAnd(V, AndRHS);
2771
2772        // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
2773        // has 1's for all bits that the subtraction with A might affect.
2774        if (Op0I->hasOneUse()) {
2775          uint32_t BitWidth = AndRHSMask.getBitWidth();
2776          uint32_t Zeros = AndRHSMask.countLeadingZeros();
2777          APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
2778
2779          ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
2780          if (!(A && A->isZero()) &&               // avoid infinite recursion.
2781              MaskedValueIsZero(Op0LHS, Mask)) {
2782            Value *NewNeg = Builder->CreateNeg(Op0RHS);
2783            return BinaryOperator::CreateAnd(NewNeg, AndRHS);
2784          }
2785        }
2786        break;
2787
2788      case Instruction::Shl:
2789      case Instruction::LShr:
2790        // (1 << x) & 1 --> zext(x == 0)
2791        // (1 >> x) & 1 --> zext(x == 0)
2792        if (AndRHSMask == 1 && Op0LHS == AndRHS) {
2793          Value *NewICmp =
2794            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
2795          return new ZExtInst(NewICmp, I.getType());
2796        }
2797        break;
2798      }
2799
2800      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
2801        if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
2802          return Res;
2803    } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
2804      // If this is an integer truncation or change from signed-to-unsigned, and
2805      // if the source is an and/or with immediate, transform it.  This
2806      // frequently occurs for bitfield accesses.
2807      if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
2808        if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
2809            CastOp->getNumOperands() == 2)
2810          if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
2811            if (CastOp->getOpcode() == Instruction::And) {
2812              // Change: and (cast (and X, C1) to T), C2
2813              // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
2814              // This will fold the two constants together, which may allow
2815              // other simplifications.
2816              Value *NewCast = Builder->CreateTruncOrBitCast(
2817                CastOp->getOperand(0), I.getType(),
2818                CastOp->getName()+".shrunk");
2819              // trunc_or_bitcast(C1)&C2
2820              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
2821              C3 = ConstantExpr::getAnd(C3, AndRHS);
2822              return BinaryOperator::CreateAnd(NewCast, C3);
2823            } else if (CastOp->getOpcode() == Instruction::Or) {
2824              // Change: and (cast (or X, C1) to T), C2
2825              // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
2826              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
2827              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
2828                // trunc(C1)&C2
2829                return ReplaceInstUsesWith(I, AndRHS);
2830            }
2831          }
2832      }
2833    }
2834
2835    // Try to fold constant and into select arguments.
2836    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
2837      if (Instruction *R = FoldOpIntoSelect(I, SI))
2838        return R;
2839    if (isa<PHINode>(Op0))
2840      if (Instruction *NV = FoldOpIntoPhi(I))
2841        return NV;
2842  }
2843
2844
2845  // (~A & ~B) == (~(A | B)) - De Morgan's Law
2846  if (Value *Op0NotVal = dyn_castNotVal(Op0))
2847    if (Value *Op1NotVal = dyn_castNotVal(Op1))
2848      if (Op0->hasOneUse() && Op1->hasOneUse()) {
2849        Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
2850                                      I.getName()+".demorgan");
2851        return BinaryOperator::CreateNot(Or);
2852      }
2853
2854  {
2855    Value *A = 0, *B = 0, *C = 0, *D = 0;
2856    // (A|B) & ~(A&B) -> A^B
2857    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
2858        match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
2859        ((A == C && B == D) || (A == D && B == C)))
2860      return BinaryOperator::CreateXor(A, B);
2861
2862    // ~(A&B) & (A|B) -> A^B
2863    if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
2864        match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
2865        ((A == C && B == D) || (A == D && B == C)))
2866      return BinaryOperator::CreateXor(A, B);
2867
2868    if (Op0->hasOneUse() &&
2869        match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
2870      if (A == Op1) {                                // (A^B)&A -> A&(A^B)
2871        I.swapOperands();     // Simplify below
2872        std::swap(Op0, Op1);
2873      } else if (B == Op1) {                         // (A^B)&B -> B&(B^A)
2874        cast<BinaryOperator>(Op0)->swapOperands();
2875        I.swapOperands();     // Simplify below
2876        std::swap(Op0, Op1);
2877      }
2878    }
2879
2880    if (Op1->hasOneUse() &&
2881        match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
2882      if (B == Op0) {                                // B&(A^B) -> B&(B^A)
2883        cast<BinaryOperator>(Op1)->swapOperands();
2884        std::swap(A, B);
2885      }
2886      if (A == Op0)                                // A&(A^B) -> A & ~B
2887        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
2888    }
2889
2890    // (A&((~A)|B)) -> A&B
2891    if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
2892        match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
2893      return BinaryOperator::CreateAnd(A, Op1);
2894    if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
2895        match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
2896      return BinaryOperator::CreateAnd(A, Op0);
2897  }
2898
2899  if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
2900    // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
2901    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
2902      return R;
2903
2904    if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
2905      if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
2906        return Res;
2907  }
2908
2909  // fold (and (cast A), (cast B)) -> (cast (and A, B))
2910  if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
2911    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
2912      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
2913        const Type *SrcTy = Op0C->getOperand(0)->getType();
2914        if (SrcTy == Op1C->getOperand(0)->getType() &&
2915            SrcTy->isIntOrIntVector() &&
2916            // Only do this if the casts both really cause code to be generated.
2917            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
2918                              I.getType()) &&
2919            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
2920                              I.getType())) {
2921          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
2922                                            Op1C->getOperand(0), I.getName());
2923          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
2924        }
2925      }
2926
2927  // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
2928  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
2929    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
2930      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
2931          SI0->getOperand(1) == SI1->getOperand(1) &&
2932          (SI0->hasOneUse() || SI1->hasOneUse())) {
2933        Value *NewOp =
2934          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
2935                             SI0->getName());
2936        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
2937                                      SI1->getOperand(1));
2938      }
2939  }
2940
2941  // If and'ing two fcmp, try combine them into one.
2942  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
2943    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
2944      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
2945        return Res;
2946  }
2947
2948  return Changed ? &I : 0;
2949}
2950
2951/// CollectBSwapParts - Analyze the specified subexpression and see if it is
2952/// capable of providing pieces of a bswap.  The subexpression provides pieces
2953/// of a bswap if it is proven that each of the non-zero bytes in the output of
2954/// the expression came from the corresponding "byte swapped" byte in some other
2955/// value.  For example, if the current subexpression is "(shl i32 %X, 24)" then
2956/// we know that the expression deposits the low byte of %X into the high byte
2957/// of the bswap result and that all other bytes are zero.  This expression is
2958/// accepted, the high byte of ByteValues is set to X to indicate a correct
2959/// match.
2960///
2961/// This function returns true if the match was unsuccessful and false if so.
2962/// On entry to the function the "OverallLeftShift" is a signed integer value
2963/// indicating the number of bytes that the subexpression is later shifted.  For
2964/// example, if the expression is later right shifted by 16 bits, the
2965/// OverallLeftShift value would be -2 on entry.  This is used to specify which
2966/// byte of ByteValues is actually being set.
2967///
2968/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
2969/// byte is masked to zero by a user.  For example, in (X & 255), X will be
2970/// processed with a bytemask of 1.  Because bytemask is 32-bits, this limits
2971/// this function to working on up to 32-byte (256 bit) values.  ByteMask is
2972/// always in the local (OverallLeftShift) coordinate space.
2973///
2974static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
2975                              SmallVector<Value*, 8> &ByteValues) {
2976  if (Instruction *I = dyn_cast<Instruction>(V)) {
2977    // If this is an or instruction, it may be an inner node of the bswap.
2978    if (I->getOpcode() == Instruction::Or) {
2979      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
2980                               ByteValues) ||
2981             CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
2982                               ByteValues);
2983    }
2984
2985    // If this is a logical shift by a constant multiple of 8, recurse with
2986    // OverallLeftShift and ByteMask adjusted.
2987    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
2988      unsigned ShAmt =
2989        cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
2990      // Ensure the shift amount is defined and of a byte value.
2991      if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
2992        return true;
2993
2994      unsigned ByteShift = ShAmt >> 3;
2995      if (I->getOpcode() == Instruction::Shl) {
2996        // X << 2 -> collect(X, +2)
2997        OverallLeftShift += ByteShift;
2998        ByteMask >>= ByteShift;
2999      } else {
3000        // X >>u 2 -> collect(X, -2)
3001        OverallLeftShift -= ByteShift;
3002        ByteMask <<= ByteShift;
3003        ByteMask &= (~0U >> (32-ByteValues.size()));
3004      }
3005
3006      if (OverallLeftShift >= (int)ByteValues.size()) return true;
3007      if (OverallLeftShift <= -(int)ByteValues.size()) return true;
3008
3009      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
3010                               ByteValues);
3011    }
3012
3013    // If this is a logical 'and' with a mask that clears bytes, clear the
3014    // corresponding bytes in ByteMask.
3015    if (I->getOpcode() == Instruction::And &&
3016        isa<ConstantInt>(I->getOperand(1))) {
3017      // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
3018      unsigned NumBytes = ByteValues.size();
3019      APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
3020      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
3021
3022      for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
3023        // If this byte is masked out by a later operation, we don't care what
3024        // the and mask is.
3025        if ((ByteMask & (1 << i)) == 0)
3026          continue;
3027
3028        // If the AndMask is all zeros for this byte, clear the bit.
3029        APInt MaskB = AndMask & Byte;
3030        if (MaskB == 0) {
3031          ByteMask &= ~(1U << i);
3032          continue;
3033        }
3034
3035        // If the AndMask is not all ones for this byte, it's not a bytezap.
3036        if (MaskB != Byte)
3037          return true;
3038
3039        // Otherwise, this byte is kept.
3040      }
3041
3042      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
3043                               ByteValues);
3044    }
3045  }
3046
3047  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
3048  // the input value to the bswap.  Some observations: 1) if more than one byte
3049  // is demanded from this input, then it could not be successfully assembled
3050  // into a byteswap.  At least one of the two bytes would not be aligned with
3051  // their ultimate destination.
3052  if (!isPowerOf2_32(ByteMask)) return true;
3053  unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
3054
3055  // 2) The input and ultimate destinations must line up: if byte 3 of an i32
3056  // is demanded, it needs to go into byte 0 of the result.  This means that the
3057  // byte needs to be shifted until it lands in the right byte bucket.  The
3058  // shift amount depends on the position: if the byte is coming from the high
3059  // part of the value (e.g. byte 3) then it must be shifted right.  If from the
3060  // low part, it must be shifted left.
3061  unsigned DestByteNo = InputByteNo + OverallLeftShift;
3062  if (InputByteNo < ByteValues.size()/2) {
3063    if (ByteValues.size()-1-DestByteNo != InputByteNo)
3064      return true;
3065  } else {
3066    if (ByteValues.size()-1-DestByteNo != InputByteNo)
3067      return true;
3068  }
3069
3070  // If the destination byte value is already defined, the values are or'd
3071  // together, which isn't a bswap (unless it's an or of the same bits).
3072  if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
3073    return true;
3074  ByteValues[DestByteNo] = V;
3075  return false;
3076}
3077
3078/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
3079/// If so, insert the new bswap intrinsic and return it.
3080Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
3081  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
3082  if (!ITy || ITy->getBitWidth() % 16 ||
3083      // ByteMask only allows up to 32-byte values.
3084      ITy->getBitWidth() > 32*8)
3085    return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
3086
3087  /// ByteValues - For each byte of the result, we keep track of which value
3088  /// defines each byte.
3089  SmallVector<Value*, 8> ByteValues;
3090  ByteValues.resize(ITy->getBitWidth()/8);
3091
3092  // Try to find all the pieces corresponding to the bswap.
3093  uint32_t ByteMask = ~0U >> (32-ByteValues.size());
3094  if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
3095    return 0;
3096
3097  // Check to see if all of the bytes come from the same value.
3098  Value *V = ByteValues[0];
3099  if (V == 0) return 0;  // Didn't find a byte?  Must be zero.
3100
3101  // Check to make sure that all of the bytes come from the same value.
3102  for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
3103    if (ByteValues[i] != V)
3104      return 0;
3105  const Type *Tys[] = { ITy };
3106  Module *M = I.getParent()->getParent()->getParent();
3107  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
3108  return CallInst::Create(F, V);
3109}
3110
3111/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D).  Check
3112/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
3113/// we can simplify this expression to "cond ? C : D or B".
3114static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
3115                                         Value *C, Value *D) {
3116  // If A is not a select of -1/0, this cannot match.
3117  Value *Cond = 0;
3118  if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
3119    return 0;
3120
3121  // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
3122  if (match(D, m_SelectCst<0, -1>(m_Specific(Cond))))
3123    return SelectInst::Create(Cond, C, B);
3124  if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
3125    return SelectInst::Create(Cond, C, B);
3126  // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
3127  if (match(B, m_SelectCst<0, -1>(m_Specific(Cond))))
3128    return SelectInst::Create(Cond, C, D);
3129  if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
3130    return SelectInst::Create(Cond, C, D);
3131  return 0;
3132}
3133
3134/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
3135Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
3136                                         ICmpInst *LHS, ICmpInst *RHS) {
3137  Value *Val, *Val2;
3138  ConstantInt *LHSCst, *RHSCst;
3139  ICmpInst::Predicate LHSCC, RHSCC;
3140
3141  // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
3142  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
3143      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
3144    return 0;
3145
3146
3147  // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
3148  if (LHSCst == RHSCst && LHSCC == RHSCC &&
3149      LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
3150    Value *NewOr = Builder->CreateOr(Val, Val2);
3151    return new ICmpInst(LHSCC, NewOr, LHSCst);
3152  }
3153
3154  // From here on, we only handle:
3155  //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
3156  if (Val != Val2) return 0;
3157
3158  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
3159  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
3160      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
3161      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
3162      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
3163    return 0;
3164
3165  // We can't fold (ugt x, C) | (sgt x, C2).
3166  if (!PredicatesFoldable(LHSCC, RHSCC))
3167    return 0;
3168
3169  // Ensure that the larger constant is on the RHS.
3170  bool ShouldSwap;
3171  if (CmpInst::isSigned(LHSCC) ||
3172      (ICmpInst::isEquality(LHSCC) &&
3173       CmpInst::isSigned(RHSCC)))
3174    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
3175  else
3176    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
3177
3178  if (ShouldSwap) {
3179    std::swap(LHS, RHS);
3180    std::swap(LHSCst, RHSCst);
3181    std::swap(LHSCC, RHSCC);
3182  }
3183
3184  // At this point, we know we have have two icmp instructions
3185  // comparing a value against two constants and or'ing the result
3186  // together.  Because of the above check, we know that we only have
3187  // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
3188  // FoldICmpLogical check above), that the two constants are not
3189  // equal.
3190  assert(LHSCst != RHSCst && "Compares not folded above?");
3191
3192  switch (LHSCC) {
3193  default: llvm_unreachable("Unknown integer condition code!");
3194  case ICmpInst::ICMP_EQ:
3195    switch (RHSCC) {
3196    default: llvm_unreachable("Unknown integer condition code!");
3197    case ICmpInst::ICMP_EQ:
3198      if (LHSCst == SubOne(RHSCst)) {
3199        // (X == 13 | X == 14) -> X-13 <u 2
3200        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
3201        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
3202        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
3203        return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
3204      }
3205      break;                         // (X == 13 | X == 15) -> no change
3206    case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
3207    case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
3208      break;
3209    case ICmpInst::ICMP_NE:          // (X == 13 | X != 15) -> X != 15
3210    case ICmpInst::ICMP_ULT:         // (X == 13 | X u< 15) -> X u< 15
3211    case ICmpInst::ICMP_SLT:         // (X == 13 | X s< 15) -> X s< 15
3212      return ReplaceInstUsesWith(I, RHS);
3213    }
3214    break;
3215  case ICmpInst::ICMP_NE:
3216    switch (RHSCC) {
3217    default: llvm_unreachable("Unknown integer condition code!");
3218    case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
3219    case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
3220    case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
3221      return ReplaceInstUsesWith(I, LHS);
3222    case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
3223    case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
3224    case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
3225      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
3226    }
3227    break;
3228  case ICmpInst::ICMP_ULT:
3229    switch (RHSCC) {
3230    default: llvm_unreachable("Unknown integer condition code!");
3231    case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
3232      break;
3233    case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2
3234      // If RHSCst is [us]MAXINT, it is always false.  Not handling
3235      // this can cause overflow.
3236      if (RHSCst->isMaxValue(false))
3237        return ReplaceInstUsesWith(I, LHS);
3238      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
3239                             false, false, I);
3240    case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
3241      break;
3242    case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
3243    case ICmpInst::ICMP_ULT:        // (X u< 13 | X u< 15) -> X u< 15
3244      return ReplaceInstUsesWith(I, RHS);
3245    case ICmpInst::ICMP_SLT:        // (X u< 13 | X s< 15) -> no change
3246      break;
3247    }
3248    break;
3249  case ICmpInst::ICMP_SLT:
3250    switch (RHSCC) {
3251    default: llvm_unreachable("Unknown integer condition code!");
3252    case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
3253      break;
3254    case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2
3255      // If RHSCst is [us]MAXINT, it is always false.  Not handling
3256      // this can cause overflow.
3257      if (RHSCst->isMaxValue(true))
3258        return ReplaceInstUsesWith(I, LHS);
3259      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
3260                             true, false, I);
3261    case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
3262      break;
3263    case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
3264    case ICmpInst::ICMP_SLT:        // (X s< 13 | X s< 15) -> X s< 15
3265      return ReplaceInstUsesWith(I, RHS);
3266    case ICmpInst::ICMP_ULT:        // (X s< 13 | X u< 15) -> no change
3267      break;
3268    }
3269    break;
3270  case ICmpInst::ICMP_UGT:
3271    switch (RHSCC) {
3272    default: llvm_unreachable("Unknown integer condition code!");
3273    case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
3274    case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
3275      return ReplaceInstUsesWith(I, LHS);
3276    case ICmpInst::ICMP_SGT:        // (X u> 13 | X s> 15) -> no change
3277      break;
3278    case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
3279    case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
3280      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
3281    case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
3282      break;
3283    }
3284    break;
3285  case ICmpInst::ICMP_SGT:
3286    switch (RHSCC) {
3287    default: llvm_unreachable("Unknown integer condition code!");
3288    case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
3289    case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
3290      return ReplaceInstUsesWith(I, LHS);
3291    case ICmpInst::ICMP_UGT:        // (X s> 13 | X u> 15) -> no change
3292      break;
3293    case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
3294    case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
3295      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
3296    case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
3297      break;
3298    }
3299    break;
3300  }
3301  return 0;
3302}
3303
3304Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
3305                                         FCmpInst *RHS) {
3306  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
3307      RHS->getPredicate() == FCmpInst::FCMP_UNO &&
3308      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
3309    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
3310      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
3311        // If either of the constants are nans, then the whole thing returns
3312        // true.
3313        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
3314          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
3315
3316        // Otherwise, no need to compare the two constants, compare the
3317        // rest.
3318        return new FCmpInst(FCmpInst::FCMP_UNO,
3319                            LHS->getOperand(0), RHS->getOperand(0));
3320      }
3321
3322    // Handle vector zeros.  This occurs because the canonical form of
3323    // "fcmp uno x,x" is "fcmp uno x, 0".
3324    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
3325        isa<ConstantAggregateZero>(RHS->getOperand(1)))
3326      return new FCmpInst(FCmpInst::FCMP_UNO,
3327                          LHS->getOperand(0), RHS->getOperand(0));
3328
3329    return 0;
3330  }
3331
3332  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
3333  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
3334  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
3335
3336  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
3337    // Swap RHS operands to match LHS.
3338    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
3339    std::swap(Op1LHS, Op1RHS);
3340  }
3341  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
3342    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
3343    if (Op0CC == Op1CC)
3344      return new FCmpInst((FCmpInst::Predicate)Op0CC,
3345                          Op0LHS, Op0RHS);
3346    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
3347      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
3348    if (Op0CC == FCmpInst::FCMP_FALSE)
3349      return ReplaceInstUsesWith(I, RHS);
3350    if (Op1CC == FCmpInst::FCMP_FALSE)
3351      return ReplaceInstUsesWith(I, LHS);
3352    bool Op0Ordered;
3353    bool Op1Ordered;
3354    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
3355    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
3356    if (Op0Ordered == Op1Ordered) {
3357      // If both are ordered or unordered, return a new fcmp with
3358      // or'ed predicates.
3359      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS);
3360      if (Instruction *I = dyn_cast<Instruction>(RV))
3361        return I;
3362      // Otherwise, it's a constant boolean value...
3363      return ReplaceInstUsesWith(I, RV);
3364    }
3365  }
3366  return 0;
3367}
3368
3369/// FoldOrWithConstants - This helper function folds:
3370///
3371///     ((A | B) & C1) | (B & C2)
3372///
3373/// into:
3374///
3375///     (A & C1) | B
3376///
3377/// when the XOR of the two constants is "all ones" (-1).
3378Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
3379                                               Value *A, Value *B, Value *C) {
3380  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
3381  if (!CI1) return 0;
3382
3383  Value *V1 = 0;
3384  ConstantInt *CI2 = 0;
3385  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
3386
3387  APInt Xor = CI1->getValue() ^ CI2->getValue();
3388  if (!Xor.isAllOnesValue()) return 0;
3389
3390  if (V1 == A || V1 == B) {
3391    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
3392    return BinaryOperator::CreateOr(NewOp, V1);
3393  }
3394
3395  return 0;
3396}
3397
3398Instruction *InstCombiner::visitOr(BinaryOperator &I) {
3399  bool Changed = SimplifyCommutative(I);
3400  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3401
3402  if (Value *V = SimplifyOrInst(Op0, Op1, TD))
3403    return ReplaceInstUsesWith(I, V);
3404
3405
3406  // See if we can simplify any instructions used by the instruction whose sole
3407  // purpose is to compute bits we don't care about.
3408  if (SimplifyDemandedInstructionBits(I))
3409    return &I;
3410
3411  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
3412    ConstantInt *C1 = 0; Value *X = 0;
3413    // (X & C1) | C2 --> (X | C2) & (C1|C2)
3414    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
3415        isOnlyUse(Op0)) {
3416      Value *Or = Builder->CreateOr(X, RHS);
3417      Or->takeName(Op0);
3418      return BinaryOperator::CreateAnd(Or,
3419                         ConstantInt::get(I.getContext(),
3420                                          RHS->getValue() | C1->getValue()));
3421    }
3422
3423    // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
3424    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
3425        isOnlyUse(Op0)) {
3426      Value *Or = Builder->CreateOr(X, RHS);
3427      Or->takeName(Op0);
3428      return BinaryOperator::CreateXor(Or,
3429                 ConstantInt::get(I.getContext(),
3430                                  C1->getValue() & ~RHS->getValue()));
3431    }
3432
3433    // Try to fold constant and into select arguments.
3434    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
3435      if (Instruction *R = FoldOpIntoSelect(I, SI))
3436        return R;
3437    if (isa<PHINode>(Op0))
3438      if (Instruction *NV = FoldOpIntoPhi(I))
3439        return NV;
3440  }
3441
3442  Value *A = 0, *B = 0;
3443  ConstantInt *C1 = 0, *C2 = 0;
3444
3445  // (A | B) | C  and  A | (B | C)                  -> bswap if possible.
3446  // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
3447  if (match(Op0, m_Or(m_Value(), m_Value())) ||
3448      match(Op1, m_Or(m_Value(), m_Value())) ||
3449      (match(Op0, m_Shift(m_Value(), m_Value())) &&
3450       match(Op1, m_Shift(m_Value(), m_Value())))) {
3451    if (Instruction *BSwap = MatchBSwap(I))
3452      return BSwap;
3453  }
3454
3455  // (X^C)|Y -> (X|Y)^C iff Y&C == 0
3456  if (Op0->hasOneUse() &&
3457      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
3458      MaskedValueIsZero(Op1, C1->getValue())) {
3459    Value *NOr = Builder->CreateOr(A, Op1);
3460    NOr->takeName(Op0);
3461    return BinaryOperator::CreateXor(NOr, C1);
3462  }
3463
3464  // Y|(X^C) -> (X|Y)^C iff Y&C == 0
3465  if (Op1->hasOneUse() &&
3466      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
3467      MaskedValueIsZero(Op0, C1->getValue())) {
3468    Value *NOr = Builder->CreateOr(A, Op0);
3469    NOr->takeName(Op0);
3470    return BinaryOperator::CreateXor(NOr, C1);
3471  }
3472
3473  // (A & C)|(B & D)
3474  Value *C = 0, *D = 0;
3475  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
3476      match(Op1, m_And(m_Value(B), m_Value(D)))) {
3477    Value *V1 = 0, *V2 = 0, *V3 = 0;
3478    C1 = dyn_cast<ConstantInt>(C);
3479    C2 = dyn_cast<ConstantInt>(D);
3480    if (C1 && C2) {  // (A & C1)|(B & C2)
3481      // If we have: ((V + N) & C1) | (V & C2)
3482      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
3483      // replace with V+N.
3484      if (C1->getValue() == ~C2->getValue()) {
3485        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
3486            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
3487          // Add commutes, try both ways.
3488          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
3489            return ReplaceInstUsesWith(I, A);
3490          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
3491            return ReplaceInstUsesWith(I, A);
3492        }
3493        // Or commutes, try both ways.
3494        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
3495            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
3496          // Add commutes, try both ways.
3497          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
3498            return ReplaceInstUsesWith(I, B);
3499          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
3500            return ReplaceInstUsesWith(I, B);
3501        }
3502      }
3503
3504      // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
3505      // iff (C1&C2) == 0 and (N&~C1) == 0
3506      if ((C1->getValue() & C2->getValue()) == 0) {
3507        if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
3508            ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) ||  // (V|N)
3509             (V2 == B && MaskedValueIsZero(V1, ~C1->getValue()))))   // (N|V)
3510          return BinaryOperator::CreateAnd(A,
3511                               ConstantInt::get(A->getContext(),
3512                                                C1->getValue()|C2->getValue()));
3513        // Or commutes, try both ways.
3514        if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
3515            ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) ||  // (V|N)
3516             (V2 == A && MaskedValueIsZero(V1, ~C2->getValue()))))   // (N|V)
3517          return BinaryOperator::CreateAnd(B,
3518                               ConstantInt::get(B->getContext(),
3519                                                C1->getValue()|C2->getValue()));
3520      }
3521    }
3522
3523    // Check to see if we have any common things being and'ed.  If so, find the
3524    // terms for V1 & (V2|V3).
3525    if (isOnlyUse(Op0) || isOnlyUse(Op1)) {
3526      V1 = 0;
3527      if (A == B)      // (A & C)|(A & D) == A & (C|D)
3528        V1 = A, V2 = C, V3 = D;
3529      else if (A == D) // (A & C)|(B & A) == A & (B|C)
3530        V1 = A, V2 = B, V3 = C;
3531      else if (C == B) // (A & C)|(C & D) == C & (A|D)
3532        V1 = C, V2 = A, V3 = D;
3533      else if (C == D) // (A & C)|(B & C) == C & (A|B)
3534        V1 = C, V2 = A, V3 = B;
3535
3536      if (V1) {
3537        Value *Or = Builder->CreateOr(V2, V3, "tmp");
3538        return BinaryOperator::CreateAnd(V1, Or);
3539      }
3540    }
3541
3542    // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants
3543    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
3544      return Match;
3545    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
3546      return Match;
3547    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
3548      return Match;
3549    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
3550      return Match;
3551
3552    // ((A&~B)|(~A&B)) -> A^B
3553    if ((match(C, m_Not(m_Specific(D))) &&
3554         match(B, m_Not(m_Specific(A)))))
3555      return BinaryOperator::CreateXor(A, D);
3556    // ((~B&A)|(~A&B)) -> A^B
3557    if ((match(A, m_Not(m_Specific(D))) &&
3558         match(B, m_Not(m_Specific(C)))))
3559      return BinaryOperator::CreateXor(C, D);
3560    // ((A&~B)|(B&~A)) -> A^B
3561    if ((match(C, m_Not(m_Specific(B))) &&
3562         match(D, m_Not(m_Specific(A)))))
3563      return BinaryOperator::CreateXor(A, B);
3564    // ((~B&A)|(B&~A)) -> A^B
3565    if ((match(A, m_Not(m_Specific(B))) &&
3566         match(D, m_Not(m_Specific(C)))))
3567      return BinaryOperator::CreateXor(C, B);
3568  }
3569
3570  // (X >> Z) | (Y >> Z)  -> (X|Y) >> Z  for all shifts.
3571  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
3572    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
3573      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
3574          SI0->getOperand(1) == SI1->getOperand(1) &&
3575          (SI0->hasOneUse() || SI1->hasOneUse())) {
3576        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
3577                                         SI0->getName());
3578        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
3579                                      SI1->getOperand(1));
3580      }
3581  }
3582
3583  // ((A|B)&1)|(B&-2) -> (A&1) | B
3584  if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
3585      match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
3586    Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C);
3587    if (Ret) return Ret;
3588  }
3589  // (B&-2)|((A|B)&1) -> (A&1) | B
3590  if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
3591      match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
3592    Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C);
3593    if (Ret) return Ret;
3594  }
3595
3596  // (~A | ~B) == (~(A & B)) - De Morgan's Law
3597  if (Value *Op0NotVal = dyn_castNotVal(Op0))
3598    if (Value *Op1NotVal = dyn_castNotVal(Op1))
3599      if (Op0->hasOneUse() && Op1->hasOneUse()) {
3600        Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
3601                                        I.getName()+".demorgan");
3602        return BinaryOperator::CreateNot(And);
3603      }
3604
3605  // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
3606  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
3607    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
3608      return R;
3609
3610    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
3611      if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
3612        return Res;
3613  }
3614
3615  // fold (or (cast A), (cast B)) -> (cast (or A, B))
3616  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
3617    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
3618      if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
3619        if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
3620            !isa<ICmpInst>(Op1C->getOperand(0))) {
3621          const Type *SrcTy = Op0C->getOperand(0)->getType();
3622          if (SrcTy == Op1C->getOperand(0)->getType() &&
3623              SrcTy->isIntOrIntVector() &&
3624              // Only do this if the casts both really cause code to be
3625              // generated.
3626              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
3627                                I.getType()) &&
3628              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
3629                                I.getType())) {
3630            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
3631                                             Op1C->getOperand(0), I.getName());
3632            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
3633          }
3634        }
3635      }
3636  }
3637
3638
3639  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
3640  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
3641    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
3642      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
3643        return Res;
3644  }
3645
3646  return Changed ? &I : 0;
3647}
3648
3649namespace {
3650
3651// XorSelf - Implements: X ^ X --> 0
3652struct XorSelf {
3653  Value *RHS;
3654  XorSelf(Value *rhs) : RHS(rhs) {}
3655  bool shouldApply(Value *LHS) const { return LHS == RHS; }
3656  Instruction *apply(BinaryOperator &Xor) const {
3657    return &Xor;
3658  }
3659};
3660
3661}
3662
3663Instruction *InstCombiner::visitXor(BinaryOperator &I) {
3664  bool Changed = SimplifyCommutative(I);
3665  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3666
3667  if (isa<UndefValue>(Op1)) {
3668    if (isa<UndefValue>(Op0))
3669      // Handle undef ^ undef -> 0 special case. This is a common
3670      // idiom (misuse).
3671      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3672    return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
3673  }
3674
3675  // xor X, X = 0, even if X is nested in a sequence of Xor's.
3676  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
3677    assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
3678    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3679  }
3680
3681  // See if we can simplify any instructions used by the instruction whose sole
3682  // purpose is to compute bits we don't care about.
3683  if (SimplifyDemandedInstructionBits(I))
3684    return &I;
3685  if (isa<VectorType>(I.getType()))
3686    if (isa<ConstantAggregateZero>(Op1))
3687      return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
3688
3689  // Is this a ~ operation?
3690  if (Value *NotOp = dyn_castNotVal(&I)) {
3691    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
3692      if (Op0I->getOpcode() == Instruction::And ||
3693          Op0I->getOpcode() == Instruction::Or) {
3694        // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
3695        // ~(~X | Y) === (X & ~Y) - De Morgan's Law
3696        if (dyn_castNotVal(Op0I->getOperand(1)))
3697          Op0I->swapOperands();
3698        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
3699          Value *NotY =
3700            Builder->CreateNot(Op0I->getOperand(1),
3701                               Op0I->getOperand(1)->getName()+".not");
3702          if (Op0I->getOpcode() == Instruction::And)
3703            return BinaryOperator::CreateOr(Op0NotVal, NotY);
3704          return BinaryOperator::CreateAnd(Op0NotVal, NotY);
3705        }
3706
3707        // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
3708        // ~(X | Y) === (~X & ~Y) - De Morgan's Law
3709        if (isFreeToInvert(Op0I->getOperand(0)) &&
3710            isFreeToInvert(Op0I->getOperand(1))) {
3711          Value *NotX =
3712            Builder->CreateNot(Op0I->getOperand(0), "notlhs");
3713          Value *NotY =
3714            Builder->CreateNot(Op0I->getOperand(1), "notrhs");
3715          if (Op0I->getOpcode() == Instruction::And)
3716            return BinaryOperator::CreateOr(NotX, NotY);
3717          return BinaryOperator::CreateAnd(NotX, NotY);
3718        }
3719      }
3720    }
3721  }
3722
3723
3724  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
3725    if (RHS->isOne() && Op0->hasOneUse()) {
3726      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
3727      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
3728        return new ICmpInst(ICI->getInversePredicate(),
3729                            ICI->getOperand(0), ICI->getOperand(1));
3730
3731      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
3732        return new FCmpInst(FCI->getInversePredicate(),
3733                            FCI->getOperand(0), FCI->getOperand(1));
3734    }
3735
3736    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
3737    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
3738      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
3739        if (CI->hasOneUse() && Op0C->hasOneUse()) {
3740          Instruction::CastOps Opcode = Op0C->getOpcode();
3741          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
3742              (RHS == ConstantExpr::getCast(Opcode,
3743                                           ConstantInt::getTrue(I.getContext()),
3744                                            Op0C->getDestTy()))) {
3745            CI->setPredicate(CI->getInversePredicate());
3746            return CastInst::Create(Opcode, CI, Op0C->getType());
3747          }
3748        }
3749      }
3750    }
3751
3752    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
3753      // ~(c-X) == X-c-1 == X+(-c-1)
3754      if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
3755        if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
3756          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
3757          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
3758                                      ConstantInt::get(I.getType(), 1));
3759          return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
3760        }
3761
3762      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
3763        if (Op0I->getOpcode() == Instruction::Add) {
3764          // ~(X-c) --> (-c-1)-X
3765          if (RHS->isAllOnesValue()) {
3766            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
3767            return BinaryOperator::CreateSub(
3768                           ConstantExpr::getSub(NegOp0CI,
3769                                      ConstantInt::get(I.getType(), 1)),
3770                                      Op0I->getOperand(0));
3771          } else if (RHS->getValue().isSignBit()) {
3772            // (X + C) ^ signbit -> (X + C + signbit)
3773            Constant *C = ConstantInt::get(I.getContext(),
3774                                           RHS->getValue() + Op0CI->getValue());
3775            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
3776
3777          }
3778        } else if (Op0I->getOpcode() == Instruction::Or) {
3779          // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
3780          if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
3781            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
3782            // Anything in both C1 and C2 is known to be zero, remove it from
3783            // NewRHS.
3784            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
3785            NewRHS = ConstantExpr::getAnd(NewRHS,
3786                                       ConstantExpr::getNot(CommonBits));
3787            Worklist.Add(Op0I);
3788            I.setOperand(0, Op0I->getOperand(0));
3789            I.setOperand(1, NewRHS);
3790            return &I;
3791          }
3792        }
3793      }
3794    }
3795
3796    // Try to fold constant and into select arguments.
3797    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
3798      if (Instruction *R = FoldOpIntoSelect(I, SI))
3799        return R;
3800    if (isa<PHINode>(Op0))
3801      if (Instruction *NV = FoldOpIntoPhi(I))
3802        return NV;
3803  }
3804
3805  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
3806    if (X == Op1)
3807      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
3808
3809  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
3810    if (X == Op0)
3811      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
3812
3813
3814  BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
3815  if (Op1I) {
3816    Value *A, *B;
3817    if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
3818      if (A == Op0) {              // B^(B|A) == (A|B)^B
3819        Op1I->swapOperands();
3820        I.swapOperands();
3821        std::swap(Op0, Op1);
3822      } else if (B == Op0) {       // B^(A|B) == (A|B)^B
3823        I.swapOperands();     // Simplified below.
3824        std::swap(Op0, Op1);
3825      }
3826    } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
3827      return ReplaceInstUsesWith(I, B);                      // A^(A^B) == B
3828    } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
3829      return ReplaceInstUsesWith(I, A);                      // A^(B^A) == B
3830    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
3831               Op1I->hasOneUse()){
3832      if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
3833        Op1I->swapOperands();
3834        std::swap(A, B);
3835      }
3836      if (B == Op0) {                                      // A^(B&A) -> (B&A)^A
3837        I.swapOperands();     // Simplified below.
3838        std::swap(Op0, Op1);
3839      }
3840    }
3841  }
3842
3843  BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
3844  if (Op0I) {
3845    Value *A, *B;
3846    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
3847        Op0I->hasOneUse()) {
3848      if (A == Op1)                                  // (B|A)^B == (A|B)^B
3849        std::swap(A, B);
3850      if (B == Op1)                                  // (A|B)^B == A & ~B
3851        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
3852    } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
3853      return ReplaceInstUsesWith(I, B);                      // (A^B)^A == B
3854    } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
3855      return ReplaceInstUsesWith(I, A);                      // (B^A)^A == B
3856    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
3857               Op0I->hasOneUse()){
3858      if (A == Op1)                                        // (A&B)^A -> (B&A)^A
3859        std::swap(A, B);
3860      if (B == Op1 &&                                      // (B&A)^A == ~B & A
3861          !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
3862        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
3863      }
3864    }
3865  }
3866
3867  // (X >> Z) ^ (Y >> Z)  -> (X^Y) >> Z  for all shifts.
3868  if (Op0I && Op1I && Op0I->isShift() &&
3869      Op0I->getOpcode() == Op1I->getOpcode() &&
3870      Op0I->getOperand(1) == Op1I->getOperand(1) &&
3871      (Op1I->hasOneUse() || Op1I->hasOneUse())) {
3872    Value *NewOp =
3873      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
3874                         Op0I->getName());
3875    return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
3876                                  Op1I->getOperand(1));
3877  }
3878
3879  if (Op0I && Op1I) {
3880    Value *A, *B, *C, *D;
3881    // (A & B)^(A | B) -> A ^ B
3882    if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
3883        match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
3884      if ((A == C && B == D) || (A == D && B == C))
3885        return BinaryOperator::CreateXor(A, B);
3886    }
3887    // (A | B)^(A & B) -> A ^ B
3888    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
3889        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
3890      if ((A == C && B == D) || (A == D && B == C))
3891        return BinaryOperator::CreateXor(A, B);
3892    }
3893
3894    // (A & B)^(C & D)
3895    if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
3896        match(Op0I, m_And(m_Value(A), m_Value(B))) &&
3897        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
3898      // (X & Y)^(X & Y) -> (Y^Z) & X
3899      Value *X = 0, *Y = 0, *Z = 0;
3900      if (A == C)
3901        X = A, Y = B, Z = D;
3902      else if (A == D)
3903        X = A, Y = B, Z = C;
3904      else if (B == C)
3905        X = B, Y = A, Z = D;
3906      else if (B == D)
3907        X = B, Y = A, Z = C;
3908
3909      if (X) {
3910        Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
3911        return BinaryOperator::CreateAnd(NewOp, X);
3912      }
3913    }
3914  }
3915
3916  // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
3917  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
3918    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
3919      return R;
3920
3921  // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
3922  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
3923    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
3924      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
3925        const Type *SrcTy = Op0C->getOperand(0)->getType();
3926        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
3927            // Only do this if the casts both really cause code to be generated.
3928            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
3929                              I.getType()) &&
3930            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
3931                              I.getType())) {
3932          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
3933                                            Op1C->getOperand(0), I.getName());
3934          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
3935        }
3936      }
3937  }
3938
3939  return Changed ? &I : 0;
3940}
3941
3942
3943Instruction *InstCombiner::visitShl(BinaryOperator &I) {
3944  return commonShiftTransforms(I);
3945}
3946
3947Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
3948  return commonShiftTransforms(I);
3949}
3950
3951Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
3952  if (Instruction *R = commonShiftTransforms(I))
3953    return R;
3954
3955  Value *Op0 = I.getOperand(0);
3956
3957  // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
3958  if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
3959    if (CSI->isAllOnesValue())
3960      return ReplaceInstUsesWith(I, CSI);
3961
3962  // See if we can turn a signed shr into an unsigned shr.
3963  if (MaskedValueIsZero(Op0,
3964                        APInt::getSignBit(I.getType()->getScalarSizeInBits())))
3965    return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
3966
3967  // Arithmetic shifting an all-sign-bit value is a no-op.
3968  unsigned NumSignBits = ComputeNumSignBits(Op0);
3969  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
3970    return ReplaceInstUsesWith(I, Op0);
3971
3972  return 0;
3973}
3974
3975Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
3976  assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
3977  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3978
3979  // shl X, 0 == X and shr X, 0 == X
3980  // shl 0, X == 0 and shr 0, X == 0
3981  if (Op1 == Constant::getNullValue(Op1->getType()) ||
3982      Op0 == Constant::getNullValue(Op0->getType()))
3983    return ReplaceInstUsesWith(I, Op0);
3984
3985  if (isa<UndefValue>(Op0)) {
3986    if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
3987      return ReplaceInstUsesWith(I, Op0);
3988    else                                    // undef << X -> 0, undef >>u X -> 0
3989      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3990  }
3991  if (isa<UndefValue>(Op1)) {
3992    if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X
3993      return ReplaceInstUsesWith(I, Op0);
3994    else                                     // X << undef, X >>u undef -> 0
3995      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3996  }
3997
3998  // See if we can fold away this shift.
3999  if (SimplifyDemandedInstructionBits(I))
4000    return &I;
4001
4002  // Try to fold constant and into select arguments.
4003  if (isa<Constant>(Op0))
4004    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
4005      if (Instruction *R = FoldOpIntoSelect(I, SI))
4006        return R;
4007
4008  if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
4009    if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
4010      return Res;
4011  return 0;
4012}
4013
4014Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
4015                                               BinaryOperator &I) {
4016  bool isLeftShift = I.getOpcode() == Instruction::Shl;
4017
4018  // See if we can simplify any instructions used by the instruction whose sole
4019  // purpose is to compute bits we don't care about.
4020  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
4021
4022  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
4023  // a signed shift.
4024  //
4025  if (Op1->uge(TypeBits)) {
4026    if (I.getOpcode() != Instruction::AShr)
4027      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
4028    else {
4029      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
4030      return &I;
4031    }
4032  }
4033
4034  // ((X*C1) << C2) == (X * (C1 << C2))
4035  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
4036    if (BO->getOpcode() == Instruction::Mul && isLeftShift)
4037      if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
4038        return BinaryOperator::CreateMul(BO->getOperand(0),
4039                                        ConstantExpr::getShl(BOOp, Op1));
4040
4041  // Try to fold constant and into select arguments.
4042  if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
4043    if (Instruction *R = FoldOpIntoSelect(I, SI))
4044      return R;
4045  if (isa<PHINode>(Op0))
4046    if (Instruction *NV = FoldOpIntoPhi(I))
4047      return NV;
4048
4049  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
4050  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
4051    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
4052    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
4053    // place.  Don't try to do this transformation in this case.  Also, we
4054    // require that the input operand is a shift-by-constant so that we have
4055    // confidence that the shifts will get folded together.  We could do this
4056    // xform in more cases, but it is unlikely to be profitable.
4057    if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
4058        isa<ConstantInt>(TrOp->getOperand(1))) {
4059      // Okay, we'll do this xform.  Make the shift of shift.
4060      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
4061      // (shift2 (shift1 & 0x00FF), c2)
4062      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
4063
4064      // For logical shifts, the truncation has the effect of making the high
4065      // part of the register be zeros.  Emulate this by inserting an AND to
4066      // clear the top bits as needed.  This 'and' will usually be zapped by
4067      // other xforms later if dead.
4068      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
4069      unsigned DstSize = TI->getType()->getScalarSizeInBits();
4070      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
4071
4072      // The mask we constructed says what the trunc would do if occurring
4073      // between the shifts.  We want to know the effect *after* the second
4074      // shift.  We know that it is a logical shift by a constant, so adjust the
4075      // mask as appropriate.
4076      if (I.getOpcode() == Instruction::Shl)
4077        MaskV <<= Op1->getZExtValue();
4078      else {
4079        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
4080        MaskV = MaskV.lshr(Op1->getZExtValue());
4081      }
4082
4083      // shift1 & 0x00FF
4084      Value *And = Builder->CreateAnd(NSh,
4085                                      ConstantInt::get(I.getContext(), MaskV),
4086                                      TI->getName());
4087
4088      // Return the value truncated to the interesting size.
4089      return new TruncInst(And, I.getType());
4090    }
4091  }
4092
4093  if (Op0->hasOneUse()) {
4094    if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
4095      // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
4096      Value *V1, *V2;
4097      ConstantInt *CC;
4098      switch (Op0BO->getOpcode()) {
4099        default: break;
4100        case Instruction::Add:
4101        case Instruction::And:
4102        case Instruction::Or:
4103        case Instruction::Xor: {
4104          // These operators commute.
4105          // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
4106          if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
4107              match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
4108                    m_Specific(Op1)))) {
4109            Value *YS =         // (Y << C)
4110              Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
4111            // (X + (Y << C))
4112            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
4113                                            Op0BO->getOperand(1)->getName());
4114            uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
4115            return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
4116                       APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
4117          }
4118
4119          // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
4120          Value *Op0BOOp1 = Op0BO->getOperand(1);
4121          if (isLeftShift && Op0BOOp1->hasOneUse() &&
4122              match(Op0BOOp1,
4123                    m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
4124                          m_ConstantInt(CC))) &&
4125              cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
4126            Value *YS =   // (Y << C)
4127              Builder->CreateShl(Op0BO->getOperand(0), Op1,
4128                                           Op0BO->getName());
4129            // X & (CC << C)
4130            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
4131                                           V1->getName()+".mask");
4132            return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
4133          }
4134        }
4135
4136        // FALL THROUGH.
4137        case Instruction::Sub: {
4138          // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
4139          if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
4140              match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
4141                    m_Specific(Op1)))) {
4142            Value *YS =  // (Y << C)
4143              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
4144            // (X + (Y << C))
4145            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
4146                                            Op0BO->getOperand(0)->getName());
4147            uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
4148            return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
4149                       APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
4150          }
4151
4152          // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
4153          if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
4154              match(Op0BO->getOperand(0),
4155                    m_And(m_Shr(m_Value(V1), m_Value(V2)),
4156                          m_ConstantInt(CC))) && V2 == Op1 &&
4157              cast<BinaryOperator>(Op0BO->getOperand(0))
4158                  ->getOperand(0)->hasOneUse()) {
4159            Value *YS = // (Y << C)
4160              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
4161            // X & (CC << C)
4162            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
4163                                           V1->getName()+".mask");
4164
4165            return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
4166          }
4167
4168          break;
4169        }
4170      }
4171
4172
4173      // If the operand is an bitwise operator with a constant RHS, and the
4174      // shift is the only use, we can pull it out of the shift.
4175      if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
4176        bool isValid = true;     // Valid only for And, Or, Xor
4177        bool highBitSet = false; // Transform if high bit of constant set?
4178
4179        switch (Op0BO->getOpcode()) {
4180          default: isValid = false; break;   // Do not perform transform!
4181          case Instruction::Add:
4182            isValid = isLeftShift;
4183            break;
4184          case Instruction::Or:
4185          case Instruction::Xor:
4186            highBitSet = false;
4187            break;
4188          case Instruction::And:
4189            highBitSet = true;
4190            break;
4191        }
4192
4193        // If this is a signed shift right, and the high bit is modified
4194        // by the logical operation, do not perform the transformation.
4195        // The highBitSet boolean indicates the value of the high bit of
4196        // the constant which would cause it to be modified for this
4197        // operation.
4198        //
4199        if (isValid && I.getOpcode() == Instruction::AShr)
4200          isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
4201
4202        if (isValid) {
4203          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
4204
4205          Value *NewShift =
4206            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
4207          NewShift->takeName(Op0BO);
4208
4209          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
4210                                        NewRHS);
4211        }
4212      }
4213    }
4214  }
4215
4216  // Find out if this is a shift of a shift by a constant.
4217  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
4218  if (ShiftOp && !ShiftOp->isShift())
4219    ShiftOp = 0;
4220
4221  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
4222    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
4223    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
4224    uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
4225    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
4226    if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
4227    Value *X = ShiftOp->getOperand(0);
4228
4229    uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
4230
4231    const IntegerType *Ty = cast<IntegerType>(I.getType());
4232
4233    // Check for (X << c1) << c2  and  (X >> c1) >> c2
4234    if (I.getOpcode() == ShiftOp->getOpcode()) {
4235      // If this is oversized composite shift, then unsigned shifts get 0, ashr
4236      // saturates.
4237      if (AmtSum >= TypeBits) {
4238        if (I.getOpcode() != Instruction::AShr)
4239          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
4240        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
4241      }
4242
4243      return BinaryOperator::Create(I.getOpcode(), X,
4244                                    ConstantInt::get(Ty, AmtSum));
4245    }
4246
4247    if (ShiftOp->getOpcode() == Instruction::LShr &&
4248        I.getOpcode() == Instruction::AShr) {
4249      if (AmtSum >= TypeBits)
4250        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
4251
4252      // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
4253      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
4254    }
4255
4256    if (ShiftOp->getOpcode() == Instruction::AShr &&
4257        I.getOpcode() == Instruction::LShr) {
4258      // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
4259      if (AmtSum >= TypeBits)
4260        AmtSum = TypeBits-1;
4261
4262      Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
4263
4264      APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
4265      return BinaryOperator::CreateAnd(Shift,
4266                                       ConstantInt::get(I.getContext(), Mask));
4267    }
4268
4269    // Okay, if we get here, one shift must be left, and the other shift must be
4270    // right.  See if the amounts are equal.
4271    if (ShiftAmt1 == ShiftAmt2) {
4272      // If we have ((X >>? C) << C), turn this into X & (-1 << C).
4273      if (I.getOpcode() == Instruction::Shl) {
4274        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
4275        return BinaryOperator::CreateAnd(X,
4276                                         ConstantInt::get(I.getContext(),Mask));
4277      }
4278      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
4279      if (I.getOpcode() == Instruction::LShr) {
4280        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
4281        return BinaryOperator::CreateAnd(X,
4282                                        ConstantInt::get(I.getContext(), Mask));
4283      }
4284      // We can simplify ((X << C) >>s C) into a trunc + sext.
4285      // NOTE: we could do this for any C, but that would make 'unusual' integer
4286      // types.  For now, just stick to ones well-supported by the code
4287      // generators.
4288      const Type *SExtType = 0;
4289      switch (Ty->getBitWidth() - ShiftAmt1) {
4290      case 1  :
4291      case 8  :
4292      case 16 :
4293      case 32 :
4294      case 64 :
4295      case 128:
4296        SExtType = IntegerType::get(I.getContext(),
4297                                    Ty->getBitWidth() - ShiftAmt1);
4298        break;
4299      default: break;
4300      }
4301      if (SExtType)
4302        return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty);
4303      // Otherwise, we can't handle it yet.
4304    } else if (ShiftAmt1 < ShiftAmt2) {
4305      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
4306
4307      // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
4308      if (I.getOpcode() == Instruction::Shl) {
4309        assert(ShiftOp->getOpcode() == Instruction::LShr ||
4310               ShiftOp->getOpcode() == Instruction::AShr);
4311        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
4312
4313        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
4314        return BinaryOperator::CreateAnd(Shift,
4315                                         ConstantInt::get(I.getContext(),Mask));
4316      }
4317
4318      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
4319      if (I.getOpcode() == Instruction::LShr) {
4320        assert(ShiftOp->getOpcode() == Instruction::Shl);
4321        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
4322
4323        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
4324        return BinaryOperator::CreateAnd(Shift,
4325                                         ConstantInt::get(I.getContext(),Mask));
4326      }
4327
4328      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
4329    } else {
4330      assert(ShiftAmt2 < ShiftAmt1);
4331      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
4332
4333      // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
4334      if (I.getOpcode() == Instruction::Shl) {
4335        assert(ShiftOp->getOpcode() == Instruction::LShr ||
4336               ShiftOp->getOpcode() == Instruction::AShr);
4337        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
4338                                            ConstantInt::get(Ty, ShiftDiff));
4339
4340        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
4341        return BinaryOperator::CreateAnd(Shift,
4342                                         ConstantInt::get(I.getContext(),Mask));
4343      }
4344
4345      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
4346      if (I.getOpcode() == Instruction::LShr) {
4347        assert(ShiftOp->getOpcode() == Instruction::Shl);
4348        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
4349
4350        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
4351        return BinaryOperator::CreateAnd(Shift,
4352                                         ConstantInt::get(I.getContext(),Mask));
4353      }
4354
4355      // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
4356    }
4357  }
4358  return 0;
4359}
4360
4361
4362
4363/// FindElementAtOffset - Given a type and a constant offset, determine whether
4364/// or not there is a sequence of GEP indices into the type that will land us at
4365/// the specified offset.  If so, fill them into NewIndices and return the
4366/// resultant element type, otherwise return null.
4367const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
4368                                          SmallVectorImpl<Value*> &NewIndices) {
4369  if (!TD) return 0;
4370  if (!Ty->isSized()) return 0;
4371
4372  // Start with the index over the outer type.  Note that the type size
4373  // might be zero (even if the offset isn't zero) if the indexed type
4374  // is something like [0 x {int, int}]
4375  const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
4376  int64_t FirstIdx = 0;
4377  if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
4378    FirstIdx = Offset/TySize;
4379    Offset -= FirstIdx*TySize;
4380
4381    // Handle hosts where % returns negative instead of values [0..TySize).
4382    if (Offset < 0) {
4383      --FirstIdx;
4384      Offset += TySize;
4385      assert(Offset >= 0);
4386    }
4387    assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
4388  }
4389
4390  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
4391
4392  // Index into the types.  If we fail, set OrigBase to null.
4393  while (Offset) {
4394    // Indexing into tail padding between struct/array elements.
4395    if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
4396      return 0;
4397
4398    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
4399      const StructLayout *SL = TD->getStructLayout(STy);
4400      assert(Offset < (int64_t)SL->getSizeInBytes() &&
4401             "Offset must stay within the indexed type");
4402
4403      unsigned Elt = SL->getElementContainingOffset(Offset);
4404      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
4405                                            Elt));
4406
4407      Offset -= SL->getElementOffset(Elt);
4408      Ty = STy->getElementType(Elt);
4409    } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
4410      uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
4411      assert(EltSize && "Cannot index into a zero-sized array");
4412      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
4413      Offset %= EltSize;
4414      Ty = AT->getElementType();
4415    } else {
4416      // Otherwise, we can't index into the middle of this atomic type, bail.
4417      return 0;
4418    }
4419  }
4420
4421  return Ty;
4422}
4423
4424
4425/// GetSelectFoldableOperands - We want to turn code that looks like this:
4426///   %C = or %A, %B
4427///   %D = select %cond, %C, %A
4428/// into:
4429///   %C = select %cond, %B, 0
4430///   %D = or %A, %C
4431///
4432/// Assuming that the specified instruction is an operand to the select, return
4433/// a bitmask indicating which operands of this instruction are foldable if they
4434/// equal the other incoming value of the select.
4435///
4436static unsigned GetSelectFoldableOperands(Instruction *I) {
4437  switch (I->getOpcode()) {
4438  case Instruction::Add:
4439  case Instruction::Mul:
4440  case Instruction::And:
4441  case Instruction::Or:
4442  case Instruction::Xor:
4443    return 3;              // Can fold through either operand.
4444  case Instruction::Sub:   // Can only fold on the amount subtracted.
4445  case Instruction::Shl:   // Can only fold on the shift amount.
4446  case Instruction::LShr:
4447  case Instruction::AShr:
4448    return 1;
4449  default:
4450    return 0;              // Cannot fold
4451  }
4452}
4453
4454/// GetSelectFoldableConstant - For the same transformation as the previous
4455/// function, return the identity constant that goes into the select.
4456static Constant *GetSelectFoldableConstant(Instruction *I) {
4457  switch (I->getOpcode()) {
4458  default: llvm_unreachable("This cannot happen!");
4459  case Instruction::Add:
4460  case Instruction::Sub:
4461  case Instruction::Or:
4462  case Instruction::Xor:
4463  case Instruction::Shl:
4464  case Instruction::LShr:
4465  case Instruction::AShr:
4466    return Constant::getNullValue(I->getType());
4467  case Instruction::And:
4468    return Constant::getAllOnesValue(I->getType());
4469  case Instruction::Mul:
4470    return ConstantInt::get(I->getType(), 1);
4471  }
4472}
4473
4474/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
4475/// have the same opcode and only one use each.  Try to simplify this.
4476Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
4477                                          Instruction *FI) {
4478  if (TI->getNumOperands() == 1) {
4479    // If this is a non-volatile load or a cast from the same type,
4480    // merge.
4481    if (TI->isCast()) {
4482      if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
4483        return 0;
4484    } else {
4485      return 0;  // unknown unary op.
4486    }
4487
4488    // Fold this by inserting a select from the input values.
4489    SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
4490                                          FI->getOperand(0), SI.getName()+".v");
4491    InsertNewInstBefore(NewSI, SI);
4492    return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
4493                            TI->getType());
4494  }
4495
4496  // Only handle binary operators here.
4497  if (!isa<BinaryOperator>(TI))
4498    return 0;
4499
4500  // Figure out if the operations have any operands in common.
4501  Value *MatchOp, *OtherOpT, *OtherOpF;
4502  bool MatchIsOpZero;
4503  if (TI->getOperand(0) == FI->getOperand(0)) {
4504    MatchOp  = TI->getOperand(0);
4505    OtherOpT = TI->getOperand(1);
4506    OtherOpF = FI->getOperand(1);
4507    MatchIsOpZero = true;
4508  } else if (TI->getOperand(1) == FI->getOperand(1)) {
4509    MatchOp  = TI->getOperand(1);
4510    OtherOpT = TI->getOperand(0);
4511    OtherOpF = FI->getOperand(0);
4512    MatchIsOpZero = false;
4513  } else if (!TI->isCommutative()) {
4514    return 0;
4515  } else if (TI->getOperand(0) == FI->getOperand(1)) {
4516    MatchOp  = TI->getOperand(0);
4517    OtherOpT = TI->getOperand(1);
4518    OtherOpF = FI->getOperand(0);
4519    MatchIsOpZero = true;
4520  } else if (TI->getOperand(1) == FI->getOperand(0)) {
4521    MatchOp  = TI->getOperand(1);
4522    OtherOpT = TI->getOperand(0);
4523    OtherOpF = FI->getOperand(1);
4524    MatchIsOpZero = true;
4525  } else {
4526    return 0;
4527  }
4528
4529  // If we reach here, they do have operations in common.
4530  SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT,
4531                                         OtherOpF, SI.getName()+".v");
4532  InsertNewInstBefore(NewSI, SI);
4533
4534  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
4535    if (MatchIsOpZero)
4536      return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
4537    else
4538      return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
4539  }
4540  llvm_unreachable("Shouldn't get here");
4541  return 0;
4542}
4543
4544static bool isSelect01(Constant *C1, Constant *C2) {
4545  ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
4546  if (!C1I)
4547    return false;
4548  ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
4549  if (!C2I)
4550    return false;
4551  return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne());
4552}
4553
4554/// FoldSelectIntoOp - Try fold the select into one of the operands to
4555/// facilitate further optimization.
4556Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
4557                                            Value *FalseVal) {
4558  // See the comment above GetSelectFoldableOperands for a description of the
4559  // transformation we are doing here.
4560  if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
4561    if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
4562        !isa<Constant>(FalseVal)) {
4563      if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
4564        unsigned OpToFold = 0;
4565        if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
4566          OpToFold = 1;
4567        } else  if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
4568          OpToFold = 2;
4569        }
4570
4571        if (OpToFold) {
4572          Constant *C = GetSelectFoldableConstant(TVI);
4573          Value *OOp = TVI->getOperand(2-OpToFold);
4574          // Avoid creating select between 2 constants unless it's selecting
4575          // between 0 and 1.
4576          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
4577            Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
4578            InsertNewInstBefore(NewSel, SI);
4579            NewSel->takeName(TVI);
4580            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
4581              return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
4582            llvm_unreachable("Unknown instruction!!");
4583          }
4584        }
4585      }
4586    }
4587  }
4588
4589  if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
4590    if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
4591        !isa<Constant>(TrueVal)) {
4592      if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
4593        unsigned OpToFold = 0;
4594        if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
4595          OpToFold = 1;
4596        } else  if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
4597          OpToFold = 2;
4598        }
4599
4600        if (OpToFold) {
4601          Constant *C = GetSelectFoldableConstant(FVI);
4602          Value *OOp = FVI->getOperand(2-OpToFold);
4603          // Avoid creating select between 2 constants unless it's selecting
4604          // between 0 and 1.
4605          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
4606            Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
4607            InsertNewInstBefore(NewSel, SI);
4608            NewSel->takeName(FVI);
4609            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
4610              return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
4611            llvm_unreachable("Unknown instruction!!");
4612          }
4613        }
4614      }
4615    }
4616  }
4617
4618  return 0;
4619}
4620
4621/// visitSelectInstWithICmp - Visit a SelectInst that has an
4622/// ICmpInst as its first operand.
4623///
4624Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
4625                                                   ICmpInst *ICI) {
4626  bool Changed = false;
4627  ICmpInst::Predicate Pred = ICI->getPredicate();
4628  Value *CmpLHS = ICI->getOperand(0);
4629  Value *CmpRHS = ICI->getOperand(1);
4630  Value *TrueVal = SI.getTrueValue();
4631  Value *FalseVal = SI.getFalseValue();
4632
4633  // Check cases where the comparison is with a constant that
4634  // can be adjusted to fit the min/max idiom. We may edit ICI in
4635  // place here, so make sure the select is the only user.
4636  if (ICI->hasOneUse())
4637    if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
4638      switch (Pred) {
4639      default: break;
4640      case ICmpInst::ICMP_ULT:
4641      case ICmpInst::ICMP_SLT: {
4642        // X < MIN ? T : F  -->  F
4643        if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
4644          return ReplaceInstUsesWith(SI, FalseVal);
4645        // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
4646        Constant *AdjustedRHS = SubOne(CI);
4647        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
4648            (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
4649          Pred = ICmpInst::getSwappedPredicate(Pred);
4650          CmpRHS = AdjustedRHS;
4651          std::swap(FalseVal, TrueVal);
4652          ICI->setPredicate(Pred);
4653          ICI->setOperand(1, CmpRHS);
4654          SI.setOperand(1, TrueVal);
4655          SI.setOperand(2, FalseVal);
4656          Changed = true;
4657        }
4658        break;
4659      }
4660      case ICmpInst::ICMP_UGT:
4661      case ICmpInst::ICMP_SGT: {
4662        // X > MAX ? T : F  -->  F
4663        if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
4664          return ReplaceInstUsesWith(SI, FalseVal);
4665        // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
4666        Constant *AdjustedRHS = AddOne(CI);
4667        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
4668            (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
4669          Pred = ICmpInst::getSwappedPredicate(Pred);
4670          CmpRHS = AdjustedRHS;
4671          std::swap(FalseVal, TrueVal);
4672          ICI->setPredicate(Pred);
4673          ICI->setOperand(1, CmpRHS);
4674          SI.setOperand(1, TrueVal);
4675          SI.setOperand(2, FalseVal);
4676          Changed = true;
4677        }
4678        break;
4679      }
4680      }
4681
4682      // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
4683      // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
4684      CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
4685      if (match(TrueVal, m_ConstantInt<-1>()) &&
4686          match(FalseVal, m_ConstantInt<0>()))
4687        Pred = ICI->getPredicate();
4688      else if (match(TrueVal, m_ConstantInt<0>()) &&
4689               match(FalseVal, m_ConstantInt<-1>()))
4690        Pred = CmpInst::getInversePredicate(ICI->getPredicate());
4691
4692      if (Pred != CmpInst::BAD_ICMP_PREDICATE) {
4693        // If we are just checking for a icmp eq of a single bit and zext'ing it
4694        // to an integer, then shift the bit to the appropriate place and then
4695        // cast to integer to avoid the comparison.
4696        const APInt &Op1CV = CI->getValue();
4697
4698        // sext (x <s  0) to i32 --> x>>s31      true if signbit set.
4699        // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
4700        if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) ||
4701            (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
4702          Value *In = ICI->getOperand(0);
4703          Value *Sh = ConstantInt::get(In->getType(),
4704                                       In->getType()->getScalarSizeInBits()-1);
4705          In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
4706                                                        In->getName()+".lobit"),
4707                                   *ICI);
4708          if (In->getType() != SI.getType())
4709            In = CastInst::CreateIntegerCast(In, SI.getType(),
4710                                             true/*SExt*/, "tmp", ICI);
4711
4712          if (Pred == ICmpInst::ICMP_SGT)
4713            In = InsertNewInstBefore(BinaryOperator::CreateNot(In,
4714                                       In->getName()+".not"), *ICI);
4715
4716          return ReplaceInstUsesWith(SI, In);
4717        }
4718      }
4719    }
4720
4721  if (CmpLHS == TrueVal && CmpRHS == FalseVal) {
4722    // Transform (X == Y) ? X : Y  -> Y
4723    if (Pred == ICmpInst::ICMP_EQ)
4724      return ReplaceInstUsesWith(SI, FalseVal);
4725    // Transform (X != Y) ? X : Y  -> X
4726    if (Pred == ICmpInst::ICMP_NE)
4727      return ReplaceInstUsesWith(SI, TrueVal);
4728    /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
4729
4730  } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) {
4731    // Transform (X == Y) ? Y : X  -> X
4732    if (Pred == ICmpInst::ICMP_EQ)
4733      return ReplaceInstUsesWith(SI, FalseVal);
4734    // Transform (X != Y) ? Y : X  -> Y
4735    if (Pred == ICmpInst::ICMP_NE)
4736      return ReplaceInstUsesWith(SI, TrueVal);
4737    /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
4738  }
4739  return Changed ? &SI : 0;
4740}
4741
4742
4743/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
4744/// PHI node (but the two may be in different blocks).  See if the true/false
4745/// values (V) are live in all of the predecessor blocks of the PHI.  For
4746/// example, cases like this cannot be mapped:
4747///
4748///   X = phi [ C1, BB1], [C2, BB2]
4749///   Y = add
4750///   Z = select X, Y, 0
4751///
4752/// because Y is not live in BB1/BB2.
4753///
4754static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
4755                                                   const SelectInst &SI) {
4756  // If the value is a non-instruction value like a constant or argument, it
4757  // can always be mapped.
4758  const Instruction *I = dyn_cast<Instruction>(V);
4759  if (I == 0) return true;
4760
4761  // If V is a PHI node defined in the same block as the condition PHI, we can
4762  // map the arguments.
4763  const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
4764
4765  if (const PHINode *VP = dyn_cast<PHINode>(I))
4766    if (VP->getParent() == CondPHI->getParent())
4767      return true;
4768
4769  // Otherwise, if the PHI and select are defined in the same block and if V is
4770  // defined in a different block, then we can transform it.
4771  if (SI.getParent() == CondPHI->getParent() &&
4772      I->getParent() != CondPHI->getParent())
4773    return true;
4774
4775  // Otherwise we have a 'hard' case and we can't tell without doing more
4776  // detailed dominator based analysis, punt.
4777  return false;
4778}
4779
4780/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
4781///   SPF2(SPF1(A, B), C)
4782Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
4783                                        SelectPatternFlavor SPF1,
4784                                        Value *A, Value *B,
4785                                        Instruction &Outer,
4786                                        SelectPatternFlavor SPF2, Value *C) {
4787  if (C == A || C == B) {
4788    // MAX(MAX(A, B), B) -> MAX(A, B)
4789    // MIN(MIN(a, b), a) -> MIN(a, b)
4790    if (SPF1 == SPF2)
4791      return ReplaceInstUsesWith(Outer, Inner);
4792
4793    // MAX(MIN(a, b), a) -> a
4794    // MIN(MAX(a, b), a) -> a
4795    if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
4796        (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
4797        (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
4798        (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
4799      return ReplaceInstUsesWith(Outer, C);
4800  }
4801
4802  // TODO: MIN(MIN(A, 23), 97)
4803  return 0;
4804}
4805
4806
4807
4808
4809Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
4810  Value *CondVal = SI.getCondition();
4811  Value *TrueVal = SI.getTrueValue();
4812  Value *FalseVal = SI.getFalseValue();
4813
4814  // select true, X, Y  -> X
4815  // select false, X, Y -> Y
4816  if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal))
4817    return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal);
4818
4819  // select C, X, X -> X
4820  if (TrueVal == FalseVal)
4821    return ReplaceInstUsesWith(SI, TrueVal);
4822
4823  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
4824    return ReplaceInstUsesWith(SI, FalseVal);
4825  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
4826    return ReplaceInstUsesWith(SI, TrueVal);
4827  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
4828    if (isa<Constant>(TrueVal))
4829      return ReplaceInstUsesWith(SI, TrueVal);
4830    else
4831      return ReplaceInstUsesWith(SI, FalseVal);
4832  }
4833
4834  if (SI.getType() == Type::getInt1Ty(SI.getContext())) {
4835    if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
4836      if (C->getZExtValue()) {
4837        // Change: A = select B, true, C --> A = or B, C
4838        return BinaryOperator::CreateOr(CondVal, FalseVal);
4839      } else {
4840        // Change: A = select B, false, C --> A = and !B, C
4841        Value *NotCond =
4842          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
4843                                             "not."+CondVal->getName()), SI);
4844        return BinaryOperator::CreateAnd(NotCond, FalseVal);
4845      }
4846    } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
4847      if (C->getZExtValue() == false) {
4848        // Change: A = select B, C, false --> A = and B, C
4849        return BinaryOperator::CreateAnd(CondVal, TrueVal);
4850      } else {
4851        // Change: A = select B, C, true --> A = or !B, C
4852        Value *NotCond =
4853          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
4854                                             "not."+CondVal->getName()), SI);
4855        return BinaryOperator::CreateOr(NotCond, TrueVal);
4856      }
4857    }
4858
4859    // select a, b, a  -> a&b
4860    // select a, a, b  -> a|b
4861    if (CondVal == TrueVal)
4862      return BinaryOperator::CreateOr(CondVal, FalseVal);
4863    else if (CondVal == FalseVal)
4864      return BinaryOperator::CreateAnd(CondVal, TrueVal);
4865  }
4866
4867  // Selecting between two integer constants?
4868  if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
4869    if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
4870      // select C, 1, 0 -> zext C to int
4871      if (FalseValC->isZero() && TrueValC->getValue() == 1) {
4872        return CastInst::Create(Instruction::ZExt, CondVal, SI.getType());
4873      } else if (TrueValC->isZero() && FalseValC->getValue() == 1) {
4874        // select C, 0, 1 -> zext !C to int
4875        Value *NotCond =
4876          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
4877                                               "not."+CondVal->getName()), SI);
4878        return CastInst::Create(Instruction::ZExt, NotCond, SI.getType());
4879      }
4880
4881      if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
4882        // If one of the constants is zero (we know they can't both be) and we
4883        // have an icmp instruction with zero, and we have an 'and' with the
4884        // non-constant value, eliminate this whole mess.  This corresponds to
4885        // cases like this: ((X & 27) ? 27 : 0)
4886        if (TrueValC->isZero() || FalseValC->isZero())
4887          if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) &&
4888              cast<Constant>(IC->getOperand(1))->isNullValue())
4889            if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0)))
4890              if (ICA->getOpcode() == Instruction::And &&
4891                  isa<ConstantInt>(ICA->getOperand(1)) &&
4892                  (ICA->getOperand(1) == TrueValC ||
4893                   ICA->getOperand(1) == FalseValC) &&
4894                  isOneBitSet(cast<ConstantInt>(ICA->getOperand(1)))) {
4895                // Okay, now we know that everything is set up, we just don't
4896                // know whether we have a icmp_ne or icmp_eq and whether the
4897                // true or false val is the zero.
4898                bool ShouldNotVal = !TrueValC->isZero();
4899                ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
4900                Value *V = ICA;
4901                if (ShouldNotVal)
4902                  V = InsertNewInstBefore(BinaryOperator::Create(
4903                                  Instruction::Xor, V, ICA->getOperand(1)), SI);
4904                return ReplaceInstUsesWith(SI, V);
4905              }
4906      }
4907    }
4908
4909  // See if we are selecting two values based on a comparison of the two values.
4910  if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
4911    if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
4912      // Transform (X == Y) ? X : Y  -> Y
4913      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
4914        // This is not safe in general for floating point:
4915        // consider X== -0, Y== +0.
4916        // It becomes safe if either operand is a nonzero constant.
4917        ConstantFP *CFPt, *CFPf;
4918        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
4919              !CFPt->getValueAPF().isZero()) ||
4920            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
4921             !CFPf->getValueAPF().isZero()))
4922        return ReplaceInstUsesWith(SI, FalseVal);
4923      }
4924      // Transform (X != Y) ? X : Y  -> X
4925      if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
4926        return ReplaceInstUsesWith(SI, TrueVal);
4927      // NOTE: if we wanted to, this is where to detect MIN/MAX
4928
4929    } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
4930      // Transform (X == Y) ? Y : X  -> X
4931      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
4932        // This is not safe in general for floating point:
4933        // consider X== -0, Y== +0.
4934        // It becomes safe if either operand is a nonzero constant.
4935        ConstantFP *CFPt, *CFPf;
4936        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
4937              !CFPt->getValueAPF().isZero()) ||
4938            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
4939             !CFPf->getValueAPF().isZero()))
4940          return ReplaceInstUsesWith(SI, FalseVal);
4941      }
4942      // Transform (X != Y) ? Y : X  -> Y
4943      if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
4944        return ReplaceInstUsesWith(SI, TrueVal);
4945      // NOTE: if we wanted to, this is where to detect MIN/MAX
4946    }
4947    // NOTE: if we wanted to, this is where to detect ABS
4948  }
4949
4950  // See if we are selecting two values based on a comparison of the two values.
4951  if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
4952    if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
4953      return Result;
4954
4955  if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
4956    if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
4957      if (TI->hasOneUse() && FI->hasOneUse()) {
4958        Instruction *AddOp = 0, *SubOp = 0;
4959
4960        // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
4961        if (TI->getOpcode() == FI->getOpcode())
4962          if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
4963            return IV;
4964
4965        // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))).  This is
4966        // even legal for FP.
4967        if ((TI->getOpcode() == Instruction::Sub &&
4968             FI->getOpcode() == Instruction::Add) ||
4969            (TI->getOpcode() == Instruction::FSub &&
4970             FI->getOpcode() == Instruction::FAdd)) {
4971          AddOp = FI; SubOp = TI;
4972        } else if ((FI->getOpcode() == Instruction::Sub &&
4973                    TI->getOpcode() == Instruction::Add) ||
4974                   (FI->getOpcode() == Instruction::FSub &&
4975                    TI->getOpcode() == Instruction::FAdd)) {
4976          AddOp = TI; SubOp = FI;
4977        }
4978
4979        if (AddOp) {
4980          Value *OtherAddOp = 0;
4981          if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
4982            OtherAddOp = AddOp->getOperand(1);
4983          } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
4984            OtherAddOp = AddOp->getOperand(0);
4985          }
4986
4987          if (OtherAddOp) {
4988            // So at this point we know we have (Y -> OtherAddOp):
4989            //        select C, (add X, Y), (sub X, Z)
4990            Value *NegVal;  // Compute -Z
4991            if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
4992              NegVal = ConstantExpr::getNeg(C);
4993            } else {
4994              NegVal = InsertNewInstBefore(
4995                    BinaryOperator::CreateNeg(SubOp->getOperand(1),
4996                                              "tmp"), SI);
4997            }
4998
4999            Value *NewTrueOp = OtherAddOp;
5000            Value *NewFalseOp = NegVal;
5001            if (AddOp != TI)
5002              std::swap(NewTrueOp, NewFalseOp);
5003            Instruction *NewSel =
5004              SelectInst::Create(CondVal, NewTrueOp,
5005                                 NewFalseOp, SI.getName() + ".p");
5006
5007            NewSel = InsertNewInstBefore(NewSel, SI);
5008            return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
5009          }
5010        }
5011      }
5012
5013  // See if we can fold the select into one of our operands.
5014  if (SI.getType()->isInteger()) {
5015    if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
5016      return FoldI;
5017
5018    // MAX(MAX(a, b), a) -> MAX(a, b)
5019    // MIN(MIN(a, b), a) -> MIN(a, b)
5020    // MAX(MIN(a, b), a) -> a
5021    // MIN(MAX(a, b), a) -> a
5022    Value *LHS, *RHS, *LHS2, *RHS2;
5023    if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
5024      if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
5025        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
5026                                          SI, SPF, RHS))
5027          return R;
5028      if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
5029        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
5030                                          SI, SPF, LHS))
5031          return R;
5032    }
5033
5034    // TODO.
5035    // ABS(-X) -> ABS(X)
5036    // ABS(ABS(X)) -> ABS(X)
5037  }
5038
5039  // See if we can fold the select into a phi node if the condition is a select.
5040  if (isa<PHINode>(SI.getCondition()))
5041    // The true/false values have to be live in the PHI predecessor's blocks.
5042    if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
5043        CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
5044      if (Instruction *NV = FoldOpIntoPhi(SI))
5045        return NV;
5046
5047  if (BinaryOperator::isNot(CondVal)) {
5048    SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
5049    SI.setOperand(1, FalseVal);
5050    SI.setOperand(2, TrueVal);
5051    return &SI;
5052  }
5053
5054  return 0;
5055}
5056
5057/// EnforceKnownAlignment - If the specified pointer points to an object that
5058/// we control, modify the object's alignment to PrefAlign. This isn't
5059/// often possible though. If alignment is important, a more reliable approach
5060/// is to simply align all global variables and allocation instructions to
5061/// their preferred alignment from the beginning.
5062///
5063static unsigned EnforceKnownAlignment(Value *V,
5064                                      unsigned Align, unsigned PrefAlign) {
5065
5066  User *U = dyn_cast<User>(V);
5067  if (!U) return Align;
5068
5069  switch (Operator::getOpcode(U)) {
5070  default: break;
5071  case Instruction::BitCast:
5072    return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
5073  case Instruction::GetElementPtr: {
5074    // If all indexes are zero, it is just the alignment of the base pointer.
5075    bool AllZeroOperands = true;
5076    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
5077      if (!isa<Constant>(*i) ||
5078          !cast<Constant>(*i)->isNullValue()) {
5079        AllZeroOperands = false;
5080        break;
5081      }
5082
5083    if (AllZeroOperands) {
5084      // Treat this like a bitcast.
5085      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
5086    }
5087    break;
5088  }
5089  }
5090
5091  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
5092    // If there is a large requested alignment and we can, bump up the alignment
5093    // of the global.
5094    if (!GV->isDeclaration()) {
5095      if (GV->getAlignment() >= PrefAlign)
5096        Align = GV->getAlignment();
5097      else {
5098        GV->setAlignment(PrefAlign);
5099        Align = PrefAlign;
5100      }
5101    }
5102  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
5103    // If there is a requested alignment and if this is an alloca, round up.
5104    if (AI->getAlignment() >= PrefAlign)
5105      Align = AI->getAlignment();
5106    else {
5107      AI->setAlignment(PrefAlign);
5108      Align = PrefAlign;
5109    }
5110  }
5111
5112  return Align;
5113}
5114
5115/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
5116/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
5117/// and it is more than the alignment of the ultimate object, see if we can
5118/// increase the alignment of the ultimate object, making this check succeed.
5119unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
5120                                                  unsigned PrefAlign) {
5121  unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
5122                      sizeof(PrefAlign) * CHAR_BIT;
5123  APInt Mask = APInt::getAllOnesValue(BitWidth);
5124  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
5125  ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
5126  unsigned TrailZ = KnownZero.countTrailingOnes();
5127  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
5128
5129  if (PrefAlign > Align)
5130    Align = EnforceKnownAlignment(V, Align, PrefAlign);
5131
5132    // We don't need to make any adjustment.
5133  return Align;
5134}
5135
5136Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
5137  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
5138  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
5139  unsigned MinAlign = std::min(DstAlign, SrcAlign);
5140  unsigned CopyAlign = MI->getAlignment();
5141
5142  if (CopyAlign < MinAlign) {
5143    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
5144                                             MinAlign, false));
5145    return MI;
5146  }
5147
5148  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
5149  // load/store.
5150  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
5151  if (MemOpLength == 0) return 0;
5152
5153  // Source and destination pointer types are always "i8*" for intrinsic.  See
5154  // if the size is something we can handle with a single primitive load/store.
5155  // A single load+store correctly handles overlapping memory in the memmove
5156  // case.
5157  unsigned Size = MemOpLength->getZExtValue();
5158  if (Size == 0) return MI;  // Delete this mem transfer.
5159
5160  if (Size > 8 || (Size&(Size-1)))
5161    return 0;  // If not 1/2/4/8 bytes, exit.
5162
5163  // Use an integer load+store unless we can find something better.
5164  Type *NewPtrTy =
5165            PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3));
5166
5167  // Memcpy forces the use of i8* for the source and destination.  That means
5168  // that if you're using memcpy to move one double around, you'll get a cast
5169  // from double* to i8*.  We'd much rather use a double load+store rather than
5170  // an i64 load+store, here because this improves the odds that the source or
5171  // dest address will be promotable.  See if we can find a better type than the
5172  // integer datatype.
5173  if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
5174    const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
5175    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
5176      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
5177      // down through these levels if so.
5178      while (!SrcETy->isSingleValueType()) {
5179        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
5180          if (STy->getNumElements() == 1)
5181            SrcETy = STy->getElementType(0);
5182          else
5183            break;
5184        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
5185          if (ATy->getNumElements() == 1)
5186            SrcETy = ATy->getElementType();
5187          else
5188            break;
5189        } else
5190          break;
5191      }
5192
5193      if (SrcETy->isSingleValueType())
5194        NewPtrTy = PointerType::getUnqual(SrcETy);
5195    }
5196  }
5197
5198
5199  // If the memcpy/memmove provides better alignment info than we can
5200  // infer, use it.
5201  SrcAlign = std::max(SrcAlign, CopyAlign);
5202  DstAlign = std::max(DstAlign, CopyAlign);
5203
5204  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
5205  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
5206  Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
5207  InsertNewInstBefore(L, *MI);
5208  InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
5209
5210  // Set the size of the copy to 0, it will be deleted on the next iteration.
5211  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
5212  return MI;
5213}
5214
5215Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
5216  unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
5217  if (MI->getAlignment() < Alignment) {
5218    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
5219                                             Alignment, false));
5220    return MI;
5221  }
5222
5223  // Extract the length and alignment and fill if they are constant.
5224  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
5225  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
5226  if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(MI->getContext()))
5227    return 0;
5228  uint64_t Len = LenC->getZExtValue();
5229  Alignment = MI->getAlignment();
5230
5231  // If the length is zero, this is a no-op
5232  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
5233
5234  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
5235  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
5236    const Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
5237
5238    Value *Dest = MI->getDest();
5239    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
5240
5241    // Alignment 0 is identity for alignment 1 for memset, but not store.
5242    if (Alignment == 0) Alignment = 1;
5243
5244    // Extract the fill value and store.
5245    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
5246    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
5247                                      Dest, false, Alignment), *MI);
5248
5249    // Set the size of the copy to 0, it will be deleted on the next iteration.
5250    MI->setLength(Constant::getNullValue(LenC->getType()));
5251    return MI;
5252  }
5253
5254  return 0;
5255}
5256
5257
5258/// visitCallInst - CallInst simplification.  This mostly only handles folding
5259/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
5260/// the heavy lifting.
5261///
5262Instruction *InstCombiner::visitCallInst(CallInst &CI) {
5263  if (isFreeCall(&CI))
5264    return visitFree(CI);
5265
5266  // If the caller function is nounwind, mark the call as nounwind, even if the
5267  // callee isn't.
5268  if (CI.getParent()->getParent()->doesNotThrow() &&
5269      !CI.doesNotThrow()) {
5270    CI.setDoesNotThrow();
5271    return &CI;
5272  }
5273
5274  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
5275  if (!II) return visitCallSite(&CI);
5276
5277  // Intrinsics cannot occur in an invoke, so handle them here instead of in
5278  // visitCallSite.
5279  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
5280    bool Changed = false;
5281
5282    // memmove/cpy/set of zero bytes is a noop.
5283    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
5284      if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
5285
5286      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
5287        if (CI->getZExtValue() == 1) {
5288          // Replace the instruction with just byte operations.  We would
5289          // transform other cases to loads/stores, but we don't know if
5290          // alignment is sufficient.
5291        }
5292    }
5293
5294    // If we have a memmove and the source operation is a constant global,
5295    // then the source and dest pointers can't alias, so we can change this
5296    // into a call to memcpy.
5297    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
5298      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
5299        if (GVSrc->isConstant()) {
5300          Module *M = CI.getParent()->getParent()->getParent();
5301          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
5302          const Type *Tys[1];
5303          Tys[0] = CI.getOperand(3)->getType();
5304          CI.setOperand(0,
5305                        Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
5306          Changed = true;
5307        }
5308    }
5309
5310    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
5311      // memmove(x,x,size) -> noop.
5312      if (MTI->getSource() == MTI->getDest())
5313        return EraseInstFromFunction(CI);
5314    }
5315
5316    // If we can determine a pointer alignment that is bigger than currently
5317    // set, update the alignment.
5318    if (isa<MemTransferInst>(MI)) {
5319      if (Instruction *I = SimplifyMemTransfer(MI))
5320        return I;
5321    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
5322      if (Instruction *I = SimplifyMemSet(MSI))
5323        return I;
5324    }
5325
5326    if (Changed) return II;
5327  }
5328
5329  switch (II->getIntrinsicID()) {
5330  default: break;
5331  case Intrinsic::bswap:
5332    // bswap(bswap(x)) -> x
5333    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
5334      if (Operand->getIntrinsicID() == Intrinsic::bswap)
5335        return ReplaceInstUsesWith(CI, Operand->getOperand(1));
5336
5337    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
5338    if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) {
5339      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
5340        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
5341          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
5342                       TI->getType()->getPrimitiveSizeInBits();
5343          Value *CV = ConstantInt::get(Operand->getType(), C);
5344          Value *V = Builder->CreateLShr(Operand->getOperand(1), CV);
5345          return new TruncInst(V, TI->getType());
5346        }
5347    }
5348
5349    break;
5350  case Intrinsic::powi:
5351    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
5352      // powi(x, 0) -> 1.0
5353      if (Power->isZero())
5354        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
5355      // powi(x, 1) -> x
5356      if (Power->isOne())
5357        return ReplaceInstUsesWith(CI, II->getOperand(1));
5358      // powi(x, -1) -> 1/x
5359      if (Power->isAllOnesValue())
5360        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
5361                                          II->getOperand(1));
5362    }
5363    break;
5364
5365  case Intrinsic::uadd_with_overflow: {
5366    Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
5367    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
5368    uint32_t BitWidth = IT->getBitWidth();
5369    APInt Mask = APInt::getSignBit(BitWidth);
5370    APInt LHSKnownZero(BitWidth, 0);
5371    APInt LHSKnownOne(BitWidth, 0);
5372    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
5373    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
5374    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
5375
5376    if (LHSKnownNegative || LHSKnownPositive) {
5377      APInt RHSKnownZero(BitWidth, 0);
5378      APInt RHSKnownOne(BitWidth, 0);
5379      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
5380      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
5381      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
5382      if (LHSKnownNegative && RHSKnownNegative) {
5383        // The sign bit is set in both cases: this MUST overflow.
5384        // Create a simple add instruction, and insert it into the struct.
5385        Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
5386        Worklist.Add(Add);
5387        Constant *V[] = {
5388          UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext())
5389        };
5390        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
5391        return InsertValueInst::Create(Struct, Add, 0);
5392      }
5393
5394      if (LHSKnownPositive && RHSKnownPositive) {
5395        // The sign bit is clear in both cases: this CANNOT overflow.
5396        // Create a simple add instruction, and insert it into the struct.
5397        Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
5398        Worklist.Add(Add);
5399        Constant *V[] = {
5400          UndefValue::get(LHS->getType()),
5401          ConstantInt::getFalse(II->getContext())
5402        };
5403        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
5404        return InsertValueInst::Create(Struct, Add, 0);
5405      }
5406    }
5407  }
5408  // FALL THROUGH uadd into sadd
5409  case Intrinsic::sadd_with_overflow:
5410    // Canonicalize constants into the RHS.
5411    if (isa<Constant>(II->getOperand(1)) &&
5412        !isa<Constant>(II->getOperand(2))) {
5413      Value *LHS = II->getOperand(1);
5414      II->setOperand(1, II->getOperand(2));
5415      II->setOperand(2, LHS);
5416      return II;
5417    }
5418
5419    // X + undef -> undef
5420    if (isa<UndefValue>(II->getOperand(2)))
5421      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
5422
5423    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
5424      // X + 0 -> {X, false}
5425      if (RHS->isZero()) {
5426        Constant *V[] = {
5427          UndefValue::get(II->getOperand(0)->getType()),
5428          ConstantInt::getFalse(II->getContext())
5429        };
5430        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
5431        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
5432      }
5433    }
5434    break;
5435  case Intrinsic::usub_with_overflow:
5436  case Intrinsic::ssub_with_overflow:
5437    // undef - X -> undef
5438    // X - undef -> undef
5439    if (isa<UndefValue>(II->getOperand(1)) ||
5440        isa<UndefValue>(II->getOperand(2)))
5441      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
5442
5443    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
5444      // X - 0 -> {X, false}
5445      if (RHS->isZero()) {
5446        Constant *V[] = {
5447          UndefValue::get(II->getOperand(1)->getType()),
5448          ConstantInt::getFalse(II->getContext())
5449        };
5450        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
5451        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
5452      }
5453    }
5454    break;
5455  case Intrinsic::umul_with_overflow:
5456  case Intrinsic::smul_with_overflow:
5457    // Canonicalize constants into the RHS.
5458    if (isa<Constant>(II->getOperand(1)) &&
5459        !isa<Constant>(II->getOperand(2))) {
5460      Value *LHS = II->getOperand(1);
5461      II->setOperand(1, II->getOperand(2));
5462      II->setOperand(2, LHS);
5463      return II;
5464    }
5465
5466    // X * undef -> undef
5467    if (isa<UndefValue>(II->getOperand(2)))
5468      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
5469
5470    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
5471      // X*0 -> {0, false}
5472      if (RHSI->isZero())
5473        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
5474
5475      // X * 1 -> {X, false}
5476      if (RHSI->equalsInt(1)) {
5477        Constant *V[] = {
5478          UndefValue::get(II->getOperand(1)->getType()),
5479          ConstantInt::getFalse(II->getContext())
5480        };
5481        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
5482        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
5483      }
5484    }
5485    break;
5486  case Intrinsic::ppc_altivec_lvx:
5487  case Intrinsic::ppc_altivec_lvxl:
5488  case Intrinsic::x86_sse_loadu_ps:
5489  case Intrinsic::x86_sse2_loadu_pd:
5490  case Intrinsic::x86_sse2_loadu_dq:
5491    // Turn PPC lvx     -> load if the pointer is known aligned.
5492    // Turn X86 loadups -> load if the pointer is known aligned.
5493    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
5494      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
5495                                         PointerType::getUnqual(II->getType()));
5496      return new LoadInst(Ptr);
5497    }
5498    break;
5499  case Intrinsic::ppc_altivec_stvx:
5500  case Intrinsic::ppc_altivec_stvxl:
5501    // Turn stvx -> store if the pointer is known aligned.
5502    if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
5503      const Type *OpPtrTy =
5504        PointerType::getUnqual(II->getOperand(1)->getType());
5505      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
5506      return new StoreInst(II->getOperand(1), Ptr);
5507    }
5508    break;
5509  case Intrinsic::x86_sse_storeu_ps:
5510  case Intrinsic::x86_sse2_storeu_pd:
5511  case Intrinsic::x86_sse2_storeu_dq:
5512    // Turn X86 storeu -> store if the pointer is known aligned.
5513    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
5514      const Type *OpPtrTy =
5515        PointerType::getUnqual(II->getOperand(2)->getType());
5516      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
5517      return new StoreInst(II->getOperand(2), Ptr);
5518    }
5519    break;
5520
5521  case Intrinsic::x86_sse_cvttss2si: {
5522    // These intrinsics only demands the 0th element of its input vector.  If
5523    // we can simplify the input based on that, do so now.
5524    unsigned VWidth =
5525      cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
5526    APInt DemandedElts(VWidth, 1);
5527    APInt UndefElts(VWidth, 0);
5528    if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
5529                                              UndefElts)) {
5530      II->setOperand(1, V);
5531      return II;
5532    }
5533    break;
5534  }
5535
5536  case Intrinsic::ppc_altivec_vperm:
5537    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
5538    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
5539      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
5540
5541      // Check that all of the elements are integer constants or undefs.
5542      bool AllEltsOk = true;
5543      for (unsigned i = 0; i != 16; ++i) {
5544        if (!isa<ConstantInt>(Mask->getOperand(i)) &&
5545            !isa<UndefValue>(Mask->getOperand(i))) {
5546          AllEltsOk = false;
5547          break;
5548        }
5549      }
5550
5551      if (AllEltsOk) {
5552        // Cast the input vectors to byte vectors.
5553        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
5554        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
5555        Value *Result = UndefValue::get(Op0->getType());
5556
5557        // Only extract each element once.
5558        Value *ExtractedElts[32];
5559        memset(ExtractedElts, 0, sizeof(ExtractedElts));
5560
5561        for (unsigned i = 0; i != 16; ++i) {
5562          if (isa<UndefValue>(Mask->getOperand(i)))
5563            continue;
5564          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
5565          Idx &= 31;  // Match the hardware behavior.
5566
5567          if (ExtractedElts[Idx] == 0) {
5568            ExtractedElts[Idx] =
5569              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
5570                  ConstantInt::get(Type::getInt32Ty(II->getContext()),
5571                                   Idx&15, false), "tmp");
5572          }
5573
5574          // Insert this value into the result vector.
5575          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
5576                         ConstantInt::get(Type::getInt32Ty(II->getContext()),
5577                                          i, false), "tmp");
5578        }
5579        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
5580      }
5581    }
5582    break;
5583
5584  case Intrinsic::stackrestore: {
5585    // If the save is right next to the restore, remove the restore.  This can
5586    // happen when variable allocas are DCE'd.
5587    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
5588      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
5589        BasicBlock::iterator BI = SS;
5590        if (&*++BI == II)
5591          return EraseInstFromFunction(CI);
5592      }
5593    }
5594
5595    // Scan down this block to see if there is another stack restore in the
5596    // same block without an intervening call/alloca.
5597    BasicBlock::iterator BI = II;
5598    TerminatorInst *TI = II->getParent()->getTerminator();
5599    bool CannotRemove = false;
5600    for (++BI; &*BI != TI; ++BI) {
5601      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
5602        CannotRemove = true;
5603        break;
5604      }
5605      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
5606        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
5607          // If there is a stackrestore below this one, remove this one.
5608          if (II->getIntrinsicID() == Intrinsic::stackrestore)
5609            return EraseInstFromFunction(CI);
5610          // Otherwise, ignore the intrinsic.
5611        } else {
5612          // If we found a non-intrinsic call, we can't remove the stack
5613          // restore.
5614          CannotRemove = true;
5615          break;
5616        }
5617      }
5618    }
5619
5620    // If the stack restore is in a return/unwind block and if there are no
5621    // allocas or calls between the restore and the return, nuke the restore.
5622    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
5623      return EraseInstFromFunction(CI);
5624    break;
5625  }
5626  }
5627
5628  return visitCallSite(II);
5629}
5630
5631// InvokeInst simplification
5632//
5633Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
5634  return visitCallSite(&II);
5635}
5636
5637/// isSafeToEliminateVarargsCast - If this cast does not affect the value
5638/// passed through the varargs area, we can eliminate the use of the cast.
5639static bool isSafeToEliminateVarargsCast(const CallSite CS,
5640                                         const CastInst * const CI,
5641                                         const TargetData * const TD,
5642                                         const int ix) {
5643  if (!CI->isLosslessCast())
5644    return false;
5645
5646  // The size of ByVal arguments is derived from the type, so we
5647  // can't change to a type with a different size.  If the size were
5648  // passed explicitly we could avoid this check.
5649  if (!CS.paramHasAttr(ix, Attribute::ByVal))
5650    return true;
5651
5652  const Type* SrcTy =
5653            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
5654  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
5655  if (!SrcTy->isSized() || !DstTy->isSized())
5656    return false;
5657  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
5658    return false;
5659  return true;
5660}
5661
5662// visitCallSite - Improvements for call and invoke instructions.
5663//
5664Instruction *InstCombiner::visitCallSite(CallSite CS) {
5665  bool Changed = false;
5666
5667  // If the callee is a constexpr cast of a function, attempt to move the cast
5668  // to the arguments of the call/invoke.
5669  if (transformConstExprCastCall(CS)) return 0;
5670
5671  Value *Callee = CS.getCalledValue();
5672
5673  if (Function *CalleeF = dyn_cast<Function>(Callee))
5674    if (CalleeF->getCallingConv() != CS.getCallingConv()) {
5675      Instruction *OldCall = CS.getInstruction();
5676      // If the call and callee calling conventions don't match, this call must
5677      // be unreachable, as the call is undefined.
5678      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
5679                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
5680                                  OldCall);
5681      // If OldCall dues not return void then replaceAllUsesWith undef.
5682      // This allows ValueHandlers and custom metadata to adjust itself.
5683      if (!OldCall->getType()->isVoidTy())
5684        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
5685      if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
5686        return EraseInstFromFunction(*OldCall);
5687      return 0;
5688    }
5689
5690  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
5691    // This instruction is not reachable, just remove it.  We insert a store to
5692    // undef so that we know that this code is not reachable, despite the fact
5693    // that we can't modify the CFG here.
5694    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
5695               UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
5696                  CS.getInstruction());
5697
5698    // If CS dues not return void then replaceAllUsesWith undef.
5699    // This allows ValueHandlers and custom metadata to adjust itself.
5700    if (!CS.getInstruction()->getType()->isVoidTy())
5701      CS.getInstruction()->
5702        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
5703
5704    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
5705      // Don't break the CFG, insert a dummy cond branch.
5706      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
5707                         ConstantInt::getTrue(Callee->getContext()), II);
5708    }
5709    return EraseInstFromFunction(*CS.getInstruction());
5710  }
5711
5712  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
5713    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
5714      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
5715        return transformCallThroughTrampoline(CS);
5716
5717  const PointerType *PTy = cast<PointerType>(Callee->getType());
5718  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
5719  if (FTy->isVarArg()) {
5720    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
5721    // See if we can optimize any arguments passed through the varargs area of
5722    // the call.
5723    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
5724           E = CS.arg_end(); I != E; ++I, ++ix) {
5725      CastInst *CI = dyn_cast<CastInst>(*I);
5726      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
5727        *I = CI->getOperand(0);
5728        Changed = true;
5729      }
5730    }
5731  }
5732
5733  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
5734    // Inline asm calls cannot throw - mark them 'nounwind'.
5735    CS.setDoesNotThrow();
5736    Changed = true;
5737  }
5738
5739  return Changed ? CS.getInstruction() : 0;
5740}
5741
5742// transformConstExprCastCall - If the callee is a constexpr cast of a function,
5743// attempt to move the cast to the arguments of the call/invoke.
5744//
5745bool InstCombiner::transformConstExprCastCall(CallSite CS) {
5746  if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
5747  ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
5748  if (CE->getOpcode() != Instruction::BitCast ||
5749      !isa<Function>(CE->getOperand(0)))
5750    return false;
5751  Function *Callee = cast<Function>(CE->getOperand(0));
5752  Instruction *Caller = CS.getInstruction();
5753  const AttrListPtr &CallerPAL = CS.getAttributes();
5754
5755  // Okay, this is a cast from a function to a different type.  Unless doing so
5756  // would cause a type conversion of one of our arguments, change this call to
5757  // be a direct call with arguments casted to the appropriate types.
5758  //
5759  const FunctionType *FT = Callee->getFunctionType();
5760  const Type *OldRetTy = Caller->getType();
5761  const Type *NewRetTy = FT->getReturnType();
5762
5763  if (isa<StructType>(NewRetTy))
5764    return false; // TODO: Handle multiple return values.
5765
5766  // Check to see if we are changing the return type...
5767  if (OldRetTy != NewRetTy) {
5768    if (Callee->isDeclaration() &&
5769        // Conversion is ok if changing from one pointer type to another or from
5770        // a pointer to an integer of the same size.
5771        !((isa<PointerType>(OldRetTy) || !TD ||
5772           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
5773          (isa<PointerType>(NewRetTy) || !TD ||
5774           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
5775      return false;   // Cannot transform this return value.
5776
5777    if (!Caller->use_empty() &&
5778        // void -> non-void is handled specially
5779        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
5780      return false;   // Cannot transform this return value.
5781
5782    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5783      Attributes RAttrs = CallerPAL.getRetAttributes();
5784      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
5785        return false;   // Attribute not compatible with transformed value.
5786    }
5787
5788    // If the callsite is an invoke instruction, and the return value is used by
5789    // a PHI node in a successor, we cannot change the return type of the call
5790    // because there is no place to put the cast instruction (without breaking
5791    // the critical edge).  Bail out in this case.
5792    if (!Caller->use_empty())
5793      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
5794        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
5795             UI != E; ++UI)
5796          if (PHINode *PN = dyn_cast<PHINode>(*UI))
5797            if (PN->getParent() == II->getNormalDest() ||
5798                PN->getParent() == II->getUnwindDest())
5799              return false;
5800  }
5801
5802  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
5803  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5804
5805  CallSite::arg_iterator AI = CS.arg_begin();
5806  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5807    const Type *ParamTy = FT->getParamType(i);
5808    const Type *ActTy = (*AI)->getType();
5809
5810    if (!CastInst::isCastable(ActTy, ParamTy))
5811      return false;   // Cannot transform this parameter value.
5812
5813    if (CallerPAL.getParamAttributes(i + 1)
5814        & Attribute::typeIncompatible(ParamTy))
5815      return false;   // Attribute not compatible with transformed value.
5816
5817    // Converting from one pointer type to another or between a pointer and an
5818    // integer of the same size is safe even if we do not have a body.
5819    bool isConvertible = ActTy == ParamTy ||
5820      (TD && ((isa<PointerType>(ParamTy) ||
5821      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
5822              (isa<PointerType>(ActTy) ||
5823              ActTy == TD->getIntPtrType(Caller->getContext()))));
5824    if (Callee->isDeclaration() && !isConvertible) return false;
5825  }
5826
5827  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
5828      Callee->isDeclaration())
5829    return false;   // Do not delete arguments unless we have a function body.
5830
5831  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5832      !CallerPAL.isEmpty())
5833    // In this case we have more arguments than the new function type, but we
5834    // won't be dropping them.  Check that these extra arguments have attributes
5835    // that are compatible with being a vararg call argument.
5836    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
5837      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
5838        break;
5839      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
5840      if (PAttrs & Attribute::VarArgsIncompatible)
5841        return false;
5842    }
5843
5844  // Okay, we decided that this is a safe thing to do: go ahead and start
5845  // inserting cast instructions as necessary...
5846  std::vector<Value*> Args;
5847  Args.reserve(NumActualArgs);
5848  SmallVector<AttributeWithIndex, 8> attrVec;
5849  attrVec.reserve(NumCommonArgs);
5850
5851  // Get any return attributes.
5852  Attributes RAttrs = CallerPAL.getRetAttributes();
5853
5854  // If the return value is not being used, the type may not be compatible
5855  // with the existing attributes.  Wipe out any problematic attributes.
5856  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
5857
5858  // Add the new return attributes.
5859  if (RAttrs)
5860    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
5861
5862  AI = CS.arg_begin();
5863  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5864    const Type *ParamTy = FT->getParamType(i);
5865    if ((*AI)->getType() == ParamTy) {
5866      Args.push_back(*AI);
5867    } else {
5868      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
5869          false, ParamTy, false);
5870      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
5871    }
5872
5873    // Add any parameter attributes.
5874    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
5875      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
5876  }
5877
5878  // If the function takes more arguments than the call was taking, add them
5879  // now.
5880  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
5881    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5882
5883  // If we are removing arguments to the function, emit an obnoxious warning.
5884  if (FT->getNumParams() < NumActualArgs) {
5885    if (!FT->isVarArg()) {
5886      errs() << "WARNING: While resolving call to function '"
5887             << Callee->getName() << "' arguments were dropped!\n";
5888    } else {
5889      // Add all of the arguments in their promoted form to the arg list.
5890      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5891        const Type *PTy = getPromotedType((*AI)->getType());
5892        if (PTy != (*AI)->getType()) {
5893          // Must promote to pass through va_arg area!
5894          Instruction::CastOps opcode =
5895            CastInst::getCastOpcode(*AI, false, PTy, false);
5896          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
5897        } else {
5898          Args.push_back(*AI);
5899        }
5900
5901        // Add any parameter attributes.
5902        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
5903          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
5904      }
5905    }
5906  }
5907
5908  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
5909    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
5910
5911  if (NewRetTy->isVoidTy())
5912    Caller->setName("");   // Void type should not have a name.
5913
5914  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
5915                                                     attrVec.end());
5916
5917  Instruction *NC;
5918  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5919    NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
5920                            Args.begin(), Args.end(),
5921                            Caller->getName(), Caller);
5922    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
5923    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
5924  } else {
5925    NC = CallInst::Create(Callee, Args.begin(), Args.end(),
5926                          Caller->getName(), Caller);
5927    CallInst *CI = cast<CallInst>(Caller);
5928    if (CI->isTailCall())
5929      cast<CallInst>(NC)->setTailCall();
5930    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
5931    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
5932  }
5933
5934  // Insert a cast of the return type as necessary.
5935  Value *NV = NC;
5936  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5937    if (!NV->getType()->isVoidTy()) {
5938      Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
5939                                                            OldRetTy, false);
5940      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
5941
5942      // If this is an invoke instruction, we should insert it after the first
5943      // non-phi, instruction in the normal successor block.
5944      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5945        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
5946        InsertNewInstBefore(NC, *I);
5947      } else {
5948        // Otherwise, it's a call, just insert cast right after the call instr
5949        InsertNewInstBefore(NC, *Caller);
5950      }
5951      Worklist.AddUsersToWorkList(*Caller);
5952    } else {
5953      NV = UndefValue::get(Caller->getType());
5954    }
5955  }
5956
5957
5958  if (!Caller->use_empty())
5959    Caller->replaceAllUsesWith(NV);
5960
5961  EraseInstFromFunction(*Caller);
5962  return true;
5963}
5964
5965// transformCallThroughTrampoline - Turn a call to a function created by the
5966// init_trampoline intrinsic into a direct call to the underlying function.
5967//
5968Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
5969  Value *Callee = CS.getCalledValue();
5970  const PointerType *PTy = cast<PointerType>(Callee->getType());
5971  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
5972  const AttrListPtr &Attrs = CS.getAttributes();
5973
5974  // If the call already has the 'nest' attribute somewhere then give up -
5975  // otherwise 'nest' would occur twice after splicing in the chain.
5976  if (Attrs.hasAttrSomewhere(Attribute::Nest))
5977    return 0;
5978
5979  IntrinsicInst *Tramp =
5980    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
5981
5982  Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
5983  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
5984  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
5985
5986  const AttrListPtr &NestAttrs = NestF->getAttributes();
5987  if (!NestAttrs.isEmpty()) {
5988    unsigned NestIdx = 1;
5989    const Type *NestTy = 0;
5990    Attributes NestAttr = Attribute::None;
5991
5992    // Look for a parameter marked with the 'nest' attribute.
5993    for (FunctionType::param_iterator I = NestFTy->param_begin(),
5994         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
5995      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
5996        // Record the parameter type and any other attributes.
5997        NestTy = *I;
5998        NestAttr = NestAttrs.getParamAttributes(NestIdx);
5999        break;
6000      }
6001
6002    if (NestTy) {
6003      Instruction *Caller = CS.getInstruction();
6004      std::vector<Value*> NewArgs;
6005      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
6006
6007      SmallVector<AttributeWithIndex, 8> NewAttrs;
6008      NewAttrs.reserve(Attrs.getNumSlots() + 1);
6009
6010      // Insert the nest argument into the call argument list, which may
6011      // mean appending it.  Likewise for attributes.
6012
6013      // Add any result attributes.
6014      if (Attributes Attr = Attrs.getRetAttributes())
6015        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
6016
6017      {
6018        unsigned Idx = 1;
6019        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
6020        do {
6021          if (Idx == NestIdx) {
6022            // Add the chain argument and attributes.
6023            Value *NestVal = Tramp->getOperand(3);
6024            if (NestVal->getType() != NestTy)
6025              NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
6026            NewArgs.push_back(NestVal);
6027            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
6028          }
6029
6030          if (I == E)
6031            break;
6032
6033          // Add the original argument and attributes.
6034          NewArgs.push_back(*I);
6035          if (Attributes Attr = Attrs.getParamAttributes(Idx))
6036            NewAttrs.push_back
6037              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
6038
6039          ++Idx, ++I;
6040        } while (1);
6041      }
6042
6043      // Add any function attributes.
6044      if (Attributes Attr = Attrs.getFnAttributes())
6045        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
6046
6047      // The trampoline may have been bitcast to a bogus type (FTy).
6048      // Handle this by synthesizing a new function type, equal to FTy
6049      // with the chain parameter inserted.
6050
6051      std::vector<const Type*> NewTypes;
6052      NewTypes.reserve(FTy->getNumParams()+1);
6053
6054      // Insert the chain's type into the list of parameter types, which may
6055      // mean appending it.
6056      {
6057        unsigned Idx = 1;
6058        FunctionType::param_iterator I = FTy->param_begin(),
6059          E = FTy->param_end();
6060
6061        do {
6062          if (Idx == NestIdx)
6063            // Add the chain's type.
6064            NewTypes.push_back(NestTy);
6065
6066          if (I == E)
6067            break;
6068
6069          // Add the original type.
6070          NewTypes.push_back(*I);
6071
6072          ++Idx, ++I;
6073        } while (1);
6074      }
6075
6076      // Replace the trampoline call with a direct call.  Let the generic
6077      // code sort out any function type mismatches.
6078      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
6079                                                FTy->isVarArg());
6080      Constant *NewCallee =
6081        NestF->getType() == PointerType::getUnqual(NewFTy) ?
6082        NestF : ConstantExpr::getBitCast(NestF,
6083                                         PointerType::getUnqual(NewFTy));
6084      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
6085                                                   NewAttrs.end());
6086
6087      Instruction *NewCaller;
6088      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
6089        NewCaller = InvokeInst::Create(NewCallee,
6090                                       II->getNormalDest(), II->getUnwindDest(),
6091                                       NewArgs.begin(), NewArgs.end(),
6092                                       Caller->getName(), Caller);
6093        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
6094        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
6095      } else {
6096        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
6097                                     Caller->getName(), Caller);
6098        if (cast<CallInst>(Caller)->isTailCall())
6099          cast<CallInst>(NewCaller)->setTailCall();
6100        cast<CallInst>(NewCaller)->
6101          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
6102        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
6103      }
6104      if (!Caller->getType()->isVoidTy())
6105        Caller->replaceAllUsesWith(NewCaller);
6106      Caller->eraseFromParent();
6107      Worklist.Remove(Caller);
6108      return 0;
6109    }
6110  }
6111
6112  // Replace the trampoline call with a direct call.  Since there is no 'nest'
6113  // parameter, there is no need to adjust the argument list.  Let the generic
6114  // code sort out any function type mismatches.
6115  Constant *NewCallee =
6116    NestF->getType() == PTy ? NestF :
6117                              ConstantExpr::getBitCast(NestF, PTy);
6118  CS.setCalledFunction(NewCallee);
6119  return CS.getInstruction();
6120}
6121
6122/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
6123/// and if a/b/c and the add's all have a single use, turn this into a phi
6124/// and a single binop.
6125Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
6126  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
6127  assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
6128  unsigned Opc = FirstInst->getOpcode();
6129  Value *LHSVal = FirstInst->getOperand(0);
6130  Value *RHSVal = FirstInst->getOperand(1);
6131
6132  const Type *LHSType = LHSVal->getType();
6133  const Type *RHSType = RHSVal->getType();
6134
6135  // Scan to see if all operands are the same opcode, and all have one use.
6136  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
6137    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
6138    if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
6139        // Verify type of the LHS matches so we don't fold cmp's of different
6140        // types or GEP's with different index types.
6141        I->getOperand(0)->getType() != LHSType ||
6142        I->getOperand(1)->getType() != RHSType)
6143      return 0;
6144
6145    // If they are CmpInst instructions, check their predicates
6146    if (Opc == Instruction::ICmp || Opc == Instruction::FCmp)
6147      if (cast<CmpInst>(I)->getPredicate() !=
6148          cast<CmpInst>(FirstInst)->getPredicate())
6149        return 0;
6150
6151    // Keep track of which operand needs a phi node.
6152    if (I->getOperand(0) != LHSVal) LHSVal = 0;
6153    if (I->getOperand(1) != RHSVal) RHSVal = 0;
6154  }
6155
6156  // If both LHS and RHS would need a PHI, don't do this transformation,
6157  // because it would increase the number of PHIs entering the block,
6158  // which leads to higher register pressure. This is especially
6159  // bad when the PHIs are in the header of a loop.
6160  if (!LHSVal && !RHSVal)
6161    return 0;
6162
6163  // Otherwise, this is safe to transform!
6164
6165  Value *InLHS = FirstInst->getOperand(0);
6166  Value *InRHS = FirstInst->getOperand(1);
6167  PHINode *NewLHS = 0, *NewRHS = 0;
6168  if (LHSVal == 0) {
6169    NewLHS = PHINode::Create(LHSType,
6170                             FirstInst->getOperand(0)->getName() + ".pn");
6171    NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
6172    NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
6173    InsertNewInstBefore(NewLHS, PN);
6174    LHSVal = NewLHS;
6175  }
6176
6177  if (RHSVal == 0) {
6178    NewRHS = PHINode::Create(RHSType,
6179                             FirstInst->getOperand(1)->getName() + ".pn");
6180    NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
6181    NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
6182    InsertNewInstBefore(NewRHS, PN);
6183    RHSVal = NewRHS;
6184  }
6185
6186  // Add all operands to the new PHIs.
6187  if (NewLHS || NewRHS) {
6188    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
6189      Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
6190      if (NewLHS) {
6191        Value *NewInLHS = InInst->getOperand(0);
6192        NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
6193      }
6194      if (NewRHS) {
6195        Value *NewInRHS = InInst->getOperand(1);
6196        NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
6197      }
6198    }
6199  }
6200
6201  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
6202    return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
6203  CmpInst *CIOp = cast<CmpInst>(FirstInst);
6204  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
6205                         LHSVal, RHSVal);
6206}
6207
6208Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
6209  GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
6210
6211  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
6212                                        FirstInst->op_end());
6213  // This is true if all GEP bases are allocas and if all indices into them are
6214  // constants.
6215  bool AllBasePointersAreAllocas = true;
6216
6217  // We don't want to replace this phi if the replacement would require
6218  // more than one phi, which leads to higher register pressure. This is
6219  // especially bad when the PHIs are in the header of a loop.
6220  bool NeededPhi = false;
6221
6222  // Scan to see if all operands are the same opcode, and all have one use.
6223  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
6224    GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
6225    if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
6226      GEP->getNumOperands() != FirstInst->getNumOperands())
6227      return 0;
6228
6229    // Keep track of whether or not all GEPs are of alloca pointers.
6230    if (AllBasePointersAreAllocas &&
6231        (!isa<AllocaInst>(GEP->getOperand(0)) ||
6232         !GEP->hasAllConstantIndices()))
6233      AllBasePointersAreAllocas = false;
6234
6235    // Compare the operand lists.
6236    for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
6237      if (FirstInst->getOperand(op) == GEP->getOperand(op))
6238        continue;
6239
6240      // Don't merge two GEPs when two operands differ (introducing phi nodes)
6241      // if one of the PHIs has a constant for the index.  The index may be
6242      // substantially cheaper to compute for the constants, so making it a
6243      // variable index could pessimize the path.  This also handles the case
6244      // for struct indices, which must always be constant.
6245      if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
6246          isa<ConstantInt>(GEP->getOperand(op)))
6247        return 0;
6248
6249      if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
6250        return 0;
6251
6252      // If we already needed a PHI for an earlier operand, and another operand
6253      // also requires a PHI, we'd be introducing more PHIs than we're
6254      // eliminating, which increases register pressure on entry to the PHI's
6255      // block.
6256      if (NeededPhi)
6257        return 0;
6258
6259      FixedOperands[op] = 0;  // Needs a PHI.
6260      NeededPhi = true;
6261    }
6262  }
6263
6264  // If all of the base pointers of the PHI'd GEPs are from allocas, don't
6265  // bother doing this transformation.  At best, this will just save a bit of
6266  // offset calculation, but all the predecessors will have to materialize the
6267  // stack address into a register anyway.  We'd actually rather *clone* the
6268  // load up into the predecessors so that we have a load of a gep of an alloca,
6269  // which can usually all be folded into the load.
6270  if (AllBasePointersAreAllocas)
6271    return 0;
6272
6273  // Otherwise, this is safe to transform.  Insert PHI nodes for each operand
6274  // that is variable.
6275  SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
6276
6277  bool HasAnyPHIs = false;
6278  for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
6279    if (FixedOperands[i]) continue;  // operand doesn't need a phi.
6280    Value *FirstOp = FirstInst->getOperand(i);
6281    PHINode *NewPN = PHINode::Create(FirstOp->getType(),
6282                                     FirstOp->getName()+".pn");
6283    InsertNewInstBefore(NewPN, PN);
6284
6285    NewPN->reserveOperandSpace(e);
6286    NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
6287    OperandPhis[i] = NewPN;
6288    FixedOperands[i] = NewPN;
6289    HasAnyPHIs = true;
6290  }
6291
6292
6293  // Add all operands to the new PHIs.
6294  if (HasAnyPHIs) {
6295    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
6296      GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
6297      BasicBlock *InBB = PN.getIncomingBlock(i);
6298
6299      for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
6300        if (PHINode *OpPhi = OperandPhis[op])
6301          OpPhi->addIncoming(InGEP->getOperand(op), InBB);
6302    }
6303  }
6304
6305  Value *Base = FixedOperands[0];
6306  return cast<GEPOperator>(FirstInst)->isInBounds() ?
6307    GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
6308                                      FixedOperands.end()) :
6309    GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
6310                              FixedOperands.end());
6311}
6312
6313
6314/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
6315/// sink the load out of the block that defines it.  This means that it must be
6316/// obvious the value of the load is not changed from the point of the load to
6317/// the end of the block it is in.
6318///
6319/// Finally, it is safe, but not profitable, to sink a load targetting a
6320/// non-address-taken alloca.  Doing so will cause us to not promote the alloca
6321/// to a register.
6322static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
6323  BasicBlock::iterator BBI = L, E = L->getParent()->end();
6324
6325  for (++BBI; BBI != E; ++BBI)
6326    if (BBI->mayWriteToMemory())
6327      return false;
6328
6329  // Check for non-address taken alloca.  If not address-taken already, it isn't
6330  // profitable to do this xform.
6331  if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
6332    bool isAddressTaken = false;
6333    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
6334         UI != E; ++UI) {
6335      if (isa<LoadInst>(UI)) continue;
6336      if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
6337        // If storing TO the alloca, then the address isn't taken.
6338        if (SI->getOperand(1) == AI) continue;
6339      }
6340      isAddressTaken = true;
6341      break;
6342    }
6343
6344    if (!isAddressTaken && AI->isStaticAlloca())
6345      return false;
6346  }
6347
6348  // If this load is a load from a GEP with a constant offset from an alloca,
6349  // then we don't want to sink it.  In its present form, it will be
6350  // load [constant stack offset].  Sinking it will cause us to have to
6351  // materialize the stack addresses in each predecessor in a register only to
6352  // do a shared load from register in the successor.
6353  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
6354    if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
6355      if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
6356        return false;
6357
6358  return true;
6359}
6360
6361Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
6362  LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
6363
6364  // When processing loads, we need to propagate two bits of information to the
6365  // sunk load: whether it is volatile, and what its alignment is.  We currently
6366  // don't sink loads when some have their alignment specified and some don't.
6367  // visitLoadInst will propagate an alignment onto the load when TD is around,
6368  // and if TD isn't around, we can't handle the mixed case.
6369  bool isVolatile = FirstLI->isVolatile();
6370  unsigned LoadAlignment = FirstLI->getAlignment();
6371
6372  // We can't sink the load if the loaded value could be modified between the
6373  // load and the PHI.
6374  if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
6375      !isSafeAndProfitableToSinkLoad(FirstLI))
6376    return 0;
6377
6378  // If the PHI is of volatile loads and the load block has multiple
6379  // successors, sinking it would remove a load of the volatile value from
6380  // the path through the other successor.
6381  if (isVolatile &&
6382      FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
6383    return 0;
6384
6385  // Check to see if all arguments are the same operation.
6386  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
6387    LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
6388    if (!LI || !LI->hasOneUse())
6389      return 0;
6390
6391    // We can't sink the load if the loaded value could be modified between
6392    // the load and the PHI.
6393    if (LI->isVolatile() != isVolatile ||
6394        LI->getParent() != PN.getIncomingBlock(i) ||
6395        !isSafeAndProfitableToSinkLoad(LI))
6396      return 0;
6397
6398    // If some of the loads have an alignment specified but not all of them,
6399    // we can't do the transformation.
6400    if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
6401      return 0;
6402
6403    LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
6404
6405    // If the PHI is of volatile loads and the load block has multiple
6406    // successors, sinking it would remove a load of the volatile value from
6407    // the path through the other successor.
6408    if (isVolatile &&
6409        LI->getParent()->getTerminator()->getNumSuccessors() != 1)
6410      return 0;
6411  }
6412
6413  // Okay, they are all the same operation.  Create a new PHI node of the
6414  // correct type, and PHI together all of the LHS's of the instructions.
6415  PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
6416                                   PN.getName()+".in");
6417  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
6418
6419  Value *InVal = FirstLI->getOperand(0);
6420  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
6421
6422  // Add all operands to the new PHI.
6423  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
6424    Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
6425    if (NewInVal != InVal)
6426      InVal = 0;
6427    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
6428  }
6429
6430  Value *PhiVal;
6431  if (InVal) {
6432    // The new PHI unions all of the same values together.  This is really
6433    // common, so we handle it intelligently here for compile-time speed.
6434    PhiVal = InVal;
6435    delete NewPN;
6436  } else {
6437    InsertNewInstBefore(NewPN, PN);
6438    PhiVal = NewPN;
6439  }
6440
6441  // If this was a volatile load that we are merging, make sure to loop through
6442  // and mark all the input loads as non-volatile.  If we don't do this, we will
6443  // insert a new volatile load and the old ones will not be deletable.
6444  if (isVolatile)
6445    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
6446      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
6447
6448  return new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
6449}
6450
6451
6452
6453/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
6454/// operator and they all are only used by the PHI, PHI together their
6455/// inputs, and do the operation once, to the result of the PHI.
6456Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
6457  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
6458
6459  if (isa<GetElementPtrInst>(FirstInst))
6460    return FoldPHIArgGEPIntoPHI(PN);
6461  if (isa<LoadInst>(FirstInst))
6462    return FoldPHIArgLoadIntoPHI(PN);
6463
6464  // Scan the instruction, looking for input operations that can be folded away.
6465  // If all input operands to the phi are the same instruction (e.g. a cast from
6466  // the same type or "+42") we can pull the operation through the PHI, reducing
6467  // code size and simplifying code.
6468  Constant *ConstantOp = 0;
6469  const Type *CastSrcTy = 0;
6470
6471  if (isa<CastInst>(FirstInst)) {
6472    CastSrcTy = FirstInst->getOperand(0)->getType();
6473
6474    // Be careful about transforming integer PHIs.  We don't want to pessimize
6475    // the code by turning an i32 into an i1293.
6476    if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
6477      if (!ShouldChangeType(PN.getType(), CastSrcTy))
6478        return 0;
6479    }
6480  } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
6481    // Can fold binop, compare or shift here if the RHS is a constant,
6482    // otherwise call FoldPHIArgBinOpIntoPHI.
6483    ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
6484    if (ConstantOp == 0)
6485      return FoldPHIArgBinOpIntoPHI(PN);
6486  } else {
6487    return 0;  // Cannot fold this operation.
6488  }
6489
6490  // Check to see if all arguments are the same operation.
6491  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
6492    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
6493    if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
6494      return 0;
6495    if (CastSrcTy) {
6496      if (I->getOperand(0)->getType() != CastSrcTy)
6497        return 0;  // Cast operation must match.
6498    } else if (I->getOperand(1) != ConstantOp) {
6499      return 0;
6500    }
6501  }
6502
6503  // Okay, they are all the same operation.  Create a new PHI node of the
6504  // correct type, and PHI together all of the LHS's of the instructions.
6505  PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
6506                                   PN.getName()+".in");
6507  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
6508
6509  Value *InVal = FirstInst->getOperand(0);
6510  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
6511
6512  // Add all operands to the new PHI.
6513  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
6514    Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
6515    if (NewInVal != InVal)
6516      InVal = 0;
6517    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
6518  }
6519
6520  Value *PhiVal;
6521  if (InVal) {
6522    // The new PHI unions all of the same values together.  This is really
6523    // common, so we handle it intelligently here for compile-time speed.
6524    PhiVal = InVal;
6525    delete NewPN;
6526  } else {
6527    InsertNewInstBefore(NewPN, PN);
6528    PhiVal = NewPN;
6529  }
6530
6531  // Insert and return the new operation.
6532  if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst))
6533    return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
6534
6535  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
6536    return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
6537
6538  CmpInst *CIOp = cast<CmpInst>(FirstInst);
6539  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
6540                         PhiVal, ConstantOp);
6541}
6542
6543/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
6544/// that is dead.
6545static bool DeadPHICycle(PHINode *PN,
6546                         SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
6547  if (PN->use_empty()) return true;
6548  if (!PN->hasOneUse()) return false;
6549
6550  // Remember this node, and if we find the cycle, return.
6551  if (!PotentiallyDeadPHIs.insert(PN))
6552    return true;
6553
6554  // Don't scan crazily complex things.
6555  if (PotentiallyDeadPHIs.size() == 16)
6556    return false;
6557
6558  if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
6559    return DeadPHICycle(PU, PotentiallyDeadPHIs);
6560
6561  return false;
6562}
6563
6564/// PHIsEqualValue - Return true if this phi node is always equal to
6565/// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
6566///   z = some value; x = phi (y, z); y = phi (x, z)
6567static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
6568                           SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
6569  // See if we already saw this PHI node.
6570  if (!ValueEqualPHIs.insert(PN))
6571    return true;
6572
6573  // Don't scan crazily complex things.
6574  if (ValueEqualPHIs.size() == 16)
6575    return false;
6576
6577  // Scan the operands to see if they are either phi nodes or are equal to
6578  // the value.
6579  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
6580    Value *Op = PN->getIncomingValue(i);
6581    if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
6582      if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
6583        return false;
6584    } else if (Op != NonPhiInVal)
6585      return false;
6586  }
6587
6588  return true;
6589}
6590
6591
6592namespace {
6593struct PHIUsageRecord {
6594  unsigned PHIId;     // The ID # of the PHI (something determinstic to sort on)
6595  unsigned Shift;     // The amount shifted.
6596  Instruction *Inst;  // The trunc instruction.
6597
6598  PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
6599    : PHIId(pn), Shift(Sh), Inst(User) {}
6600
6601  bool operator<(const PHIUsageRecord &RHS) const {
6602    if (PHIId < RHS.PHIId) return true;
6603    if (PHIId > RHS.PHIId) return false;
6604    if (Shift < RHS.Shift) return true;
6605    if (Shift > RHS.Shift) return false;
6606    return Inst->getType()->getPrimitiveSizeInBits() <
6607           RHS.Inst->getType()->getPrimitiveSizeInBits();
6608  }
6609};
6610
6611struct LoweredPHIRecord {
6612  PHINode *PN;        // The PHI that was lowered.
6613  unsigned Shift;     // The amount shifted.
6614  unsigned Width;     // The width extracted.
6615
6616  LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
6617    : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
6618
6619  // Ctor form used by DenseMap.
6620  LoweredPHIRecord(PHINode *pn, unsigned Sh)
6621    : PN(pn), Shift(Sh), Width(0) {}
6622};
6623}
6624
6625namespace llvm {
6626  template<>
6627  struct DenseMapInfo<LoweredPHIRecord> {
6628    static inline LoweredPHIRecord getEmptyKey() {
6629      return LoweredPHIRecord(0, 0);
6630    }
6631    static inline LoweredPHIRecord getTombstoneKey() {
6632      return LoweredPHIRecord(0, 1);
6633    }
6634    static unsigned getHashValue(const LoweredPHIRecord &Val) {
6635      return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
6636             (Val.Width>>3);
6637    }
6638    static bool isEqual(const LoweredPHIRecord &LHS,
6639                        const LoweredPHIRecord &RHS) {
6640      return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
6641             LHS.Width == RHS.Width;
6642    }
6643  };
6644  template <>
6645  struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
6646}
6647
6648
6649/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
6650/// illegal type: see if it is only used by trunc or trunc(lshr) operations.  If
6651/// so, we split the PHI into the various pieces being extracted.  This sort of
6652/// thing is introduced when SROA promotes an aggregate to large integer values.
6653///
6654/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
6655/// inttoptr.  We should produce new PHIs in the right type.
6656///
6657Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
6658  // PHIUsers - Keep track of all of the truncated values extracted from a set
6659  // of PHIs, along with their offset.  These are the things we want to rewrite.
6660  SmallVector<PHIUsageRecord, 16> PHIUsers;
6661
6662  // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
6663  // nodes which are extracted from. PHIsToSlice is a set we use to avoid
6664  // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
6665  // check the uses of (to ensure they are all extracts).
6666  SmallVector<PHINode*, 8> PHIsToSlice;
6667  SmallPtrSet<PHINode*, 8> PHIsInspected;
6668
6669  PHIsToSlice.push_back(&FirstPhi);
6670  PHIsInspected.insert(&FirstPhi);
6671
6672  for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
6673    PHINode *PN = PHIsToSlice[PHIId];
6674
6675    // Scan the input list of the PHI.  If any input is an invoke, and if the
6676    // input is defined in the predecessor, then we won't be split the critical
6677    // edge which is required to insert a truncate.  Because of this, we have to
6678    // bail out.
6679    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
6680      InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
6681      if (II == 0) continue;
6682      if (II->getParent() != PN->getIncomingBlock(i))
6683        continue;
6684
6685      // If we have a phi, and if it's directly in the predecessor, then we have
6686      // a critical edge where we need to put the truncate.  Since we can't
6687      // split the edge in instcombine, we have to bail out.
6688      return 0;
6689    }
6690
6691
6692    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
6693         UI != E; ++UI) {
6694      Instruction *User = cast<Instruction>(*UI);
6695
6696      // If the user is a PHI, inspect its uses recursively.
6697      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
6698        if (PHIsInspected.insert(UserPN))
6699          PHIsToSlice.push_back(UserPN);
6700        continue;
6701      }
6702
6703      // Truncates are always ok.
6704      if (isa<TruncInst>(User)) {
6705        PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
6706        continue;
6707      }
6708
6709      // Otherwise it must be a lshr which can only be used by one trunc.
6710      if (User->getOpcode() != Instruction::LShr ||
6711          !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
6712          !isa<ConstantInt>(User->getOperand(1)))
6713        return 0;
6714
6715      unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
6716      PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
6717    }
6718  }
6719
6720  // If we have no users, they must be all self uses, just nuke the PHI.
6721  if (PHIUsers.empty())
6722    return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
6723
6724  // If this phi node is transformable, create new PHIs for all the pieces
6725  // extracted out of it.  First, sort the users by their offset and size.
6726  array_pod_sort(PHIUsers.begin(), PHIUsers.end());
6727
6728  DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
6729            for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
6730              errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
6731        );
6732
6733  // PredValues - This is a temporary used when rewriting PHI nodes.  It is
6734  // hoisted out here to avoid construction/destruction thrashing.
6735  DenseMap<BasicBlock*, Value*> PredValues;
6736
6737  // ExtractedVals - Each new PHI we introduce is saved here so we don't
6738  // introduce redundant PHIs.
6739  DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
6740
6741  for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
6742    unsigned PHIId = PHIUsers[UserI].PHIId;
6743    PHINode *PN = PHIsToSlice[PHIId];
6744    unsigned Offset = PHIUsers[UserI].Shift;
6745    const Type *Ty = PHIUsers[UserI].Inst->getType();
6746
6747    PHINode *EltPHI;
6748
6749    // If we've already lowered a user like this, reuse the previously lowered
6750    // value.
6751    if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
6752
6753      // Otherwise, Create the new PHI node for this user.
6754      EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
6755      assert(EltPHI->getType() != PN->getType() &&
6756             "Truncate didn't shrink phi?");
6757
6758      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
6759        BasicBlock *Pred = PN->getIncomingBlock(i);
6760        Value *&PredVal = PredValues[Pred];
6761
6762        // If we already have a value for this predecessor, reuse it.
6763        if (PredVal) {
6764          EltPHI->addIncoming(PredVal, Pred);
6765          continue;
6766        }
6767
6768        // Handle the PHI self-reuse case.
6769        Value *InVal = PN->getIncomingValue(i);
6770        if (InVal == PN) {
6771          PredVal = EltPHI;
6772          EltPHI->addIncoming(PredVal, Pred);
6773          continue;
6774        }
6775
6776        if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
6777          // If the incoming value was a PHI, and if it was one of the PHIs we
6778          // already rewrote it, just use the lowered value.
6779          if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
6780            PredVal = Res;
6781            EltPHI->addIncoming(PredVal, Pred);
6782            continue;
6783          }
6784        }
6785
6786        // Otherwise, do an extract in the predecessor.
6787        Builder->SetInsertPoint(Pred, Pred->getTerminator());
6788        Value *Res = InVal;
6789        if (Offset)
6790          Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
6791                                                          Offset), "extract");
6792        Res = Builder->CreateTrunc(Res, Ty, "extract.t");
6793        PredVal = Res;
6794        EltPHI->addIncoming(Res, Pred);
6795
6796        // If the incoming value was a PHI, and if it was one of the PHIs we are
6797        // rewriting, we will ultimately delete the code we inserted.  This
6798        // means we need to revisit that PHI to make sure we extract out the
6799        // needed piece.
6800        if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
6801          if (PHIsInspected.count(OldInVal)) {
6802            unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
6803                                          OldInVal)-PHIsToSlice.begin();
6804            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
6805                                              cast<Instruction>(Res)));
6806            ++UserE;
6807          }
6808      }
6809      PredValues.clear();
6810
6811      DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
6812                   << *EltPHI << '\n');
6813      ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
6814    }
6815
6816    // Replace the use of this piece with the PHI node.
6817    ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
6818  }
6819
6820  // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
6821  // with undefs.
6822  Value *Undef = UndefValue::get(FirstPhi.getType());
6823  for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
6824    ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
6825  return ReplaceInstUsesWith(FirstPhi, Undef);
6826}
6827
6828// PHINode simplification
6829//
6830Instruction *InstCombiner::visitPHINode(PHINode &PN) {
6831  // If LCSSA is around, don't mess with Phi nodes
6832  if (MustPreserveLCSSA) return 0;
6833
6834  if (Value *V = PN.hasConstantValue())
6835    return ReplaceInstUsesWith(PN, V);
6836
6837  // If all PHI operands are the same operation, pull them through the PHI,
6838  // reducing code size.
6839  if (isa<Instruction>(PN.getIncomingValue(0)) &&
6840      isa<Instruction>(PN.getIncomingValue(1)) &&
6841      cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
6842      cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
6843      // FIXME: The hasOneUse check will fail for PHIs that use the value more
6844      // than themselves more than once.
6845      PN.getIncomingValue(0)->hasOneUse())
6846    if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
6847      return Result;
6848
6849  // If this is a trivial cycle in the PHI node graph, remove it.  Basically, if
6850  // this PHI only has a single use (a PHI), and if that PHI only has one use (a
6851  // PHI)... break the cycle.
6852  if (PN.hasOneUse()) {
6853    Instruction *PHIUser = cast<Instruction>(PN.use_back());
6854    if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
6855      SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
6856      PotentiallyDeadPHIs.insert(&PN);
6857      if (DeadPHICycle(PU, PotentiallyDeadPHIs))
6858        return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
6859    }
6860
6861    // If this phi has a single use, and if that use just computes a value for
6862    // the next iteration of a loop, delete the phi.  This occurs with unused
6863    // induction variables, e.g. "for (int j = 0; ; ++j);".  Detecting this
6864    // common case here is good because the only other things that catch this
6865    // are induction variable analysis (sometimes) and ADCE, which is only run
6866    // late.
6867    if (PHIUser->hasOneUse() &&
6868        (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
6869        PHIUser->use_back() == &PN) {
6870      return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
6871    }
6872  }
6873
6874  // We sometimes end up with phi cycles that non-obviously end up being the
6875  // same value, for example:
6876  //   z = some value; x = phi (y, z); y = phi (x, z)
6877  // where the phi nodes don't necessarily need to be in the same block.  Do a
6878  // quick check to see if the PHI node only contains a single non-phi value, if
6879  // so, scan to see if the phi cycle is actually equal to that value.
6880  {
6881    unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues();
6882    // Scan for the first non-phi operand.
6883    while (InValNo != NumOperandVals &&
6884           isa<PHINode>(PN.getIncomingValue(InValNo)))
6885      ++InValNo;
6886
6887    if (InValNo != NumOperandVals) {
6888      Value *NonPhiInVal = PN.getOperand(InValNo);
6889
6890      // Scan the rest of the operands to see if there are any conflicts, if so
6891      // there is no need to recursively scan other phis.
6892      for (++InValNo; InValNo != NumOperandVals; ++InValNo) {
6893        Value *OpVal = PN.getIncomingValue(InValNo);
6894        if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
6895          break;
6896      }
6897
6898      // If we scanned over all operands, then we have one unique value plus
6899      // phi values.  Scan PHI nodes to see if they all merge in each other or
6900      // the value.
6901      if (InValNo == NumOperandVals) {
6902        SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
6903        if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
6904          return ReplaceInstUsesWith(PN, NonPhiInVal);
6905      }
6906    }
6907  }
6908
6909  // If there are multiple PHIs, sort their operands so that they all list
6910  // the blocks in the same order. This will help identical PHIs be eliminated
6911  // by other passes. Other passes shouldn't depend on this for correctness
6912  // however.
6913  PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
6914  if (&PN != FirstPN)
6915    for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
6916      BasicBlock *BBA = PN.getIncomingBlock(i);
6917      BasicBlock *BBB = FirstPN->getIncomingBlock(i);
6918      if (BBA != BBB) {
6919        Value *VA = PN.getIncomingValue(i);
6920        unsigned j = PN.getBasicBlockIndex(BBB);
6921        Value *VB = PN.getIncomingValue(j);
6922        PN.setIncomingBlock(i, BBB);
6923        PN.setIncomingValue(i, VB);
6924        PN.setIncomingBlock(j, BBA);
6925        PN.setIncomingValue(j, VA);
6926        // NOTE: Instcombine normally would want us to "return &PN" if we
6927        // modified any of the operands of an instruction.  However, since we
6928        // aren't adding or removing uses (just rearranging them) we don't do
6929        // this in this case.
6930      }
6931    }
6932
6933  // If this is an integer PHI and we know that it has an illegal type, see if
6934  // it is only used by trunc or trunc(lshr) operations.  If so, we split the
6935  // PHI into the various pieces being extracted.  This sort of thing is
6936  // introduced when SROA promotes an aggregate to a single large integer type.
6937  if (isa<IntegerType>(PN.getType()) && TD &&
6938      !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
6939    if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
6940      return Res;
6941
6942  return 0;
6943}
6944
6945Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
6946  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
6947
6948  if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
6949    return ReplaceInstUsesWith(GEP, V);
6950
6951  Value *PtrOp = GEP.getOperand(0);
6952
6953  if (isa<UndefValue>(GEP.getOperand(0)))
6954    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
6955
6956  // Eliminate unneeded casts for indices.
6957  if (TD) {
6958    bool MadeChange = false;
6959    unsigned PtrSize = TD->getPointerSizeInBits();
6960
6961    gep_type_iterator GTI = gep_type_begin(GEP);
6962    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
6963         I != E; ++I, ++GTI) {
6964      if (!isa<SequentialType>(*GTI)) continue;
6965
6966      // If we are using a wider index than needed for this platform, shrink it
6967      // to what we need.  If narrower, sign-extend it to what we need.  This
6968      // explicit cast can make subsequent optimizations more obvious.
6969      unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
6970      if (OpBits == PtrSize)
6971        continue;
6972
6973      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
6974      MadeChange = true;
6975    }
6976    if (MadeChange) return &GEP;
6977  }
6978
6979  // Combine Indices - If the source pointer to this getelementptr instruction
6980  // is a getelementptr instruction, combine the indices of the two
6981  // getelementptr instructions into a single instruction.
6982  //
6983  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
6984    // Note that if our source is a gep chain itself that we wait for that
6985    // chain to be resolved before we perform this transformation.  This
6986    // avoids us creating a TON of code in some cases.
6987    //
6988    if (GetElementPtrInst *SrcGEP =
6989          dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
6990      if (SrcGEP->getNumOperands() == 2)
6991        return 0;   // Wait until our source is folded to completion.
6992
6993    SmallVector<Value*, 8> Indices;
6994
6995    // Find out whether the last index in the source GEP is a sequential idx.
6996    bool EndsWithSequential = false;
6997    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
6998         I != E; ++I)
6999      EndsWithSequential = !isa<StructType>(*I);
7000
7001    // Can we combine the two pointer arithmetics offsets?
7002    if (EndsWithSequential) {
7003      // Replace: gep (gep %P, long B), long A, ...
7004      // With:    T = long A+B; gep %P, T, ...
7005      //
7006      Value *Sum;
7007      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
7008      Value *GO1 = GEP.getOperand(1);
7009      if (SO1 == Constant::getNullValue(SO1->getType())) {
7010        Sum = GO1;
7011      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
7012        Sum = SO1;
7013      } else {
7014        // If they aren't the same type, then the input hasn't been processed
7015        // by the loop above yet (which canonicalizes sequential index types to
7016        // intptr_t).  Just avoid transforming this until the input has been
7017        // normalized.
7018        if (SO1->getType() != GO1->getType())
7019          return 0;
7020        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
7021      }
7022
7023      // Update the GEP in place if possible.
7024      if (Src->getNumOperands() == 2) {
7025        GEP.setOperand(0, Src->getOperand(0));
7026        GEP.setOperand(1, Sum);
7027        return &GEP;
7028      }
7029      Indices.append(Src->op_begin()+1, Src->op_end()-1);
7030      Indices.push_back(Sum);
7031      Indices.append(GEP.op_begin()+2, GEP.op_end());
7032    } else if (isa<Constant>(*GEP.idx_begin()) &&
7033               cast<Constant>(*GEP.idx_begin())->isNullValue() &&
7034               Src->getNumOperands() != 1) {
7035      // Otherwise we can do the fold if the first index of the GEP is a zero
7036      Indices.append(Src->op_begin()+1, Src->op_end());
7037      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
7038    }
7039
7040    if (!Indices.empty())
7041      return (cast<GEPOperator>(&GEP)->isInBounds() &&
7042              Src->isInBounds()) ?
7043        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
7044                                          Indices.end(), GEP.getName()) :
7045        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
7046                                  Indices.end(), GEP.getName());
7047  }
7048
7049  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
7050  if (Value *X = getBitCastOperand(PtrOp)) {
7051    assert(isa<PointerType>(X->getType()) && "Must be cast from pointer");
7052
7053    // If the input bitcast is actually "bitcast(bitcast(x))", then we don't
7054    // want to change the gep until the bitcasts are eliminated.
7055    if (getBitCastOperand(X)) {
7056      Worklist.AddValue(PtrOp);
7057      return 0;
7058    }
7059
7060    bool HasZeroPointerIndex = false;
7061    if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
7062      HasZeroPointerIndex = C->isZero();
7063
7064    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
7065    // into     : GEP [10 x i8]* X, i32 0, ...
7066    //
7067    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
7068    //           into     : GEP i8* X, ...
7069    //
7070    // This occurs when the program declares an array extern like "int X[];"
7071    if (HasZeroPointerIndex) {
7072      const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
7073      const PointerType *XTy = cast<PointerType>(X->getType());
7074      if (const ArrayType *CATy =
7075          dyn_cast<ArrayType>(CPTy->getElementType())) {
7076        // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
7077        if (CATy->getElementType() == XTy->getElementType()) {
7078          // -> GEP i8* X, ...
7079          SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
7080          return cast<GEPOperator>(&GEP)->isInBounds() ?
7081            GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(),
7082                                              GEP.getName()) :
7083            GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
7084                                      GEP.getName());
7085        }
7086
7087        if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){
7088          // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
7089          if (CATy->getElementType() == XATy->getElementType()) {
7090            // -> GEP [10 x i8]* X, i32 0, ...
7091            // At this point, we know that the cast source type is a pointer
7092            // to an array of the same type as the destination pointer
7093            // array.  Because the array type is never stepped over (there
7094            // is a leading zero) we can fold the cast into this GEP.
7095            GEP.setOperand(0, X);
7096            return &GEP;
7097          }
7098        }
7099      }
7100    } else if (GEP.getNumOperands() == 2) {
7101      // Transform things like:
7102      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
7103      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
7104      const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
7105      const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
7106      if (TD && isa<ArrayType>(SrcElTy) &&
7107          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
7108          TD->getTypeAllocSize(ResElTy)) {
7109        Value *Idx[2];
7110        Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
7111        Idx[1] = GEP.getOperand(1);
7112        Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
7113          Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
7114          Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
7115        // V and GEP are both pointer types --> BitCast
7116        return new BitCastInst(NewGEP, GEP.getType());
7117      }
7118
7119      // Transform things like:
7120      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
7121      //   (where tmp = 8*tmp2) into:
7122      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
7123
7124      if (TD && isa<ArrayType>(SrcElTy) &&
7125          ResElTy == Type::getInt8Ty(GEP.getContext())) {
7126        uint64_t ArrayEltSize =
7127            TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
7128
7129        // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
7130        // allow either a mul, shift, or constant here.
7131        Value *NewIdx = 0;
7132        ConstantInt *Scale = 0;
7133        if (ArrayEltSize == 1) {
7134          NewIdx = GEP.getOperand(1);
7135          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
7136        } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
7137          NewIdx = ConstantInt::get(CI->getType(), 1);
7138          Scale = CI;
7139        } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
7140          if (Inst->getOpcode() == Instruction::Shl &&
7141              isa<ConstantInt>(Inst->getOperand(1))) {
7142            ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
7143            uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
7144            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
7145                                     1ULL << ShAmtVal);
7146            NewIdx = Inst->getOperand(0);
7147          } else if (Inst->getOpcode() == Instruction::Mul &&
7148                     isa<ConstantInt>(Inst->getOperand(1))) {
7149            Scale = cast<ConstantInt>(Inst->getOperand(1));
7150            NewIdx = Inst->getOperand(0);
7151          }
7152        }
7153
7154        // If the index will be to exactly the right offset with the scale taken
7155        // out, perform the transformation. Note, we don't know whether Scale is
7156        // signed or not. We'll use unsigned version of division/modulo
7157        // operation after making sure Scale doesn't have the sign bit set.
7158        if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
7159            Scale->getZExtValue() % ArrayEltSize == 0) {
7160          Scale = ConstantInt::get(Scale->getType(),
7161                                   Scale->getZExtValue() / ArrayEltSize);
7162          if (Scale->getZExtValue() != 1) {
7163            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
7164                                                       false /*ZExt*/);
7165            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
7166          }
7167
7168          // Insert the new GEP instruction.
7169          Value *Idx[2];
7170          Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
7171          Idx[1] = NewIdx;
7172          Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
7173            Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
7174            Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
7175          // The NewGEP must be pointer typed, so must the old one -> BitCast
7176          return new BitCastInst(NewGEP, GEP.getType());
7177        }
7178      }
7179    }
7180  }
7181
7182  /// See if we can simplify:
7183  ///   X = bitcast A* to B*
7184  ///   Y = gep X, <...constant indices...>
7185  /// into a gep of the original struct.  This is important for SROA and alias
7186  /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
7187  if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
7188    if (TD &&
7189        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
7190      // Determine how much the GEP moves the pointer.  We are guaranteed to get
7191      // a constant back from EmitGEPOffset.
7192      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
7193      int64_t Offset = OffsetV->getSExtValue();
7194
7195      // If this GEP instruction doesn't move the pointer, just replace the GEP
7196      // with a bitcast of the real input to the dest type.
7197      if (Offset == 0) {
7198        // If the bitcast is of an allocation, and the allocation will be
7199        // converted to match the type of the cast, don't touch this.
7200        if (isa<AllocaInst>(BCI->getOperand(0)) ||
7201            isMalloc(BCI->getOperand(0))) {
7202          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
7203          if (Instruction *I = visitBitCast(*BCI)) {
7204            if (I != BCI) {
7205              I->takeName(BCI);
7206              BCI->getParent()->getInstList().insert(BCI, I);
7207              ReplaceInstUsesWith(*BCI, I);
7208            }
7209            return &GEP;
7210          }
7211        }
7212        return new BitCastInst(BCI->getOperand(0), GEP.getType());
7213      }
7214
7215      // Otherwise, if the offset is non-zero, we need to find out if there is a
7216      // field at Offset in 'A's type.  If so, we can pull the cast through the
7217      // GEP.
7218      SmallVector<Value*, 8> NewIndices;
7219      const Type *InTy =
7220        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
7221      if (FindElementAtOffset(InTy, Offset, NewIndices)) {
7222        Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
7223          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
7224                                     NewIndices.end()) :
7225          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
7226                             NewIndices.end());
7227
7228        if (NGEP->getType() == GEP.getType())
7229          return ReplaceInstUsesWith(GEP, NGEP);
7230        NGEP->takeName(&GEP);
7231        return new BitCastInst(NGEP, GEP.getType());
7232      }
7233    }
7234  }
7235
7236  return 0;
7237}
7238
7239Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
7240  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
7241  if (AI.isArrayAllocation()) {  // Check C != 1
7242    if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
7243      const Type *NewTy =
7244        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
7245      assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
7246      AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
7247      New->setAlignment(AI.getAlignment());
7248
7249      // Scan to the end of the allocation instructions, to skip over a block of
7250      // allocas if possible...also skip interleaved debug info
7251      //
7252      BasicBlock::iterator It = New;
7253      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
7254
7255      // Now that I is pointing to the first non-allocation-inst in the block,
7256      // insert our getelementptr instruction...
7257      //
7258      Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
7259      Value *Idx[2];
7260      Idx[0] = NullIdx;
7261      Idx[1] = NullIdx;
7262      Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
7263                                                   New->getName()+".sub", It);
7264
7265      // Now make everything use the getelementptr instead of the original
7266      // allocation.
7267      return ReplaceInstUsesWith(AI, V);
7268    } else if (isa<UndefValue>(AI.getArraySize())) {
7269      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
7270    }
7271  }
7272
7273  if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
7274    // If alloca'ing a zero byte object, replace the alloca with a null pointer.
7275    // Note that we only do this for alloca's, because malloc should allocate
7276    // and return a unique pointer, even for a zero byte allocation.
7277    if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
7278      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
7279
7280    // If the alignment is 0 (unspecified), assign it the preferred alignment.
7281    if (AI.getAlignment() == 0)
7282      AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
7283  }
7284
7285  return 0;
7286}
7287
7288Instruction *InstCombiner::visitFree(Instruction &FI) {
7289  Value *Op = FI.getOperand(1);
7290
7291  // free undef -> unreachable.
7292  if (isa<UndefValue>(Op)) {
7293    // Insert a new store to null because we cannot modify the CFG here.
7294    new StoreInst(ConstantInt::getTrue(FI.getContext()),
7295           UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI);
7296    return EraseInstFromFunction(FI);
7297  }
7298
7299  // If we have 'free null' delete the instruction.  This can happen in stl code
7300  // when lots of inlining happens.
7301  if (isa<ConstantPointerNull>(Op))
7302    return EraseInstFromFunction(FI);
7303
7304  // If we have a malloc call whose only use is a free call, delete both.
7305  if (isMalloc(Op)) {
7306    if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
7307      if (Op->hasOneUse() && CI->hasOneUse()) {
7308        EraseInstFromFunction(FI);
7309        EraseInstFromFunction(*CI);
7310        return EraseInstFromFunction(*cast<Instruction>(Op));
7311      }
7312    } else {
7313      // Op is a call to malloc
7314      if (Op->hasOneUse()) {
7315        EraseInstFromFunction(FI);
7316        return EraseInstFromFunction(*cast<Instruction>(Op));
7317      }
7318    }
7319  }
7320
7321  return 0;
7322}
7323
7324/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
7325static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
7326                                        const TargetData *TD) {
7327  User *CI = cast<User>(LI.getOperand(0));
7328  Value *CastOp = CI->getOperand(0);
7329
7330  const PointerType *DestTy = cast<PointerType>(CI->getType());
7331  const Type *DestPTy = DestTy->getElementType();
7332  if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
7333
7334    // If the address spaces don't match, don't eliminate the cast.
7335    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
7336      return 0;
7337
7338    const Type *SrcPTy = SrcTy->getElementType();
7339
7340    if (DestPTy->isInteger() || isa<PointerType>(DestPTy) ||
7341         isa<VectorType>(DestPTy)) {
7342      // If the source is an array, the code below will not succeed.  Check to
7343      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
7344      // constants.
7345      if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
7346        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
7347          if (ASrcTy->getNumElements() != 0) {
7348            Value *Idxs[2];
7349            Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
7350            Idxs[1] = Idxs[0];
7351            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
7352            SrcTy = cast<PointerType>(CastOp->getType());
7353            SrcPTy = SrcTy->getElementType();
7354          }
7355
7356      if (IC.getTargetData() &&
7357          (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) ||
7358            isa<VectorType>(SrcPTy)) &&
7359          // Do not allow turning this into a load of an integer, which is then
7360          // casted to a pointer, this pessimizes pointer analysis a lot.
7361          (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
7362          IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
7363               IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
7364
7365        // Okay, we are casting from one integer or pointer type to another of
7366        // the same size.  Instead of casting the pointer before the load, cast
7367        // the result of the loaded value.
7368        Value *NewLoad =
7369          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
7370        // Now cast the result of the load.
7371        return new BitCastInst(NewLoad, LI.getType());
7372      }
7373    }
7374  }
7375  return 0;
7376}
7377
7378Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
7379  Value *Op = LI.getOperand(0);
7380
7381  // Attempt to improve the alignment.
7382  if (TD) {
7383    unsigned KnownAlign =
7384      GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
7385    if (KnownAlign >
7386        (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
7387                                  LI.getAlignment()))
7388      LI.setAlignment(KnownAlign);
7389  }
7390
7391  // load (cast X) --> cast (load X) iff safe.
7392  if (isa<CastInst>(Op))
7393    if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
7394      return Res;
7395
7396  // None of the following transforms are legal for volatile loads.
7397  if (LI.isVolatile()) return 0;
7398
7399  // Do really simple store-to-load forwarding and load CSE, to catch cases
7400  // where there are several consequtive memory accesses to the same location,
7401  // separated by a few arithmetic operations.
7402  BasicBlock::iterator BBI = &LI;
7403  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
7404    return ReplaceInstUsesWith(LI, AvailableVal);
7405
7406  // load(gep null, ...) -> unreachable
7407  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
7408    const Value *GEPI0 = GEPI->getOperand(0);
7409    // TODO: Consider a target hook for valid address spaces for this xform.
7410    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
7411      // Insert a new store to null instruction before the load to indicate
7412      // that this code is not reachable.  We do this instead of inserting
7413      // an unreachable instruction directly because we cannot modify the
7414      // CFG.
7415      new StoreInst(UndefValue::get(LI.getType()),
7416                    Constant::getNullValue(Op->getType()), &LI);
7417      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
7418    }
7419  }
7420
7421  // load null/undef -> unreachable
7422  // TODO: Consider a target hook for valid address spaces for this xform.
7423  if (isa<UndefValue>(Op) ||
7424      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
7425    // Insert a new store to null instruction before the load to indicate that
7426    // this code is not reachable.  We do this instead of inserting an
7427    // unreachable instruction directly because we cannot modify the CFG.
7428    new StoreInst(UndefValue::get(LI.getType()),
7429                  Constant::getNullValue(Op->getType()), &LI);
7430    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
7431  }
7432
7433  // Instcombine load (constantexpr_cast global) -> cast (load global)
7434  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
7435    if (CE->isCast())
7436      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
7437        return Res;
7438
7439  if (Op->hasOneUse()) {
7440    // Change select and PHI nodes to select values instead of addresses: this
7441    // helps alias analysis out a lot, allows many others simplifications, and
7442    // exposes redundancy in the code.
7443    //
7444    // Note that we cannot do the transformation unless we know that the
7445    // introduced loads cannot trap!  Something like this is valid as long as
7446    // the condition is always false: load (select bool %C, int* null, int* %G),
7447    // but it would not be valid if we transformed it to load from null
7448    // unconditionally.
7449    //
7450    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
7451      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
7452      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) &&
7453          isSafeToLoadUnconditionally(SI->getOperand(2), SI)) {
7454        Value *V1 = Builder->CreateLoad(SI->getOperand(1),
7455                                        SI->getOperand(1)->getName()+".val");
7456        Value *V2 = Builder->CreateLoad(SI->getOperand(2),
7457                                        SI->getOperand(2)->getName()+".val");
7458        return SelectInst::Create(SI->getCondition(), V1, V2);
7459      }
7460
7461      // load (select (cond, null, P)) -> load P
7462      if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
7463        if (C->isNullValue()) {
7464          LI.setOperand(0, SI->getOperand(2));
7465          return &LI;
7466        }
7467
7468      // load (select (cond, P, null)) -> load P
7469      if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
7470        if (C->isNullValue()) {
7471          LI.setOperand(0, SI->getOperand(1));
7472          return &LI;
7473        }
7474    }
7475  }
7476  return 0;
7477}
7478
7479/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
7480/// when possible.  This makes it generally easy to do alias analysis and/or
7481/// SROA/mem2reg of the memory object.
7482static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
7483  User *CI = cast<User>(SI.getOperand(1));
7484  Value *CastOp = CI->getOperand(0);
7485
7486  const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
7487  const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
7488  if (SrcTy == 0) return 0;
7489
7490  const Type *SrcPTy = SrcTy->getElementType();
7491
7492  if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy))
7493    return 0;
7494
7495  /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
7496  /// to its first element.  This allows us to handle things like:
7497  ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
7498  /// on 32-bit hosts.
7499  SmallVector<Value*, 4> NewGEPIndices;
7500
7501  // If the source is an array, the code below will not succeed.  Check to
7502  // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
7503  // constants.
7504  if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
7505    // Index through pointer.
7506    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
7507    NewGEPIndices.push_back(Zero);
7508
7509    while (1) {
7510      if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
7511        if (!STy->getNumElements()) /* Struct can be empty {} */
7512          break;
7513        NewGEPIndices.push_back(Zero);
7514        SrcPTy = STy->getElementType(0);
7515      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
7516        NewGEPIndices.push_back(Zero);
7517        SrcPTy = ATy->getElementType();
7518      } else {
7519        break;
7520      }
7521    }
7522
7523    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
7524  }
7525
7526  if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
7527    return 0;
7528
7529  // If the pointers point into different address spaces or if they point to
7530  // values with different sizes, we can't do the transformation.
7531  if (!IC.getTargetData() ||
7532      SrcTy->getAddressSpace() !=
7533        cast<PointerType>(CI->getType())->getAddressSpace() ||
7534      IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
7535      IC.getTargetData()->getTypeSizeInBits(DestPTy))
7536    return 0;
7537
7538  // Okay, we are casting from one integer or pointer type to another of
7539  // the same size.  Instead of casting the pointer before
7540  // the store, cast the value to be stored.
7541  Value *NewCast;
7542  Value *SIOp0 = SI.getOperand(0);
7543  Instruction::CastOps opcode = Instruction::BitCast;
7544  const Type* CastSrcTy = SIOp0->getType();
7545  const Type* CastDstTy = SrcPTy;
7546  if (isa<PointerType>(CastDstTy)) {
7547    if (CastSrcTy->isInteger())
7548      opcode = Instruction::IntToPtr;
7549  } else if (isa<IntegerType>(CastDstTy)) {
7550    if (isa<PointerType>(SIOp0->getType()))
7551      opcode = Instruction::PtrToInt;
7552  }
7553
7554  // SIOp0 is a pointer to aggregate and this is a store to the first field,
7555  // emit a GEP to index into its first field.
7556  if (!NewGEPIndices.empty())
7557    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
7558                                           NewGEPIndices.end());
7559
7560  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
7561                                   SIOp0->getName()+".c");
7562  return new StoreInst(NewCast, CastOp);
7563}
7564
7565/// equivalentAddressValues - Test if A and B will obviously have the same
7566/// value. This includes recognizing that %t0 and %t1 will have the same
7567/// value in code like this:
7568///   %t0 = getelementptr \@a, 0, 3
7569///   store i32 0, i32* %t0
7570///   %t1 = getelementptr \@a, 0, 3
7571///   %t2 = load i32* %t1
7572///
7573static bool equivalentAddressValues(Value *A, Value *B) {
7574  // Test if the values are trivially equivalent.
7575  if (A == B) return true;
7576
7577  // Test if the values come form identical arithmetic instructions.
7578  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
7579  // its only used to compare two uses within the same basic block, which
7580  // means that they'll always either have the same value or one of them
7581  // will have an undefined value.
7582  if (isa<BinaryOperator>(A) ||
7583      isa<CastInst>(A) ||
7584      isa<PHINode>(A) ||
7585      isa<GetElementPtrInst>(A))
7586    if (Instruction *BI = dyn_cast<Instruction>(B))
7587      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
7588        return true;
7589
7590  // Otherwise they may not be equivalent.
7591  return false;
7592}
7593
7594// If this instruction has two uses, one of which is a llvm.dbg.declare,
7595// return the llvm.dbg.declare.
7596DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
7597  if (!V->hasNUses(2))
7598    return 0;
7599  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
7600       UI != E; ++UI) {
7601    if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI))
7602      return DI;
7603    if (isa<BitCastInst>(UI) && UI->hasOneUse()) {
7604      if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin()))
7605        return DI;
7606      }
7607  }
7608  return 0;
7609}
7610
7611Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
7612  Value *Val = SI.getOperand(0);
7613  Value *Ptr = SI.getOperand(1);
7614
7615  // If the RHS is an alloca with a single use, zapify the store, making the
7616  // alloca dead.
7617  // If the RHS is an alloca with a two uses, the other one being a
7618  // llvm.dbg.declare, zapify the store and the declare, making the
7619  // alloca dead.  We must do this to prevent declare's from affecting
7620  // codegen.
7621  if (!SI.isVolatile()) {
7622    if (Ptr->hasOneUse()) {
7623      if (isa<AllocaInst>(Ptr)) {
7624        EraseInstFromFunction(SI);
7625        ++NumCombined;
7626        return 0;
7627      }
7628      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
7629        if (isa<AllocaInst>(GEP->getOperand(0))) {
7630          if (GEP->getOperand(0)->hasOneUse()) {
7631            EraseInstFromFunction(SI);
7632            ++NumCombined;
7633            return 0;
7634          }
7635          if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
7636            EraseInstFromFunction(*DI);
7637            EraseInstFromFunction(SI);
7638            ++NumCombined;
7639            return 0;
7640          }
7641        }
7642      }
7643    }
7644    if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
7645      EraseInstFromFunction(*DI);
7646      EraseInstFromFunction(SI);
7647      ++NumCombined;
7648      return 0;
7649    }
7650  }
7651
7652  // Attempt to improve the alignment.
7653  if (TD) {
7654    unsigned KnownAlign =
7655      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
7656    if (KnownAlign >
7657        (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
7658                                  SI.getAlignment()))
7659      SI.setAlignment(KnownAlign);
7660  }
7661
7662  // Do really simple DSE, to catch cases where there are several consecutive
7663  // stores to the same location, separated by a few arithmetic operations. This
7664  // situation often occurs with bitfield accesses.
7665  BasicBlock::iterator BBI = &SI;
7666  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
7667       --ScanInsts) {
7668    --BBI;
7669    // Don't count debug info directives, lest they affect codegen,
7670    // and we skip pointer-to-pointer bitcasts, which are NOPs.
7671    // It is necessary for correctness to skip those that feed into a
7672    // llvm.dbg.declare, as these are not present when debugging is off.
7673    if (isa<DbgInfoIntrinsic>(BBI) ||
7674        (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
7675      ScanInsts++;
7676      continue;
7677    }
7678
7679    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
7680      // Prev store isn't volatile, and stores to the same location?
7681      if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
7682                                                          SI.getOperand(1))) {
7683        ++NumDeadStore;
7684        ++BBI;
7685        EraseInstFromFunction(*PrevSI);
7686        continue;
7687      }
7688      break;
7689    }
7690
7691    // If this is a load, we have to stop.  However, if the loaded value is from
7692    // the pointer we're loading and is producing the pointer we're storing,
7693    // then *this* store is dead (X = load P; store X -> P).
7694    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
7695      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
7696          !SI.isVolatile()) {
7697        EraseInstFromFunction(SI);
7698        ++NumCombined;
7699        return 0;
7700      }
7701      // Otherwise, this is a load from some other location.  Stores before it
7702      // may not be dead.
7703      break;
7704    }
7705
7706    // Don't skip over loads or things that can modify memory.
7707    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
7708      break;
7709  }
7710
7711
7712  if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
7713
7714  // store X, null    -> turns into 'unreachable' in SimplifyCFG
7715  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
7716    if (!isa<UndefValue>(Val)) {
7717      SI.setOperand(0, UndefValue::get(Val->getType()));
7718      if (Instruction *U = dyn_cast<Instruction>(Val))
7719        Worklist.Add(U);  // Dropped a use.
7720      ++NumCombined;
7721    }
7722    return 0;  // Do not modify these!
7723  }
7724
7725  // store undef, Ptr -> noop
7726  if (isa<UndefValue>(Val)) {
7727    EraseInstFromFunction(SI);
7728    ++NumCombined;
7729    return 0;
7730  }
7731
7732  // If the pointer destination is a cast, see if we can fold the cast into the
7733  // source instead.
7734  if (isa<CastInst>(Ptr))
7735    if (Instruction *Res = InstCombineStoreToCast(*this, SI))
7736      return Res;
7737  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
7738    if (CE->isCast())
7739      if (Instruction *Res = InstCombineStoreToCast(*this, SI))
7740        return Res;
7741
7742
7743  // If this store is the last instruction in the basic block (possibly
7744  // excepting debug info instructions and the pointer bitcasts that feed
7745  // into them), and if the block ends with an unconditional branch, try
7746  // to move it to the successor block.
7747  BBI = &SI;
7748  do {
7749    ++BBI;
7750  } while (isa<DbgInfoIntrinsic>(BBI) ||
7751           (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType())));
7752  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
7753    if (BI->isUnconditional())
7754      if (SimplifyStoreAtEndOfBlock(SI))
7755        return 0;  // xform done!
7756
7757  return 0;
7758}
7759
7760/// SimplifyStoreAtEndOfBlock - Turn things like:
7761///   if () { *P = v1; } else { *P = v2 }
7762/// into a phi node with a store in the successor.
7763///
7764/// Simplify things like:
7765///   *P = v1; if () { *P = v2; }
7766/// into a phi node with a store in the successor.
7767///
7768bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
7769  BasicBlock *StoreBB = SI.getParent();
7770
7771  // Check to see if the successor block has exactly two incoming edges.  If
7772  // so, see if the other predecessor contains a store to the same location.
7773  // if so, insert a PHI node (if needed) and move the stores down.
7774  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
7775
7776  // Determine whether Dest has exactly two predecessors and, if so, compute
7777  // the other predecessor.
7778  pred_iterator PI = pred_begin(DestBB);
7779  BasicBlock *OtherBB = 0;
7780  if (*PI != StoreBB)
7781    OtherBB = *PI;
7782  ++PI;
7783  if (PI == pred_end(DestBB))
7784    return false;
7785
7786  if (*PI != StoreBB) {
7787    if (OtherBB)
7788      return false;
7789    OtherBB = *PI;
7790  }
7791  if (++PI != pred_end(DestBB))
7792    return false;
7793
7794  // Bail out if all the relevant blocks aren't distinct (this can happen,
7795  // for example, if SI is in an infinite loop)
7796  if (StoreBB == DestBB || OtherBB == DestBB)
7797    return false;
7798
7799  // Verify that the other block ends in a branch and is not otherwise empty.
7800  BasicBlock::iterator BBI = OtherBB->getTerminator();
7801  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
7802  if (!OtherBr || BBI == OtherBB->begin())
7803    return false;
7804
7805  // If the other block ends in an unconditional branch, check for the 'if then
7806  // else' case.  there is an instruction before the branch.
7807  StoreInst *OtherStore = 0;
7808  if (OtherBr->isUnconditional()) {
7809    --BBI;
7810    // Skip over debugging info.
7811    while (isa<DbgInfoIntrinsic>(BBI) ||
7812           (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
7813      if (BBI==OtherBB->begin())
7814        return false;
7815      --BBI;
7816    }
7817    // If this isn't a store, isn't a store to the same location, or if the
7818    // alignments differ, bail out.
7819    OtherStore = dyn_cast<StoreInst>(BBI);
7820    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
7821        OtherStore->getAlignment() != SI.getAlignment())
7822      return false;
7823  } else {
7824    // Otherwise, the other block ended with a conditional branch. If one of the
7825    // destinations is StoreBB, then we have the if/then case.
7826    if (OtherBr->getSuccessor(0) != StoreBB &&
7827        OtherBr->getSuccessor(1) != StoreBB)
7828      return false;
7829
7830    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
7831    // if/then triangle.  See if there is a store to the same ptr as SI that
7832    // lives in OtherBB.
7833    for (;; --BBI) {
7834      // Check to see if we find the matching store.
7835      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
7836        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
7837            OtherStore->getAlignment() != SI.getAlignment())
7838          return false;
7839        break;
7840      }
7841      // If we find something that may be using or overwriting the stored
7842      // value, or if we run out of instructions, we can't do the xform.
7843      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
7844          BBI == OtherBB->begin())
7845        return false;
7846    }
7847
7848    // In order to eliminate the store in OtherBr, we have to
7849    // make sure nothing reads or overwrites the stored value in
7850    // StoreBB.
7851    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
7852      // FIXME: This should really be AA driven.
7853      if (I->mayReadFromMemory() || I->mayWriteToMemory())
7854        return false;
7855    }
7856  }
7857
7858  // Insert a PHI node now if we need it.
7859  Value *MergedVal = OtherStore->getOperand(0);
7860  if (MergedVal != SI.getOperand(0)) {
7861    PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
7862    PN->reserveOperandSpace(2);
7863    PN->addIncoming(SI.getOperand(0), SI.getParent());
7864    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
7865    MergedVal = InsertNewInstBefore(PN, DestBB->front());
7866  }
7867
7868  // Advance to a place where it is safe to insert the new store and
7869  // insert it.
7870  BBI = DestBB->getFirstNonPHI();
7871  InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
7872                                    OtherStore->isVolatile(),
7873                                    SI.getAlignment()), *BBI);
7874
7875  // Nuke the old stores.
7876  EraseInstFromFunction(SI);
7877  EraseInstFromFunction(*OtherStore);
7878  ++NumCombined;
7879  return true;
7880}
7881
7882
7883Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
7884  // Change br (not X), label True, label False to: br X, label False, True
7885  Value *X = 0;
7886  BasicBlock *TrueDest;
7887  BasicBlock *FalseDest;
7888  if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
7889      !isa<Constant>(X)) {
7890    // Swap Destinations and condition...
7891    BI.setCondition(X);
7892    BI.setSuccessor(0, FalseDest);
7893    BI.setSuccessor(1, TrueDest);
7894    return &BI;
7895  }
7896
7897  // Cannonicalize fcmp_one -> fcmp_oeq
7898  FCmpInst::Predicate FPred; Value *Y;
7899  if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
7900                             TrueDest, FalseDest)) &&
7901      BI.getCondition()->hasOneUse())
7902    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
7903        FPred == FCmpInst::FCMP_OGE) {
7904      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
7905      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
7906
7907      // Swap Destinations and condition.
7908      BI.setSuccessor(0, FalseDest);
7909      BI.setSuccessor(1, TrueDest);
7910      Worklist.Add(Cond);
7911      return &BI;
7912    }
7913
7914  // Cannonicalize icmp_ne -> icmp_eq
7915  ICmpInst::Predicate IPred;
7916  if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
7917                      TrueDest, FalseDest)) &&
7918      BI.getCondition()->hasOneUse())
7919    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
7920        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
7921        IPred == ICmpInst::ICMP_SGE) {
7922      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
7923      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
7924      // Swap Destinations and condition.
7925      BI.setSuccessor(0, FalseDest);
7926      BI.setSuccessor(1, TrueDest);
7927      Worklist.Add(Cond);
7928      return &BI;
7929    }
7930
7931  return 0;
7932}
7933
7934Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
7935  Value *Cond = SI.getCondition();
7936  if (Instruction *I = dyn_cast<Instruction>(Cond)) {
7937    if (I->getOpcode() == Instruction::Add)
7938      if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
7939        // change 'switch (X+4) case 1:' into 'switch (X) case -3'
7940        for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
7941          SI.setOperand(i,
7942                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
7943                                                AddRHS));
7944        SI.setOperand(0, I->getOperand(0));
7945        Worklist.Add(I);
7946        return &SI;
7947      }
7948  }
7949  return 0;
7950}
7951
7952Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
7953  Value *Agg = EV.getAggregateOperand();
7954
7955  if (!EV.hasIndices())
7956    return ReplaceInstUsesWith(EV, Agg);
7957
7958  if (Constant *C = dyn_cast<Constant>(Agg)) {
7959    if (isa<UndefValue>(C))
7960      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
7961
7962    if (isa<ConstantAggregateZero>(C))
7963      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
7964
7965    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
7966      // Extract the element indexed by the first index out of the constant
7967      Value *V = C->getOperand(*EV.idx_begin());
7968      if (EV.getNumIndices() > 1)
7969        // Extract the remaining indices out of the constant indexed by the
7970        // first index
7971        return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
7972      else
7973        return ReplaceInstUsesWith(EV, V);
7974    }
7975    return 0; // Can't handle other constants
7976  }
7977  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
7978    // We're extracting from an insertvalue instruction, compare the indices
7979    const unsigned *exti, *exte, *insi, *inse;
7980    for (exti = EV.idx_begin(), insi = IV->idx_begin(),
7981         exte = EV.idx_end(), inse = IV->idx_end();
7982         exti != exte && insi != inse;
7983         ++exti, ++insi) {
7984      if (*insi != *exti)
7985        // The insert and extract both reference distinctly different elements.
7986        // This means the extract is not influenced by the insert, and we can
7987        // replace the aggregate operand of the extract with the aggregate
7988        // operand of the insert. i.e., replace
7989        // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
7990        // %E = extractvalue { i32, { i32 } } %I, 0
7991        // with
7992        // %E = extractvalue { i32, { i32 } } %A, 0
7993        return ExtractValueInst::Create(IV->getAggregateOperand(),
7994                                        EV.idx_begin(), EV.idx_end());
7995    }
7996    if (exti == exte && insi == inse)
7997      // Both iterators are at the end: Index lists are identical. Replace
7998      // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
7999      // %C = extractvalue { i32, { i32 } } %B, 1, 0
8000      // with "i32 42"
8001      return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
8002    if (exti == exte) {
8003      // The extract list is a prefix of the insert list. i.e. replace
8004      // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
8005      // %E = extractvalue { i32, { i32 } } %I, 1
8006      // with
8007      // %X = extractvalue { i32, { i32 } } %A, 1
8008      // %E = insertvalue { i32 } %X, i32 42, 0
8009      // by switching the order of the insert and extract (though the
8010      // insertvalue should be left in, since it may have other uses).
8011      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
8012                                                 EV.idx_begin(), EV.idx_end());
8013      return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
8014                                     insi, inse);
8015    }
8016    if (insi == inse)
8017      // The insert list is a prefix of the extract list
8018      // We can simply remove the common indices from the extract and make it
8019      // operate on the inserted value instead of the insertvalue result.
8020      // i.e., replace
8021      // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
8022      // %E = extractvalue { i32, { i32 } } %I, 1, 0
8023      // with
8024      // %E extractvalue { i32 } { i32 42 }, 0
8025      return ExtractValueInst::Create(IV->getInsertedValueOperand(),
8026                                      exti, exte);
8027  }
8028  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
8029    // We're extracting from an intrinsic, see if we're the only user, which
8030    // allows us to simplify multiple result intrinsics to simpler things that
8031    // just get one value..
8032    if (II->hasOneUse()) {
8033      // Check if we're grabbing the overflow bit or the result of a 'with
8034      // overflow' intrinsic.  If it's the latter we can remove the intrinsic
8035      // and replace it with a traditional binary instruction.
8036      switch (II->getIntrinsicID()) {
8037      case Intrinsic::uadd_with_overflow:
8038      case Intrinsic::sadd_with_overflow:
8039        if (*EV.idx_begin() == 0) {  // Normal result.
8040          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
8041          II->replaceAllUsesWith(UndefValue::get(II->getType()));
8042          EraseInstFromFunction(*II);
8043          return BinaryOperator::CreateAdd(LHS, RHS);
8044        }
8045        break;
8046      case Intrinsic::usub_with_overflow:
8047      case Intrinsic::ssub_with_overflow:
8048        if (*EV.idx_begin() == 0) {  // Normal result.
8049          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
8050          II->replaceAllUsesWith(UndefValue::get(II->getType()));
8051          EraseInstFromFunction(*II);
8052          return BinaryOperator::CreateSub(LHS, RHS);
8053        }
8054        break;
8055      case Intrinsic::umul_with_overflow:
8056      case Intrinsic::smul_with_overflow:
8057        if (*EV.idx_begin() == 0) {  // Normal result.
8058          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
8059          II->replaceAllUsesWith(UndefValue::get(II->getType()));
8060          EraseInstFromFunction(*II);
8061          return BinaryOperator::CreateMul(LHS, RHS);
8062        }
8063        break;
8064      default:
8065        break;
8066      }
8067    }
8068  }
8069  // Can't simplify extracts from other values. Note that nested extracts are
8070  // already simplified implicitely by the above (extract ( extract (insert) )
8071  // will be translated into extract ( insert ( extract ) ) first and then just
8072  // the value inserted, if appropriate).
8073  return 0;
8074}
8075
8076/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
8077/// is to leave as a vector operation.
8078static bool CheapToScalarize(Value *V, bool isConstant) {
8079  if (isa<ConstantAggregateZero>(V))
8080    return true;
8081  if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
8082    if (isConstant) return true;
8083    // If all elts are the same, we can extract.
8084    Constant *Op0 = C->getOperand(0);
8085    for (unsigned i = 1; i < C->getNumOperands(); ++i)
8086      if (C->getOperand(i) != Op0)
8087        return false;
8088    return true;
8089  }
8090  Instruction *I = dyn_cast<Instruction>(V);
8091  if (!I) return false;
8092
8093  // Insert element gets simplified to the inserted element or is deleted if
8094  // this is constant idx extract element and its a constant idx insertelt.
8095  if (I->getOpcode() == Instruction::InsertElement && isConstant &&
8096      isa<ConstantInt>(I->getOperand(2)))
8097    return true;
8098  if (I->getOpcode() == Instruction::Load && I->hasOneUse())
8099    return true;
8100  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
8101    if (BO->hasOneUse() &&
8102        (CheapToScalarize(BO->getOperand(0), isConstant) ||
8103         CheapToScalarize(BO->getOperand(1), isConstant)))
8104      return true;
8105  if (CmpInst *CI = dyn_cast<CmpInst>(I))
8106    if (CI->hasOneUse() &&
8107        (CheapToScalarize(CI->getOperand(0), isConstant) ||
8108         CheapToScalarize(CI->getOperand(1), isConstant)))
8109      return true;
8110
8111  return false;
8112}
8113
8114/// Read and decode a shufflevector mask.
8115///
8116/// It turns undef elements into values that are larger than the number of
8117/// elements in the input.
8118static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
8119  unsigned NElts = SVI->getType()->getNumElements();
8120  if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
8121    return std::vector<unsigned>(NElts, 0);
8122  if (isa<UndefValue>(SVI->getOperand(2)))
8123    return std::vector<unsigned>(NElts, 2*NElts);
8124
8125  std::vector<unsigned> Result;
8126  const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
8127  for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
8128    if (isa<UndefValue>(*i))
8129      Result.push_back(NElts*2);  // undef -> 8
8130    else
8131      Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
8132  return Result;
8133}
8134
8135/// FindScalarElement - Given a vector and an element number, see if the scalar
8136/// value is already around as a register, for example if it were inserted then
8137/// extracted from the vector.
8138static Value *FindScalarElement(Value *V, unsigned EltNo) {
8139  assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
8140  const VectorType *PTy = cast<VectorType>(V->getType());
8141  unsigned Width = PTy->getNumElements();
8142  if (EltNo >= Width)  // Out of range access.
8143    return UndefValue::get(PTy->getElementType());
8144
8145  if (isa<UndefValue>(V))
8146    return UndefValue::get(PTy->getElementType());
8147  else if (isa<ConstantAggregateZero>(V))
8148    return Constant::getNullValue(PTy->getElementType());
8149  else if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
8150    return CP->getOperand(EltNo);
8151  else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
8152    // If this is an insert to a variable element, we don't know what it is.
8153    if (!isa<ConstantInt>(III->getOperand(2)))
8154      return 0;
8155    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
8156
8157    // If this is an insert to the element we are looking for, return the
8158    // inserted value.
8159    if (EltNo == IIElt)
8160      return III->getOperand(1);
8161
8162    // Otherwise, the insertelement doesn't modify the value, recurse on its
8163    // vector input.
8164    return FindScalarElement(III->getOperand(0), EltNo);
8165  } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
8166    unsigned LHSWidth =
8167      cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
8168    unsigned InEl = getShuffleMask(SVI)[EltNo];
8169    if (InEl < LHSWidth)
8170      return FindScalarElement(SVI->getOperand(0), InEl);
8171    else if (InEl < LHSWidth*2)
8172      return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
8173    else
8174      return UndefValue::get(PTy->getElementType());
8175  }
8176
8177  // Otherwise, we don't know.
8178  return 0;
8179}
8180
8181Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
8182  // If vector val is undef, replace extract with scalar undef.
8183  if (isa<UndefValue>(EI.getOperand(0)))
8184    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
8185
8186  // If vector val is constant 0, replace extract with scalar 0.
8187  if (isa<ConstantAggregateZero>(EI.getOperand(0)))
8188    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
8189
8190  if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
8191    // If vector val is constant with all elements the same, replace EI with
8192    // that element. When the elements are not identical, we cannot replace yet
8193    // (we do that below, but only when the index is constant).
8194    Constant *op0 = C->getOperand(0);
8195    for (unsigned i = 1; i != C->getNumOperands(); ++i)
8196      if (C->getOperand(i) != op0) {
8197        op0 = 0;
8198        break;
8199      }
8200    if (op0)
8201      return ReplaceInstUsesWith(EI, op0);
8202  }
8203
8204  // If extracting a specified index from the vector, see if we can recursively
8205  // find a previously computed scalar that was inserted into the vector.
8206  if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
8207    unsigned IndexVal = IdxC->getZExtValue();
8208    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
8209
8210    // If this is extracting an invalid index, turn this into undef, to avoid
8211    // crashing the code below.
8212    if (IndexVal >= VectorWidth)
8213      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
8214
8215    // This instruction only demands the single element from the input vector.
8216    // If the input vector has a single use, simplify it based on this use
8217    // property.
8218    if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
8219      APInt UndefElts(VectorWidth, 0);
8220      APInt DemandedMask(VectorWidth, 1 << IndexVal);
8221      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
8222                                                DemandedMask, UndefElts)) {
8223        EI.setOperand(0, V);
8224        return &EI;
8225      }
8226    }
8227
8228    if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
8229      return ReplaceInstUsesWith(EI, Elt);
8230
8231    // If the this extractelement is directly using a bitcast from a vector of
8232    // the same number of elements, see if we can find the source element from
8233    // it.  In this case, we will end up needing to bitcast the scalars.
8234    if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
8235      if (const VectorType *VT =
8236              dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
8237        if (VT->getNumElements() == VectorWidth)
8238          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
8239            return new BitCastInst(Elt, EI.getType());
8240    }
8241  }
8242
8243  if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
8244    // Push extractelement into predecessor operation if legal and
8245    // profitable to do so
8246    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
8247      if (I->hasOneUse() &&
8248          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
8249        Value *newEI0 =
8250          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
8251                                        EI.getName()+".lhs");
8252        Value *newEI1 =
8253          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
8254                                        EI.getName()+".rhs");
8255        return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
8256      }
8257    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
8258      // Extracting the inserted element?
8259      if (IE->getOperand(2) == EI.getOperand(1))
8260        return ReplaceInstUsesWith(EI, IE->getOperand(1));
8261      // If the inserted and extracted elements are constants, they must not
8262      // be the same value, extract from the pre-inserted value instead.
8263      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
8264        Worklist.AddValue(EI.getOperand(0));
8265        EI.setOperand(0, IE->getOperand(0));
8266        return &EI;
8267      }
8268    } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
8269      // If this is extracting an element from a shufflevector, figure out where
8270      // it came from and extract from the appropriate input element instead.
8271      if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
8272        unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
8273        Value *Src;
8274        unsigned LHSWidth =
8275          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
8276
8277        if (SrcIdx < LHSWidth)
8278          Src = SVI->getOperand(0);
8279        else if (SrcIdx < LHSWidth*2) {
8280          SrcIdx -= LHSWidth;
8281          Src = SVI->getOperand(1);
8282        } else {
8283          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
8284        }
8285        return ExtractElementInst::Create(Src,
8286                         ConstantInt::get(Type::getInt32Ty(EI.getContext()),
8287                                          SrcIdx, false));
8288      }
8289    }
8290    // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
8291  }
8292  return 0;
8293}
8294
8295/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
8296/// elements from either LHS or RHS, return the shuffle mask and true.
8297/// Otherwise, return false.
8298static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
8299                                         std::vector<Constant*> &Mask) {
8300  assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
8301         "Invalid CollectSingleShuffleElements");
8302  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
8303
8304  if (isa<UndefValue>(V)) {
8305    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
8306    return true;
8307  }
8308
8309  if (V == LHS) {
8310    for (unsigned i = 0; i != NumElts; ++i)
8311      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
8312    return true;
8313  }
8314
8315  if (V == RHS) {
8316    for (unsigned i = 0; i != NumElts; ++i)
8317      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
8318                                      i+NumElts));
8319    return true;
8320  }
8321
8322  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
8323    // If this is an insert of an extract from some other vector, include it.
8324    Value *VecOp    = IEI->getOperand(0);
8325    Value *ScalarOp = IEI->getOperand(1);
8326    Value *IdxOp    = IEI->getOperand(2);
8327
8328    if (!isa<ConstantInt>(IdxOp))
8329      return false;
8330    unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
8331
8332    if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
8333      // Okay, we can handle this if the vector we are insertinting into is
8334      // transitively ok.
8335      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
8336        // If so, update the mask to reflect the inserted undef.
8337        Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
8338        return true;
8339      }
8340    } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
8341      if (isa<ConstantInt>(EI->getOperand(1)) &&
8342          EI->getOperand(0)->getType() == V->getType()) {
8343        unsigned ExtractedIdx =
8344          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
8345
8346        // This must be extracting from either LHS or RHS.
8347        if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
8348          // Okay, we can handle this if the vector we are insertinting into is
8349          // transitively ok.
8350          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
8351            // If so, update the mask to reflect the inserted value.
8352            if (EI->getOperand(0) == LHS) {
8353              Mask[InsertedIdx % NumElts] =
8354                 ConstantInt::get(Type::getInt32Ty(V->getContext()),
8355                                  ExtractedIdx);
8356            } else {
8357              assert(EI->getOperand(0) == RHS);
8358              Mask[InsertedIdx % NumElts] =
8359                ConstantInt::get(Type::getInt32Ty(V->getContext()),
8360                                 ExtractedIdx+NumElts);
8361
8362            }
8363            return true;
8364          }
8365        }
8366      }
8367    }
8368  }
8369  // TODO: Handle shufflevector here!
8370
8371  return false;
8372}
8373
8374/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
8375/// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
8376/// that computes V and the LHS value of the shuffle.
8377static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
8378                                     Value *&RHS) {
8379  assert(isa<VectorType>(V->getType()) &&
8380         (RHS == 0 || V->getType() == RHS->getType()) &&
8381         "Invalid shuffle!");
8382  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
8383
8384  if (isa<UndefValue>(V)) {
8385    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
8386    return V;
8387  } else if (isa<ConstantAggregateZero>(V)) {
8388    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
8389    return V;
8390  } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
8391    // If this is an insert of an extract from some other vector, include it.
8392    Value *VecOp    = IEI->getOperand(0);
8393    Value *ScalarOp = IEI->getOperand(1);
8394    Value *IdxOp    = IEI->getOperand(2);
8395
8396    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
8397      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
8398          EI->getOperand(0)->getType() == V->getType()) {
8399        unsigned ExtractedIdx =
8400          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
8401        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
8402
8403        // Either the extracted from or inserted into vector must be RHSVec,
8404        // otherwise we'd end up with a shuffle of three inputs.
8405        if (EI->getOperand(0) == RHS || RHS == 0) {
8406          RHS = EI->getOperand(0);
8407          Value *V = CollectShuffleElements(VecOp, Mask, RHS);
8408          Mask[InsertedIdx % NumElts] =
8409            ConstantInt::get(Type::getInt32Ty(V->getContext()),
8410                             NumElts+ExtractedIdx);
8411          return V;
8412        }
8413
8414        if (VecOp == RHS) {
8415          Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
8416          // Everything but the extracted element is replaced with the RHS.
8417          for (unsigned i = 0; i != NumElts; ++i) {
8418            if (i != InsertedIdx)
8419              Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
8420                                         NumElts+i);
8421          }
8422          return V;
8423        }
8424
8425        // If this insertelement is a chain that comes from exactly these two
8426        // vectors, return the vector and the effective shuffle.
8427        if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
8428          return EI->getOperand(0);
8429      }
8430    }
8431  }
8432  // TODO: Handle shufflevector here!
8433
8434  // Otherwise, can't do anything fancy.  Return an identity vector.
8435  for (unsigned i = 0; i != NumElts; ++i)
8436    Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
8437  return V;
8438}
8439
8440Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
8441  Value *VecOp    = IE.getOperand(0);
8442  Value *ScalarOp = IE.getOperand(1);
8443  Value *IdxOp    = IE.getOperand(2);
8444
8445  // Inserting an undef or into an undefined place, remove this.
8446  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
8447    ReplaceInstUsesWith(IE, VecOp);
8448
8449  // If the inserted element was extracted from some other vector, and if the
8450  // indexes are constant, try to turn this into a shufflevector operation.
8451  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
8452    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
8453        EI->getOperand(0)->getType() == IE.getType()) {
8454      unsigned NumVectorElts = IE.getType()->getNumElements();
8455      unsigned ExtractedIdx =
8456        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
8457      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
8458
8459      if (ExtractedIdx >= NumVectorElts) // Out of range extract.
8460        return ReplaceInstUsesWith(IE, VecOp);
8461
8462      if (InsertedIdx >= NumVectorElts)  // Out of range insert.
8463        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
8464
8465      // If we are extracting a value from a vector, then inserting it right
8466      // back into the same place, just use the input vector.
8467      if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
8468        return ReplaceInstUsesWith(IE, VecOp);
8469
8470      // If this insertelement isn't used by some other insertelement, turn it
8471      // (and any insertelements it points to), into one big shuffle.
8472      if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
8473        std::vector<Constant*> Mask;
8474        Value *RHS = 0;
8475        Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
8476        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
8477        // We now have a shuffle of LHS, RHS, Mask.
8478        return new ShuffleVectorInst(LHS, RHS,
8479                                     ConstantVector::get(Mask));
8480      }
8481    }
8482  }
8483
8484  unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
8485  APInt UndefElts(VWidth, 0);
8486  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
8487  if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts))
8488    return &IE;
8489
8490  return 0;
8491}
8492
8493
8494Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
8495  Value *LHS = SVI.getOperand(0);
8496  Value *RHS = SVI.getOperand(1);
8497  std::vector<unsigned> Mask = getShuffleMask(&SVI);
8498
8499  bool MadeChange = false;
8500
8501  // Undefined shuffle mask -> undefined value.
8502  if (isa<UndefValue>(SVI.getOperand(2)))
8503    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
8504
8505  unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
8506
8507  if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
8508    return 0;
8509
8510  APInt UndefElts(VWidth, 0);
8511  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
8512  if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
8513    LHS = SVI.getOperand(0);
8514    RHS = SVI.getOperand(1);
8515    MadeChange = true;
8516  }
8517
8518  // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
8519  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
8520  if (LHS == RHS || isa<UndefValue>(LHS)) {
8521    if (isa<UndefValue>(LHS) && LHS == RHS) {
8522      // shuffle(undef,undef,mask) -> undef.
8523      return ReplaceInstUsesWith(SVI, LHS);
8524    }
8525
8526    // Remap any references to RHS to use LHS.
8527    std::vector<Constant*> Elts;
8528    for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
8529      if (Mask[i] >= 2*e)
8530        Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
8531      else {
8532        if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
8533            (Mask[i] <  e && isa<UndefValue>(LHS))) {
8534          Mask[i] = 2*e;     // Turn into undef.
8535          Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
8536        } else {
8537          Mask[i] = Mask[i] % e;  // Force to LHS.
8538          Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
8539                                          Mask[i]));
8540        }
8541      }
8542    }
8543    SVI.setOperand(0, SVI.getOperand(1));
8544    SVI.setOperand(1, UndefValue::get(RHS->getType()));
8545    SVI.setOperand(2, ConstantVector::get(Elts));
8546    LHS = SVI.getOperand(0);
8547    RHS = SVI.getOperand(1);
8548    MadeChange = true;
8549  }
8550
8551  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
8552  bool isLHSID = true, isRHSID = true;
8553
8554  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
8555    if (Mask[i] >= e*2) continue;  // Ignore undef values.
8556    // Is this an identity shuffle of the LHS value?
8557    isLHSID &= (Mask[i] == i);
8558
8559    // Is this an identity shuffle of the RHS value?
8560    isRHSID &= (Mask[i]-e == i);
8561  }
8562
8563  // Eliminate identity shuffles.
8564  if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
8565  if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
8566
8567  // If the LHS is a shufflevector itself, see if we can combine it with this
8568  // one without producing an unusual shuffle.  Here we are really conservative:
8569  // we are absolutely afraid of producing a shuffle mask not in the input
8570  // program, because the code gen may not be smart enough to turn a merged
8571  // shuffle into two specific shuffles: it may produce worse code.  As such,
8572  // we only merge two shuffles if the result is one of the two input shuffle
8573  // masks.  In this case, merging the shuffles just removes one instruction,
8574  // which we know is safe.  This is good for things like turning:
8575  // (splat(splat)) -> splat.
8576  if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
8577    if (isa<UndefValue>(RHS)) {
8578      std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
8579
8580      if (LHSMask.size() == Mask.size()) {
8581        std::vector<unsigned> NewMask;
8582        for (unsigned i = 0, e = Mask.size(); i != e; ++i)
8583          if (Mask[i] >= e)
8584            NewMask.push_back(2*e);
8585          else
8586            NewMask.push_back(LHSMask[Mask[i]]);
8587
8588        // If the result mask is equal to the src shuffle or this
8589        // shuffle mask, do the replacement.
8590        if (NewMask == LHSMask || NewMask == Mask) {
8591          unsigned LHSInNElts =
8592            cast<VectorType>(LHSSVI->getOperand(0)->getType())->
8593            getNumElements();
8594          std::vector<Constant*> Elts;
8595          for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
8596            if (NewMask[i] >= LHSInNElts*2) {
8597              Elts.push_back(UndefValue::get(
8598                                           Type::getInt32Ty(SVI.getContext())));
8599            } else {
8600              Elts.push_back(ConstantInt::get(
8601                                           Type::getInt32Ty(SVI.getContext()),
8602                                              NewMask[i]));
8603            }
8604          }
8605          return new ShuffleVectorInst(LHSSVI->getOperand(0),
8606                                       LHSSVI->getOperand(1),
8607                                       ConstantVector::get(Elts));
8608        }
8609      }
8610    }
8611  }
8612
8613  return MadeChange ? &SVI : 0;
8614}
8615
8616
8617
8618
8619/// TryToSinkInstruction - Try to move the specified instruction from its
8620/// current block into the beginning of DestBlock, which can only happen if it's
8621/// safe to move the instruction past all of the instructions between it and the
8622/// end of its block.
8623static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
8624  assert(I->hasOneUse() && "Invariants didn't hold!");
8625
8626  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
8627  if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
8628    return false;
8629
8630  // Do not sink alloca instructions out of the entry block.
8631  if (isa<AllocaInst>(I) && I->getParent() ==
8632        &DestBlock->getParent()->getEntryBlock())
8633    return false;
8634
8635  // We can only sink load instructions if there is nothing between the load and
8636  // the end of block that could change the value.
8637  if (I->mayReadFromMemory()) {
8638    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
8639         Scan != E; ++Scan)
8640      if (Scan->mayWriteToMemory())
8641        return false;
8642  }
8643
8644  BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
8645
8646  CopyPrecedingStopPoint(I, InsertPos);
8647  I->moveBefore(InsertPos);
8648  ++NumSunkInst;
8649  return true;
8650}
8651
8652
8653/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
8654/// all reachable code to the worklist.
8655///
8656/// This has a couple of tricks to make the code faster and more powerful.  In
8657/// particular, we constant fold and DCE instructions as we go, to avoid adding
8658/// them to the worklist (this significantly speeds up instcombine on code where
8659/// many instructions are dead or constant).  Additionally, if we find a branch
8660/// whose condition is a known constant, we only visit the reachable successors.
8661///
8662static bool AddReachableCodeToWorklist(BasicBlock *BB,
8663                                       SmallPtrSet<BasicBlock*, 64> &Visited,
8664                                       InstCombiner &IC,
8665                                       const TargetData *TD) {
8666  bool MadeIRChange = false;
8667  SmallVector<BasicBlock*, 256> Worklist;
8668  Worklist.push_back(BB);
8669
8670  std::vector<Instruction*> InstrsForInstCombineWorklist;
8671  InstrsForInstCombineWorklist.reserve(128);
8672
8673  SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
8674
8675  while (!Worklist.empty()) {
8676    BB = Worklist.back();
8677    Worklist.pop_back();
8678
8679    // We have now visited this block!  If we've already been here, ignore it.
8680    if (!Visited.insert(BB)) continue;
8681
8682    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
8683      Instruction *Inst = BBI++;
8684
8685      // DCE instruction if trivially dead.
8686      if (isInstructionTriviallyDead(Inst)) {
8687        ++NumDeadInst;
8688        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
8689        Inst->eraseFromParent();
8690        continue;
8691      }
8692
8693      // ConstantProp instruction if trivially constant.
8694      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
8695        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
8696          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
8697                       << *Inst << '\n');
8698          Inst->replaceAllUsesWith(C);
8699          ++NumConstProp;
8700          Inst->eraseFromParent();
8701          continue;
8702        }
8703
8704
8705
8706      if (TD) {
8707        // See if we can constant fold its operands.
8708        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
8709             i != e; ++i) {
8710          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
8711          if (CE == 0) continue;
8712
8713          // If we already folded this constant, don't try again.
8714          if (!FoldedConstants.insert(CE))
8715            continue;
8716
8717          Constant *NewC = ConstantFoldConstantExpression(CE, TD);
8718          if (NewC && NewC != CE) {
8719            *i = NewC;
8720            MadeIRChange = true;
8721          }
8722        }
8723      }
8724
8725
8726      InstrsForInstCombineWorklist.push_back(Inst);
8727    }
8728
8729    // Recursively visit successors.  If this is a branch or switch on a
8730    // constant, only visit the reachable successor.
8731    TerminatorInst *TI = BB->getTerminator();
8732    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
8733      if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
8734        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
8735        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
8736        Worklist.push_back(ReachableBB);
8737        continue;
8738      }
8739    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
8740      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
8741        // See if this is an explicit destination.
8742        for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
8743          if (SI->getCaseValue(i) == Cond) {
8744            BasicBlock *ReachableBB = SI->getSuccessor(i);
8745            Worklist.push_back(ReachableBB);
8746            continue;
8747          }
8748
8749        // Otherwise it is the default destination.
8750        Worklist.push_back(SI->getSuccessor(0));
8751        continue;
8752      }
8753    }
8754
8755    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
8756      Worklist.push_back(TI->getSuccessor(i));
8757  }
8758
8759  // Once we've found all of the instructions to add to instcombine's worklist,
8760  // add them in reverse order.  This way instcombine will visit from the top
8761  // of the function down.  This jives well with the way that it adds all uses
8762  // of instructions to the worklist after doing a transformation, thus avoiding
8763  // some N^2 behavior in pathological cases.
8764  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
8765                              InstrsForInstCombineWorklist.size());
8766
8767  return MadeIRChange;
8768}
8769
8770bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
8771  MadeIRChange = false;
8772
8773  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
8774        << F.getNameStr() << "\n");
8775
8776  {
8777    // Do a depth-first traversal of the function, populate the worklist with
8778    // the reachable instructions.  Ignore blocks that are not reachable.  Keep
8779    // track of which blocks we visit.
8780    SmallPtrSet<BasicBlock*, 64> Visited;
8781    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
8782
8783    // Do a quick scan over the function.  If we find any blocks that are
8784    // unreachable, remove any instructions inside of them.  This prevents
8785    // the instcombine code from having to deal with some bad special cases.
8786    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
8787      if (!Visited.count(BB)) {
8788        Instruction *Term = BB->getTerminator();
8789        while (Term != BB->begin()) {   // Remove instrs bottom-up
8790          BasicBlock::iterator I = Term; --I;
8791
8792          DEBUG(errs() << "IC: DCE: " << *I << '\n');
8793          // A debug intrinsic shouldn't force another iteration if we weren't
8794          // going to do one without it.
8795          if (!isa<DbgInfoIntrinsic>(I)) {
8796            ++NumDeadInst;
8797            MadeIRChange = true;
8798          }
8799
8800          // If I is not void type then replaceAllUsesWith undef.
8801          // This allows ValueHandlers and custom metadata to adjust itself.
8802          if (!I->getType()->isVoidTy())
8803            I->replaceAllUsesWith(UndefValue::get(I->getType()));
8804          I->eraseFromParent();
8805        }
8806      }
8807  }
8808
8809  while (!Worklist.isEmpty()) {
8810    Instruction *I = Worklist.RemoveOne();
8811    if (I == 0) continue;  // skip null values.
8812
8813    // Check to see if we can DCE the instruction.
8814    if (isInstructionTriviallyDead(I)) {
8815      DEBUG(errs() << "IC: DCE: " << *I << '\n');
8816      EraseInstFromFunction(*I);
8817      ++NumDeadInst;
8818      MadeIRChange = true;
8819      continue;
8820    }
8821
8822    // Instruction isn't dead, see if we can constant propagate it.
8823    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
8824      if (Constant *C = ConstantFoldInstruction(I, TD)) {
8825        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
8826
8827        // Add operands to the worklist.
8828        ReplaceInstUsesWith(*I, C);
8829        ++NumConstProp;
8830        EraseInstFromFunction(*I);
8831        MadeIRChange = true;
8832        continue;
8833      }
8834
8835    // See if we can trivially sink this instruction to a successor basic block.
8836    if (I->hasOneUse()) {
8837      BasicBlock *BB = I->getParent();
8838      Instruction *UserInst = cast<Instruction>(I->use_back());
8839      BasicBlock *UserParent;
8840
8841      // Get the block the use occurs in.
8842      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
8843        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
8844      else
8845        UserParent = UserInst->getParent();
8846
8847      if (UserParent != BB) {
8848        bool UserIsSuccessor = false;
8849        // See if the user is one of our successors.
8850        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
8851          if (*SI == UserParent) {
8852            UserIsSuccessor = true;
8853            break;
8854          }
8855
8856        // If the user is one of our immediate successors, and if that successor
8857        // only has us as a predecessors (we'd have to split the critical edge
8858        // otherwise), we can keep going.
8859        if (UserIsSuccessor && UserParent->getSinglePredecessor())
8860          // Okay, the CFG is simple enough, try to sink this instruction.
8861          MadeIRChange |= TryToSinkInstruction(I, UserParent);
8862      }
8863    }
8864
8865    // Now that we have an instruction, try combining it to simplify it.
8866    Builder->SetInsertPoint(I->getParent(), I);
8867
8868#ifndef NDEBUG
8869    std::string OrigI;
8870#endif
8871    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
8872    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
8873
8874    if (Instruction *Result = visit(*I)) {
8875      ++NumCombined;
8876      // Should we replace the old instruction with a new one?
8877      if (Result != I) {
8878        DEBUG(errs() << "IC: Old = " << *I << '\n'
8879                     << "    New = " << *Result << '\n');
8880
8881        // Everything uses the new instruction now.
8882        I->replaceAllUsesWith(Result);
8883
8884        // Push the new instruction and any users onto the worklist.
8885        Worklist.Add(Result);
8886        Worklist.AddUsersToWorkList(*Result);
8887
8888        // Move the name to the new instruction first.
8889        Result->takeName(I);
8890
8891        // Insert the new instruction into the basic block...
8892        BasicBlock *InstParent = I->getParent();
8893        BasicBlock::iterator InsertPos = I;
8894
8895        if (!isa<PHINode>(Result))        // If combining a PHI, don't insert
8896          while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
8897            ++InsertPos;
8898
8899        InstParent->getInstList().insert(InsertPos, Result);
8900
8901        EraseInstFromFunction(*I);
8902      } else {
8903#ifndef NDEBUG
8904        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
8905                     << "    New = " << *I << '\n');
8906#endif
8907
8908        // If the instruction was modified, it's possible that it is now dead.
8909        // if so, remove it.
8910        if (isInstructionTriviallyDead(I)) {
8911          EraseInstFromFunction(*I);
8912        } else {
8913          Worklist.Add(I);
8914          Worklist.AddUsersToWorkList(*I);
8915        }
8916      }
8917      MadeIRChange = true;
8918    }
8919  }
8920
8921  Worklist.Zap();
8922  return MadeIRChange;
8923}
8924
8925
8926bool InstCombiner::runOnFunction(Function &F) {
8927  MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
8928  TD = getAnalysisIfAvailable<TargetData>();
8929
8930
8931  /// Builder - This is an IRBuilder that automatically inserts new
8932  /// instructions into the worklist when they are created.
8933  IRBuilder<true, TargetFolder, InstCombineIRInserter>
8934    TheBuilder(F.getContext(), TargetFolder(TD),
8935               InstCombineIRInserter(Worklist));
8936  Builder = &TheBuilder;
8937
8938  bool EverMadeChange = false;
8939
8940  // Iterate while there is work to do.
8941  unsigned Iteration = 0;
8942  while (DoOneIteration(F, Iteration++))
8943    EverMadeChange = true;
8944
8945  Builder = 0;
8946  return EverMadeChange;
8947}
8948
8949FunctionPass *llvm::createInstructionCombiningPass() {
8950  return new InstCombiner();
8951}
8952