InstructionCombining.cpp revision 32c0cf5af933031f4e842bf44d08e96d76a70386
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// InstructionCombining - Combine instructions to form fewer, simple
11// instructions.  This pass does not modify the CFG.  This pass is where
12// algebraic simplification happens.
13//
14// This pass combines things like:
15//    %Y = add i32 %X, 1
16//    %Z = add i32 %Y, 1
17// into:
18//    %Z = add i32 %X, 2
19//
20// This is a simple worklist driven algorithm.
21//
22// This pass guarantees that the following canonicalizations are performed on
23// the program:
24//    1. If a binary operator has a constant operand, it is moved to the RHS
25//    2. Bitwise operators with constant operands are always grouped so that
26//       shifts are performed first, then or's, then and's, then xor's.
27//    3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
28//    4. All cmp instructions on boolean values are replaced with logical ops
29//    5. add X, X is represented as (X*2) => (X << 1)
30//    6. Multiplies with a power-of-two constant argument are transformed into
31//       shifts.
32//   ... etc.
33//
34//===----------------------------------------------------------------------===//
35
36#define DEBUG_TYPE "instcombine"
37#include "llvm/Transforms/Scalar.h"
38#include "InstCombine.h"
39#include "llvm/IntrinsicInst.h"
40#include "llvm/LLVMContext.h"
41#include "llvm/DerivedTypes.h"
42#include "llvm/GlobalVariable.h"
43#include "llvm/Operator.h"
44#include "llvm/Analysis/ConstantFolding.h"
45#include "llvm/Analysis/InstructionSimplify.h"
46#include "llvm/Analysis/MemoryBuiltins.h"
47#include "llvm/Target/TargetData.h"
48#include "llvm/Transforms/Utils/BasicBlockUtils.h"
49#include "llvm/Transforms/Utils/Local.h"
50#include "llvm/Support/CallSite.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/Support/GetElementPtrTypeIterator.h"
54#include "llvm/Support/MathExtras.h"
55#include "llvm/Support/PatternMatch.h"
56#include "llvm/ADT/SmallPtrSet.h"
57#include "llvm/ADT/Statistic.h"
58#include "llvm/ADT/STLExtras.h"
59#include <algorithm>
60#include <climits>
61using namespace llvm;
62using namespace llvm::PatternMatch;
63
64STATISTIC(NumCombined , "Number of insts combined");
65STATISTIC(NumConstProp, "Number of constant folds");
66STATISTIC(NumDeadInst , "Number of dead inst eliminated");
67STATISTIC(NumSunkInst , "Number of instructions sunk");
68
69
70char InstCombiner::ID = 0;
71static RegisterPass<InstCombiner>
72X("instcombine", "Combine redundant instructions");
73
74void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
75  AU.addPreservedID(LCSSAID);
76  AU.setPreservesCFG();
77}
78
79
80// isOnlyUse - Return true if this instruction will be deleted if we stop using
81// it.
82static bool isOnlyUse(Value *V) {
83  return V->hasOneUse() || isa<Constant>(V);
84}
85
86// getPromotedType - Return the specified type promoted as it would be to pass
87// though a va_arg area...
88static const Type *getPromotedType(const Type *Ty) {
89  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
90    if (ITy->getBitWidth() < 32)
91      return Type::getInt32Ty(Ty->getContext());
92  }
93  return Ty;
94}
95
96/// ShouldChangeType - Return true if it is desirable to convert a computation
97/// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
98/// type for example, or from a smaller to a larger illegal type.
99bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
100  assert(isa<IntegerType>(From) && isa<IntegerType>(To));
101
102  // If we don't have TD, we don't know if the source/dest are legal.
103  if (!TD) return false;
104
105  unsigned FromWidth = From->getPrimitiveSizeInBits();
106  unsigned ToWidth = To->getPrimitiveSizeInBits();
107  bool FromLegal = TD->isLegalInteger(FromWidth);
108  bool ToLegal = TD->isLegalInteger(ToWidth);
109
110  // If this is a legal integer from type, and the result would be an illegal
111  // type, don't do the transformation.
112  if (FromLegal && !ToLegal)
113    return false;
114
115  // Otherwise, if both are illegal, do not increase the size of the result. We
116  // do allow things like i160 -> i64, but not i64 -> i160.
117  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
118    return false;
119
120  return true;
121}
122
123/// getBitCastOperand - If the specified operand is a CastInst, a constant
124/// expression bitcast, or a GetElementPtrInst with all zero indices, return the
125/// operand value, otherwise return null.
126static Value *getBitCastOperand(Value *V) {
127  if (Operator *O = dyn_cast<Operator>(V)) {
128    if (O->getOpcode() == Instruction::BitCast)
129      return O->getOperand(0);
130    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
131      if (GEP->hasAllZeroIndices())
132        return GEP->getPointerOperand();
133  }
134  return 0;
135}
136
137
138
139// SimplifyCommutative - This performs a few simplifications for commutative
140// operators:
141//
142//  1. Order operands such that they are listed from right (least complex) to
143//     left (most complex).  This puts constants before unary operators before
144//     binary operators.
145//
146//  2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
147//  3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
148//
149bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
150  bool Changed = false;
151  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
152    Changed = !I.swapOperands();
153
154  if (!I.isAssociative()) return Changed;
155  Instruction::BinaryOps Opcode = I.getOpcode();
156  if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
157    if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
158      if (isa<Constant>(I.getOperand(1))) {
159        Constant *Folded = ConstantExpr::get(I.getOpcode(),
160                                             cast<Constant>(I.getOperand(1)),
161                                             cast<Constant>(Op->getOperand(1)));
162        I.setOperand(0, Op->getOperand(0));
163        I.setOperand(1, Folded);
164        return true;
165      } else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1)))
166        if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
167            isOnlyUse(Op) && isOnlyUse(Op1)) {
168          Constant *C1 = cast<Constant>(Op->getOperand(1));
169          Constant *C2 = cast<Constant>(Op1->getOperand(1));
170
171          // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
172          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
173          Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
174                                                    Op1->getOperand(0),
175                                                    Op1->getName(), &I);
176          Worklist.Add(New);
177          I.setOperand(0, New);
178          I.setOperand(1, Folded);
179          return true;
180        }
181    }
182  return Changed;
183}
184
185// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
186// if the LHS is a constant zero (which is the 'negate' form).
187//
188Value *InstCombiner::dyn_castNegVal(Value *V) const {
189  if (BinaryOperator::isNeg(V))
190    return BinaryOperator::getNegArgument(V);
191
192  // Constants can be considered to be negated values if they can be folded.
193  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
194    return ConstantExpr::getNeg(C);
195
196  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
197    if (C->getType()->getElementType()->isInteger())
198      return ConstantExpr::getNeg(C);
199
200  return 0;
201}
202
203// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
204// instruction if the LHS is a constant negative zero (which is the 'negate'
205// form).
206//
207Value *InstCombiner::dyn_castFNegVal(Value *V) const {
208  if (BinaryOperator::isFNeg(V))
209    return BinaryOperator::getFNegArgument(V);
210
211  // Constants can be considered to be negated values if they can be folded.
212  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
213    return ConstantExpr::getFNeg(C);
214
215  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
216    if (C->getType()->getElementType()->isFloatingPoint())
217      return ConstantExpr::getFNeg(C);
218
219  return 0;
220}
221
222/// isFreeToInvert - Return true if the specified value is free to invert (apply
223/// ~ to).  This happens in cases where the ~ can be eliminated.
224static inline bool isFreeToInvert(Value *V) {
225  // ~(~(X)) -> X.
226  if (BinaryOperator::isNot(V))
227    return true;
228
229  // Constants can be considered to be not'ed values.
230  if (isa<ConstantInt>(V))
231    return true;
232
233  // Compares can be inverted if they have a single use.
234  if (CmpInst *CI = dyn_cast<CmpInst>(V))
235    return CI->hasOneUse();
236
237  return false;
238}
239
240static inline Value *dyn_castNotVal(Value *V) {
241  // If this is not(not(x)) don't return that this is a not: we want the two
242  // not's to be folded first.
243  if (BinaryOperator::isNot(V)) {
244    Value *Operand = BinaryOperator::getNotArgument(V);
245    if (!isFreeToInvert(Operand))
246      return Operand;
247  }
248
249  // Constants can be considered to be not'ed values...
250  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
251    return ConstantInt::get(C->getType(), ~C->getValue());
252  return 0;
253}
254
255
256
257// dyn_castFoldableMul - If this value is a multiply that can be folded into
258// other computations (because it has a constant operand), return the
259// non-constant operand of the multiply, and set CST to point to the multiplier.
260// Otherwise, return null.
261//
262static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
263  if (V->hasOneUse() && V->getType()->isInteger())
264    if (Instruction *I = dyn_cast<Instruction>(V)) {
265      if (I->getOpcode() == Instruction::Mul)
266        if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
267          return I->getOperand(0);
268      if (I->getOpcode() == Instruction::Shl)
269        if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
270          // The multiplier is really 1 << CST.
271          uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
272          uint32_t CSTVal = CST->getLimitedValue(BitWidth);
273          CST = ConstantInt::get(V->getType()->getContext(),
274                                 APInt(BitWidth, 1).shl(CSTVal));
275          return I->getOperand(0);
276        }
277    }
278  return 0;
279}
280
281/// AddOne - Add one to a ConstantInt.
282static Constant *AddOne(Constant *C) {
283  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
284}
285/// SubOne - Subtract one from a ConstantInt.
286static Constant *SubOne(ConstantInt *C) {
287  return ConstantInt::get(C->getContext(), C->getValue()-1);
288}
289
290
291/// AssociativeOpt - Perform an optimization on an associative operator.  This
292/// function is designed to check a chain of associative operators for a
293/// potential to apply a certain optimization.  Since the optimization may be
294/// applicable if the expression was reassociated, this checks the chain, then
295/// reassociates the expression as necessary to expose the optimization
296/// opportunity.  This makes use of a special Functor, which must define
297/// 'shouldApply' and 'apply' methods.
298///
299template<typename Functor>
300static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
301  // Quick check, see if the immediate LHS matches...
302  if (F.shouldApply(Root.getOperand(0)))
303    return F.apply(Root);
304
305  return 0;
306}
307
308static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
309                                             InstCombiner *IC) {
310  if (CastInst *CI = dyn_cast<CastInst>(&I))
311    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
312
313  // Figure out if the constant is the left or the right argument.
314  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
315  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
316
317  if (Constant *SOC = dyn_cast<Constant>(SO)) {
318    if (ConstIsRHS)
319      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
320    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
321  }
322
323  Value *Op0 = SO, *Op1 = ConstOperand;
324  if (!ConstIsRHS)
325    std::swap(Op0, Op1);
326
327  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
328    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
329                                    SO->getName()+".op");
330  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
331    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
332                                   SO->getName()+".cmp");
333  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
334    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
335                                   SO->getName()+".cmp");
336  llvm_unreachable("Unknown binary instruction type!");
337}
338
339// FoldOpIntoSelect - Given an instruction with a select as one operand and a
340// constant as the other operand, try to fold the binary operator into the
341// select arguments.  This also works for Cast instructions, which obviously do
342// not have a second operand.
343Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
344  // Don't modify shared select instructions
345  if (!SI->hasOneUse()) return 0;
346  Value *TV = SI->getOperand(1);
347  Value *FV = SI->getOperand(2);
348
349  if (isa<Constant>(TV) || isa<Constant>(FV)) {
350    // Bool selects with constant operands can be folded to logical ops.
351    if (SI->getType() == Type::getInt1Ty(SI->getContext())) return 0;
352
353    Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
354    Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
355
356    return SelectInst::Create(SI->getCondition(), SelectTrueVal,
357                              SelectFalseVal);
358  }
359  return 0;
360}
361
362
363/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
364/// has a PHI node as operand #0, see if we can fold the instruction into the
365/// PHI (which is only possible if all operands to the PHI are constants).
366///
367/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
368/// that would normally be unprofitable because they strongly encourage jump
369/// threading.
370Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
371                                         bool AllowAggressive) {
372  AllowAggressive = false;
373  PHINode *PN = cast<PHINode>(I.getOperand(0));
374  unsigned NumPHIValues = PN->getNumIncomingValues();
375  if (NumPHIValues == 0 ||
376      // We normally only transform phis with a single use, unless we're trying
377      // hard to make jump threading happen.
378      (!PN->hasOneUse() && !AllowAggressive))
379    return 0;
380
381
382  // Check to see if all of the operands of the PHI are simple constants
383  // (constantint/constantfp/undef).  If there is one non-constant value,
384  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
385  // bail out.  We don't do arbitrary constant expressions here because moving
386  // their computation can be expensive without a cost model.
387  BasicBlock *NonConstBB = 0;
388  for (unsigned i = 0; i != NumPHIValues; ++i)
389    if (!isa<Constant>(PN->getIncomingValue(i)) ||
390        isa<ConstantExpr>(PN->getIncomingValue(i))) {
391      if (NonConstBB) return 0;  // More than one non-const value.
392      if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.
393      NonConstBB = PN->getIncomingBlock(i);
394
395      // If the incoming non-constant value is in I's block, we have an infinite
396      // loop.
397      if (NonConstBB == I.getParent())
398        return 0;
399    }
400
401  // If there is exactly one non-constant value, we can insert a copy of the
402  // operation in that block.  However, if this is a critical edge, we would be
403  // inserting the computation one some other paths (e.g. inside a loop).  Only
404  // do this if the pred block is unconditionally branching into the phi block.
405  if (NonConstBB != 0 && !AllowAggressive) {
406    BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
407    if (!BI || !BI->isUnconditional()) return 0;
408  }
409
410  // Okay, we can do the transformation: create the new PHI node.
411  PHINode *NewPN = PHINode::Create(I.getType(), "");
412  NewPN->reserveOperandSpace(PN->getNumOperands()/2);
413  InsertNewInstBefore(NewPN, *PN);
414  NewPN->takeName(PN);
415
416  // Next, add all of the operands to the PHI.
417  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
418    // We only currently try to fold the condition of a select when it is a phi,
419    // not the true/false values.
420    Value *TrueV = SI->getTrueValue();
421    Value *FalseV = SI->getFalseValue();
422    BasicBlock *PhiTransBB = PN->getParent();
423    for (unsigned i = 0; i != NumPHIValues; ++i) {
424      BasicBlock *ThisBB = PN->getIncomingBlock(i);
425      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
426      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
427      Value *InV = 0;
428      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
429        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
430      } else {
431        assert(PN->getIncomingBlock(i) == NonConstBB);
432        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
433                                 FalseVInPred,
434                                 "phitmp", NonConstBB->getTerminator());
435        Worklist.Add(cast<Instruction>(InV));
436      }
437      NewPN->addIncoming(InV, ThisBB);
438    }
439  } else if (I.getNumOperands() == 2) {
440    Constant *C = cast<Constant>(I.getOperand(1));
441    for (unsigned i = 0; i != NumPHIValues; ++i) {
442      Value *InV = 0;
443      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
444        if (CmpInst *CI = dyn_cast<CmpInst>(&I))
445          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
446        else
447          InV = ConstantExpr::get(I.getOpcode(), InC, C);
448      } else {
449        assert(PN->getIncomingBlock(i) == NonConstBB);
450        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
451          InV = BinaryOperator::Create(BO->getOpcode(),
452                                       PN->getIncomingValue(i), C, "phitmp",
453                                       NonConstBB->getTerminator());
454        else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
455          InV = CmpInst::Create(CI->getOpcode(),
456                                CI->getPredicate(),
457                                PN->getIncomingValue(i), C, "phitmp",
458                                NonConstBB->getTerminator());
459        else
460          llvm_unreachable("Unknown binop!");
461
462        Worklist.Add(cast<Instruction>(InV));
463      }
464      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
465    }
466  } else {
467    CastInst *CI = cast<CastInst>(&I);
468    const Type *RetTy = CI->getType();
469    for (unsigned i = 0; i != NumPHIValues; ++i) {
470      Value *InV;
471      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
472        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
473      } else {
474        assert(PN->getIncomingBlock(i) == NonConstBB);
475        InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),
476                               I.getType(), "phitmp",
477                               NonConstBB->getTerminator());
478        Worklist.Add(cast<Instruction>(InV));
479      }
480      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
481    }
482  }
483  return ReplaceInstUsesWith(I, NewPN);
484}
485
486
487/// WillNotOverflowSignedAdd - Return true if we can prove that:
488///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
489/// This basically requires proving that the add in the original type would not
490/// overflow to change the sign bit or have a carry out.
491bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
492  // There are different heuristics we can use for this.  Here are some simple
493  // ones.
494
495  // Add has the property that adding any two 2's complement numbers can only
496  // have one carry bit which can change a sign.  As such, if LHS and RHS each
497  // have at least two sign bits, we know that the addition of the two values
498  // will sign extend fine.
499  if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
500    return true;
501
502
503  // If one of the operands only has one non-zero bit, and if the other operand
504  // has a known-zero bit in a more significant place than it (not including the
505  // sign bit) the ripple may go up to and fill the zero, but won't change the
506  // sign.  For example, (X & ~4) + 1.
507
508  // TODO: Implement.
509
510  return false;
511}
512
513
514Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
515  bool Changed = SimplifyCommutative(I);
516  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
517
518  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
519                                 I.hasNoUnsignedWrap(), TD))
520    return ReplaceInstUsesWith(I, V);
521
522
523  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
524    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
525      // X + (signbit) --> X ^ signbit
526      const APInt& Val = CI->getValue();
527      uint32_t BitWidth = Val.getBitWidth();
528      if (Val == APInt::getSignBit(BitWidth))
529        return BinaryOperator::CreateXor(LHS, RHS);
530
531      // See if SimplifyDemandedBits can simplify this.  This handles stuff like
532      // (X & 254)+1 -> (X&254)|1
533      if (SimplifyDemandedInstructionBits(I))
534        return &I;
535
536      // zext(bool) + C -> bool ? C + 1 : C
537      if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
538        if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
539          return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
540    }
541
542    if (isa<PHINode>(LHS))
543      if (Instruction *NV = FoldOpIntoPhi(I))
544        return NV;
545
546    ConstantInt *XorRHS = 0;
547    Value *XorLHS = 0;
548    if (isa<ConstantInt>(RHSC) &&
549        match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
550      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
551      const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
552
553      uint32_t Size = TySizeBits / 2;
554      APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1));
555      APInt CFF80Val(-C0080Val);
556      do {
557        if (TySizeBits > Size) {
558          // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
559          // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
560          if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) ||
561              (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) {
562            // This is a sign extend if the top bits are known zero.
563            if (!MaskedValueIsZero(XorLHS,
564                   APInt::getHighBitsSet(TySizeBits, TySizeBits - Size)))
565              Size = 0;  // Not a sign ext, but can't be any others either.
566            break;
567          }
568        }
569        Size >>= 1;
570        C0080Val = APIntOps::lshr(C0080Val, Size);
571        CFF80Val = APIntOps::ashr(CFF80Val, Size);
572      } while (Size >= 1);
573
574      // FIXME: This shouldn't be necessary. When the backends can handle types
575      // with funny bit widths then this switch statement should be removed. It
576      // is just here to get the size of the "middle" type back up to something
577      // that the back ends can handle.
578      const Type *MiddleType = 0;
579      switch (Size) {
580        default: break;
581        case 32:
582        case 16:
583        case  8: MiddleType = IntegerType::get(I.getContext(), Size); break;
584      }
585      if (MiddleType) {
586        Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");
587        return new SExtInst(NewTrunc, I.getType(), I.getName());
588      }
589    }
590  }
591
592  if (I.getType() == Type::getInt1Ty(I.getContext()))
593    return BinaryOperator::CreateXor(LHS, RHS);
594
595  if (I.getType()->isInteger()) {
596    // X + X --> X << 1
597    if (LHS == RHS)
598      return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
599
600    if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
601      if (RHSI->getOpcode() == Instruction::Sub)
602        if (LHS == RHSI->getOperand(1))                   // A + (B - A) --> B
603          return ReplaceInstUsesWith(I, RHSI->getOperand(0));
604    }
605    if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
606      if (LHSI->getOpcode() == Instruction::Sub)
607        if (RHS == LHSI->getOperand(1))                   // (B - A) + A --> B
608          return ReplaceInstUsesWith(I, LHSI->getOperand(0));
609    }
610  }
611
612  // -A + B  -->  B - A
613  // -A + -B  -->  -(A + B)
614  if (Value *LHSV = dyn_castNegVal(LHS)) {
615    if (LHS->getType()->isIntOrIntVector()) {
616      if (Value *RHSV = dyn_castNegVal(RHS)) {
617        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
618        return BinaryOperator::CreateNeg(NewAdd);
619      }
620    }
621
622    return BinaryOperator::CreateSub(RHS, LHSV);
623  }
624
625  // A + -B  -->  A - B
626  if (!isa<Constant>(RHS))
627    if (Value *V = dyn_castNegVal(RHS))
628      return BinaryOperator::CreateSub(LHS, V);
629
630
631  ConstantInt *C2;
632  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
633    if (X == RHS)   // X*C + X --> X * (C+1)
634      return BinaryOperator::CreateMul(RHS, AddOne(C2));
635
636    // X*C1 + X*C2 --> X * (C1+C2)
637    ConstantInt *C1;
638    if (X == dyn_castFoldableMul(RHS, C1))
639      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
640  }
641
642  // X + X*C --> X * (C+1)
643  if (dyn_castFoldableMul(RHS, C2) == LHS)
644    return BinaryOperator::CreateMul(LHS, AddOne(C2));
645
646  // X + ~X --> -1   since   ~X = -X-1
647  if (dyn_castNotVal(LHS) == RHS ||
648      dyn_castNotVal(RHS) == LHS)
649    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
650
651
652  // A+B --> A|B iff A and B have no bits set in common.
653  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
654    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
655    APInt LHSKnownOne(IT->getBitWidth(), 0);
656    APInt LHSKnownZero(IT->getBitWidth(), 0);
657    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
658    if (LHSKnownZero != 0) {
659      APInt RHSKnownOne(IT->getBitWidth(), 0);
660      APInt RHSKnownZero(IT->getBitWidth(), 0);
661      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
662
663      // No bits in common -> bitwise or.
664      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
665        return BinaryOperator::CreateOr(LHS, RHS);
666    }
667  }
668
669  // W*X + Y*Z --> W * (X+Z)  iff W == Y
670  if (I.getType()->isIntOrIntVector()) {
671    Value *W, *X, *Y, *Z;
672    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
673        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
674      if (W != Y) {
675        if (W == Z) {
676          std::swap(Y, Z);
677        } else if (Y == X) {
678          std::swap(W, X);
679        } else if (X == Z) {
680          std::swap(Y, Z);
681          std::swap(W, X);
682        }
683      }
684
685      if (W == Y) {
686        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
687        return BinaryOperator::CreateMul(W, NewAdd);
688      }
689    }
690  }
691
692  if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
693    Value *X = 0;
694    if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
695      return BinaryOperator::CreateSub(SubOne(CRHS), X);
696
697    // (X & FF00) + xx00  -> (X+xx00) & FF00
698    if (LHS->hasOneUse() &&
699        match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
700      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
701      if (Anded == CRHS) {
702        // See if all bits from the first bit set in the Add RHS up are included
703        // in the mask.  First, get the rightmost bit.
704        const APInt &AddRHSV = CRHS->getValue();
705
706        // Form a mask of all bits from the lowest bit added through the top.
707        APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
708
709        // See if the and mask includes all of these bits.
710        APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
711
712        if (AddRHSHighBits == AddRHSHighBitsAnd) {
713          // Okay, the xform is safe.  Insert the new add pronto.
714          Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
715          return BinaryOperator::CreateAnd(NewAdd, C2);
716        }
717      }
718    }
719
720    // Try to fold constant add into select arguments.
721    if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
722      if (Instruction *R = FoldOpIntoSelect(I, SI))
723        return R;
724  }
725
726  // add (select X 0 (sub n A)) A  -->  select X A n
727  {
728    SelectInst *SI = dyn_cast<SelectInst>(LHS);
729    Value *A = RHS;
730    if (!SI) {
731      SI = dyn_cast<SelectInst>(RHS);
732      A = LHS;
733    }
734    if (SI && SI->hasOneUse()) {
735      Value *TV = SI->getTrueValue();
736      Value *FV = SI->getFalseValue();
737      Value *N;
738
739      // Can we fold the add into the argument of the select?
740      // We check both true and false select arguments for a matching subtract.
741      if (match(FV, m_Zero()) &&
742          match(TV, m_Sub(m_Value(N), m_Specific(A))))
743        // Fold the add into the true select value.
744        return SelectInst::Create(SI->getCondition(), N, A);
745      if (match(TV, m_Zero()) &&
746          match(FV, m_Sub(m_Value(N), m_Specific(A))))
747        // Fold the add into the false select value.
748        return SelectInst::Create(SI->getCondition(), A, N);
749    }
750  }
751
752  // Check for (add (sext x), y), see if we can merge this into an
753  // integer add followed by a sext.
754  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
755    // (add (sext x), cst) --> (sext (add x, cst'))
756    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
757      Constant *CI =
758        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
759      if (LHSConv->hasOneUse() &&
760          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
761          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
762        // Insert the new, smaller add.
763        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
764                                              CI, "addconv");
765        return new SExtInst(NewAdd, I.getType());
766      }
767    }
768
769    // (add (sext x), (sext y)) --> (sext (add int x, y))
770    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
771      // Only do this if x/y have the same type, if at last one of them has a
772      // single use (so we don't increase the number of sexts), and if the
773      // integer add will not overflow.
774      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
775          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
776          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
777                                   RHSConv->getOperand(0))) {
778        // Insert the new integer add.
779        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
780                                              RHSConv->getOperand(0), "addconv");
781        return new SExtInst(NewAdd, I.getType());
782      }
783    }
784  }
785
786  return Changed ? &I : 0;
787}
788
789Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
790  bool Changed = SimplifyCommutative(I);
791  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
792
793  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
794    // X + 0 --> X
795    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
796      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
797                              (I.getType())->getValueAPF()))
798        return ReplaceInstUsesWith(I, LHS);
799    }
800
801    if (isa<PHINode>(LHS))
802      if (Instruction *NV = FoldOpIntoPhi(I))
803        return NV;
804  }
805
806  // -A + B  -->  B - A
807  // -A + -B  -->  -(A + B)
808  if (Value *LHSV = dyn_castFNegVal(LHS))
809    return BinaryOperator::CreateFSub(RHS, LHSV);
810
811  // A + -B  -->  A - B
812  if (!isa<Constant>(RHS))
813    if (Value *V = dyn_castFNegVal(RHS))
814      return BinaryOperator::CreateFSub(LHS, V);
815
816  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
817  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
818    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
819      return ReplaceInstUsesWith(I, LHS);
820
821  // Check for (add double (sitofp x), y), see if we can merge this into an
822  // integer add followed by a promotion.
823  if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
824    // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
825    // ... if the constant fits in the integer value.  This is useful for things
826    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
827    // requires a constant pool load, and generally allows the add to be better
828    // instcombined.
829    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
830      Constant *CI =
831      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
832      if (LHSConv->hasOneUse() &&
833          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
834          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
835        // Insert the new integer add.
836        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
837                                              CI, "addconv");
838        return new SIToFPInst(NewAdd, I.getType());
839      }
840    }
841
842    // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
843    if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
844      // Only do this if x/y have the same type, if at last one of them has a
845      // single use (so we don't increase the number of int->fp conversions),
846      // and if the integer add will not overflow.
847      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
848          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
849          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
850                                   RHSConv->getOperand(0))) {
851        // Insert the new integer add.
852        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
853                                              RHSConv->getOperand(0),"addconv");
854        return new SIToFPInst(NewAdd, I.getType());
855      }
856    }
857  }
858
859  return Changed ? &I : 0;
860}
861
862
863/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
864/// code necessary to compute the offset from the base pointer (without adding
865/// in the base pointer).  Return the result as a signed integer of intptr size.
866Value *InstCombiner::EmitGEPOffset(User *GEP) {
867  TargetData &TD = *getTargetData();
868  gep_type_iterator GTI = gep_type_begin(GEP);
869  const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
870  Value *Result = Constant::getNullValue(IntPtrTy);
871
872  // Build a mask for high order bits.
873  unsigned IntPtrWidth = TD.getPointerSizeInBits();
874  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
875
876  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
877       ++i, ++GTI) {
878    Value *Op = *i;
879    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
880    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
881      if (OpC->isZero()) continue;
882
883      // Handle a struct index, which adds its field offset to the pointer.
884      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
885        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
886
887        Result = Builder->CreateAdd(Result,
888                                    ConstantInt::get(IntPtrTy, Size),
889                                    GEP->getName()+".offs");
890        continue;
891      }
892
893      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
894      Constant *OC =
895              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
896      Scale = ConstantExpr::getMul(OC, Scale);
897      // Emit an add instruction.
898      Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
899      continue;
900    }
901    // Convert to correct type.
902    if (Op->getType() != IntPtrTy)
903      Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
904    if (Size != 1) {
905      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
906      // We'll let instcombine(mul) convert this to a shl if possible.
907      Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
908    }
909
910    // Emit an add instruction.
911    Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
912  }
913  return Result;
914}
915
916
917
918
919/// Optimize pointer differences into the same array into a size.  Consider:
920///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
921/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
922///
923Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
924                                               const Type *Ty) {
925  assert(TD && "Must have target data info for this");
926
927  // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
928  // this.
929  bool Swapped = false;
930  GetElementPtrInst *GEP = 0;
931  ConstantExpr *CstGEP = 0;
932
933  // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
934  // For now we require one side to be the base pointer "A" or a constant
935  // expression derived from it.
936  if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
937    // (gep X, ...) - X
938    if (LHSGEP->getOperand(0) == RHS) {
939      GEP = LHSGEP;
940      Swapped = false;
941    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
942      // (gep X, ...) - (ce_gep X, ...)
943      if (CE->getOpcode() == Instruction::GetElementPtr &&
944          LHSGEP->getOperand(0) == CE->getOperand(0)) {
945        CstGEP = CE;
946        GEP = LHSGEP;
947        Swapped = false;
948      }
949    }
950  }
951
952  if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
953    // X - (gep X, ...)
954    if (RHSGEP->getOperand(0) == LHS) {
955      GEP = RHSGEP;
956      Swapped = true;
957    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
958      // (ce_gep X, ...) - (gep X, ...)
959      if (CE->getOpcode() == Instruction::GetElementPtr &&
960          RHSGEP->getOperand(0) == CE->getOperand(0)) {
961        CstGEP = CE;
962        GEP = RHSGEP;
963        Swapped = true;
964      }
965    }
966  }
967
968  if (GEP == 0)
969    return 0;
970
971  // Emit the offset of the GEP and an intptr_t.
972  Value *Result = EmitGEPOffset(GEP);
973
974  // If we had a constant expression GEP on the other side offsetting the
975  // pointer, subtract it from the offset we have.
976  if (CstGEP) {
977    Value *CstOffset = EmitGEPOffset(CstGEP);
978    Result = Builder->CreateSub(Result, CstOffset);
979  }
980
981
982  // If we have p - gep(p, ...)  then we have to negate the result.
983  if (Swapped)
984    Result = Builder->CreateNeg(Result, "diff.neg");
985
986  return Builder->CreateIntCast(Result, Ty, true);
987}
988
989
990Instruction *InstCombiner::visitSub(BinaryOperator &I) {
991  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
992
993  if (Op0 == Op1)                        // sub X, X  -> 0
994    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
995
996  // If this is a 'B = x-(-A)', change to B = x+A.  This preserves NSW/NUW.
997  if (Value *V = dyn_castNegVal(Op1)) {
998    BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
999    Res->setHasNoSignedWrap(I.hasNoSignedWrap());
1000    Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
1001    return Res;
1002  }
1003
1004  if (isa<UndefValue>(Op0))
1005    return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
1006  if (isa<UndefValue>(Op1))
1007    return ReplaceInstUsesWith(I, Op1);    // X - undef -> undef
1008  if (I.getType() == Type::getInt1Ty(I.getContext()))
1009    return BinaryOperator::CreateXor(Op0, Op1);
1010
1011  if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
1012    // Replace (-1 - A) with (~A).
1013    if (C->isAllOnesValue())
1014      return BinaryOperator::CreateNot(Op1);
1015
1016    // C - ~X == X + (1+C)
1017    Value *X = 0;
1018    if (match(Op1, m_Not(m_Value(X))))
1019      return BinaryOperator::CreateAdd(X, AddOne(C));
1020
1021    // -(X >>u 31) -> (X >>s 31)
1022    // -(X >>s 31) -> (X >>u 31)
1023    if (C->isZero()) {
1024      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
1025        if (SI->getOpcode() == Instruction::LShr) {
1026          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1027            // Check to see if we are shifting out everything but the sign bit.
1028            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
1029                SI->getType()->getPrimitiveSizeInBits()-1) {
1030              // Ok, the transformation is safe.  Insert AShr.
1031              return BinaryOperator::Create(Instruction::AShr,
1032                                          SI->getOperand(0), CU, SI->getName());
1033            }
1034          }
1035        } else if (SI->getOpcode() == Instruction::AShr) {
1036          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1037            // Check to see if we are shifting out everything but the sign bit.
1038            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
1039                SI->getType()->getPrimitiveSizeInBits()-1) {
1040              // Ok, the transformation is safe.  Insert LShr.
1041              return BinaryOperator::CreateLShr(
1042                                          SI->getOperand(0), CU, SI->getName());
1043            }
1044          }
1045        }
1046      }
1047    }
1048
1049    // Try to fold constant sub into select arguments.
1050    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
1051      if (Instruction *R = FoldOpIntoSelect(I, SI))
1052        return R;
1053
1054    // C - zext(bool) -> bool ? C - 1 : C
1055    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
1056      if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
1057        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
1058  }
1059
1060  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
1061    if (Op1I->getOpcode() == Instruction::Add) {
1062      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
1063        return BinaryOperator::CreateNeg(Op1I->getOperand(1),
1064                                         I.getName());
1065      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
1066        return BinaryOperator::CreateNeg(Op1I->getOperand(0),
1067                                         I.getName());
1068      else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
1069        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
1070          // C1-(X+C2) --> (C1-C2)-X
1071          return BinaryOperator::CreateSub(
1072            ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
1073      }
1074    }
1075
1076    if (Op1I->hasOneUse()) {
1077      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
1078      // is not used by anyone else...
1079      //
1080      if (Op1I->getOpcode() == Instruction::Sub) {
1081        // Swap the two operands of the subexpr...
1082        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
1083        Op1I->setOperand(0, IIOp1);
1084        Op1I->setOperand(1, IIOp0);
1085
1086        // Create the new top level add instruction...
1087        return BinaryOperator::CreateAdd(Op0, Op1);
1088      }
1089
1090      // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
1091      //
1092      if (Op1I->getOpcode() == Instruction::And &&
1093          (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
1094        Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
1095
1096        Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
1097        return BinaryOperator::CreateAnd(Op0, NewNot);
1098      }
1099
1100      // 0 - (X sdiv C)  -> (X sdiv -C)
1101      if (Op1I->getOpcode() == Instruction::SDiv)
1102        if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
1103          if (CSI->isZero())
1104            if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
1105              return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
1106                                          ConstantExpr::getNeg(DivRHS));
1107
1108      // X - X*C --> X * (1-C)
1109      ConstantInt *C2 = 0;
1110      if (dyn_castFoldableMul(Op1I, C2) == Op0) {
1111        Constant *CP1 =
1112          ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
1113                                             C2);
1114        return BinaryOperator::CreateMul(Op0, CP1);
1115      }
1116    }
1117  }
1118
1119  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
1120    if (Op0I->getOpcode() == Instruction::Add) {
1121      if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
1122        return ReplaceInstUsesWith(I, Op0I->getOperand(1));
1123      else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
1124        return ReplaceInstUsesWith(I, Op0I->getOperand(0));
1125    } else if (Op0I->getOpcode() == Instruction::Sub) {
1126      if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
1127        return BinaryOperator::CreateNeg(Op0I->getOperand(1),
1128                                         I.getName());
1129    }
1130  }
1131
1132  ConstantInt *C1;
1133  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
1134    if (X == Op1)  // X*C - X --> X * (C-1)
1135      return BinaryOperator::CreateMul(Op1, SubOne(C1));
1136
1137    ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
1138    if (X == dyn_castFoldableMul(Op1, C2))
1139      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
1140  }
1141
1142  // Optimize pointer differences into the same array into a size.  Consider:
1143  //  &A[10] - &A[0]: we should compile this to "10".
1144  if (TD) {
1145    Value *LHSOp, *RHSOp;
1146    if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
1147        match(Op1, m_PtrToInt(m_Value(RHSOp))))
1148      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
1149        return ReplaceInstUsesWith(I, Res);
1150
1151    // trunc(p)-trunc(q) -> trunc(p-q)
1152    if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
1153        match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
1154      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
1155        return ReplaceInstUsesWith(I, Res);
1156  }
1157
1158  return 0;
1159}
1160
1161Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
1162  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1163
1164  // If this is a 'B = x-(-A)', change to B = x+A...
1165  if (Value *V = dyn_castFNegVal(Op1))
1166    return BinaryOperator::CreateFAdd(Op0, V);
1167
1168  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
1169    if (Op1I->getOpcode() == Instruction::FAdd) {
1170      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
1171        return BinaryOperator::CreateFNeg(Op1I->getOperand(1),
1172                                          I.getName());
1173      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
1174        return BinaryOperator::CreateFNeg(Op1I->getOperand(0),
1175                                          I.getName());
1176    }
1177  }
1178
1179  return 0;
1180}
1181
1182/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
1183/// are carefully arranged to allow folding of expressions such as:
1184///
1185///      (A < B) | (A > B) --> (A != B)
1186///
1187/// Note that this is only valid if the first and second predicates have the
1188/// same sign. Is illegal to do: (A u< B) | (A s> B)
1189///
1190/// Three bits are used to represent the condition, as follows:
1191///   0  A > B
1192///   1  A == B
1193///   2  A < B
1194///
1195/// <=>  Value  Definition
1196/// 000     0   Always false
1197/// 001     1   A >  B
1198/// 010     2   A == B
1199/// 011     3   A >= B
1200/// 100     4   A <  B
1201/// 101     5   A != B
1202/// 110     6   A <= B
1203/// 111     7   Always true
1204///
1205static unsigned getICmpCode(const ICmpInst *ICI) {
1206  switch (ICI->getPredicate()) {
1207    // False -> 0
1208  case ICmpInst::ICMP_UGT: return 1;  // 001
1209  case ICmpInst::ICMP_SGT: return 1;  // 001
1210  case ICmpInst::ICMP_EQ:  return 2;  // 010
1211  case ICmpInst::ICMP_UGE: return 3;  // 011
1212  case ICmpInst::ICMP_SGE: return 3;  // 011
1213  case ICmpInst::ICMP_ULT: return 4;  // 100
1214  case ICmpInst::ICMP_SLT: return 4;  // 100
1215  case ICmpInst::ICMP_NE:  return 5;  // 101
1216  case ICmpInst::ICMP_ULE: return 6;  // 110
1217  case ICmpInst::ICMP_SLE: return 6;  // 110
1218    // True -> 7
1219  default:
1220    llvm_unreachable("Invalid ICmp predicate!");
1221    return 0;
1222  }
1223}
1224
1225/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
1226/// predicate into a three bit mask. It also returns whether it is an ordered
1227/// predicate by reference.
1228static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
1229  isOrdered = false;
1230  switch (CC) {
1231  case FCmpInst::FCMP_ORD: isOrdered = true; return 0;  // 000
1232  case FCmpInst::FCMP_UNO:                   return 0;  // 000
1233  case FCmpInst::FCMP_OGT: isOrdered = true; return 1;  // 001
1234  case FCmpInst::FCMP_UGT:                   return 1;  // 001
1235  case FCmpInst::FCMP_OEQ: isOrdered = true; return 2;  // 010
1236  case FCmpInst::FCMP_UEQ:                   return 2;  // 010
1237  case FCmpInst::FCMP_OGE: isOrdered = true; return 3;  // 011
1238  case FCmpInst::FCMP_UGE:                   return 3;  // 011
1239  case FCmpInst::FCMP_OLT: isOrdered = true; return 4;  // 100
1240  case FCmpInst::FCMP_ULT:                   return 4;  // 100
1241  case FCmpInst::FCMP_ONE: isOrdered = true; return 5;  // 101
1242  case FCmpInst::FCMP_UNE:                   return 5;  // 101
1243  case FCmpInst::FCMP_OLE: isOrdered = true; return 6;  // 110
1244  case FCmpInst::FCMP_ULE:                   return 6;  // 110
1245    // True -> 7
1246  default:
1247    // Not expecting FCMP_FALSE and FCMP_TRUE;
1248    llvm_unreachable("Unexpected FCmp predicate!");
1249    return 0;
1250  }
1251}
1252
1253/// getICmpValue - This is the complement of getICmpCode, which turns an
1254/// opcode and two operands into either a constant true or false, or a brand
1255/// new ICmp instruction. The sign is passed in to determine which kind
1256/// of predicate to use in the new icmp instruction.
1257static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) {
1258  switch (code) {
1259  default: llvm_unreachable("Illegal ICmp code!");
1260  case  0: return ConstantInt::getFalse(LHS->getContext());
1261  case  1:
1262    if (sign)
1263      return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
1264    else
1265      return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
1266  case  2: return new ICmpInst(ICmpInst::ICMP_EQ,  LHS, RHS);
1267  case  3:
1268    if (sign)
1269      return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
1270    else
1271      return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
1272  case  4:
1273    if (sign)
1274      return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
1275    else
1276      return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
1277  case  5: return new ICmpInst(ICmpInst::ICMP_NE,  LHS, RHS);
1278  case  6:
1279    if (sign)
1280      return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
1281    else
1282      return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
1283  case  7: return ConstantInt::getTrue(LHS->getContext());
1284  }
1285}
1286
1287/// getFCmpValue - This is the complement of getFCmpCode, which turns an
1288/// opcode and two operands into either a FCmp instruction. isordered is passed
1289/// in to determine which kind of predicate to use in the new fcmp instruction.
1290static Value *getFCmpValue(bool isordered, unsigned code,
1291                           Value *LHS, Value *RHS) {
1292  switch (code) {
1293  default: llvm_unreachable("Illegal FCmp code!");
1294  case  0:
1295    if (isordered)
1296      return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
1297    else
1298      return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
1299  case  1:
1300    if (isordered)
1301      return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
1302    else
1303      return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
1304  case  2:
1305    if (isordered)
1306      return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
1307    else
1308      return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
1309  case  3:
1310    if (isordered)
1311      return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
1312    else
1313      return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
1314  case  4:
1315    if (isordered)
1316      return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
1317    else
1318      return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
1319  case  5:
1320    if (isordered)
1321      return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
1322    else
1323      return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
1324  case  6:
1325    if (isordered)
1326      return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
1327    else
1328      return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
1329  case  7: return ConstantInt::getTrue(LHS->getContext());
1330  }
1331}
1332
1333/// PredicatesFoldable - Return true if both predicates match sign or if at
1334/// least one of them is an equality comparison (which is signless).
1335static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
1336  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
1337         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
1338         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
1339}
1340
1341namespace {
1342// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
1343struct FoldICmpLogical {
1344  InstCombiner &IC;
1345  Value *LHS, *RHS;
1346  ICmpInst::Predicate pred;
1347  FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI)
1348    : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)),
1349      pred(ICI->getPredicate()) {}
1350  bool shouldApply(Value *V) const {
1351    if (ICmpInst *ICI = dyn_cast<ICmpInst>(V))
1352      if (PredicatesFoldable(pred, ICI->getPredicate()))
1353        return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) ||
1354                (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS));
1355    return false;
1356  }
1357  Instruction *apply(Instruction &Log) const {
1358    ICmpInst *ICI = cast<ICmpInst>(Log.getOperand(0));
1359    if (ICI->getOperand(0) != LHS) {
1360      assert(ICI->getOperand(1) == LHS);
1361      ICI->swapOperands();  // Swap the LHS and RHS of the ICmp
1362    }
1363
1364    ICmpInst *RHSICI = cast<ICmpInst>(Log.getOperand(1));
1365    unsigned LHSCode = getICmpCode(ICI);
1366    unsigned RHSCode = getICmpCode(RHSICI);
1367    unsigned Code;
1368    switch (Log.getOpcode()) {
1369    case Instruction::And: Code = LHSCode & RHSCode; break;
1370    case Instruction::Or:  Code = LHSCode | RHSCode; break;
1371    case Instruction::Xor: Code = LHSCode ^ RHSCode; break;
1372    default: llvm_unreachable("Illegal logical opcode!"); return 0;
1373    }
1374
1375    bool isSigned = RHSICI->isSigned() || ICI->isSigned();
1376    Value *RV = getICmpValue(isSigned, Code, LHS, RHS);
1377    if (Instruction *I = dyn_cast<Instruction>(RV))
1378      return I;
1379    // Otherwise, it's a constant boolean value...
1380    return IC.ReplaceInstUsesWith(Log, RV);
1381  }
1382};
1383} // end anonymous namespace
1384
1385// OptAndOp - This handles expressions of the form ((val OP C1) & C2).  Where
1386// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
1387// guaranteed to be a binary operator.
1388Instruction *InstCombiner::OptAndOp(Instruction *Op,
1389                                    ConstantInt *OpRHS,
1390                                    ConstantInt *AndRHS,
1391                                    BinaryOperator &TheAnd) {
1392  Value *X = Op->getOperand(0);
1393  Constant *Together = 0;
1394  if (!Op->isShift())
1395    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
1396
1397  switch (Op->getOpcode()) {
1398  case Instruction::Xor:
1399    if (Op->hasOneUse()) {
1400      // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
1401      Value *And = Builder->CreateAnd(X, AndRHS);
1402      And->takeName(Op);
1403      return BinaryOperator::CreateXor(And, Together);
1404    }
1405    break;
1406  case Instruction::Or:
1407    if (Together == AndRHS) // (X | C) & C --> C
1408      return ReplaceInstUsesWith(TheAnd, AndRHS);
1409
1410    if (Op->hasOneUse() && Together != OpRHS) {
1411      // (X | C1) & C2 --> (X | (C1&C2)) & C2
1412      Value *Or = Builder->CreateOr(X, Together);
1413      Or->takeName(Op);
1414      return BinaryOperator::CreateAnd(Or, AndRHS);
1415    }
1416    break;
1417  case Instruction::Add:
1418    if (Op->hasOneUse()) {
1419      // Adding a one to a single bit bit-field should be turned into an XOR
1420      // of the bit.  First thing to check is to see if this AND is with a
1421      // single bit constant.
1422      const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
1423
1424      // If there is only one bit set.
1425      if (AndRHSV.isPowerOf2()) {
1426        // Ok, at this point, we know that we are masking the result of the
1427        // ADD down to exactly one bit.  If the constant we are adding has
1428        // no bits set below this bit, then we can eliminate the ADD.
1429        const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
1430
1431        // Check to see if any bits below the one bit set in AndRHSV are set.
1432        if ((AddRHS & (AndRHSV-1)) == 0) {
1433          // If not, the only thing that can effect the output of the AND is
1434          // the bit specified by AndRHSV.  If that bit is set, the effect of
1435          // the XOR is to toggle the bit.  If it is clear, then the ADD has
1436          // no effect.
1437          if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
1438            TheAnd.setOperand(0, X);
1439            return &TheAnd;
1440          } else {
1441            // Pull the XOR out of the AND.
1442            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
1443            NewAnd->takeName(Op);
1444            return BinaryOperator::CreateXor(NewAnd, AndRHS);
1445          }
1446        }
1447      }
1448    }
1449    break;
1450
1451  case Instruction::Shl: {
1452    // We know that the AND will not produce any of the bits shifted in, so if
1453    // the anded constant includes them, clear them now!
1454    //
1455    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
1456    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
1457    APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
1458    ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
1459                                       AndRHS->getValue() & ShlMask);
1460
1461    if (CI->getValue() == ShlMask) {
1462    // Masking out bits that the shift already masks
1463      return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
1464    } else if (CI != AndRHS) {                  // Reducing bits set in and.
1465      TheAnd.setOperand(1, CI);
1466      return &TheAnd;
1467    }
1468    break;
1469  }
1470  case Instruction::LShr: {
1471    // We know that the AND will not produce any of the bits shifted in, so if
1472    // the anded constant includes them, clear them now!  This only applies to
1473    // unsigned shifts, because a signed shr may bring in set bits!
1474    //
1475    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
1476    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
1477    APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
1478    ConstantInt *CI = ConstantInt::get(Op->getContext(),
1479                                       AndRHS->getValue() & ShrMask);
1480
1481    if (CI->getValue() == ShrMask) {
1482    // Masking out bits that the shift already masks.
1483      return ReplaceInstUsesWith(TheAnd, Op);
1484    } else if (CI != AndRHS) {
1485      TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
1486      return &TheAnd;
1487    }
1488    break;
1489  }
1490  case Instruction::AShr:
1491    // Signed shr.
1492    // See if this is shifting in some sign extension, then masking it out
1493    // with an and.
1494    if (Op->hasOneUse()) {
1495      uint32_t BitWidth = AndRHS->getType()->getBitWidth();
1496      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
1497      APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
1498      Constant *C = ConstantInt::get(Op->getContext(),
1499                                     AndRHS->getValue() & ShrMask);
1500      if (C == AndRHS) {          // Masking out bits shifted in.
1501        // (Val ashr C1) & C2 -> (Val lshr C1) & C2
1502        // Make the argument unsigned.
1503        Value *ShVal = Op->getOperand(0);
1504        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
1505        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
1506      }
1507    }
1508    break;
1509  }
1510  return 0;
1511}
1512
1513
1514/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
1515/// true, otherwise (V < Lo || V >= Hi).  In pratice, we emit the more efficient
1516/// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
1517/// whether to treat the V, Lo and HI as signed or not. IB is the location to
1518/// insert new instructions.
1519Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
1520                                           bool isSigned, bool Inside,
1521                                           Instruction &IB) {
1522  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
1523            ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
1524         "Lo is not <= Hi in range emission code!");
1525
1526  if (Inside) {
1527    if (Lo == Hi)  // Trivially false.
1528      return new ICmpInst(ICmpInst::ICMP_NE, V, V);
1529
1530    // V >= Min && V < Hi --> V < Hi
1531    if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
1532      ICmpInst::Predicate pred = (isSigned ?
1533        ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
1534      return new ICmpInst(pred, V, Hi);
1535    }
1536
1537    // Emit V-Lo <u Hi-Lo
1538    Constant *NegLo = ConstantExpr::getNeg(Lo);
1539    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
1540    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
1541    return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
1542  }
1543
1544  if (Lo == Hi)  // Trivially true.
1545    return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
1546
1547  // V < Min || V >= Hi -> V > Hi-1
1548  Hi = SubOne(cast<ConstantInt>(Hi));
1549  if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
1550    ICmpInst::Predicate pred = (isSigned ?
1551        ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
1552    return new ICmpInst(pred, V, Hi);
1553  }
1554
1555  // Emit V-Lo >u Hi-1-Lo
1556  // Note that Hi has already had one subtracted from it, above.
1557  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
1558  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
1559  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
1560  return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
1561}
1562
1563// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
1564// any number of 0s on either side.  The 1s are allowed to wrap from LSB to
1565// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
1566// not, since all 1s are not contiguous.
1567static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
1568  const APInt& V = Val->getValue();
1569  uint32_t BitWidth = Val->getType()->getBitWidth();
1570  if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
1571
1572  // look for the first zero bit after the run of ones
1573  MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
1574  // look for the first non-zero bit
1575  ME = V.getActiveBits();
1576  return true;
1577}
1578
1579/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
1580/// where isSub determines whether the operator is a sub.  If we can fold one of
1581/// the following xforms:
1582///
1583/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
1584/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
1585/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
1586///
1587/// return (A +/- B).
1588///
1589Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
1590                                        ConstantInt *Mask, bool isSub,
1591                                        Instruction &I) {
1592  Instruction *LHSI = dyn_cast<Instruction>(LHS);
1593  if (!LHSI || LHSI->getNumOperands() != 2 ||
1594      !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
1595
1596  ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
1597
1598  switch (LHSI->getOpcode()) {
1599  default: return 0;
1600  case Instruction::And:
1601    if (ConstantExpr::getAnd(N, Mask) == Mask) {
1602      // If the AndRHS is a power of two minus one (0+1+), this is simple.
1603      if ((Mask->getValue().countLeadingZeros() +
1604           Mask->getValue().countPopulation()) ==
1605          Mask->getValue().getBitWidth())
1606        break;
1607
1608      // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
1609      // part, we don't need any explicit masks to take them out of A.  If that
1610      // is all N is, ignore it.
1611      uint32_t MB = 0, ME = 0;
1612      if (isRunOfOnes(Mask, MB, ME)) {  // begin/end bit of run, inclusive
1613        uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
1614        APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
1615        if (MaskedValueIsZero(RHS, Mask))
1616          break;
1617      }
1618    }
1619    return 0;
1620  case Instruction::Or:
1621  case Instruction::Xor:
1622    // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
1623    if ((Mask->getValue().countLeadingZeros() +
1624         Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
1625        && ConstantExpr::getAnd(N, Mask)->isNullValue())
1626      break;
1627    return 0;
1628  }
1629
1630  if (isSub)
1631    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
1632  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
1633}
1634
1635/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
1636Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
1637                                          ICmpInst *LHS, ICmpInst *RHS) {
1638  Value *Val, *Val2;
1639  ConstantInt *LHSCst, *RHSCst;
1640  ICmpInst::Predicate LHSCC, RHSCC;
1641
1642  // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
1643  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val),
1644                         m_ConstantInt(LHSCst))) ||
1645      !match(RHS, m_ICmp(RHSCC, m_Value(Val2),
1646                         m_ConstantInt(RHSCst))))
1647    return 0;
1648
1649  if (LHSCst == RHSCst && LHSCC == RHSCC) {
1650    // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
1651    // where C is a power of 2
1652    if (LHSCC == ICmpInst::ICMP_ULT &&
1653        LHSCst->getValue().isPowerOf2()) {
1654      Value *NewOr = Builder->CreateOr(Val, Val2);
1655      return new ICmpInst(LHSCC, NewOr, LHSCst);
1656    }
1657
1658    // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
1659    if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
1660      Value *NewOr = Builder->CreateOr(Val, Val2);
1661      return new ICmpInst(LHSCC, NewOr, LHSCst);
1662    }
1663  }
1664
1665  // From here on, we only handle:
1666  //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
1667  if (Val != Val2) return 0;
1668
1669  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
1670  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
1671      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
1672      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
1673      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
1674    return 0;
1675
1676  // We can't fold (ugt x, C) & (sgt x, C2).
1677  if (!PredicatesFoldable(LHSCC, RHSCC))
1678    return 0;
1679
1680  // Ensure that the larger constant is on the RHS.
1681  bool ShouldSwap;
1682  if (CmpInst::isSigned(LHSCC) ||
1683      (ICmpInst::isEquality(LHSCC) &&
1684       CmpInst::isSigned(RHSCC)))
1685    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
1686  else
1687    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
1688
1689  if (ShouldSwap) {
1690    std::swap(LHS, RHS);
1691    std::swap(LHSCst, RHSCst);
1692    std::swap(LHSCC, RHSCC);
1693  }
1694
1695  // At this point, we know we have have two icmp instructions
1696  // comparing a value against two constants and and'ing the result
1697  // together.  Because of the above check, we know that we only have
1698  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
1699  // (from the FoldICmpLogical check above), that the two constants
1700  // are not equal and that the larger constant is on the RHS
1701  assert(LHSCst != RHSCst && "Compares not folded above?");
1702
1703  switch (LHSCC) {
1704  default: llvm_unreachable("Unknown integer condition code!");
1705  case ICmpInst::ICMP_EQ:
1706    switch (RHSCC) {
1707    default: llvm_unreachable("Unknown integer condition code!");
1708    case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
1709    case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
1710    case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
1711      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1712    case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
1713    case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
1714    case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
1715      return ReplaceInstUsesWith(I, LHS);
1716    }
1717  case ICmpInst::ICMP_NE:
1718    switch (RHSCC) {
1719    default: llvm_unreachable("Unknown integer condition code!");
1720    case ICmpInst::ICMP_ULT:
1721      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
1722        return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
1723      break;                        // (X != 13 & X u< 15) -> no change
1724    case ICmpInst::ICMP_SLT:
1725      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
1726        return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
1727      break;                        // (X != 13 & X s< 15) -> no change
1728    case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
1729    case ICmpInst::ICMP_UGT:        // (X != 13 & X u> 15) -> X u> 15
1730    case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
1731      return ReplaceInstUsesWith(I, RHS);
1732    case ICmpInst::ICMP_NE:
1733      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
1734        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
1735        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
1736        return new ICmpInst(ICmpInst::ICMP_UGT, Add,
1737                            ConstantInt::get(Add->getType(), 1));
1738      }
1739      break;                        // (X != 13 & X != 15) -> no change
1740    }
1741    break;
1742  case ICmpInst::ICMP_ULT:
1743    switch (RHSCC) {
1744    default: llvm_unreachable("Unknown integer condition code!");
1745    case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
1746    case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
1747      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1748    case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
1749      break;
1750    case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
1751    case ICmpInst::ICMP_ULT:        // (X u< 13 & X u< 15) -> X u< 13
1752      return ReplaceInstUsesWith(I, LHS);
1753    case ICmpInst::ICMP_SLT:        // (X u< 13 & X s< 15) -> no change
1754      break;
1755    }
1756    break;
1757  case ICmpInst::ICMP_SLT:
1758    switch (RHSCC) {
1759    default: llvm_unreachable("Unknown integer condition code!");
1760    case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
1761    case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
1762      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1763    case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
1764      break;
1765    case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
1766    case ICmpInst::ICMP_SLT:        // (X s< 13 & X s< 15) -> X < 13
1767      return ReplaceInstUsesWith(I, LHS);
1768    case ICmpInst::ICMP_ULT:        // (X s< 13 & X u< 15) -> no change
1769      break;
1770    }
1771    break;
1772  case ICmpInst::ICMP_UGT:
1773    switch (RHSCC) {
1774    default: llvm_unreachable("Unknown integer condition code!");
1775    case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
1776    case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
1777      return ReplaceInstUsesWith(I, RHS);
1778    case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
1779      break;
1780    case ICmpInst::ICMP_NE:
1781      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
1782        return new ICmpInst(LHSCC, Val, RHSCst);
1783      break;                        // (X u> 13 & X != 15) -> no change
1784    case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
1785      return InsertRangeTest(Val, AddOne(LHSCst),
1786                             RHSCst, false, true, I);
1787    case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
1788      break;
1789    }
1790    break;
1791  case ICmpInst::ICMP_SGT:
1792    switch (RHSCC) {
1793    default: llvm_unreachable("Unknown integer condition code!");
1794    case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
1795    case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
1796      return ReplaceInstUsesWith(I, RHS);
1797    case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
1798      break;
1799    case ICmpInst::ICMP_NE:
1800      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
1801        return new ICmpInst(LHSCC, Val, RHSCst);
1802      break;                        // (X s> 13 & X != 15) -> no change
1803    case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
1804      return InsertRangeTest(Val, AddOne(LHSCst),
1805                             RHSCst, true, true, I);
1806    case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
1807      break;
1808    }
1809    break;
1810  }
1811
1812  return 0;
1813}
1814
1815Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
1816                                          FCmpInst *RHS) {
1817
1818  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
1819      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
1820    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
1821    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
1822      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
1823        // If either of the constants are nans, then the whole thing returns
1824        // false.
1825        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
1826          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1827        return new FCmpInst(FCmpInst::FCMP_ORD,
1828                            LHS->getOperand(0), RHS->getOperand(0));
1829      }
1830
1831    // Handle vector zeros.  This occurs because the canonical form of
1832    // "fcmp ord x,x" is "fcmp ord x, 0".
1833    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
1834        isa<ConstantAggregateZero>(RHS->getOperand(1)))
1835      return new FCmpInst(FCmpInst::FCMP_ORD,
1836                          LHS->getOperand(0), RHS->getOperand(0));
1837    return 0;
1838  }
1839
1840  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
1841  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
1842  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
1843
1844
1845  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
1846    // Swap RHS operands to match LHS.
1847    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
1848    std::swap(Op1LHS, Op1RHS);
1849  }
1850
1851  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
1852    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
1853    if (Op0CC == Op1CC)
1854      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
1855
1856    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
1857      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1858    if (Op0CC == FCmpInst::FCMP_TRUE)
1859      return ReplaceInstUsesWith(I, RHS);
1860    if (Op1CC == FCmpInst::FCMP_TRUE)
1861      return ReplaceInstUsesWith(I, LHS);
1862
1863    bool Op0Ordered;
1864    bool Op1Ordered;
1865    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
1866    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
1867    if (Op1Pred == 0) {
1868      std::swap(LHS, RHS);
1869      std::swap(Op0Pred, Op1Pred);
1870      std::swap(Op0Ordered, Op1Ordered);
1871    }
1872    if (Op0Pred == 0) {
1873      // uno && ueq -> uno && (uno || eq) -> ueq
1874      // ord && olt -> ord && (ord && lt) -> olt
1875      if (Op0Ordered == Op1Ordered)
1876        return ReplaceInstUsesWith(I, RHS);
1877
1878      // uno && oeq -> uno && (ord && eq) -> false
1879      // uno && ord -> false
1880      if (!Op0Ordered)
1881        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1882      // ord && ueq -> ord && (uno || eq) -> oeq
1883      return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS));
1884    }
1885  }
1886
1887  return 0;
1888}
1889
1890
1891Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
1892  bool Changed = SimplifyCommutative(I);
1893  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1894
1895  if (Value *V = SimplifyAndInst(Op0, Op1, TD))
1896    return ReplaceInstUsesWith(I, V);
1897
1898  // See if we can simplify any instructions used by the instruction whose sole
1899  // purpose is to compute bits we don't care about.
1900  if (SimplifyDemandedInstructionBits(I))
1901    return &I;
1902
1903  if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
1904    const APInt &AndRHSMask = AndRHS->getValue();
1905    APInt NotAndRHS(~AndRHSMask);
1906
1907    // Optimize a variety of ((val OP C1) & C2) combinations...
1908    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
1909      Value *Op0LHS = Op0I->getOperand(0);
1910      Value *Op0RHS = Op0I->getOperand(1);
1911      switch (Op0I->getOpcode()) {
1912      default: break;
1913      case Instruction::Xor:
1914      case Instruction::Or:
1915        // If the mask is only needed on one incoming arm, push it up.
1916        if (!Op0I->hasOneUse()) break;
1917
1918        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
1919          // Not masking anything out for the LHS, move to RHS.
1920          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
1921                                             Op0RHS->getName()+".masked");
1922          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
1923        }
1924        if (!isa<Constant>(Op0RHS) &&
1925            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
1926          // Not masking anything out for the RHS, move to LHS.
1927          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
1928                                             Op0LHS->getName()+".masked");
1929          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
1930        }
1931
1932        break;
1933      case Instruction::Add:
1934        // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
1935        // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
1936        // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
1937        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
1938          return BinaryOperator::CreateAnd(V, AndRHS);
1939        if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
1940          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
1941        break;
1942
1943      case Instruction::Sub:
1944        // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
1945        // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
1946        // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
1947        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
1948          return BinaryOperator::CreateAnd(V, AndRHS);
1949
1950        // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
1951        // has 1's for all bits that the subtraction with A might affect.
1952        if (Op0I->hasOneUse()) {
1953          uint32_t BitWidth = AndRHSMask.getBitWidth();
1954          uint32_t Zeros = AndRHSMask.countLeadingZeros();
1955          APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
1956
1957          ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
1958          if (!(A && A->isZero()) &&               // avoid infinite recursion.
1959              MaskedValueIsZero(Op0LHS, Mask)) {
1960            Value *NewNeg = Builder->CreateNeg(Op0RHS);
1961            return BinaryOperator::CreateAnd(NewNeg, AndRHS);
1962          }
1963        }
1964        break;
1965
1966      case Instruction::Shl:
1967      case Instruction::LShr:
1968        // (1 << x) & 1 --> zext(x == 0)
1969        // (1 >> x) & 1 --> zext(x == 0)
1970        if (AndRHSMask == 1 && Op0LHS == AndRHS) {
1971          Value *NewICmp =
1972            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
1973          return new ZExtInst(NewICmp, I.getType());
1974        }
1975        break;
1976      }
1977
1978      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
1979        if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
1980          return Res;
1981    } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
1982      // If this is an integer truncation or change from signed-to-unsigned, and
1983      // if the source is an and/or with immediate, transform it.  This
1984      // frequently occurs for bitfield accesses.
1985      if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
1986        if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
1987            CastOp->getNumOperands() == 2)
1988          if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
1989            if (CastOp->getOpcode() == Instruction::And) {
1990              // Change: and (cast (and X, C1) to T), C2
1991              // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
1992              // This will fold the two constants together, which may allow
1993              // other simplifications.
1994              Value *NewCast = Builder->CreateTruncOrBitCast(
1995                CastOp->getOperand(0), I.getType(),
1996                CastOp->getName()+".shrunk");
1997              // trunc_or_bitcast(C1)&C2
1998              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
1999              C3 = ConstantExpr::getAnd(C3, AndRHS);
2000              return BinaryOperator::CreateAnd(NewCast, C3);
2001            } else if (CastOp->getOpcode() == Instruction::Or) {
2002              // Change: and (cast (or X, C1) to T), C2
2003              // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
2004              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
2005              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
2006                // trunc(C1)&C2
2007                return ReplaceInstUsesWith(I, AndRHS);
2008            }
2009          }
2010      }
2011    }
2012
2013    // Try to fold constant and into select arguments.
2014    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
2015      if (Instruction *R = FoldOpIntoSelect(I, SI))
2016        return R;
2017    if (isa<PHINode>(Op0))
2018      if (Instruction *NV = FoldOpIntoPhi(I))
2019        return NV;
2020  }
2021
2022
2023  // (~A & ~B) == (~(A | B)) - De Morgan's Law
2024  if (Value *Op0NotVal = dyn_castNotVal(Op0))
2025    if (Value *Op1NotVal = dyn_castNotVal(Op1))
2026      if (Op0->hasOneUse() && Op1->hasOneUse()) {
2027        Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
2028                                      I.getName()+".demorgan");
2029        return BinaryOperator::CreateNot(Or);
2030      }
2031
2032  {
2033    Value *A = 0, *B = 0, *C = 0, *D = 0;
2034    // (A|B) & ~(A&B) -> A^B
2035    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
2036        match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
2037        ((A == C && B == D) || (A == D && B == C)))
2038      return BinaryOperator::CreateXor(A, B);
2039
2040    // ~(A&B) & (A|B) -> A^B
2041    if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
2042        match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
2043        ((A == C && B == D) || (A == D && B == C)))
2044      return BinaryOperator::CreateXor(A, B);
2045
2046    if (Op0->hasOneUse() &&
2047        match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
2048      if (A == Op1) {                                // (A^B)&A -> A&(A^B)
2049        I.swapOperands();     // Simplify below
2050        std::swap(Op0, Op1);
2051      } else if (B == Op1) {                         // (A^B)&B -> B&(B^A)
2052        cast<BinaryOperator>(Op0)->swapOperands();
2053        I.swapOperands();     // Simplify below
2054        std::swap(Op0, Op1);
2055      }
2056    }
2057
2058    if (Op1->hasOneUse() &&
2059        match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
2060      if (B == Op0) {                                // B&(A^B) -> B&(B^A)
2061        cast<BinaryOperator>(Op1)->swapOperands();
2062        std::swap(A, B);
2063      }
2064      if (A == Op0)                                // A&(A^B) -> A & ~B
2065        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
2066    }
2067
2068    // (A&((~A)|B)) -> A&B
2069    if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
2070        match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
2071      return BinaryOperator::CreateAnd(A, Op1);
2072    if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
2073        match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
2074      return BinaryOperator::CreateAnd(A, Op0);
2075  }
2076
2077  if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
2078    // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
2079    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
2080      return R;
2081
2082    if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
2083      if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
2084        return Res;
2085  }
2086
2087  // fold (and (cast A), (cast B)) -> (cast (and A, B))
2088  if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
2089    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
2090      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
2091        const Type *SrcTy = Op0C->getOperand(0)->getType();
2092        if (SrcTy == Op1C->getOperand(0)->getType() &&
2093            SrcTy->isIntOrIntVector() &&
2094            // Only do this if the casts both really cause code to be generated.
2095            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
2096                              I.getType()) &&
2097            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
2098                              I.getType())) {
2099          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
2100                                            Op1C->getOperand(0), I.getName());
2101          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
2102        }
2103      }
2104
2105  // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
2106  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
2107    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
2108      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
2109          SI0->getOperand(1) == SI1->getOperand(1) &&
2110          (SI0->hasOneUse() || SI1->hasOneUse())) {
2111        Value *NewOp =
2112          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
2113                             SI0->getName());
2114        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
2115                                      SI1->getOperand(1));
2116      }
2117  }
2118
2119  // If and'ing two fcmp, try combine them into one.
2120  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
2121    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
2122      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
2123        return Res;
2124  }
2125
2126  return Changed ? &I : 0;
2127}
2128
2129/// CollectBSwapParts - Analyze the specified subexpression and see if it is
2130/// capable of providing pieces of a bswap.  The subexpression provides pieces
2131/// of a bswap if it is proven that each of the non-zero bytes in the output of
2132/// the expression came from the corresponding "byte swapped" byte in some other
2133/// value.  For example, if the current subexpression is "(shl i32 %X, 24)" then
2134/// we know that the expression deposits the low byte of %X into the high byte
2135/// of the bswap result and that all other bytes are zero.  This expression is
2136/// accepted, the high byte of ByteValues is set to X to indicate a correct
2137/// match.
2138///
2139/// This function returns true if the match was unsuccessful and false if so.
2140/// On entry to the function the "OverallLeftShift" is a signed integer value
2141/// indicating the number of bytes that the subexpression is later shifted.  For
2142/// example, if the expression is later right shifted by 16 bits, the
2143/// OverallLeftShift value would be -2 on entry.  This is used to specify which
2144/// byte of ByteValues is actually being set.
2145///
2146/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
2147/// byte is masked to zero by a user.  For example, in (X & 255), X will be
2148/// processed with a bytemask of 1.  Because bytemask is 32-bits, this limits
2149/// this function to working on up to 32-byte (256 bit) values.  ByteMask is
2150/// always in the local (OverallLeftShift) coordinate space.
2151///
2152static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
2153                              SmallVector<Value*, 8> &ByteValues) {
2154  if (Instruction *I = dyn_cast<Instruction>(V)) {
2155    // If this is an or instruction, it may be an inner node of the bswap.
2156    if (I->getOpcode() == Instruction::Or) {
2157      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
2158                               ByteValues) ||
2159             CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
2160                               ByteValues);
2161    }
2162
2163    // If this is a logical shift by a constant multiple of 8, recurse with
2164    // OverallLeftShift and ByteMask adjusted.
2165    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
2166      unsigned ShAmt =
2167        cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
2168      // Ensure the shift amount is defined and of a byte value.
2169      if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
2170        return true;
2171
2172      unsigned ByteShift = ShAmt >> 3;
2173      if (I->getOpcode() == Instruction::Shl) {
2174        // X << 2 -> collect(X, +2)
2175        OverallLeftShift += ByteShift;
2176        ByteMask >>= ByteShift;
2177      } else {
2178        // X >>u 2 -> collect(X, -2)
2179        OverallLeftShift -= ByteShift;
2180        ByteMask <<= ByteShift;
2181        ByteMask &= (~0U >> (32-ByteValues.size()));
2182      }
2183
2184      if (OverallLeftShift >= (int)ByteValues.size()) return true;
2185      if (OverallLeftShift <= -(int)ByteValues.size()) return true;
2186
2187      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
2188                               ByteValues);
2189    }
2190
2191    // If this is a logical 'and' with a mask that clears bytes, clear the
2192    // corresponding bytes in ByteMask.
2193    if (I->getOpcode() == Instruction::And &&
2194        isa<ConstantInt>(I->getOperand(1))) {
2195      // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
2196      unsigned NumBytes = ByteValues.size();
2197      APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
2198      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
2199
2200      for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
2201        // If this byte is masked out by a later operation, we don't care what
2202        // the and mask is.
2203        if ((ByteMask & (1 << i)) == 0)
2204          continue;
2205
2206        // If the AndMask is all zeros for this byte, clear the bit.
2207        APInt MaskB = AndMask & Byte;
2208        if (MaskB == 0) {
2209          ByteMask &= ~(1U << i);
2210          continue;
2211        }
2212
2213        // If the AndMask is not all ones for this byte, it's not a bytezap.
2214        if (MaskB != Byte)
2215          return true;
2216
2217        // Otherwise, this byte is kept.
2218      }
2219
2220      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
2221                               ByteValues);
2222    }
2223  }
2224
2225  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
2226  // the input value to the bswap.  Some observations: 1) if more than one byte
2227  // is demanded from this input, then it could not be successfully assembled
2228  // into a byteswap.  At least one of the two bytes would not be aligned with
2229  // their ultimate destination.
2230  if (!isPowerOf2_32(ByteMask)) return true;
2231  unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
2232
2233  // 2) The input and ultimate destinations must line up: if byte 3 of an i32
2234  // is demanded, it needs to go into byte 0 of the result.  This means that the
2235  // byte needs to be shifted until it lands in the right byte bucket.  The
2236  // shift amount depends on the position: if the byte is coming from the high
2237  // part of the value (e.g. byte 3) then it must be shifted right.  If from the
2238  // low part, it must be shifted left.
2239  unsigned DestByteNo = InputByteNo + OverallLeftShift;
2240  if (InputByteNo < ByteValues.size()/2) {
2241    if (ByteValues.size()-1-DestByteNo != InputByteNo)
2242      return true;
2243  } else {
2244    if (ByteValues.size()-1-DestByteNo != InputByteNo)
2245      return true;
2246  }
2247
2248  // If the destination byte value is already defined, the values are or'd
2249  // together, which isn't a bswap (unless it's an or of the same bits).
2250  if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
2251    return true;
2252  ByteValues[DestByteNo] = V;
2253  return false;
2254}
2255
2256/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
2257/// If so, insert the new bswap intrinsic and return it.
2258Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
2259  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
2260  if (!ITy || ITy->getBitWidth() % 16 ||
2261      // ByteMask only allows up to 32-byte values.
2262      ITy->getBitWidth() > 32*8)
2263    return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
2264
2265  /// ByteValues - For each byte of the result, we keep track of which value
2266  /// defines each byte.
2267  SmallVector<Value*, 8> ByteValues;
2268  ByteValues.resize(ITy->getBitWidth()/8);
2269
2270  // Try to find all the pieces corresponding to the bswap.
2271  uint32_t ByteMask = ~0U >> (32-ByteValues.size());
2272  if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
2273    return 0;
2274
2275  // Check to see if all of the bytes come from the same value.
2276  Value *V = ByteValues[0];
2277  if (V == 0) return 0;  // Didn't find a byte?  Must be zero.
2278
2279  // Check to make sure that all of the bytes come from the same value.
2280  for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
2281    if (ByteValues[i] != V)
2282      return 0;
2283  const Type *Tys[] = { ITy };
2284  Module *M = I.getParent()->getParent()->getParent();
2285  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
2286  return CallInst::Create(F, V);
2287}
2288
2289/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D).  Check
2290/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
2291/// we can simplify this expression to "cond ? C : D or B".
2292static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
2293                                         Value *C, Value *D) {
2294  // If A is not a select of -1/0, this cannot match.
2295  Value *Cond = 0;
2296  if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
2297    return 0;
2298
2299  // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
2300  if (match(D, m_SelectCst<0, -1>(m_Specific(Cond))))
2301    return SelectInst::Create(Cond, C, B);
2302  if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
2303    return SelectInst::Create(Cond, C, B);
2304  // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
2305  if (match(B, m_SelectCst<0, -1>(m_Specific(Cond))))
2306    return SelectInst::Create(Cond, C, D);
2307  if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
2308    return SelectInst::Create(Cond, C, D);
2309  return 0;
2310}
2311
2312/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
2313Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
2314                                         ICmpInst *LHS, ICmpInst *RHS) {
2315  Value *Val, *Val2;
2316  ConstantInt *LHSCst, *RHSCst;
2317  ICmpInst::Predicate LHSCC, RHSCC;
2318
2319  // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
2320  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
2321      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
2322    return 0;
2323
2324
2325  // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
2326  if (LHSCst == RHSCst && LHSCC == RHSCC &&
2327      LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
2328    Value *NewOr = Builder->CreateOr(Val, Val2);
2329    return new ICmpInst(LHSCC, NewOr, LHSCst);
2330  }
2331
2332  // From here on, we only handle:
2333  //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
2334  if (Val != Val2) return 0;
2335
2336  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
2337  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
2338      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
2339      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
2340      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
2341    return 0;
2342
2343  // We can't fold (ugt x, C) | (sgt x, C2).
2344  if (!PredicatesFoldable(LHSCC, RHSCC))
2345    return 0;
2346
2347  // Ensure that the larger constant is on the RHS.
2348  bool ShouldSwap;
2349  if (CmpInst::isSigned(LHSCC) ||
2350      (ICmpInst::isEquality(LHSCC) &&
2351       CmpInst::isSigned(RHSCC)))
2352    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
2353  else
2354    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
2355
2356  if (ShouldSwap) {
2357    std::swap(LHS, RHS);
2358    std::swap(LHSCst, RHSCst);
2359    std::swap(LHSCC, RHSCC);
2360  }
2361
2362  // At this point, we know we have have two icmp instructions
2363  // comparing a value against two constants and or'ing the result
2364  // together.  Because of the above check, we know that we only have
2365  // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
2366  // FoldICmpLogical check above), that the two constants are not
2367  // equal.
2368  assert(LHSCst != RHSCst && "Compares not folded above?");
2369
2370  switch (LHSCC) {
2371  default: llvm_unreachable("Unknown integer condition code!");
2372  case ICmpInst::ICMP_EQ:
2373    switch (RHSCC) {
2374    default: llvm_unreachable("Unknown integer condition code!");
2375    case ICmpInst::ICMP_EQ:
2376      if (LHSCst == SubOne(RHSCst)) {
2377        // (X == 13 | X == 14) -> X-13 <u 2
2378        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
2379        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
2380        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
2381        return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
2382      }
2383      break;                         // (X == 13 | X == 15) -> no change
2384    case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
2385    case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
2386      break;
2387    case ICmpInst::ICMP_NE:          // (X == 13 | X != 15) -> X != 15
2388    case ICmpInst::ICMP_ULT:         // (X == 13 | X u< 15) -> X u< 15
2389    case ICmpInst::ICMP_SLT:         // (X == 13 | X s< 15) -> X s< 15
2390      return ReplaceInstUsesWith(I, RHS);
2391    }
2392    break;
2393  case ICmpInst::ICMP_NE:
2394    switch (RHSCC) {
2395    default: llvm_unreachable("Unknown integer condition code!");
2396    case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
2397    case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
2398    case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
2399      return ReplaceInstUsesWith(I, LHS);
2400    case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
2401    case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
2402    case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
2403      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2404    }
2405    break;
2406  case ICmpInst::ICMP_ULT:
2407    switch (RHSCC) {
2408    default: llvm_unreachable("Unknown integer condition code!");
2409    case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
2410      break;
2411    case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2
2412      // If RHSCst is [us]MAXINT, it is always false.  Not handling
2413      // this can cause overflow.
2414      if (RHSCst->isMaxValue(false))
2415        return ReplaceInstUsesWith(I, LHS);
2416      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
2417                             false, false, I);
2418    case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
2419      break;
2420    case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
2421    case ICmpInst::ICMP_ULT:        // (X u< 13 | X u< 15) -> X u< 15
2422      return ReplaceInstUsesWith(I, RHS);
2423    case ICmpInst::ICMP_SLT:        // (X u< 13 | X s< 15) -> no change
2424      break;
2425    }
2426    break;
2427  case ICmpInst::ICMP_SLT:
2428    switch (RHSCC) {
2429    default: llvm_unreachable("Unknown integer condition code!");
2430    case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
2431      break;
2432    case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2
2433      // If RHSCst is [us]MAXINT, it is always false.  Not handling
2434      // this can cause overflow.
2435      if (RHSCst->isMaxValue(true))
2436        return ReplaceInstUsesWith(I, LHS);
2437      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
2438                             true, false, I);
2439    case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
2440      break;
2441    case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
2442    case ICmpInst::ICMP_SLT:        // (X s< 13 | X s< 15) -> X s< 15
2443      return ReplaceInstUsesWith(I, RHS);
2444    case ICmpInst::ICMP_ULT:        // (X s< 13 | X u< 15) -> no change
2445      break;
2446    }
2447    break;
2448  case ICmpInst::ICMP_UGT:
2449    switch (RHSCC) {
2450    default: llvm_unreachable("Unknown integer condition code!");
2451    case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
2452    case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
2453      return ReplaceInstUsesWith(I, LHS);
2454    case ICmpInst::ICMP_SGT:        // (X u> 13 | X s> 15) -> no change
2455      break;
2456    case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
2457    case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
2458      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2459    case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
2460      break;
2461    }
2462    break;
2463  case ICmpInst::ICMP_SGT:
2464    switch (RHSCC) {
2465    default: llvm_unreachable("Unknown integer condition code!");
2466    case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
2467    case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
2468      return ReplaceInstUsesWith(I, LHS);
2469    case ICmpInst::ICMP_UGT:        // (X s> 13 | X u> 15) -> no change
2470      break;
2471    case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
2472    case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
2473      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2474    case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
2475      break;
2476    }
2477    break;
2478  }
2479  return 0;
2480}
2481
2482Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
2483                                         FCmpInst *RHS) {
2484  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
2485      RHS->getPredicate() == FCmpInst::FCMP_UNO &&
2486      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
2487    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
2488      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
2489        // If either of the constants are nans, then the whole thing returns
2490        // true.
2491        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
2492          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2493
2494        // Otherwise, no need to compare the two constants, compare the
2495        // rest.
2496        return new FCmpInst(FCmpInst::FCMP_UNO,
2497                            LHS->getOperand(0), RHS->getOperand(0));
2498      }
2499
2500    // Handle vector zeros.  This occurs because the canonical form of
2501    // "fcmp uno x,x" is "fcmp uno x, 0".
2502    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
2503        isa<ConstantAggregateZero>(RHS->getOperand(1)))
2504      return new FCmpInst(FCmpInst::FCMP_UNO,
2505                          LHS->getOperand(0), RHS->getOperand(0));
2506
2507    return 0;
2508  }
2509
2510  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
2511  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
2512  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
2513
2514  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
2515    // Swap RHS operands to match LHS.
2516    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
2517    std::swap(Op1LHS, Op1RHS);
2518  }
2519  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
2520    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
2521    if (Op0CC == Op1CC)
2522      return new FCmpInst((FCmpInst::Predicate)Op0CC,
2523                          Op0LHS, Op0RHS);
2524    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
2525      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2526    if (Op0CC == FCmpInst::FCMP_FALSE)
2527      return ReplaceInstUsesWith(I, RHS);
2528    if (Op1CC == FCmpInst::FCMP_FALSE)
2529      return ReplaceInstUsesWith(I, LHS);
2530    bool Op0Ordered;
2531    bool Op1Ordered;
2532    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
2533    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
2534    if (Op0Ordered == Op1Ordered) {
2535      // If both are ordered or unordered, return a new fcmp with
2536      // or'ed predicates.
2537      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS);
2538      if (Instruction *I = dyn_cast<Instruction>(RV))
2539        return I;
2540      // Otherwise, it's a constant boolean value...
2541      return ReplaceInstUsesWith(I, RV);
2542    }
2543  }
2544  return 0;
2545}
2546
2547/// FoldOrWithConstants - This helper function folds:
2548///
2549///     ((A | B) & C1) | (B & C2)
2550///
2551/// into:
2552///
2553///     (A & C1) | B
2554///
2555/// when the XOR of the two constants is "all ones" (-1).
2556Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
2557                                               Value *A, Value *B, Value *C) {
2558  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
2559  if (!CI1) return 0;
2560
2561  Value *V1 = 0;
2562  ConstantInt *CI2 = 0;
2563  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
2564
2565  APInt Xor = CI1->getValue() ^ CI2->getValue();
2566  if (!Xor.isAllOnesValue()) return 0;
2567
2568  if (V1 == A || V1 == B) {
2569    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
2570    return BinaryOperator::CreateOr(NewOp, V1);
2571  }
2572
2573  return 0;
2574}
2575
2576Instruction *InstCombiner::visitOr(BinaryOperator &I) {
2577  bool Changed = SimplifyCommutative(I);
2578  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2579
2580  if (Value *V = SimplifyOrInst(Op0, Op1, TD))
2581    return ReplaceInstUsesWith(I, V);
2582
2583
2584  // See if we can simplify any instructions used by the instruction whose sole
2585  // purpose is to compute bits we don't care about.
2586  if (SimplifyDemandedInstructionBits(I))
2587    return &I;
2588
2589  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
2590    ConstantInt *C1 = 0; Value *X = 0;
2591    // (X & C1) | C2 --> (X | C2) & (C1|C2)
2592    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
2593        isOnlyUse(Op0)) {
2594      Value *Or = Builder->CreateOr(X, RHS);
2595      Or->takeName(Op0);
2596      return BinaryOperator::CreateAnd(Or,
2597                         ConstantInt::get(I.getContext(),
2598                                          RHS->getValue() | C1->getValue()));
2599    }
2600
2601    // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
2602    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
2603        isOnlyUse(Op0)) {
2604      Value *Or = Builder->CreateOr(X, RHS);
2605      Or->takeName(Op0);
2606      return BinaryOperator::CreateXor(Or,
2607                 ConstantInt::get(I.getContext(),
2608                                  C1->getValue() & ~RHS->getValue()));
2609    }
2610
2611    // Try to fold constant and into select arguments.
2612    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
2613      if (Instruction *R = FoldOpIntoSelect(I, SI))
2614        return R;
2615    if (isa<PHINode>(Op0))
2616      if (Instruction *NV = FoldOpIntoPhi(I))
2617        return NV;
2618  }
2619
2620  Value *A = 0, *B = 0;
2621  ConstantInt *C1 = 0, *C2 = 0;
2622
2623  // (A | B) | C  and  A | (B | C)                  -> bswap if possible.
2624  // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
2625  if (match(Op0, m_Or(m_Value(), m_Value())) ||
2626      match(Op1, m_Or(m_Value(), m_Value())) ||
2627      (match(Op0, m_Shift(m_Value(), m_Value())) &&
2628       match(Op1, m_Shift(m_Value(), m_Value())))) {
2629    if (Instruction *BSwap = MatchBSwap(I))
2630      return BSwap;
2631  }
2632
2633  // (X^C)|Y -> (X|Y)^C iff Y&C == 0
2634  if (Op0->hasOneUse() &&
2635      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
2636      MaskedValueIsZero(Op1, C1->getValue())) {
2637    Value *NOr = Builder->CreateOr(A, Op1);
2638    NOr->takeName(Op0);
2639    return BinaryOperator::CreateXor(NOr, C1);
2640  }
2641
2642  // Y|(X^C) -> (X|Y)^C iff Y&C == 0
2643  if (Op1->hasOneUse() &&
2644      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
2645      MaskedValueIsZero(Op0, C1->getValue())) {
2646    Value *NOr = Builder->CreateOr(A, Op0);
2647    NOr->takeName(Op0);
2648    return BinaryOperator::CreateXor(NOr, C1);
2649  }
2650
2651  // (A & C)|(B & D)
2652  Value *C = 0, *D = 0;
2653  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
2654      match(Op1, m_And(m_Value(B), m_Value(D)))) {
2655    Value *V1 = 0, *V2 = 0, *V3 = 0;
2656    C1 = dyn_cast<ConstantInt>(C);
2657    C2 = dyn_cast<ConstantInt>(D);
2658    if (C1 && C2) {  // (A & C1)|(B & C2)
2659      // If we have: ((V + N) & C1) | (V & C2)
2660      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
2661      // replace with V+N.
2662      if (C1->getValue() == ~C2->getValue()) {
2663        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
2664            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
2665          // Add commutes, try both ways.
2666          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
2667            return ReplaceInstUsesWith(I, A);
2668          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
2669            return ReplaceInstUsesWith(I, A);
2670        }
2671        // Or commutes, try both ways.
2672        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
2673            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
2674          // Add commutes, try both ways.
2675          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
2676            return ReplaceInstUsesWith(I, B);
2677          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
2678            return ReplaceInstUsesWith(I, B);
2679        }
2680      }
2681
2682      // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
2683      // iff (C1&C2) == 0 and (N&~C1) == 0
2684      if ((C1->getValue() & C2->getValue()) == 0) {
2685        if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
2686            ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) ||  // (V|N)
2687             (V2 == B && MaskedValueIsZero(V1, ~C1->getValue()))))   // (N|V)
2688          return BinaryOperator::CreateAnd(A,
2689                               ConstantInt::get(A->getContext(),
2690                                                C1->getValue()|C2->getValue()));
2691        // Or commutes, try both ways.
2692        if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
2693            ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) ||  // (V|N)
2694             (V2 == A && MaskedValueIsZero(V1, ~C2->getValue()))))   // (N|V)
2695          return BinaryOperator::CreateAnd(B,
2696                               ConstantInt::get(B->getContext(),
2697                                                C1->getValue()|C2->getValue()));
2698      }
2699    }
2700
2701    // Check to see if we have any common things being and'ed.  If so, find the
2702    // terms for V1 & (V2|V3).
2703    if (isOnlyUse(Op0) || isOnlyUse(Op1)) {
2704      V1 = 0;
2705      if (A == B)      // (A & C)|(A & D) == A & (C|D)
2706        V1 = A, V2 = C, V3 = D;
2707      else if (A == D) // (A & C)|(B & A) == A & (B|C)
2708        V1 = A, V2 = B, V3 = C;
2709      else if (C == B) // (A & C)|(C & D) == C & (A|D)
2710        V1 = C, V2 = A, V3 = D;
2711      else if (C == D) // (A & C)|(B & C) == C & (A|B)
2712        V1 = C, V2 = A, V3 = B;
2713
2714      if (V1) {
2715        Value *Or = Builder->CreateOr(V2, V3, "tmp");
2716        return BinaryOperator::CreateAnd(V1, Or);
2717      }
2718    }
2719
2720    // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants
2721    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
2722      return Match;
2723    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
2724      return Match;
2725    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
2726      return Match;
2727    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
2728      return Match;
2729
2730    // ((A&~B)|(~A&B)) -> A^B
2731    if ((match(C, m_Not(m_Specific(D))) &&
2732         match(B, m_Not(m_Specific(A)))))
2733      return BinaryOperator::CreateXor(A, D);
2734    // ((~B&A)|(~A&B)) -> A^B
2735    if ((match(A, m_Not(m_Specific(D))) &&
2736         match(B, m_Not(m_Specific(C)))))
2737      return BinaryOperator::CreateXor(C, D);
2738    // ((A&~B)|(B&~A)) -> A^B
2739    if ((match(C, m_Not(m_Specific(B))) &&
2740         match(D, m_Not(m_Specific(A)))))
2741      return BinaryOperator::CreateXor(A, B);
2742    // ((~B&A)|(B&~A)) -> A^B
2743    if ((match(A, m_Not(m_Specific(B))) &&
2744         match(D, m_Not(m_Specific(C)))))
2745      return BinaryOperator::CreateXor(C, B);
2746  }
2747
2748  // (X >> Z) | (Y >> Z)  -> (X|Y) >> Z  for all shifts.
2749  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
2750    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
2751      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
2752          SI0->getOperand(1) == SI1->getOperand(1) &&
2753          (SI0->hasOneUse() || SI1->hasOneUse())) {
2754        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
2755                                         SI0->getName());
2756        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
2757                                      SI1->getOperand(1));
2758      }
2759  }
2760
2761  // ((A|B)&1)|(B&-2) -> (A&1) | B
2762  if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
2763      match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
2764    Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C);
2765    if (Ret) return Ret;
2766  }
2767  // (B&-2)|((A|B)&1) -> (A&1) | B
2768  if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
2769      match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
2770    Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C);
2771    if (Ret) return Ret;
2772  }
2773
2774  // (~A | ~B) == (~(A & B)) - De Morgan's Law
2775  if (Value *Op0NotVal = dyn_castNotVal(Op0))
2776    if (Value *Op1NotVal = dyn_castNotVal(Op1))
2777      if (Op0->hasOneUse() && Op1->hasOneUse()) {
2778        Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
2779                                        I.getName()+".demorgan");
2780        return BinaryOperator::CreateNot(And);
2781      }
2782
2783  // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
2784  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
2785    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
2786      return R;
2787
2788    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
2789      if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
2790        return Res;
2791  }
2792
2793  // fold (or (cast A), (cast B)) -> (cast (or A, B))
2794  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
2795    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
2796      if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
2797        if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
2798            !isa<ICmpInst>(Op1C->getOperand(0))) {
2799          const Type *SrcTy = Op0C->getOperand(0)->getType();
2800          if (SrcTy == Op1C->getOperand(0)->getType() &&
2801              SrcTy->isIntOrIntVector() &&
2802              // Only do this if the casts both really cause code to be
2803              // generated.
2804              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
2805                                I.getType()) &&
2806              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
2807                                I.getType())) {
2808            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
2809                                             Op1C->getOperand(0), I.getName());
2810            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
2811          }
2812        }
2813      }
2814  }
2815
2816
2817  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
2818  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
2819    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
2820      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
2821        return Res;
2822  }
2823
2824  return Changed ? &I : 0;
2825}
2826
2827namespace {
2828
2829// XorSelf - Implements: X ^ X --> 0
2830struct XorSelf {
2831  Value *RHS;
2832  XorSelf(Value *rhs) : RHS(rhs) {}
2833  bool shouldApply(Value *LHS) const { return LHS == RHS; }
2834  Instruction *apply(BinaryOperator &Xor) const {
2835    return &Xor;
2836  }
2837};
2838
2839}
2840
2841Instruction *InstCombiner::visitXor(BinaryOperator &I) {
2842  bool Changed = SimplifyCommutative(I);
2843  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2844
2845  if (isa<UndefValue>(Op1)) {
2846    if (isa<UndefValue>(Op0))
2847      // Handle undef ^ undef -> 0 special case. This is a common
2848      // idiom (misuse).
2849      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
2850    return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
2851  }
2852
2853  // xor X, X = 0, even if X is nested in a sequence of Xor's.
2854  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
2855    assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
2856    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
2857  }
2858
2859  // See if we can simplify any instructions used by the instruction whose sole
2860  // purpose is to compute bits we don't care about.
2861  if (SimplifyDemandedInstructionBits(I))
2862    return &I;
2863  if (isa<VectorType>(I.getType()))
2864    if (isa<ConstantAggregateZero>(Op1))
2865      return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
2866
2867  // Is this a ~ operation?
2868  if (Value *NotOp = dyn_castNotVal(&I)) {
2869    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
2870      if (Op0I->getOpcode() == Instruction::And ||
2871          Op0I->getOpcode() == Instruction::Or) {
2872        // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
2873        // ~(~X | Y) === (X & ~Y) - De Morgan's Law
2874        if (dyn_castNotVal(Op0I->getOperand(1)))
2875          Op0I->swapOperands();
2876        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
2877          Value *NotY =
2878            Builder->CreateNot(Op0I->getOperand(1),
2879                               Op0I->getOperand(1)->getName()+".not");
2880          if (Op0I->getOpcode() == Instruction::And)
2881            return BinaryOperator::CreateOr(Op0NotVal, NotY);
2882          return BinaryOperator::CreateAnd(Op0NotVal, NotY);
2883        }
2884
2885        // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
2886        // ~(X | Y) === (~X & ~Y) - De Morgan's Law
2887        if (isFreeToInvert(Op0I->getOperand(0)) &&
2888            isFreeToInvert(Op0I->getOperand(1))) {
2889          Value *NotX =
2890            Builder->CreateNot(Op0I->getOperand(0), "notlhs");
2891          Value *NotY =
2892            Builder->CreateNot(Op0I->getOperand(1), "notrhs");
2893          if (Op0I->getOpcode() == Instruction::And)
2894            return BinaryOperator::CreateOr(NotX, NotY);
2895          return BinaryOperator::CreateAnd(NotX, NotY);
2896        }
2897      }
2898    }
2899  }
2900
2901
2902  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
2903    if (RHS->isOne() && Op0->hasOneUse()) {
2904      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
2905      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
2906        return new ICmpInst(ICI->getInversePredicate(),
2907                            ICI->getOperand(0), ICI->getOperand(1));
2908
2909      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
2910        return new FCmpInst(FCI->getInversePredicate(),
2911                            FCI->getOperand(0), FCI->getOperand(1));
2912    }
2913
2914    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
2915    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
2916      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
2917        if (CI->hasOneUse() && Op0C->hasOneUse()) {
2918          Instruction::CastOps Opcode = Op0C->getOpcode();
2919          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
2920              (RHS == ConstantExpr::getCast(Opcode,
2921                                           ConstantInt::getTrue(I.getContext()),
2922                                            Op0C->getDestTy()))) {
2923            CI->setPredicate(CI->getInversePredicate());
2924            return CastInst::Create(Opcode, CI, Op0C->getType());
2925          }
2926        }
2927      }
2928    }
2929
2930    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
2931      // ~(c-X) == X-c-1 == X+(-c-1)
2932      if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
2933        if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
2934          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
2935          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
2936                                      ConstantInt::get(I.getType(), 1));
2937          return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
2938        }
2939
2940      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
2941        if (Op0I->getOpcode() == Instruction::Add) {
2942          // ~(X-c) --> (-c-1)-X
2943          if (RHS->isAllOnesValue()) {
2944            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
2945            return BinaryOperator::CreateSub(
2946                           ConstantExpr::getSub(NegOp0CI,
2947                                      ConstantInt::get(I.getType(), 1)),
2948                                      Op0I->getOperand(0));
2949          } else if (RHS->getValue().isSignBit()) {
2950            // (X + C) ^ signbit -> (X + C + signbit)
2951            Constant *C = ConstantInt::get(I.getContext(),
2952                                           RHS->getValue() + Op0CI->getValue());
2953            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
2954
2955          }
2956        } else if (Op0I->getOpcode() == Instruction::Or) {
2957          // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
2958          if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
2959            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
2960            // Anything in both C1 and C2 is known to be zero, remove it from
2961            // NewRHS.
2962            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
2963            NewRHS = ConstantExpr::getAnd(NewRHS,
2964                                       ConstantExpr::getNot(CommonBits));
2965            Worklist.Add(Op0I);
2966            I.setOperand(0, Op0I->getOperand(0));
2967            I.setOperand(1, NewRHS);
2968            return &I;
2969          }
2970        }
2971      }
2972    }
2973
2974    // Try to fold constant and into select arguments.
2975    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
2976      if (Instruction *R = FoldOpIntoSelect(I, SI))
2977        return R;
2978    if (isa<PHINode>(Op0))
2979      if (Instruction *NV = FoldOpIntoPhi(I))
2980        return NV;
2981  }
2982
2983  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
2984    if (X == Op1)
2985      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
2986
2987  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
2988    if (X == Op0)
2989      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
2990
2991
2992  BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
2993  if (Op1I) {
2994    Value *A, *B;
2995    if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
2996      if (A == Op0) {              // B^(B|A) == (A|B)^B
2997        Op1I->swapOperands();
2998        I.swapOperands();
2999        std::swap(Op0, Op1);
3000      } else if (B == Op0) {       // B^(A|B) == (A|B)^B
3001        I.swapOperands();     // Simplified below.
3002        std::swap(Op0, Op1);
3003      }
3004    } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
3005      return ReplaceInstUsesWith(I, B);                      // A^(A^B) == B
3006    } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
3007      return ReplaceInstUsesWith(I, A);                      // A^(B^A) == B
3008    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
3009               Op1I->hasOneUse()){
3010      if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
3011        Op1I->swapOperands();
3012        std::swap(A, B);
3013      }
3014      if (B == Op0) {                                      // A^(B&A) -> (B&A)^A
3015        I.swapOperands();     // Simplified below.
3016        std::swap(Op0, Op1);
3017      }
3018    }
3019  }
3020
3021  BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
3022  if (Op0I) {
3023    Value *A, *B;
3024    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
3025        Op0I->hasOneUse()) {
3026      if (A == Op1)                                  // (B|A)^B == (A|B)^B
3027        std::swap(A, B);
3028      if (B == Op1)                                  // (A|B)^B == A & ~B
3029        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
3030    } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
3031      return ReplaceInstUsesWith(I, B);                      // (A^B)^A == B
3032    } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
3033      return ReplaceInstUsesWith(I, A);                      // (B^A)^A == B
3034    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
3035               Op0I->hasOneUse()){
3036      if (A == Op1)                                        // (A&B)^A -> (B&A)^A
3037        std::swap(A, B);
3038      if (B == Op1 &&                                      // (B&A)^A == ~B & A
3039          !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
3040        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
3041      }
3042    }
3043  }
3044
3045  // (X >> Z) ^ (Y >> Z)  -> (X^Y) >> Z  for all shifts.
3046  if (Op0I && Op1I && Op0I->isShift() &&
3047      Op0I->getOpcode() == Op1I->getOpcode() &&
3048      Op0I->getOperand(1) == Op1I->getOperand(1) &&
3049      (Op1I->hasOneUse() || Op1I->hasOneUse())) {
3050    Value *NewOp =
3051      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
3052                         Op0I->getName());
3053    return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
3054                                  Op1I->getOperand(1));
3055  }
3056
3057  if (Op0I && Op1I) {
3058    Value *A, *B, *C, *D;
3059    // (A & B)^(A | B) -> A ^ B
3060    if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
3061        match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
3062      if ((A == C && B == D) || (A == D && B == C))
3063        return BinaryOperator::CreateXor(A, B);
3064    }
3065    // (A | B)^(A & B) -> A ^ B
3066    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
3067        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
3068      if ((A == C && B == D) || (A == D && B == C))
3069        return BinaryOperator::CreateXor(A, B);
3070    }
3071
3072    // (A & B)^(C & D)
3073    if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
3074        match(Op0I, m_And(m_Value(A), m_Value(B))) &&
3075        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
3076      // (X & Y)^(X & Y) -> (Y^Z) & X
3077      Value *X = 0, *Y = 0, *Z = 0;
3078      if (A == C)
3079        X = A, Y = B, Z = D;
3080      else if (A == D)
3081        X = A, Y = B, Z = C;
3082      else if (B == C)
3083        X = B, Y = A, Z = D;
3084      else if (B == D)
3085        X = B, Y = A, Z = C;
3086
3087      if (X) {
3088        Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
3089        return BinaryOperator::CreateAnd(NewOp, X);
3090      }
3091    }
3092  }
3093
3094  // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
3095  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
3096    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
3097      return R;
3098
3099  // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
3100  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
3101    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
3102      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
3103        const Type *SrcTy = Op0C->getOperand(0)->getType();
3104        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
3105            // Only do this if the casts both really cause code to be generated.
3106            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
3107                              I.getType()) &&
3108            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
3109                              I.getType())) {
3110          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
3111                                            Op1C->getOperand(0), I.getName());
3112          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
3113        }
3114      }
3115  }
3116
3117  return Changed ? &I : 0;
3118}
3119
3120
3121Instruction *InstCombiner::visitShl(BinaryOperator &I) {
3122  return commonShiftTransforms(I);
3123}
3124
3125Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
3126  return commonShiftTransforms(I);
3127}
3128
3129Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
3130  if (Instruction *R = commonShiftTransforms(I))
3131    return R;
3132
3133  Value *Op0 = I.getOperand(0);
3134
3135  // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
3136  if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
3137    if (CSI->isAllOnesValue())
3138      return ReplaceInstUsesWith(I, CSI);
3139
3140  // See if we can turn a signed shr into an unsigned shr.
3141  if (MaskedValueIsZero(Op0,
3142                        APInt::getSignBit(I.getType()->getScalarSizeInBits())))
3143    return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
3144
3145  // Arithmetic shifting an all-sign-bit value is a no-op.
3146  unsigned NumSignBits = ComputeNumSignBits(Op0);
3147  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
3148    return ReplaceInstUsesWith(I, Op0);
3149
3150  return 0;
3151}
3152
3153Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
3154  assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
3155  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3156
3157  // shl X, 0 == X and shr X, 0 == X
3158  // shl 0, X == 0 and shr 0, X == 0
3159  if (Op1 == Constant::getNullValue(Op1->getType()) ||
3160      Op0 == Constant::getNullValue(Op0->getType()))
3161    return ReplaceInstUsesWith(I, Op0);
3162
3163  if (isa<UndefValue>(Op0)) {
3164    if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
3165      return ReplaceInstUsesWith(I, Op0);
3166    else                                    // undef << X -> 0, undef >>u X -> 0
3167      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3168  }
3169  if (isa<UndefValue>(Op1)) {
3170    if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X
3171      return ReplaceInstUsesWith(I, Op0);
3172    else                                     // X << undef, X >>u undef -> 0
3173      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3174  }
3175
3176  // See if we can fold away this shift.
3177  if (SimplifyDemandedInstructionBits(I))
3178    return &I;
3179
3180  // Try to fold constant and into select arguments.
3181  if (isa<Constant>(Op0))
3182    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
3183      if (Instruction *R = FoldOpIntoSelect(I, SI))
3184        return R;
3185
3186  if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
3187    if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
3188      return Res;
3189  return 0;
3190}
3191
3192Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
3193                                               BinaryOperator &I) {
3194  bool isLeftShift = I.getOpcode() == Instruction::Shl;
3195
3196  // See if we can simplify any instructions used by the instruction whose sole
3197  // purpose is to compute bits we don't care about.
3198  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
3199
3200  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
3201  // a signed shift.
3202  //
3203  if (Op1->uge(TypeBits)) {
3204    if (I.getOpcode() != Instruction::AShr)
3205      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
3206    else {
3207      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
3208      return &I;
3209    }
3210  }
3211
3212  // ((X*C1) << C2) == (X * (C1 << C2))
3213  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
3214    if (BO->getOpcode() == Instruction::Mul && isLeftShift)
3215      if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
3216        return BinaryOperator::CreateMul(BO->getOperand(0),
3217                                        ConstantExpr::getShl(BOOp, Op1));
3218
3219  // Try to fold constant and into select arguments.
3220  if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
3221    if (Instruction *R = FoldOpIntoSelect(I, SI))
3222      return R;
3223  if (isa<PHINode>(Op0))
3224    if (Instruction *NV = FoldOpIntoPhi(I))
3225      return NV;
3226
3227  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
3228  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
3229    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
3230    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
3231    // place.  Don't try to do this transformation in this case.  Also, we
3232    // require that the input operand is a shift-by-constant so that we have
3233    // confidence that the shifts will get folded together.  We could do this
3234    // xform in more cases, but it is unlikely to be profitable.
3235    if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
3236        isa<ConstantInt>(TrOp->getOperand(1))) {
3237      // Okay, we'll do this xform.  Make the shift of shift.
3238      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
3239      // (shift2 (shift1 & 0x00FF), c2)
3240      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
3241
3242      // For logical shifts, the truncation has the effect of making the high
3243      // part of the register be zeros.  Emulate this by inserting an AND to
3244      // clear the top bits as needed.  This 'and' will usually be zapped by
3245      // other xforms later if dead.
3246      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
3247      unsigned DstSize = TI->getType()->getScalarSizeInBits();
3248      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
3249
3250      // The mask we constructed says what the trunc would do if occurring
3251      // between the shifts.  We want to know the effect *after* the second
3252      // shift.  We know that it is a logical shift by a constant, so adjust the
3253      // mask as appropriate.
3254      if (I.getOpcode() == Instruction::Shl)
3255        MaskV <<= Op1->getZExtValue();
3256      else {
3257        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
3258        MaskV = MaskV.lshr(Op1->getZExtValue());
3259      }
3260
3261      // shift1 & 0x00FF
3262      Value *And = Builder->CreateAnd(NSh,
3263                                      ConstantInt::get(I.getContext(), MaskV),
3264                                      TI->getName());
3265
3266      // Return the value truncated to the interesting size.
3267      return new TruncInst(And, I.getType());
3268    }
3269  }
3270
3271  if (Op0->hasOneUse()) {
3272    if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
3273      // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
3274      Value *V1, *V2;
3275      ConstantInt *CC;
3276      switch (Op0BO->getOpcode()) {
3277        default: break;
3278        case Instruction::Add:
3279        case Instruction::And:
3280        case Instruction::Or:
3281        case Instruction::Xor: {
3282          // These operators commute.
3283          // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
3284          if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
3285              match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
3286                    m_Specific(Op1)))) {
3287            Value *YS =         // (Y << C)
3288              Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
3289            // (X + (Y << C))
3290            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
3291                                            Op0BO->getOperand(1)->getName());
3292            uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
3293            return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
3294                       APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
3295          }
3296
3297          // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
3298          Value *Op0BOOp1 = Op0BO->getOperand(1);
3299          if (isLeftShift && Op0BOOp1->hasOneUse() &&
3300              match(Op0BOOp1,
3301                    m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
3302                          m_ConstantInt(CC))) &&
3303              cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
3304            Value *YS =   // (Y << C)
3305              Builder->CreateShl(Op0BO->getOperand(0), Op1,
3306                                           Op0BO->getName());
3307            // X & (CC << C)
3308            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
3309                                           V1->getName()+".mask");
3310            return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
3311          }
3312        }
3313
3314        // FALL THROUGH.
3315        case Instruction::Sub: {
3316          // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
3317          if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
3318              match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
3319                    m_Specific(Op1)))) {
3320            Value *YS =  // (Y << C)
3321              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
3322            // (X + (Y << C))
3323            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
3324                                            Op0BO->getOperand(0)->getName());
3325            uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
3326            return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
3327                       APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
3328          }
3329
3330          // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
3331          if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
3332              match(Op0BO->getOperand(0),
3333                    m_And(m_Shr(m_Value(V1), m_Value(V2)),
3334                          m_ConstantInt(CC))) && V2 == Op1 &&
3335              cast<BinaryOperator>(Op0BO->getOperand(0))
3336                  ->getOperand(0)->hasOneUse()) {
3337            Value *YS = // (Y << C)
3338              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
3339            // X & (CC << C)
3340            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
3341                                           V1->getName()+".mask");
3342
3343            return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
3344          }
3345
3346          break;
3347        }
3348      }
3349
3350
3351      // If the operand is an bitwise operator with a constant RHS, and the
3352      // shift is the only use, we can pull it out of the shift.
3353      if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
3354        bool isValid = true;     // Valid only for And, Or, Xor
3355        bool highBitSet = false; // Transform if high bit of constant set?
3356
3357        switch (Op0BO->getOpcode()) {
3358          default: isValid = false; break;   // Do not perform transform!
3359          case Instruction::Add:
3360            isValid = isLeftShift;
3361            break;
3362          case Instruction::Or:
3363          case Instruction::Xor:
3364            highBitSet = false;
3365            break;
3366          case Instruction::And:
3367            highBitSet = true;
3368            break;
3369        }
3370
3371        // If this is a signed shift right, and the high bit is modified
3372        // by the logical operation, do not perform the transformation.
3373        // The highBitSet boolean indicates the value of the high bit of
3374        // the constant which would cause it to be modified for this
3375        // operation.
3376        //
3377        if (isValid && I.getOpcode() == Instruction::AShr)
3378          isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
3379
3380        if (isValid) {
3381          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
3382
3383          Value *NewShift =
3384            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
3385          NewShift->takeName(Op0BO);
3386
3387          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
3388                                        NewRHS);
3389        }
3390      }
3391    }
3392  }
3393
3394  // Find out if this is a shift of a shift by a constant.
3395  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
3396  if (ShiftOp && !ShiftOp->isShift())
3397    ShiftOp = 0;
3398
3399  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
3400    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
3401    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
3402    uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
3403    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
3404    if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
3405    Value *X = ShiftOp->getOperand(0);
3406
3407    uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
3408
3409    const IntegerType *Ty = cast<IntegerType>(I.getType());
3410
3411    // Check for (X << c1) << c2  and  (X >> c1) >> c2
3412    if (I.getOpcode() == ShiftOp->getOpcode()) {
3413      // If this is oversized composite shift, then unsigned shifts get 0, ashr
3414      // saturates.
3415      if (AmtSum >= TypeBits) {
3416        if (I.getOpcode() != Instruction::AShr)
3417          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3418        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
3419      }
3420
3421      return BinaryOperator::Create(I.getOpcode(), X,
3422                                    ConstantInt::get(Ty, AmtSum));
3423    }
3424
3425    if (ShiftOp->getOpcode() == Instruction::LShr &&
3426        I.getOpcode() == Instruction::AShr) {
3427      if (AmtSum >= TypeBits)
3428        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3429
3430      // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
3431      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
3432    }
3433
3434    if (ShiftOp->getOpcode() == Instruction::AShr &&
3435        I.getOpcode() == Instruction::LShr) {
3436      // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
3437      if (AmtSum >= TypeBits)
3438        AmtSum = TypeBits-1;
3439
3440      Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
3441
3442      APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
3443      return BinaryOperator::CreateAnd(Shift,
3444                                       ConstantInt::get(I.getContext(), Mask));
3445    }
3446
3447    // Okay, if we get here, one shift must be left, and the other shift must be
3448    // right.  See if the amounts are equal.
3449    if (ShiftAmt1 == ShiftAmt2) {
3450      // If we have ((X >>? C) << C), turn this into X & (-1 << C).
3451      if (I.getOpcode() == Instruction::Shl) {
3452        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
3453        return BinaryOperator::CreateAnd(X,
3454                                         ConstantInt::get(I.getContext(),Mask));
3455      }
3456      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
3457      if (I.getOpcode() == Instruction::LShr) {
3458        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
3459        return BinaryOperator::CreateAnd(X,
3460                                        ConstantInt::get(I.getContext(), Mask));
3461      }
3462      // We can simplify ((X << C) >>s C) into a trunc + sext.
3463      // NOTE: we could do this for any C, but that would make 'unusual' integer
3464      // types.  For now, just stick to ones well-supported by the code
3465      // generators.
3466      const Type *SExtType = 0;
3467      switch (Ty->getBitWidth() - ShiftAmt1) {
3468      case 1  :
3469      case 8  :
3470      case 16 :
3471      case 32 :
3472      case 64 :
3473      case 128:
3474        SExtType = IntegerType::get(I.getContext(),
3475                                    Ty->getBitWidth() - ShiftAmt1);
3476        break;
3477      default: break;
3478      }
3479      if (SExtType)
3480        return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty);
3481      // Otherwise, we can't handle it yet.
3482    } else if (ShiftAmt1 < ShiftAmt2) {
3483      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
3484
3485      // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
3486      if (I.getOpcode() == Instruction::Shl) {
3487        assert(ShiftOp->getOpcode() == Instruction::LShr ||
3488               ShiftOp->getOpcode() == Instruction::AShr);
3489        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
3490
3491        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
3492        return BinaryOperator::CreateAnd(Shift,
3493                                         ConstantInt::get(I.getContext(),Mask));
3494      }
3495
3496      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
3497      if (I.getOpcode() == Instruction::LShr) {
3498        assert(ShiftOp->getOpcode() == Instruction::Shl);
3499        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
3500
3501        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
3502        return BinaryOperator::CreateAnd(Shift,
3503                                         ConstantInt::get(I.getContext(),Mask));
3504      }
3505
3506      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
3507    } else {
3508      assert(ShiftAmt2 < ShiftAmt1);
3509      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
3510
3511      // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
3512      if (I.getOpcode() == Instruction::Shl) {
3513        assert(ShiftOp->getOpcode() == Instruction::LShr ||
3514               ShiftOp->getOpcode() == Instruction::AShr);
3515        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
3516                                            ConstantInt::get(Ty, ShiftDiff));
3517
3518        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
3519        return BinaryOperator::CreateAnd(Shift,
3520                                         ConstantInt::get(I.getContext(),Mask));
3521      }
3522
3523      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
3524      if (I.getOpcode() == Instruction::LShr) {
3525        assert(ShiftOp->getOpcode() == Instruction::Shl);
3526        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
3527
3528        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
3529        return BinaryOperator::CreateAnd(Shift,
3530                                         ConstantInt::get(I.getContext(),Mask));
3531      }
3532
3533      // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
3534    }
3535  }
3536  return 0;
3537}
3538
3539
3540
3541/// FindElementAtOffset - Given a type and a constant offset, determine whether
3542/// or not there is a sequence of GEP indices into the type that will land us at
3543/// the specified offset.  If so, fill them into NewIndices and return the
3544/// resultant element type, otherwise return null.
3545const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
3546                                          SmallVectorImpl<Value*> &NewIndices) {
3547  if (!TD) return 0;
3548  if (!Ty->isSized()) return 0;
3549
3550  // Start with the index over the outer type.  Note that the type size
3551  // might be zero (even if the offset isn't zero) if the indexed type
3552  // is something like [0 x {int, int}]
3553  const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
3554  int64_t FirstIdx = 0;
3555  if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
3556    FirstIdx = Offset/TySize;
3557    Offset -= FirstIdx*TySize;
3558
3559    // Handle hosts where % returns negative instead of values [0..TySize).
3560    if (Offset < 0) {
3561      --FirstIdx;
3562      Offset += TySize;
3563      assert(Offset >= 0);
3564    }
3565    assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
3566  }
3567
3568  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
3569
3570  // Index into the types.  If we fail, set OrigBase to null.
3571  while (Offset) {
3572    // Indexing into tail padding between struct/array elements.
3573    if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
3574      return 0;
3575
3576    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
3577      const StructLayout *SL = TD->getStructLayout(STy);
3578      assert(Offset < (int64_t)SL->getSizeInBytes() &&
3579             "Offset must stay within the indexed type");
3580
3581      unsigned Elt = SL->getElementContainingOffset(Offset);
3582      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
3583                                            Elt));
3584
3585      Offset -= SL->getElementOffset(Elt);
3586      Ty = STy->getElementType(Elt);
3587    } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
3588      uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
3589      assert(EltSize && "Cannot index into a zero-sized array");
3590      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
3591      Offset %= EltSize;
3592      Ty = AT->getElementType();
3593    } else {
3594      // Otherwise, we can't index into the middle of this atomic type, bail.
3595      return 0;
3596    }
3597  }
3598
3599  return Ty;
3600}
3601
3602
3603/// EnforceKnownAlignment - If the specified pointer points to an object that
3604/// we control, modify the object's alignment to PrefAlign. This isn't
3605/// often possible though. If alignment is important, a more reliable approach
3606/// is to simply align all global variables and allocation instructions to
3607/// their preferred alignment from the beginning.
3608///
3609static unsigned EnforceKnownAlignment(Value *V,
3610                                      unsigned Align, unsigned PrefAlign) {
3611
3612  User *U = dyn_cast<User>(V);
3613  if (!U) return Align;
3614
3615  switch (Operator::getOpcode(U)) {
3616  default: break;
3617  case Instruction::BitCast:
3618    return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
3619  case Instruction::GetElementPtr: {
3620    // If all indexes are zero, it is just the alignment of the base pointer.
3621    bool AllZeroOperands = true;
3622    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
3623      if (!isa<Constant>(*i) ||
3624          !cast<Constant>(*i)->isNullValue()) {
3625        AllZeroOperands = false;
3626        break;
3627      }
3628
3629    if (AllZeroOperands) {
3630      // Treat this like a bitcast.
3631      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
3632    }
3633    break;
3634  }
3635  }
3636
3637  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
3638    // If there is a large requested alignment and we can, bump up the alignment
3639    // of the global.
3640    if (!GV->isDeclaration()) {
3641      if (GV->getAlignment() >= PrefAlign)
3642        Align = GV->getAlignment();
3643      else {
3644        GV->setAlignment(PrefAlign);
3645        Align = PrefAlign;
3646      }
3647    }
3648  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
3649    // If there is a requested alignment and if this is an alloca, round up.
3650    if (AI->getAlignment() >= PrefAlign)
3651      Align = AI->getAlignment();
3652    else {
3653      AI->setAlignment(PrefAlign);
3654      Align = PrefAlign;
3655    }
3656  }
3657
3658  return Align;
3659}
3660
3661/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
3662/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
3663/// and it is more than the alignment of the ultimate object, see if we can
3664/// increase the alignment of the ultimate object, making this check succeed.
3665unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
3666                                                  unsigned PrefAlign) {
3667  unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
3668                      sizeof(PrefAlign) * CHAR_BIT;
3669  APInt Mask = APInt::getAllOnesValue(BitWidth);
3670  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
3671  ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
3672  unsigned TrailZ = KnownZero.countTrailingOnes();
3673  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
3674
3675  if (PrefAlign > Align)
3676    Align = EnforceKnownAlignment(V, Align, PrefAlign);
3677
3678    // We don't need to make any adjustment.
3679  return Align;
3680}
3681
3682Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
3683  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
3684  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
3685  unsigned MinAlign = std::min(DstAlign, SrcAlign);
3686  unsigned CopyAlign = MI->getAlignment();
3687
3688  if (CopyAlign < MinAlign) {
3689    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
3690                                             MinAlign, false));
3691    return MI;
3692  }
3693
3694  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
3695  // load/store.
3696  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
3697  if (MemOpLength == 0) return 0;
3698
3699  // Source and destination pointer types are always "i8*" for intrinsic.  See
3700  // if the size is something we can handle with a single primitive load/store.
3701  // A single load+store correctly handles overlapping memory in the memmove
3702  // case.
3703  unsigned Size = MemOpLength->getZExtValue();
3704  if (Size == 0) return MI;  // Delete this mem transfer.
3705
3706  if (Size > 8 || (Size&(Size-1)))
3707    return 0;  // If not 1/2/4/8 bytes, exit.
3708
3709  // Use an integer load+store unless we can find something better.
3710  Type *NewPtrTy =
3711            PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3));
3712
3713  // Memcpy forces the use of i8* for the source and destination.  That means
3714  // that if you're using memcpy to move one double around, you'll get a cast
3715  // from double* to i8*.  We'd much rather use a double load+store rather than
3716  // an i64 load+store, here because this improves the odds that the source or
3717  // dest address will be promotable.  See if we can find a better type than the
3718  // integer datatype.
3719  if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
3720    const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
3721    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
3722      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
3723      // down through these levels if so.
3724      while (!SrcETy->isSingleValueType()) {
3725        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
3726          if (STy->getNumElements() == 1)
3727            SrcETy = STy->getElementType(0);
3728          else
3729            break;
3730        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
3731          if (ATy->getNumElements() == 1)
3732            SrcETy = ATy->getElementType();
3733          else
3734            break;
3735        } else
3736          break;
3737      }
3738
3739      if (SrcETy->isSingleValueType())
3740        NewPtrTy = PointerType::getUnqual(SrcETy);
3741    }
3742  }
3743
3744
3745  // If the memcpy/memmove provides better alignment info than we can
3746  // infer, use it.
3747  SrcAlign = std::max(SrcAlign, CopyAlign);
3748  DstAlign = std::max(DstAlign, CopyAlign);
3749
3750  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
3751  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
3752  Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
3753  InsertNewInstBefore(L, *MI);
3754  InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
3755
3756  // Set the size of the copy to 0, it will be deleted on the next iteration.
3757  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
3758  return MI;
3759}
3760
3761Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
3762  unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
3763  if (MI->getAlignment() < Alignment) {
3764    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
3765                                             Alignment, false));
3766    return MI;
3767  }
3768
3769  // Extract the length and alignment and fill if they are constant.
3770  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
3771  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
3772  if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(MI->getContext()))
3773    return 0;
3774  uint64_t Len = LenC->getZExtValue();
3775  Alignment = MI->getAlignment();
3776
3777  // If the length is zero, this is a no-op
3778  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
3779
3780  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
3781  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
3782    const Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
3783
3784    Value *Dest = MI->getDest();
3785    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
3786
3787    // Alignment 0 is identity for alignment 1 for memset, but not store.
3788    if (Alignment == 0) Alignment = 1;
3789
3790    // Extract the fill value and store.
3791    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
3792    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
3793                                      Dest, false, Alignment), *MI);
3794
3795    // Set the size of the copy to 0, it will be deleted on the next iteration.
3796    MI->setLength(Constant::getNullValue(LenC->getType()));
3797    return MI;
3798  }
3799
3800  return 0;
3801}
3802
3803
3804/// visitCallInst - CallInst simplification.  This mostly only handles folding
3805/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
3806/// the heavy lifting.
3807///
3808Instruction *InstCombiner::visitCallInst(CallInst &CI) {
3809  if (isFreeCall(&CI))
3810    return visitFree(CI);
3811
3812  // If the caller function is nounwind, mark the call as nounwind, even if the
3813  // callee isn't.
3814  if (CI.getParent()->getParent()->doesNotThrow() &&
3815      !CI.doesNotThrow()) {
3816    CI.setDoesNotThrow();
3817    return &CI;
3818  }
3819
3820  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
3821  if (!II) return visitCallSite(&CI);
3822
3823  // Intrinsics cannot occur in an invoke, so handle them here instead of in
3824  // visitCallSite.
3825  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
3826    bool Changed = false;
3827
3828    // memmove/cpy/set of zero bytes is a noop.
3829    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
3830      if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
3831
3832      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
3833        if (CI->getZExtValue() == 1) {
3834          // Replace the instruction with just byte operations.  We would
3835          // transform other cases to loads/stores, but we don't know if
3836          // alignment is sufficient.
3837        }
3838    }
3839
3840    // If we have a memmove and the source operation is a constant global,
3841    // then the source and dest pointers can't alias, so we can change this
3842    // into a call to memcpy.
3843    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
3844      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
3845        if (GVSrc->isConstant()) {
3846          Module *M = CI.getParent()->getParent()->getParent();
3847          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
3848          const Type *Tys[1];
3849          Tys[0] = CI.getOperand(3)->getType();
3850          CI.setOperand(0,
3851                        Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
3852          Changed = true;
3853        }
3854    }
3855
3856    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
3857      // memmove(x,x,size) -> noop.
3858      if (MTI->getSource() == MTI->getDest())
3859        return EraseInstFromFunction(CI);
3860    }
3861
3862    // If we can determine a pointer alignment that is bigger than currently
3863    // set, update the alignment.
3864    if (isa<MemTransferInst>(MI)) {
3865      if (Instruction *I = SimplifyMemTransfer(MI))
3866        return I;
3867    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
3868      if (Instruction *I = SimplifyMemSet(MSI))
3869        return I;
3870    }
3871
3872    if (Changed) return II;
3873  }
3874
3875  switch (II->getIntrinsicID()) {
3876  default: break;
3877  case Intrinsic::bswap:
3878    // bswap(bswap(x)) -> x
3879    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
3880      if (Operand->getIntrinsicID() == Intrinsic::bswap)
3881        return ReplaceInstUsesWith(CI, Operand->getOperand(1));
3882
3883    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
3884    if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) {
3885      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
3886        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
3887          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
3888                       TI->getType()->getPrimitiveSizeInBits();
3889          Value *CV = ConstantInt::get(Operand->getType(), C);
3890          Value *V = Builder->CreateLShr(Operand->getOperand(1), CV);
3891          return new TruncInst(V, TI->getType());
3892        }
3893    }
3894
3895    break;
3896  case Intrinsic::powi:
3897    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
3898      // powi(x, 0) -> 1.0
3899      if (Power->isZero())
3900        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
3901      // powi(x, 1) -> x
3902      if (Power->isOne())
3903        return ReplaceInstUsesWith(CI, II->getOperand(1));
3904      // powi(x, -1) -> 1/x
3905      if (Power->isAllOnesValue())
3906        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
3907                                          II->getOperand(1));
3908    }
3909    break;
3910
3911  case Intrinsic::uadd_with_overflow: {
3912    Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
3913    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
3914    uint32_t BitWidth = IT->getBitWidth();
3915    APInt Mask = APInt::getSignBit(BitWidth);
3916    APInt LHSKnownZero(BitWidth, 0);
3917    APInt LHSKnownOne(BitWidth, 0);
3918    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
3919    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
3920    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
3921
3922    if (LHSKnownNegative || LHSKnownPositive) {
3923      APInt RHSKnownZero(BitWidth, 0);
3924      APInt RHSKnownOne(BitWidth, 0);
3925      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
3926      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
3927      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
3928      if (LHSKnownNegative && RHSKnownNegative) {
3929        // The sign bit is set in both cases: this MUST overflow.
3930        // Create a simple add instruction, and insert it into the struct.
3931        Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
3932        Worklist.Add(Add);
3933        Constant *V[] = {
3934          UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext())
3935        };
3936        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
3937        return InsertValueInst::Create(Struct, Add, 0);
3938      }
3939
3940      if (LHSKnownPositive && RHSKnownPositive) {
3941        // The sign bit is clear in both cases: this CANNOT overflow.
3942        // Create a simple add instruction, and insert it into the struct.
3943        Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
3944        Worklist.Add(Add);
3945        Constant *V[] = {
3946          UndefValue::get(LHS->getType()),
3947          ConstantInt::getFalse(II->getContext())
3948        };
3949        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
3950        return InsertValueInst::Create(Struct, Add, 0);
3951      }
3952    }
3953  }
3954  // FALL THROUGH uadd into sadd
3955  case Intrinsic::sadd_with_overflow:
3956    // Canonicalize constants into the RHS.
3957    if (isa<Constant>(II->getOperand(1)) &&
3958        !isa<Constant>(II->getOperand(2))) {
3959      Value *LHS = II->getOperand(1);
3960      II->setOperand(1, II->getOperand(2));
3961      II->setOperand(2, LHS);
3962      return II;
3963    }
3964
3965    // X + undef -> undef
3966    if (isa<UndefValue>(II->getOperand(2)))
3967      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
3968
3969    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
3970      // X + 0 -> {X, false}
3971      if (RHS->isZero()) {
3972        Constant *V[] = {
3973          UndefValue::get(II->getOperand(0)->getType()),
3974          ConstantInt::getFalse(II->getContext())
3975        };
3976        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
3977        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
3978      }
3979    }
3980    break;
3981  case Intrinsic::usub_with_overflow:
3982  case Intrinsic::ssub_with_overflow:
3983    // undef - X -> undef
3984    // X - undef -> undef
3985    if (isa<UndefValue>(II->getOperand(1)) ||
3986        isa<UndefValue>(II->getOperand(2)))
3987      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
3988
3989    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
3990      // X - 0 -> {X, false}
3991      if (RHS->isZero()) {
3992        Constant *V[] = {
3993          UndefValue::get(II->getOperand(1)->getType()),
3994          ConstantInt::getFalse(II->getContext())
3995        };
3996        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
3997        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
3998      }
3999    }
4000    break;
4001  case Intrinsic::umul_with_overflow:
4002  case Intrinsic::smul_with_overflow:
4003    // Canonicalize constants into the RHS.
4004    if (isa<Constant>(II->getOperand(1)) &&
4005        !isa<Constant>(II->getOperand(2))) {
4006      Value *LHS = II->getOperand(1);
4007      II->setOperand(1, II->getOperand(2));
4008      II->setOperand(2, LHS);
4009      return II;
4010    }
4011
4012    // X * undef -> undef
4013    if (isa<UndefValue>(II->getOperand(2)))
4014      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
4015
4016    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
4017      // X*0 -> {0, false}
4018      if (RHSI->isZero())
4019        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
4020
4021      // X * 1 -> {X, false}
4022      if (RHSI->equalsInt(1)) {
4023        Constant *V[] = {
4024          UndefValue::get(II->getOperand(1)->getType()),
4025          ConstantInt::getFalse(II->getContext())
4026        };
4027        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
4028        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
4029      }
4030    }
4031    break;
4032  case Intrinsic::ppc_altivec_lvx:
4033  case Intrinsic::ppc_altivec_lvxl:
4034  case Intrinsic::x86_sse_loadu_ps:
4035  case Intrinsic::x86_sse2_loadu_pd:
4036  case Intrinsic::x86_sse2_loadu_dq:
4037    // Turn PPC lvx     -> load if the pointer is known aligned.
4038    // Turn X86 loadups -> load if the pointer is known aligned.
4039    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
4040      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
4041                                         PointerType::getUnqual(II->getType()));
4042      return new LoadInst(Ptr);
4043    }
4044    break;
4045  case Intrinsic::ppc_altivec_stvx:
4046  case Intrinsic::ppc_altivec_stvxl:
4047    // Turn stvx -> store if the pointer is known aligned.
4048    if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
4049      const Type *OpPtrTy =
4050        PointerType::getUnqual(II->getOperand(1)->getType());
4051      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
4052      return new StoreInst(II->getOperand(1), Ptr);
4053    }
4054    break;
4055  case Intrinsic::x86_sse_storeu_ps:
4056  case Intrinsic::x86_sse2_storeu_pd:
4057  case Intrinsic::x86_sse2_storeu_dq:
4058    // Turn X86 storeu -> store if the pointer is known aligned.
4059    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
4060      const Type *OpPtrTy =
4061        PointerType::getUnqual(II->getOperand(2)->getType());
4062      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
4063      return new StoreInst(II->getOperand(2), Ptr);
4064    }
4065    break;
4066
4067  case Intrinsic::x86_sse_cvttss2si: {
4068    // These intrinsics only demands the 0th element of its input vector.  If
4069    // we can simplify the input based on that, do so now.
4070    unsigned VWidth =
4071      cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
4072    APInt DemandedElts(VWidth, 1);
4073    APInt UndefElts(VWidth, 0);
4074    if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
4075                                              UndefElts)) {
4076      II->setOperand(1, V);
4077      return II;
4078    }
4079    break;
4080  }
4081
4082  case Intrinsic::ppc_altivec_vperm:
4083    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
4084    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
4085      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
4086
4087      // Check that all of the elements are integer constants or undefs.
4088      bool AllEltsOk = true;
4089      for (unsigned i = 0; i != 16; ++i) {
4090        if (!isa<ConstantInt>(Mask->getOperand(i)) &&
4091            !isa<UndefValue>(Mask->getOperand(i))) {
4092          AllEltsOk = false;
4093          break;
4094        }
4095      }
4096
4097      if (AllEltsOk) {
4098        // Cast the input vectors to byte vectors.
4099        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
4100        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
4101        Value *Result = UndefValue::get(Op0->getType());
4102
4103        // Only extract each element once.
4104        Value *ExtractedElts[32];
4105        memset(ExtractedElts, 0, sizeof(ExtractedElts));
4106
4107        for (unsigned i = 0; i != 16; ++i) {
4108          if (isa<UndefValue>(Mask->getOperand(i)))
4109            continue;
4110          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
4111          Idx &= 31;  // Match the hardware behavior.
4112
4113          if (ExtractedElts[Idx] == 0) {
4114            ExtractedElts[Idx] =
4115              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
4116                  ConstantInt::get(Type::getInt32Ty(II->getContext()),
4117                                   Idx&15, false), "tmp");
4118          }
4119
4120          // Insert this value into the result vector.
4121          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
4122                         ConstantInt::get(Type::getInt32Ty(II->getContext()),
4123                                          i, false), "tmp");
4124        }
4125        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
4126      }
4127    }
4128    break;
4129
4130  case Intrinsic::stackrestore: {
4131    // If the save is right next to the restore, remove the restore.  This can
4132    // happen when variable allocas are DCE'd.
4133    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
4134      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
4135        BasicBlock::iterator BI = SS;
4136        if (&*++BI == II)
4137          return EraseInstFromFunction(CI);
4138      }
4139    }
4140
4141    // Scan down this block to see if there is another stack restore in the
4142    // same block without an intervening call/alloca.
4143    BasicBlock::iterator BI = II;
4144    TerminatorInst *TI = II->getParent()->getTerminator();
4145    bool CannotRemove = false;
4146    for (++BI; &*BI != TI; ++BI) {
4147      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
4148        CannotRemove = true;
4149        break;
4150      }
4151      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
4152        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
4153          // If there is a stackrestore below this one, remove this one.
4154          if (II->getIntrinsicID() == Intrinsic::stackrestore)
4155            return EraseInstFromFunction(CI);
4156          // Otherwise, ignore the intrinsic.
4157        } else {
4158          // If we found a non-intrinsic call, we can't remove the stack
4159          // restore.
4160          CannotRemove = true;
4161          break;
4162        }
4163      }
4164    }
4165
4166    // If the stack restore is in a return/unwind block and if there are no
4167    // allocas or calls between the restore and the return, nuke the restore.
4168    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
4169      return EraseInstFromFunction(CI);
4170    break;
4171  }
4172  }
4173
4174  return visitCallSite(II);
4175}
4176
4177// InvokeInst simplification
4178//
4179Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
4180  return visitCallSite(&II);
4181}
4182
4183/// isSafeToEliminateVarargsCast - If this cast does not affect the value
4184/// passed through the varargs area, we can eliminate the use of the cast.
4185static bool isSafeToEliminateVarargsCast(const CallSite CS,
4186                                         const CastInst * const CI,
4187                                         const TargetData * const TD,
4188                                         const int ix) {
4189  if (!CI->isLosslessCast())
4190    return false;
4191
4192  // The size of ByVal arguments is derived from the type, so we
4193  // can't change to a type with a different size.  If the size were
4194  // passed explicitly we could avoid this check.
4195  if (!CS.paramHasAttr(ix, Attribute::ByVal))
4196    return true;
4197
4198  const Type* SrcTy =
4199            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
4200  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
4201  if (!SrcTy->isSized() || !DstTy->isSized())
4202    return false;
4203  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
4204    return false;
4205  return true;
4206}
4207
4208// visitCallSite - Improvements for call and invoke instructions.
4209//
4210Instruction *InstCombiner::visitCallSite(CallSite CS) {
4211  bool Changed = false;
4212
4213  // If the callee is a constexpr cast of a function, attempt to move the cast
4214  // to the arguments of the call/invoke.
4215  if (transformConstExprCastCall(CS)) return 0;
4216
4217  Value *Callee = CS.getCalledValue();
4218
4219  if (Function *CalleeF = dyn_cast<Function>(Callee))
4220    if (CalleeF->getCallingConv() != CS.getCallingConv()) {
4221      Instruction *OldCall = CS.getInstruction();
4222      // If the call and callee calling conventions don't match, this call must
4223      // be unreachable, as the call is undefined.
4224      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4225                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
4226                                  OldCall);
4227      // If OldCall dues not return void then replaceAllUsesWith undef.
4228      // This allows ValueHandlers and custom metadata to adjust itself.
4229      if (!OldCall->getType()->isVoidTy())
4230        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
4231      if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
4232        return EraseInstFromFunction(*OldCall);
4233      return 0;
4234    }
4235
4236  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
4237    // This instruction is not reachable, just remove it.  We insert a store to
4238    // undef so that we know that this code is not reachable, despite the fact
4239    // that we can't modify the CFG here.
4240    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4241               UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
4242                  CS.getInstruction());
4243
4244    // If CS dues not return void then replaceAllUsesWith undef.
4245    // This allows ValueHandlers and custom metadata to adjust itself.
4246    if (!CS.getInstruction()->getType()->isVoidTy())
4247      CS.getInstruction()->
4248        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
4249
4250    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
4251      // Don't break the CFG, insert a dummy cond branch.
4252      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
4253                         ConstantInt::getTrue(Callee->getContext()), II);
4254    }
4255    return EraseInstFromFunction(*CS.getInstruction());
4256  }
4257
4258  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
4259    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
4260      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
4261        return transformCallThroughTrampoline(CS);
4262
4263  const PointerType *PTy = cast<PointerType>(Callee->getType());
4264  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4265  if (FTy->isVarArg()) {
4266    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
4267    // See if we can optimize any arguments passed through the varargs area of
4268    // the call.
4269    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
4270           E = CS.arg_end(); I != E; ++I, ++ix) {
4271      CastInst *CI = dyn_cast<CastInst>(*I);
4272      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
4273        *I = CI->getOperand(0);
4274        Changed = true;
4275      }
4276    }
4277  }
4278
4279  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
4280    // Inline asm calls cannot throw - mark them 'nounwind'.
4281    CS.setDoesNotThrow();
4282    Changed = true;
4283  }
4284
4285  return Changed ? CS.getInstruction() : 0;
4286}
4287
4288// transformConstExprCastCall - If the callee is a constexpr cast of a function,
4289// attempt to move the cast to the arguments of the call/invoke.
4290//
4291bool InstCombiner::transformConstExprCastCall(CallSite CS) {
4292  if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
4293  ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
4294  if (CE->getOpcode() != Instruction::BitCast ||
4295      !isa<Function>(CE->getOperand(0)))
4296    return false;
4297  Function *Callee = cast<Function>(CE->getOperand(0));
4298  Instruction *Caller = CS.getInstruction();
4299  const AttrListPtr &CallerPAL = CS.getAttributes();
4300
4301  // Okay, this is a cast from a function to a different type.  Unless doing so
4302  // would cause a type conversion of one of our arguments, change this call to
4303  // be a direct call with arguments casted to the appropriate types.
4304  //
4305  const FunctionType *FT = Callee->getFunctionType();
4306  const Type *OldRetTy = Caller->getType();
4307  const Type *NewRetTy = FT->getReturnType();
4308
4309  if (isa<StructType>(NewRetTy))
4310    return false; // TODO: Handle multiple return values.
4311
4312  // Check to see if we are changing the return type...
4313  if (OldRetTy != NewRetTy) {
4314    if (Callee->isDeclaration() &&
4315        // Conversion is ok if changing from one pointer type to another or from
4316        // a pointer to an integer of the same size.
4317        !((isa<PointerType>(OldRetTy) || !TD ||
4318           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
4319          (isa<PointerType>(NewRetTy) || !TD ||
4320           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
4321      return false;   // Cannot transform this return value.
4322
4323    if (!Caller->use_empty() &&
4324        // void -> non-void is handled specially
4325        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
4326      return false;   // Cannot transform this return value.
4327
4328    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4329      Attributes RAttrs = CallerPAL.getRetAttributes();
4330      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
4331        return false;   // Attribute not compatible with transformed value.
4332    }
4333
4334    // If the callsite is an invoke instruction, and the return value is used by
4335    // a PHI node in a successor, we cannot change the return type of the call
4336    // because there is no place to put the cast instruction (without breaking
4337    // the critical edge).  Bail out in this case.
4338    if (!Caller->use_empty())
4339      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4340        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
4341             UI != E; ++UI)
4342          if (PHINode *PN = dyn_cast<PHINode>(*UI))
4343            if (PN->getParent() == II->getNormalDest() ||
4344                PN->getParent() == II->getUnwindDest())
4345              return false;
4346  }
4347
4348  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
4349  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4350
4351  CallSite::arg_iterator AI = CS.arg_begin();
4352  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4353    const Type *ParamTy = FT->getParamType(i);
4354    const Type *ActTy = (*AI)->getType();
4355
4356    if (!CastInst::isCastable(ActTy, ParamTy))
4357      return false;   // Cannot transform this parameter value.
4358
4359    if (CallerPAL.getParamAttributes(i + 1)
4360        & Attribute::typeIncompatible(ParamTy))
4361      return false;   // Attribute not compatible with transformed value.
4362
4363    // Converting from one pointer type to another or between a pointer and an
4364    // integer of the same size is safe even if we do not have a body.
4365    bool isConvertible = ActTy == ParamTy ||
4366      (TD && ((isa<PointerType>(ParamTy) ||
4367      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
4368              (isa<PointerType>(ActTy) ||
4369              ActTy == TD->getIntPtrType(Caller->getContext()))));
4370    if (Callee->isDeclaration() && !isConvertible) return false;
4371  }
4372
4373  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
4374      Callee->isDeclaration())
4375    return false;   // Do not delete arguments unless we have a function body.
4376
4377  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4378      !CallerPAL.isEmpty())
4379    // In this case we have more arguments than the new function type, but we
4380    // won't be dropping them.  Check that these extra arguments have attributes
4381    // that are compatible with being a vararg call argument.
4382    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
4383      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
4384        break;
4385      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
4386      if (PAttrs & Attribute::VarArgsIncompatible)
4387        return false;
4388    }
4389
4390  // Okay, we decided that this is a safe thing to do: go ahead and start
4391  // inserting cast instructions as necessary...
4392  std::vector<Value*> Args;
4393  Args.reserve(NumActualArgs);
4394  SmallVector<AttributeWithIndex, 8> attrVec;
4395  attrVec.reserve(NumCommonArgs);
4396
4397  // Get any return attributes.
4398  Attributes RAttrs = CallerPAL.getRetAttributes();
4399
4400  // If the return value is not being used, the type may not be compatible
4401  // with the existing attributes.  Wipe out any problematic attributes.
4402  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
4403
4404  // Add the new return attributes.
4405  if (RAttrs)
4406    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
4407
4408  AI = CS.arg_begin();
4409  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4410    const Type *ParamTy = FT->getParamType(i);
4411    if ((*AI)->getType() == ParamTy) {
4412      Args.push_back(*AI);
4413    } else {
4414      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
4415          false, ParamTy, false);
4416      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
4417    }
4418
4419    // Add any parameter attributes.
4420    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
4421      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
4422  }
4423
4424  // If the function takes more arguments than the call was taking, add them
4425  // now.
4426  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
4427    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
4428
4429  // If we are removing arguments to the function, emit an obnoxious warning.
4430  if (FT->getNumParams() < NumActualArgs) {
4431    if (!FT->isVarArg()) {
4432      errs() << "WARNING: While resolving call to function '"
4433             << Callee->getName() << "' arguments were dropped!\n";
4434    } else {
4435      // Add all of the arguments in their promoted form to the arg list.
4436      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4437        const Type *PTy = getPromotedType((*AI)->getType());
4438        if (PTy != (*AI)->getType()) {
4439          // Must promote to pass through va_arg area!
4440          Instruction::CastOps opcode =
4441            CastInst::getCastOpcode(*AI, false, PTy, false);
4442          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
4443        } else {
4444          Args.push_back(*AI);
4445        }
4446
4447        // Add any parameter attributes.
4448        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
4449          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
4450      }
4451    }
4452  }
4453
4454  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
4455    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
4456
4457  if (NewRetTy->isVoidTy())
4458    Caller->setName("");   // Void type should not have a name.
4459
4460  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
4461                                                     attrVec.end());
4462
4463  Instruction *NC;
4464  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4465    NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
4466                            Args.begin(), Args.end(),
4467                            Caller->getName(), Caller);
4468    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
4469    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
4470  } else {
4471    NC = CallInst::Create(Callee, Args.begin(), Args.end(),
4472                          Caller->getName(), Caller);
4473    CallInst *CI = cast<CallInst>(Caller);
4474    if (CI->isTailCall())
4475      cast<CallInst>(NC)->setTailCall();
4476    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
4477    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
4478  }
4479
4480  // Insert a cast of the return type as necessary.
4481  Value *NV = NC;
4482  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4483    if (!NV->getType()->isVoidTy()) {
4484      Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
4485                                                            OldRetTy, false);
4486      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
4487
4488      // If this is an invoke instruction, we should insert it after the first
4489      // non-phi, instruction in the normal successor block.
4490      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4491        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
4492        InsertNewInstBefore(NC, *I);
4493      } else {
4494        // Otherwise, it's a call, just insert cast right after the call instr
4495        InsertNewInstBefore(NC, *Caller);
4496      }
4497      Worklist.AddUsersToWorkList(*Caller);
4498    } else {
4499      NV = UndefValue::get(Caller->getType());
4500    }
4501  }
4502
4503
4504  if (!Caller->use_empty())
4505    Caller->replaceAllUsesWith(NV);
4506
4507  EraseInstFromFunction(*Caller);
4508  return true;
4509}
4510
4511// transformCallThroughTrampoline - Turn a call to a function created by the
4512// init_trampoline intrinsic into a direct call to the underlying function.
4513//
4514Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
4515  Value *Callee = CS.getCalledValue();
4516  const PointerType *PTy = cast<PointerType>(Callee->getType());
4517  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4518  const AttrListPtr &Attrs = CS.getAttributes();
4519
4520  // If the call already has the 'nest' attribute somewhere then give up -
4521  // otherwise 'nest' would occur twice after splicing in the chain.
4522  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4523    return 0;
4524
4525  IntrinsicInst *Tramp =
4526    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
4527
4528  Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
4529  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
4530  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
4531
4532  const AttrListPtr &NestAttrs = NestF->getAttributes();
4533  if (!NestAttrs.isEmpty()) {
4534    unsigned NestIdx = 1;
4535    const Type *NestTy = 0;
4536    Attributes NestAttr = Attribute::None;
4537
4538    // Look for a parameter marked with the 'nest' attribute.
4539    for (FunctionType::param_iterator I = NestFTy->param_begin(),
4540         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
4541      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
4542        // Record the parameter type and any other attributes.
4543        NestTy = *I;
4544        NestAttr = NestAttrs.getParamAttributes(NestIdx);
4545        break;
4546      }
4547
4548    if (NestTy) {
4549      Instruction *Caller = CS.getInstruction();
4550      std::vector<Value*> NewArgs;
4551      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
4552
4553      SmallVector<AttributeWithIndex, 8> NewAttrs;
4554      NewAttrs.reserve(Attrs.getNumSlots() + 1);
4555
4556      // Insert the nest argument into the call argument list, which may
4557      // mean appending it.  Likewise for attributes.
4558
4559      // Add any result attributes.
4560      if (Attributes Attr = Attrs.getRetAttributes())
4561        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
4562
4563      {
4564        unsigned Idx = 1;
4565        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4566        do {
4567          if (Idx == NestIdx) {
4568            // Add the chain argument and attributes.
4569            Value *NestVal = Tramp->getOperand(3);
4570            if (NestVal->getType() != NestTy)
4571              NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
4572            NewArgs.push_back(NestVal);
4573            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
4574          }
4575
4576          if (I == E)
4577            break;
4578
4579          // Add the original argument and attributes.
4580          NewArgs.push_back(*I);
4581          if (Attributes Attr = Attrs.getParamAttributes(Idx))
4582            NewAttrs.push_back
4583              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
4584
4585          ++Idx, ++I;
4586        } while (1);
4587      }
4588
4589      // Add any function attributes.
4590      if (Attributes Attr = Attrs.getFnAttributes())
4591        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
4592
4593      // The trampoline may have been bitcast to a bogus type (FTy).
4594      // Handle this by synthesizing a new function type, equal to FTy
4595      // with the chain parameter inserted.
4596
4597      std::vector<const Type*> NewTypes;
4598      NewTypes.reserve(FTy->getNumParams()+1);
4599
4600      // Insert the chain's type into the list of parameter types, which may
4601      // mean appending it.
4602      {
4603        unsigned Idx = 1;
4604        FunctionType::param_iterator I = FTy->param_begin(),
4605          E = FTy->param_end();
4606
4607        do {
4608          if (Idx == NestIdx)
4609            // Add the chain's type.
4610            NewTypes.push_back(NestTy);
4611
4612          if (I == E)
4613            break;
4614
4615          // Add the original type.
4616          NewTypes.push_back(*I);
4617
4618          ++Idx, ++I;
4619        } while (1);
4620      }
4621
4622      // Replace the trampoline call with a direct call.  Let the generic
4623      // code sort out any function type mismatches.
4624      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4625                                                FTy->isVarArg());
4626      Constant *NewCallee =
4627        NestF->getType() == PointerType::getUnqual(NewFTy) ?
4628        NestF : ConstantExpr::getBitCast(NestF,
4629                                         PointerType::getUnqual(NewFTy));
4630      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
4631                                                   NewAttrs.end());
4632
4633      Instruction *NewCaller;
4634      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4635        NewCaller = InvokeInst::Create(NewCallee,
4636                                       II->getNormalDest(), II->getUnwindDest(),
4637                                       NewArgs.begin(), NewArgs.end(),
4638                                       Caller->getName(), Caller);
4639        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4640        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4641      } else {
4642        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
4643                                     Caller->getName(), Caller);
4644        if (cast<CallInst>(Caller)->isTailCall())
4645          cast<CallInst>(NewCaller)->setTailCall();
4646        cast<CallInst>(NewCaller)->
4647          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
4648        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4649      }
4650      if (!Caller->getType()->isVoidTy())
4651        Caller->replaceAllUsesWith(NewCaller);
4652      Caller->eraseFromParent();
4653      Worklist.Remove(Caller);
4654      return 0;
4655    }
4656  }
4657
4658  // Replace the trampoline call with a direct call.  Since there is no 'nest'
4659  // parameter, there is no need to adjust the argument list.  Let the generic
4660  // code sort out any function type mismatches.
4661  Constant *NewCallee =
4662    NestF->getType() == PTy ? NestF :
4663                              ConstantExpr::getBitCast(NestF, PTy);
4664  CS.setCalledFunction(NewCallee);
4665  return CS.getInstruction();
4666}
4667
4668
4669
4670Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
4671  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
4672
4673  if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
4674    return ReplaceInstUsesWith(GEP, V);
4675
4676  Value *PtrOp = GEP.getOperand(0);
4677
4678  if (isa<UndefValue>(GEP.getOperand(0)))
4679    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
4680
4681  // Eliminate unneeded casts for indices.
4682  if (TD) {
4683    bool MadeChange = false;
4684    unsigned PtrSize = TD->getPointerSizeInBits();
4685
4686    gep_type_iterator GTI = gep_type_begin(GEP);
4687    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
4688         I != E; ++I, ++GTI) {
4689      if (!isa<SequentialType>(*GTI)) continue;
4690
4691      // If we are using a wider index than needed for this platform, shrink it
4692      // to what we need.  If narrower, sign-extend it to what we need.  This
4693      // explicit cast can make subsequent optimizations more obvious.
4694      unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
4695      if (OpBits == PtrSize)
4696        continue;
4697
4698      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
4699      MadeChange = true;
4700    }
4701    if (MadeChange) return &GEP;
4702  }
4703
4704  // Combine Indices - If the source pointer to this getelementptr instruction
4705  // is a getelementptr instruction, combine the indices of the two
4706  // getelementptr instructions into a single instruction.
4707  //
4708  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
4709    // Note that if our source is a gep chain itself that we wait for that
4710    // chain to be resolved before we perform this transformation.  This
4711    // avoids us creating a TON of code in some cases.
4712    //
4713    if (GetElementPtrInst *SrcGEP =
4714          dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
4715      if (SrcGEP->getNumOperands() == 2)
4716        return 0;   // Wait until our source is folded to completion.
4717
4718    SmallVector<Value*, 8> Indices;
4719
4720    // Find out whether the last index in the source GEP is a sequential idx.
4721    bool EndsWithSequential = false;
4722    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
4723         I != E; ++I)
4724      EndsWithSequential = !isa<StructType>(*I);
4725
4726    // Can we combine the two pointer arithmetics offsets?
4727    if (EndsWithSequential) {
4728      // Replace: gep (gep %P, long B), long A, ...
4729      // With:    T = long A+B; gep %P, T, ...
4730      //
4731      Value *Sum;
4732      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
4733      Value *GO1 = GEP.getOperand(1);
4734      if (SO1 == Constant::getNullValue(SO1->getType())) {
4735        Sum = GO1;
4736      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
4737        Sum = SO1;
4738      } else {
4739        // If they aren't the same type, then the input hasn't been processed
4740        // by the loop above yet (which canonicalizes sequential index types to
4741        // intptr_t).  Just avoid transforming this until the input has been
4742        // normalized.
4743        if (SO1->getType() != GO1->getType())
4744          return 0;
4745        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
4746      }
4747
4748      // Update the GEP in place if possible.
4749      if (Src->getNumOperands() == 2) {
4750        GEP.setOperand(0, Src->getOperand(0));
4751        GEP.setOperand(1, Sum);
4752        return &GEP;
4753      }
4754      Indices.append(Src->op_begin()+1, Src->op_end()-1);
4755      Indices.push_back(Sum);
4756      Indices.append(GEP.op_begin()+2, GEP.op_end());
4757    } else if (isa<Constant>(*GEP.idx_begin()) &&
4758               cast<Constant>(*GEP.idx_begin())->isNullValue() &&
4759               Src->getNumOperands() != 1) {
4760      // Otherwise we can do the fold if the first index of the GEP is a zero
4761      Indices.append(Src->op_begin()+1, Src->op_end());
4762      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
4763    }
4764
4765    if (!Indices.empty())
4766      return (cast<GEPOperator>(&GEP)->isInBounds() &&
4767              Src->isInBounds()) ?
4768        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
4769                                          Indices.end(), GEP.getName()) :
4770        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
4771                                  Indices.end(), GEP.getName());
4772  }
4773
4774  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
4775  if (Value *X = getBitCastOperand(PtrOp)) {
4776    assert(isa<PointerType>(X->getType()) && "Must be cast from pointer");
4777
4778    // If the input bitcast is actually "bitcast(bitcast(x))", then we don't
4779    // want to change the gep until the bitcasts are eliminated.
4780    if (getBitCastOperand(X)) {
4781      Worklist.AddValue(PtrOp);
4782      return 0;
4783    }
4784
4785    bool HasZeroPointerIndex = false;
4786    if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
4787      HasZeroPointerIndex = C->isZero();
4788
4789    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
4790    // into     : GEP [10 x i8]* X, i32 0, ...
4791    //
4792    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
4793    //           into     : GEP i8* X, ...
4794    //
4795    // This occurs when the program declares an array extern like "int X[];"
4796    if (HasZeroPointerIndex) {
4797      const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
4798      const PointerType *XTy = cast<PointerType>(X->getType());
4799      if (const ArrayType *CATy =
4800          dyn_cast<ArrayType>(CPTy->getElementType())) {
4801        // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
4802        if (CATy->getElementType() == XTy->getElementType()) {
4803          // -> GEP i8* X, ...
4804          SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
4805          return cast<GEPOperator>(&GEP)->isInBounds() ?
4806            GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(),
4807                                              GEP.getName()) :
4808            GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
4809                                      GEP.getName());
4810        }
4811
4812        if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){
4813          // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
4814          if (CATy->getElementType() == XATy->getElementType()) {
4815            // -> GEP [10 x i8]* X, i32 0, ...
4816            // At this point, we know that the cast source type is a pointer
4817            // to an array of the same type as the destination pointer
4818            // array.  Because the array type is never stepped over (there
4819            // is a leading zero) we can fold the cast into this GEP.
4820            GEP.setOperand(0, X);
4821            return &GEP;
4822          }
4823        }
4824      }
4825    } else if (GEP.getNumOperands() == 2) {
4826      // Transform things like:
4827      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
4828      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
4829      const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
4830      const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
4831      if (TD && isa<ArrayType>(SrcElTy) &&
4832          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
4833          TD->getTypeAllocSize(ResElTy)) {
4834        Value *Idx[2];
4835        Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
4836        Idx[1] = GEP.getOperand(1);
4837        Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
4838          Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
4839          Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
4840        // V and GEP are both pointer types --> BitCast
4841        return new BitCastInst(NewGEP, GEP.getType());
4842      }
4843
4844      // Transform things like:
4845      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
4846      //   (where tmp = 8*tmp2) into:
4847      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
4848
4849      if (TD && isa<ArrayType>(SrcElTy) &&
4850          ResElTy == Type::getInt8Ty(GEP.getContext())) {
4851        uint64_t ArrayEltSize =
4852            TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
4853
4854        // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
4855        // allow either a mul, shift, or constant here.
4856        Value *NewIdx = 0;
4857        ConstantInt *Scale = 0;
4858        if (ArrayEltSize == 1) {
4859          NewIdx = GEP.getOperand(1);
4860          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
4861        } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
4862          NewIdx = ConstantInt::get(CI->getType(), 1);
4863          Scale = CI;
4864        } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
4865          if (Inst->getOpcode() == Instruction::Shl &&
4866              isa<ConstantInt>(Inst->getOperand(1))) {
4867            ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
4868            uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
4869            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
4870                                     1ULL << ShAmtVal);
4871            NewIdx = Inst->getOperand(0);
4872          } else if (Inst->getOpcode() == Instruction::Mul &&
4873                     isa<ConstantInt>(Inst->getOperand(1))) {
4874            Scale = cast<ConstantInt>(Inst->getOperand(1));
4875            NewIdx = Inst->getOperand(0);
4876          }
4877        }
4878
4879        // If the index will be to exactly the right offset with the scale taken
4880        // out, perform the transformation. Note, we don't know whether Scale is
4881        // signed or not. We'll use unsigned version of division/modulo
4882        // operation after making sure Scale doesn't have the sign bit set.
4883        if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
4884            Scale->getZExtValue() % ArrayEltSize == 0) {
4885          Scale = ConstantInt::get(Scale->getType(),
4886                                   Scale->getZExtValue() / ArrayEltSize);
4887          if (Scale->getZExtValue() != 1) {
4888            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
4889                                                       false /*ZExt*/);
4890            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
4891          }
4892
4893          // Insert the new GEP instruction.
4894          Value *Idx[2];
4895          Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
4896          Idx[1] = NewIdx;
4897          Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
4898            Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
4899            Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
4900          // The NewGEP must be pointer typed, so must the old one -> BitCast
4901          return new BitCastInst(NewGEP, GEP.getType());
4902        }
4903      }
4904    }
4905  }
4906
4907  /// See if we can simplify:
4908  ///   X = bitcast A* to B*
4909  ///   Y = gep X, <...constant indices...>
4910  /// into a gep of the original struct.  This is important for SROA and alias
4911  /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
4912  if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
4913    if (TD &&
4914        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
4915      // Determine how much the GEP moves the pointer.  We are guaranteed to get
4916      // a constant back from EmitGEPOffset.
4917      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
4918      int64_t Offset = OffsetV->getSExtValue();
4919
4920      // If this GEP instruction doesn't move the pointer, just replace the GEP
4921      // with a bitcast of the real input to the dest type.
4922      if (Offset == 0) {
4923        // If the bitcast is of an allocation, and the allocation will be
4924        // converted to match the type of the cast, don't touch this.
4925        if (isa<AllocaInst>(BCI->getOperand(0)) ||
4926            isMalloc(BCI->getOperand(0))) {
4927          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
4928          if (Instruction *I = visitBitCast(*BCI)) {
4929            if (I != BCI) {
4930              I->takeName(BCI);
4931              BCI->getParent()->getInstList().insert(BCI, I);
4932              ReplaceInstUsesWith(*BCI, I);
4933            }
4934            return &GEP;
4935          }
4936        }
4937        return new BitCastInst(BCI->getOperand(0), GEP.getType());
4938      }
4939
4940      // Otherwise, if the offset is non-zero, we need to find out if there is a
4941      // field at Offset in 'A's type.  If so, we can pull the cast through the
4942      // GEP.
4943      SmallVector<Value*, 8> NewIndices;
4944      const Type *InTy =
4945        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
4946      if (FindElementAtOffset(InTy, Offset, NewIndices)) {
4947        Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
4948          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
4949                                     NewIndices.end()) :
4950          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
4951                             NewIndices.end());
4952
4953        if (NGEP->getType() == GEP.getType())
4954          return ReplaceInstUsesWith(GEP, NGEP);
4955        NGEP->takeName(&GEP);
4956        return new BitCastInst(NGEP, GEP.getType());
4957      }
4958    }
4959  }
4960
4961  return 0;
4962}
4963
4964Instruction *InstCombiner::visitFree(Instruction &FI) {
4965  Value *Op = FI.getOperand(1);
4966
4967  // free undef -> unreachable.
4968  if (isa<UndefValue>(Op)) {
4969    // Insert a new store to null because we cannot modify the CFG here.
4970    new StoreInst(ConstantInt::getTrue(FI.getContext()),
4971           UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI);
4972    return EraseInstFromFunction(FI);
4973  }
4974
4975  // If we have 'free null' delete the instruction.  This can happen in stl code
4976  // when lots of inlining happens.
4977  if (isa<ConstantPointerNull>(Op))
4978    return EraseInstFromFunction(FI);
4979
4980  // If we have a malloc call whose only use is a free call, delete both.
4981  if (isMalloc(Op)) {
4982    if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
4983      if (Op->hasOneUse() && CI->hasOneUse()) {
4984        EraseInstFromFunction(FI);
4985        EraseInstFromFunction(*CI);
4986        return EraseInstFromFunction(*cast<Instruction>(Op));
4987      }
4988    } else {
4989      // Op is a call to malloc
4990      if (Op->hasOneUse()) {
4991        EraseInstFromFunction(FI);
4992        return EraseInstFromFunction(*cast<Instruction>(Op));
4993      }
4994    }
4995  }
4996
4997  return 0;
4998}
4999
5000
5001
5002Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
5003  // Change br (not X), label True, label False to: br X, label False, True
5004  Value *X = 0;
5005  BasicBlock *TrueDest;
5006  BasicBlock *FalseDest;
5007  if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
5008      !isa<Constant>(X)) {
5009    // Swap Destinations and condition...
5010    BI.setCondition(X);
5011    BI.setSuccessor(0, FalseDest);
5012    BI.setSuccessor(1, TrueDest);
5013    return &BI;
5014  }
5015
5016  // Cannonicalize fcmp_one -> fcmp_oeq
5017  FCmpInst::Predicate FPred; Value *Y;
5018  if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
5019                             TrueDest, FalseDest)) &&
5020      BI.getCondition()->hasOneUse())
5021    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
5022        FPred == FCmpInst::FCMP_OGE) {
5023      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
5024      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
5025
5026      // Swap Destinations and condition.
5027      BI.setSuccessor(0, FalseDest);
5028      BI.setSuccessor(1, TrueDest);
5029      Worklist.Add(Cond);
5030      return &BI;
5031    }
5032
5033  // Cannonicalize icmp_ne -> icmp_eq
5034  ICmpInst::Predicate IPred;
5035  if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
5036                      TrueDest, FalseDest)) &&
5037      BI.getCondition()->hasOneUse())
5038    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
5039        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
5040        IPred == ICmpInst::ICMP_SGE) {
5041      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
5042      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
5043      // Swap Destinations and condition.
5044      BI.setSuccessor(0, FalseDest);
5045      BI.setSuccessor(1, TrueDest);
5046      Worklist.Add(Cond);
5047      return &BI;
5048    }
5049
5050  return 0;
5051}
5052
5053Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
5054  Value *Cond = SI.getCondition();
5055  if (Instruction *I = dyn_cast<Instruction>(Cond)) {
5056    if (I->getOpcode() == Instruction::Add)
5057      if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
5058        // change 'switch (X+4) case 1:' into 'switch (X) case -3'
5059        for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
5060          SI.setOperand(i,
5061                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
5062                                                AddRHS));
5063        SI.setOperand(0, I->getOperand(0));
5064        Worklist.Add(I);
5065        return &SI;
5066      }
5067  }
5068  return 0;
5069}
5070
5071Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
5072  Value *Agg = EV.getAggregateOperand();
5073
5074  if (!EV.hasIndices())
5075    return ReplaceInstUsesWith(EV, Agg);
5076
5077  if (Constant *C = dyn_cast<Constant>(Agg)) {
5078    if (isa<UndefValue>(C))
5079      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
5080
5081    if (isa<ConstantAggregateZero>(C))
5082      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
5083
5084    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
5085      // Extract the element indexed by the first index out of the constant
5086      Value *V = C->getOperand(*EV.idx_begin());
5087      if (EV.getNumIndices() > 1)
5088        // Extract the remaining indices out of the constant indexed by the
5089        // first index
5090        return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
5091      else
5092        return ReplaceInstUsesWith(EV, V);
5093    }
5094    return 0; // Can't handle other constants
5095  }
5096  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
5097    // We're extracting from an insertvalue instruction, compare the indices
5098    const unsigned *exti, *exte, *insi, *inse;
5099    for (exti = EV.idx_begin(), insi = IV->idx_begin(),
5100         exte = EV.idx_end(), inse = IV->idx_end();
5101         exti != exte && insi != inse;
5102         ++exti, ++insi) {
5103      if (*insi != *exti)
5104        // The insert and extract both reference distinctly different elements.
5105        // This means the extract is not influenced by the insert, and we can
5106        // replace the aggregate operand of the extract with the aggregate
5107        // operand of the insert. i.e., replace
5108        // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
5109        // %E = extractvalue { i32, { i32 } } %I, 0
5110        // with
5111        // %E = extractvalue { i32, { i32 } } %A, 0
5112        return ExtractValueInst::Create(IV->getAggregateOperand(),
5113                                        EV.idx_begin(), EV.idx_end());
5114    }
5115    if (exti == exte && insi == inse)
5116      // Both iterators are at the end: Index lists are identical. Replace
5117      // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
5118      // %C = extractvalue { i32, { i32 } } %B, 1, 0
5119      // with "i32 42"
5120      return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
5121    if (exti == exte) {
5122      // The extract list is a prefix of the insert list. i.e. replace
5123      // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
5124      // %E = extractvalue { i32, { i32 } } %I, 1
5125      // with
5126      // %X = extractvalue { i32, { i32 } } %A, 1
5127      // %E = insertvalue { i32 } %X, i32 42, 0
5128      // by switching the order of the insert and extract (though the
5129      // insertvalue should be left in, since it may have other uses).
5130      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
5131                                                 EV.idx_begin(), EV.idx_end());
5132      return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
5133                                     insi, inse);
5134    }
5135    if (insi == inse)
5136      // The insert list is a prefix of the extract list
5137      // We can simply remove the common indices from the extract and make it
5138      // operate on the inserted value instead of the insertvalue result.
5139      // i.e., replace
5140      // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
5141      // %E = extractvalue { i32, { i32 } } %I, 1, 0
5142      // with
5143      // %E extractvalue { i32 } { i32 42 }, 0
5144      return ExtractValueInst::Create(IV->getInsertedValueOperand(),
5145                                      exti, exte);
5146  }
5147  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
5148    // We're extracting from an intrinsic, see if we're the only user, which
5149    // allows us to simplify multiple result intrinsics to simpler things that
5150    // just get one value..
5151    if (II->hasOneUse()) {
5152      // Check if we're grabbing the overflow bit or the result of a 'with
5153      // overflow' intrinsic.  If it's the latter we can remove the intrinsic
5154      // and replace it with a traditional binary instruction.
5155      switch (II->getIntrinsicID()) {
5156      case Intrinsic::uadd_with_overflow:
5157      case Intrinsic::sadd_with_overflow:
5158        if (*EV.idx_begin() == 0) {  // Normal result.
5159          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
5160          II->replaceAllUsesWith(UndefValue::get(II->getType()));
5161          EraseInstFromFunction(*II);
5162          return BinaryOperator::CreateAdd(LHS, RHS);
5163        }
5164        break;
5165      case Intrinsic::usub_with_overflow:
5166      case Intrinsic::ssub_with_overflow:
5167        if (*EV.idx_begin() == 0) {  // Normal result.
5168          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
5169          II->replaceAllUsesWith(UndefValue::get(II->getType()));
5170          EraseInstFromFunction(*II);
5171          return BinaryOperator::CreateSub(LHS, RHS);
5172        }
5173        break;
5174      case Intrinsic::umul_with_overflow:
5175      case Intrinsic::smul_with_overflow:
5176        if (*EV.idx_begin() == 0) {  // Normal result.
5177          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
5178          II->replaceAllUsesWith(UndefValue::get(II->getType()));
5179          EraseInstFromFunction(*II);
5180          return BinaryOperator::CreateMul(LHS, RHS);
5181        }
5182        break;
5183      default:
5184        break;
5185      }
5186    }
5187  }
5188  // Can't simplify extracts from other values. Note that nested extracts are
5189  // already simplified implicitely by the above (extract ( extract (insert) )
5190  // will be translated into extract ( insert ( extract ) ) first and then just
5191  // the value inserted, if appropriate).
5192  return 0;
5193}
5194
5195
5196
5197
5198/// TryToSinkInstruction - Try to move the specified instruction from its
5199/// current block into the beginning of DestBlock, which can only happen if it's
5200/// safe to move the instruction past all of the instructions between it and the
5201/// end of its block.
5202static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
5203  assert(I->hasOneUse() && "Invariants didn't hold!");
5204
5205  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5206  if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
5207    return false;
5208
5209  // Do not sink alloca instructions out of the entry block.
5210  if (isa<AllocaInst>(I) && I->getParent() ==
5211        &DestBlock->getParent()->getEntryBlock())
5212    return false;
5213
5214  // We can only sink load instructions if there is nothing between the load and
5215  // the end of block that could change the value.
5216  if (I->mayReadFromMemory()) {
5217    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
5218         Scan != E; ++Scan)
5219      if (Scan->mayWriteToMemory())
5220        return false;
5221  }
5222
5223  BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
5224
5225  I->moveBefore(InsertPos);
5226  ++NumSunkInst;
5227  return true;
5228}
5229
5230
5231/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
5232/// all reachable code to the worklist.
5233///
5234/// This has a couple of tricks to make the code faster and more powerful.  In
5235/// particular, we constant fold and DCE instructions as we go, to avoid adding
5236/// them to the worklist (this significantly speeds up instcombine on code where
5237/// many instructions are dead or constant).  Additionally, if we find a branch
5238/// whose condition is a known constant, we only visit the reachable successors.
5239///
5240static bool AddReachableCodeToWorklist(BasicBlock *BB,
5241                                       SmallPtrSet<BasicBlock*, 64> &Visited,
5242                                       InstCombiner &IC,
5243                                       const TargetData *TD) {
5244  bool MadeIRChange = false;
5245  SmallVector<BasicBlock*, 256> Worklist;
5246  Worklist.push_back(BB);
5247
5248  std::vector<Instruction*> InstrsForInstCombineWorklist;
5249  InstrsForInstCombineWorklist.reserve(128);
5250
5251  SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
5252
5253  while (!Worklist.empty()) {
5254    BB = Worklist.back();
5255    Worklist.pop_back();
5256
5257    // We have now visited this block!  If we've already been here, ignore it.
5258    if (!Visited.insert(BB)) continue;
5259
5260    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
5261      Instruction *Inst = BBI++;
5262
5263      // DCE instruction if trivially dead.
5264      if (isInstructionTriviallyDead(Inst)) {
5265        ++NumDeadInst;
5266        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
5267        Inst->eraseFromParent();
5268        continue;
5269      }
5270
5271      // ConstantProp instruction if trivially constant.
5272      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
5273        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
5274          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
5275                       << *Inst << '\n');
5276          Inst->replaceAllUsesWith(C);
5277          ++NumConstProp;
5278          Inst->eraseFromParent();
5279          continue;
5280        }
5281
5282
5283
5284      if (TD) {
5285        // See if we can constant fold its operands.
5286        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
5287             i != e; ++i) {
5288          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
5289          if (CE == 0) continue;
5290
5291          // If we already folded this constant, don't try again.
5292          if (!FoldedConstants.insert(CE))
5293            continue;
5294
5295          Constant *NewC = ConstantFoldConstantExpression(CE, TD);
5296          if (NewC && NewC != CE) {
5297            *i = NewC;
5298            MadeIRChange = true;
5299          }
5300        }
5301      }
5302
5303
5304      InstrsForInstCombineWorklist.push_back(Inst);
5305    }
5306
5307    // Recursively visit successors.  If this is a branch or switch on a
5308    // constant, only visit the reachable successor.
5309    TerminatorInst *TI = BB->getTerminator();
5310    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
5311      if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
5312        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
5313        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
5314        Worklist.push_back(ReachableBB);
5315        continue;
5316      }
5317    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
5318      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
5319        // See if this is an explicit destination.
5320        for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
5321          if (SI->getCaseValue(i) == Cond) {
5322            BasicBlock *ReachableBB = SI->getSuccessor(i);
5323            Worklist.push_back(ReachableBB);
5324            continue;
5325          }
5326
5327        // Otherwise it is the default destination.
5328        Worklist.push_back(SI->getSuccessor(0));
5329        continue;
5330      }
5331    }
5332
5333    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
5334      Worklist.push_back(TI->getSuccessor(i));
5335  }
5336
5337  // Once we've found all of the instructions to add to instcombine's worklist,
5338  // add them in reverse order.  This way instcombine will visit from the top
5339  // of the function down.  This jives well with the way that it adds all uses
5340  // of instructions to the worklist after doing a transformation, thus avoiding
5341  // some N^2 behavior in pathological cases.
5342  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
5343                              InstrsForInstCombineWorklist.size());
5344
5345  return MadeIRChange;
5346}
5347
5348bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
5349  MadeIRChange = false;
5350
5351  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5352        << F.getNameStr() << "\n");
5353
5354  {
5355    // Do a depth-first traversal of the function, populate the worklist with
5356    // the reachable instructions.  Ignore blocks that are not reachable.  Keep
5357    // track of which blocks we visit.
5358    SmallPtrSet<BasicBlock*, 64> Visited;
5359    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
5360
5361    // Do a quick scan over the function.  If we find any blocks that are
5362    // unreachable, remove any instructions inside of them.  This prevents
5363    // the instcombine code from having to deal with some bad special cases.
5364    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
5365      if (!Visited.count(BB)) {
5366        Instruction *Term = BB->getTerminator();
5367        while (Term != BB->begin()) {   // Remove instrs bottom-up
5368          BasicBlock::iterator I = Term; --I;
5369
5370          DEBUG(errs() << "IC: DCE: " << *I << '\n');
5371          // A debug intrinsic shouldn't force another iteration if we weren't
5372          // going to do one without it.
5373          if (!isa<DbgInfoIntrinsic>(I)) {
5374            ++NumDeadInst;
5375            MadeIRChange = true;
5376          }
5377
5378          // If I is not void type then replaceAllUsesWith undef.
5379          // This allows ValueHandlers and custom metadata to adjust itself.
5380          if (!I->getType()->isVoidTy())
5381            I->replaceAllUsesWith(UndefValue::get(I->getType()));
5382          I->eraseFromParent();
5383        }
5384      }
5385  }
5386
5387  while (!Worklist.isEmpty()) {
5388    Instruction *I = Worklist.RemoveOne();
5389    if (I == 0) continue;  // skip null values.
5390
5391    // Check to see if we can DCE the instruction.
5392    if (isInstructionTriviallyDead(I)) {
5393      DEBUG(errs() << "IC: DCE: " << *I << '\n');
5394      EraseInstFromFunction(*I);
5395      ++NumDeadInst;
5396      MadeIRChange = true;
5397      continue;
5398    }
5399
5400    // Instruction isn't dead, see if we can constant propagate it.
5401    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
5402      if (Constant *C = ConstantFoldInstruction(I, TD)) {
5403        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
5404
5405        // Add operands to the worklist.
5406        ReplaceInstUsesWith(*I, C);
5407        ++NumConstProp;
5408        EraseInstFromFunction(*I);
5409        MadeIRChange = true;
5410        continue;
5411      }
5412
5413    // See if we can trivially sink this instruction to a successor basic block.
5414    if (I->hasOneUse()) {
5415      BasicBlock *BB = I->getParent();
5416      Instruction *UserInst = cast<Instruction>(I->use_back());
5417      BasicBlock *UserParent;
5418
5419      // Get the block the use occurs in.
5420      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
5421        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
5422      else
5423        UserParent = UserInst->getParent();
5424
5425      if (UserParent != BB) {
5426        bool UserIsSuccessor = false;
5427        // See if the user is one of our successors.
5428        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
5429          if (*SI == UserParent) {
5430            UserIsSuccessor = true;
5431            break;
5432          }
5433
5434        // If the user is one of our immediate successors, and if that successor
5435        // only has us as a predecessors (we'd have to split the critical edge
5436        // otherwise), we can keep going.
5437        if (UserIsSuccessor && UserParent->getSinglePredecessor())
5438          // Okay, the CFG is simple enough, try to sink this instruction.
5439          MadeIRChange |= TryToSinkInstruction(I, UserParent);
5440      }
5441    }
5442
5443    // Now that we have an instruction, try combining it to simplify it.
5444    Builder->SetInsertPoint(I->getParent(), I);
5445
5446#ifndef NDEBUG
5447    std::string OrigI;
5448#endif
5449    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
5450    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
5451
5452    if (Instruction *Result = visit(*I)) {
5453      ++NumCombined;
5454      // Should we replace the old instruction with a new one?
5455      if (Result != I) {
5456        DEBUG(errs() << "IC: Old = " << *I << '\n'
5457                     << "    New = " << *Result << '\n');
5458
5459        // Everything uses the new instruction now.
5460        I->replaceAllUsesWith(Result);
5461
5462        // Push the new instruction and any users onto the worklist.
5463        Worklist.Add(Result);
5464        Worklist.AddUsersToWorkList(*Result);
5465
5466        // Move the name to the new instruction first.
5467        Result->takeName(I);
5468
5469        // Insert the new instruction into the basic block...
5470        BasicBlock *InstParent = I->getParent();
5471        BasicBlock::iterator InsertPos = I;
5472
5473        if (!isa<PHINode>(Result))        // If combining a PHI, don't insert
5474          while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
5475            ++InsertPos;
5476
5477        InstParent->getInstList().insert(InsertPos, Result);
5478
5479        EraseInstFromFunction(*I);
5480      } else {
5481#ifndef NDEBUG
5482        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
5483                     << "    New = " << *I << '\n');
5484#endif
5485
5486        // If the instruction was modified, it's possible that it is now dead.
5487        // if so, remove it.
5488        if (isInstructionTriviallyDead(I)) {
5489          EraseInstFromFunction(*I);
5490        } else {
5491          Worklist.Add(I);
5492          Worklist.AddUsersToWorkList(*I);
5493        }
5494      }
5495      MadeIRChange = true;
5496    }
5497  }
5498
5499  Worklist.Zap();
5500  return MadeIRChange;
5501}
5502
5503
5504bool InstCombiner::runOnFunction(Function &F) {
5505  MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
5506  TD = getAnalysisIfAvailable<TargetData>();
5507
5508
5509  /// Builder - This is an IRBuilder that automatically inserts new
5510  /// instructions into the worklist when they are created.
5511  IRBuilder<true, TargetFolder, InstCombineIRInserter>
5512    TheBuilder(F.getContext(), TargetFolder(TD),
5513               InstCombineIRInserter(Worklist));
5514  Builder = &TheBuilder;
5515
5516  bool EverMadeChange = false;
5517
5518  // Iterate while there is work to do.
5519  unsigned Iteration = 0;
5520  while (DoOneIteration(F, Iteration++))
5521    EverMadeChange = true;
5522
5523  Builder = 0;
5524  return EverMadeChange;
5525}
5526
5527FunctionPass *llvm::createInstructionCombiningPass() {
5528  return new InstCombiner();
5529}
5530