InstructionCombining.cpp revision d0db8e8bad3060d5c016483ecfc62d822bc84fb7
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// InstructionCombining - Combine instructions to form fewer, simple
11// instructions.  This pass does not modify the CFG.  This pass is where
12// algebraic simplification happens.
13//
14// This pass combines things like:
15//    %Y = add i32 %X, 1
16//    %Z = add i32 %Y, 1
17// into:
18//    %Z = add i32 %X, 2
19//
20// This is a simple worklist driven algorithm.
21//
22// This pass guarantees that the following canonicalizations are performed on
23// the program:
24//    1. If a binary operator has a constant operand, it is moved to the RHS
25//    2. Bitwise operators with constant operands are always grouped so that
26//       shifts are performed first, then or's, then and's, then xor's.
27//    3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
28//    4. All cmp instructions on boolean values are replaced with logical ops
29//    5. add X, X is represented as (X*2) => (X << 1)
30//    6. Multiplies with a power-of-two constant argument are transformed into
31//       shifts.
32//   ... etc.
33//
34//===----------------------------------------------------------------------===//
35
36#define DEBUG_TYPE "instcombine"
37#include "llvm/Transforms/Scalar.h"
38#include "InstCombine.h"
39#include "llvm/IntrinsicInst.h"
40#include "llvm/LLVMContext.h"
41#include "llvm/DerivedTypes.h"
42#include "llvm/GlobalVariable.h"
43#include "llvm/Operator.h"
44#include "llvm/Analysis/ConstantFolding.h"
45#include "llvm/Analysis/InstructionSimplify.h"
46#include "llvm/Analysis/MemoryBuiltins.h"
47#include "llvm/Target/TargetData.h"
48#include "llvm/Transforms/Utils/BasicBlockUtils.h"
49#include "llvm/Transforms/Utils/Local.h"
50#include "llvm/Support/CallSite.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/Support/GetElementPtrTypeIterator.h"
54#include "llvm/Support/MathExtras.h"
55#include "llvm/Support/PatternMatch.h"
56#include "llvm/ADT/SmallPtrSet.h"
57#include "llvm/ADT/Statistic.h"
58#include "llvm/ADT/STLExtras.h"
59#include <algorithm>
60#include <climits>
61using namespace llvm;
62using namespace llvm::PatternMatch;
63
64STATISTIC(NumCombined , "Number of insts combined");
65STATISTIC(NumConstProp, "Number of constant folds");
66STATISTIC(NumDeadInst , "Number of dead inst eliminated");
67STATISTIC(NumSunkInst , "Number of instructions sunk");
68
69
70char InstCombiner::ID = 0;
71static RegisterPass<InstCombiner>
72X("instcombine", "Combine redundant instructions");
73
74void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
75  AU.addPreservedID(LCSSAID);
76  AU.setPreservesCFG();
77}
78
79
80// isOnlyUse - Return true if this instruction will be deleted if we stop using
81// it.
82static bool isOnlyUse(Value *V) {
83  return V->hasOneUse() || isa<Constant>(V);
84}
85
86// getPromotedType - Return the specified type promoted as it would be to pass
87// though a va_arg area...
88static const Type *getPromotedType(const Type *Ty) {
89  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
90    if (ITy->getBitWidth() < 32)
91      return Type::getInt32Ty(Ty->getContext());
92  }
93  return Ty;
94}
95
96/// ShouldChangeType - Return true if it is desirable to convert a computation
97/// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
98/// type for example, or from a smaller to a larger illegal type.
99bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
100  assert(isa<IntegerType>(From) && isa<IntegerType>(To));
101
102  // If we don't have TD, we don't know if the source/dest are legal.
103  if (!TD) return false;
104
105  unsigned FromWidth = From->getPrimitiveSizeInBits();
106  unsigned ToWidth = To->getPrimitiveSizeInBits();
107  bool FromLegal = TD->isLegalInteger(FromWidth);
108  bool ToLegal = TD->isLegalInteger(ToWidth);
109
110  // If this is a legal integer from type, and the result would be an illegal
111  // type, don't do the transformation.
112  if (FromLegal && !ToLegal)
113    return false;
114
115  // Otherwise, if both are illegal, do not increase the size of the result. We
116  // do allow things like i160 -> i64, but not i64 -> i160.
117  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
118    return false;
119
120  return true;
121}
122
123/// getBitCastOperand - If the specified operand is a CastInst, a constant
124/// expression bitcast, or a GetElementPtrInst with all zero indices, return the
125/// operand value, otherwise return null.
126static Value *getBitCastOperand(Value *V) {
127  if (Operator *O = dyn_cast<Operator>(V)) {
128    if (O->getOpcode() == Instruction::BitCast)
129      return O->getOperand(0);
130    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
131      if (GEP->hasAllZeroIndices())
132        return GEP->getPointerOperand();
133  }
134  return 0;
135}
136
137
138
139// SimplifyCommutative - This performs a few simplifications for commutative
140// operators:
141//
142//  1. Order operands such that they are listed from right (least complex) to
143//     left (most complex).  This puts constants before unary operators before
144//     binary operators.
145//
146//  2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
147//  3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
148//
149bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
150  bool Changed = false;
151  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
152    Changed = !I.swapOperands();
153
154  if (!I.isAssociative()) return Changed;
155  Instruction::BinaryOps Opcode = I.getOpcode();
156  if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
157    if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
158      if (isa<Constant>(I.getOperand(1))) {
159        Constant *Folded = ConstantExpr::get(I.getOpcode(),
160                                             cast<Constant>(I.getOperand(1)),
161                                             cast<Constant>(Op->getOperand(1)));
162        I.setOperand(0, Op->getOperand(0));
163        I.setOperand(1, Folded);
164        return true;
165      } else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1)))
166        if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
167            isOnlyUse(Op) && isOnlyUse(Op1)) {
168          Constant *C1 = cast<Constant>(Op->getOperand(1));
169          Constant *C2 = cast<Constant>(Op1->getOperand(1));
170
171          // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
172          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
173          Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
174                                                    Op1->getOperand(0),
175                                                    Op1->getName(), &I);
176          Worklist.Add(New);
177          I.setOperand(0, New);
178          I.setOperand(1, Folded);
179          return true;
180        }
181    }
182  return Changed;
183}
184
185// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
186// if the LHS is a constant zero (which is the 'negate' form).
187//
188Value *InstCombiner::dyn_castNegVal(Value *V) const {
189  if (BinaryOperator::isNeg(V))
190    return BinaryOperator::getNegArgument(V);
191
192  // Constants can be considered to be negated values if they can be folded.
193  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
194    return ConstantExpr::getNeg(C);
195
196  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
197    if (C->getType()->getElementType()->isInteger())
198      return ConstantExpr::getNeg(C);
199
200  return 0;
201}
202
203// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
204// instruction if the LHS is a constant negative zero (which is the 'negate'
205// form).
206//
207Value *InstCombiner::dyn_castFNegVal(Value *V) const {
208  if (BinaryOperator::isFNeg(V))
209    return BinaryOperator::getFNegArgument(V);
210
211  // Constants can be considered to be negated values if they can be folded.
212  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
213    return ConstantExpr::getFNeg(C);
214
215  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
216    if (C->getType()->getElementType()->isFloatingPoint())
217      return ConstantExpr::getFNeg(C);
218
219  return 0;
220}
221
222/// isFreeToInvert - Return true if the specified value is free to invert (apply
223/// ~ to).  This happens in cases where the ~ can be eliminated.
224static inline bool isFreeToInvert(Value *V) {
225  // ~(~(X)) -> X.
226  if (BinaryOperator::isNot(V))
227    return true;
228
229  // Constants can be considered to be not'ed values.
230  if (isa<ConstantInt>(V))
231    return true;
232
233  // Compares can be inverted if they have a single use.
234  if (CmpInst *CI = dyn_cast<CmpInst>(V))
235    return CI->hasOneUse();
236
237  return false;
238}
239
240static inline Value *dyn_castNotVal(Value *V) {
241  // If this is not(not(x)) don't return that this is a not: we want the two
242  // not's to be folded first.
243  if (BinaryOperator::isNot(V)) {
244    Value *Operand = BinaryOperator::getNotArgument(V);
245    if (!isFreeToInvert(Operand))
246      return Operand;
247  }
248
249  // Constants can be considered to be not'ed values...
250  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
251    return ConstantInt::get(C->getType(), ~C->getValue());
252  return 0;
253}
254
255
256
257// dyn_castFoldableMul - If this value is a multiply that can be folded into
258// other computations (because it has a constant operand), return the
259// non-constant operand of the multiply, and set CST to point to the multiplier.
260// Otherwise, return null.
261//
262static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
263  if (V->hasOneUse() && V->getType()->isInteger())
264    if (Instruction *I = dyn_cast<Instruction>(V)) {
265      if (I->getOpcode() == Instruction::Mul)
266        if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
267          return I->getOperand(0);
268      if (I->getOpcode() == Instruction::Shl)
269        if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
270          // The multiplier is really 1 << CST.
271          uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
272          uint32_t CSTVal = CST->getLimitedValue(BitWidth);
273          CST = ConstantInt::get(V->getType()->getContext(),
274                                 APInt(BitWidth, 1).shl(CSTVal));
275          return I->getOperand(0);
276        }
277    }
278  return 0;
279}
280
281/// AddOne - Add one to a ConstantInt.
282static Constant *AddOne(Constant *C) {
283  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
284}
285/// SubOne - Subtract one from a ConstantInt.
286static Constant *SubOne(ConstantInt *C) {
287  return ConstantInt::get(C->getContext(), C->getValue()-1);
288}
289
290
291/// AssociativeOpt - Perform an optimization on an associative operator.  This
292/// function is designed to check a chain of associative operators for a
293/// potential to apply a certain optimization.  Since the optimization may be
294/// applicable if the expression was reassociated, this checks the chain, then
295/// reassociates the expression as necessary to expose the optimization
296/// opportunity.  This makes use of a special Functor, which must define
297/// 'shouldApply' and 'apply' methods.
298///
299template<typename Functor>
300static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
301  // Quick check, see if the immediate LHS matches...
302  if (F.shouldApply(Root.getOperand(0)))
303    return F.apply(Root);
304
305  return 0;
306}
307
308namespace {
309
310// AddRHS - Implements: X + X --> X << 1
311struct AddRHS {
312  Value *RHS;
313  explicit AddRHS(Value *rhs) : RHS(rhs) {}
314  bool shouldApply(Value *LHS) const { return LHS == RHS; }
315  Instruction *apply(BinaryOperator &Add) const {
316    return BinaryOperator::CreateShl(Add.getOperand(0),
317                                     ConstantInt::get(Add.getType(), 1));
318  }
319};
320
321// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2)
322//                 iff C1&C2 == 0
323struct AddMaskingAnd {
324  Constant *C2;
325  explicit AddMaskingAnd(Constant *c) : C2(c) {}
326  bool shouldApply(Value *LHS) const {
327    ConstantInt *C1;
328    return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) &&
329           ConstantExpr::getAnd(C1, C2)->isNullValue();
330  }
331  Instruction *apply(BinaryOperator &Add) const {
332    return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
333  }
334};
335
336}
337
338static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
339                                             InstCombiner *IC) {
340  if (CastInst *CI = dyn_cast<CastInst>(&I))
341    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
342
343  // Figure out if the constant is the left or the right argument.
344  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
345  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
346
347  if (Constant *SOC = dyn_cast<Constant>(SO)) {
348    if (ConstIsRHS)
349      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
350    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
351  }
352
353  Value *Op0 = SO, *Op1 = ConstOperand;
354  if (!ConstIsRHS)
355    std::swap(Op0, Op1);
356
357  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
358    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
359                                    SO->getName()+".op");
360  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
361    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
362                                   SO->getName()+".cmp");
363  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
364    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
365                                   SO->getName()+".cmp");
366  llvm_unreachable("Unknown binary instruction type!");
367}
368
369// FoldOpIntoSelect - Given an instruction with a select as one operand and a
370// constant as the other operand, try to fold the binary operator into the
371// select arguments.  This also works for Cast instructions, which obviously do
372// not have a second operand.
373Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
374  // Don't modify shared select instructions
375  if (!SI->hasOneUse()) return 0;
376  Value *TV = SI->getOperand(1);
377  Value *FV = SI->getOperand(2);
378
379  if (isa<Constant>(TV) || isa<Constant>(FV)) {
380    // Bool selects with constant operands can be folded to logical ops.
381    if (SI->getType() == Type::getInt1Ty(SI->getContext())) return 0;
382
383    Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
384    Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
385
386    return SelectInst::Create(SI->getCondition(), SelectTrueVal,
387                              SelectFalseVal);
388  }
389  return 0;
390}
391
392
393/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
394/// has a PHI node as operand #0, see if we can fold the instruction into the
395/// PHI (which is only possible if all operands to the PHI are constants).
396///
397/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
398/// that would normally be unprofitable because they strongly encourage jump
399/// threading.
400Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
401                                         bool AllowAggressive) {
402  AllowAggressive = false;
403  PHINode *PN = cast<PHINode>(I.getOperand(0));
404  unsigned NumPHIValues = PN->getNumIncomingValues();
405  if (NumPHIValues == 0 ||
406      // We normally only transform phis with a single use, unless we're trying
407      // hard to make jump threading happen.
408      (!PN->hasOneUse() && !AllowAggressive))
409    return 0;
410
411
412  // Check to see if all of the operands of the PHI are simple constants
413  // (constantint/constantfp/undef).  If there is one non-constant value,
414  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
415  // bail out.  We don't do arbitrary constant expressions here because moving
416  // their computation can be expensive without a cost model.
417  BasicBlock *NonConstBB = 0;
418  for (unsigned i = 0; i != NumPHIValues; ++i)
419    if (!isa<Constant>(PN->getIncomingValue(i)) ||
420        isa<ConstantExpr>(PN->getIncomingValue(i))) {
421      if (NonConstBB) return 0;  // More than one non-const value.
422      if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.
423      NonConstBB = PN->getIncomingBlock(i);
424
425      // If the incoming non-constant value is in I's block, we have an infinite
426      // loop.
427      if (NonConstBB == I.getParent())
428        return 0;
429    }
430
431  // If there is exactly one non-constant value, we can insert a copy of the
432  // operation in that block.  However, if this is a critical edge, we would be
433  // inserting the computation one some other paths (e.g. inside a loop).  Only
434  // do this if the pred block is unconditionally branching into the phi block.
435  if (NonConstBB != 0 && !AllowAggressive) {
436    BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
437    if (!BI || !BI->isUnconditional()) return 0;
438  }
439
440  // Okay, we can do the transformation: create the new PHI node.
441  PHINode *NewPN = PHINode::Create(I.getType(), "");
442  NewPN->reserveOperandSpace(PN->getNumOperands()/2);
443  InsertNewInstBefore(NewPN, *PN);
444  NewPN->takeName(PN);
445
446  // Next, add all of the operands to the PHI.
447  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
448    // We only currently try to fold the condition of a select when it is a phi,
449    // not the true/false values.
450    Value *TrueV = SI->getTrueValue();
451    Value *FalseV = SI->getFalseValue();
452    BasicBlock *PhiTransBB = PN->getParent();
453    for (unsigned i = 0; i != NumPHIValues; ++i) {
454      BasicBlock *ThisBB = PN->getIncomingBlock(i);
455      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
456      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
457      Value *InV = 0;
458      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
459        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
460      } else {
461        assert(PN->getIncomingBlock(i) == NonConstBB);
462        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
463                                 FalseVInPred,
464                                 "phitmp", NonConstBB->getTerminator());
465        Worklist.Add(cast<Instruction>(InV));
466      }
467      NewPN->addIncoming(InV, ThisBB);
468    }
469  } else if (I.getNumOperands() == 2) {
470    Constant *C = cast<Constant>(I.getOperand(1));
471    for (unsigned i = 0; i != NumPHIValues; ++i) {
472      Value *InV = 0;
473      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
474        if (CmpInst *CI = dyn_cast<CmpInst>(&I))
475          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
476        else
477          InV = ConstantExpr::get(I.getOpcode(), InC, C);
478      } else {
479        assert(PN->getIncomingBlock(i) == NonConstBB);
480        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
481          InV = BinaryOperator::Create(BO->getOpcode(),
482                                       PN->getIncomingValue(i), C, "phitmp",
483                                       NonConstBB->getTerminator());
484        else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
485          InV = CmpInst::Create(CI->getOpcode(),
486                                CI->getPredicate(),
487                                PN->getIncomingValue(i), C, "phitmp",
488                                NonConstBB->getTerminator());
489        else
490          llvm_unreachable("Unknown binop!");
491
492        Worklist.Add(cast<Instruction>(InV));
493      }
494      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
495    }
496  } else {
497    CastInst *CI = cast<CastInst>(&I);
498    const Type *RetTy = CI->getType();
499    for (unsigned i = 0; i != NumPHIValues; ++i) {
500      Value *InV;
501      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
502        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
503      } else {
504        assert(PN->getIncomingBlock(i) == NonConstBB);
505        InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),
506                               I.getType(), "phitmp",
507                               NonConstBB->getTerminator());
508        Worklist.Add(cast<Instruction>(InV));
509      }
510      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
511    }
512  }
513  return ReplaceInstUsesWith(I, NewPN);
514}
515
516
517/// WillNotOverflowSignedAdd - Return true if we can prove that:
518///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
519/// This basically requires proving that the add in the original type would not
520/// overflow to change the sign bit or have a carry out.
521bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
522  // There are different heuristics we can use for this.  Here are some simple
523  // ones.
524
525  // Add has the property that adding any two 2's complement numbers can only
526  // have one carry bit which can change a sign.  As such, if LHS and RHS each
527  // have at least two sign bits, we know that the addition of the two values
528  // will sign extend fine.
529  if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
530    return true;
531
532
533  // If one of the operands only has one non-zero bit, and if the other operand
534  // has a known-zero bit in a more significant place than it (not including the
535  // sign bit) the ripple may go up to and fill the zero, but won't change the
536  // sign.  For example, (X & ~4) + 1.
537
538  // TODO: Implement.
539
540  return false;
541}
542
543
544Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
545  bool Changed = SimplifyCommutative(I);
546  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
547
548  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
549                                 I.hasNoUnsignedWrap(), TD))
550    return ReplaceInstUsesWith(I, V);
551
552
553  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
554    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
555      // X + (signbit) --> X ^ signbit
556      const APInt& Val = CI->getValue();
557      uint32_t BitWidth = Val.getBitWidth();
558      if (Val == APInt::getSignBit(BitWidth))
559        return BinaryOperator::CreateXor(LHS, RHS);
560
561      // See if SimplifyDemandedBits can simplify this.  This handles stuff like
562      // (X & 254)+1 -> (X&254)|1
563      if (SimplifyDemandedInstructionBits(I))
564        return &I;
565
566      // zext(bool) + C -> bool ? C + 1 : C
567      if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
568        if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
569          return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
570    }
571
572    if (isa<PHINode>(LHS))
573      if (Instruction *NV = FoldOpIntoPhi(I))
574        return NV;
575
576    ConstantInt *XorRHS = 0;
577    Value *XorLHS = 0;
578    if (isa<ConstantInt>(RHSC) &&
579        match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
580      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
581      const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
582
583      uint32_t Size = TySizeBits / 2;
584      APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1));
585      APInt CFF80Val(-C0080Val);
586      do {
587        if (TySizeBits > Size) {
588          // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
589          // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
590          if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) ||
591              (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) {
592            // This is a sign extend if the top bits are known zero.
593            if (!MaskedValueIsZero(XorLHS,
594                   APInt::getHighBitsSet(TySizeBits, TySizeBits - Size)))
595              Size = 0;  // Not a sign ext, but can't be any others either.
596            break;
597          }
598        }
599        Size >>= 1;
600        C0080Val = APIntOps::lshr(C0080Val, Size);
601        CFF80Val = APIntOps::ashr(CFF80Val, Size);
602      } while (Size >= 1);
603
604      // FIXME: This shouldn't be necessary. When the backends can handle types
605      // with funny bit widths then this switch statement should be removed. It
606      // is just here to get the size of the "middle" type back up to something
607      // that the back ends can handle.
608      const Type *MiddleType = 0;
609      switch (Size) {
610        default: break;
611        case 32:
612        case 16:
613        case  8: MiddleType = IntegerType::get(I.getContext(), Size); break;
614      }
615      if (MiddleType) {
616        Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");
617        return new SExtInst(NewTrunc, I.getType(), I.getName());
618      }
619    }
620  }
621
622  if (I.getType() == Type::getInt1Ty(I.getContext()))
623    return BinaryOperator::CreateXor(LHS, RHS);
624
625  // X + X --> X << 1
626  if (I.getType()->isInteger()) {
627    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS)))
628      return Result;
629
630    if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
631      if (RHSI->getOpcode() == Instruction::Sub)
632        if (LHS == RHSI->getOperand(1))                   // A + (B - A) --> B
633          return ReplaceInstUsesWith(I, RHSI->getOperand(0));
634    }
635    if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
636      if (LHSI->getOpcode() == Instruction::Sub)
637        if (RHS == LHSI->getOperand(1))                   // (B - A) + A --> B
638          return ReplaceInstUsesWith(I, LHSI->getOperand(0));
639    }
640  }
641
642  // -A + B  -->  B - A
643  // -A + -B  -->  -(A + B)
644  if (Value *LHSV = dyn_castNegVal(LHS)) {
645    if (LHS->getType()->isIntOrIntVector()) {
646      if (Value *RHSV = dyn_castNegVal(RHS)) {
647        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
648        return BinaryOperator::CreateNeg(NewAdd);
649      }
650    }
651
652    return BinaryOperator::CreateSub(RHS, LHSV);
653  }
654
655  // A + -B  -->  A - B
656  if (!isa<Constant>(RHS))
657    if (Value *V = dyn_castNegVal(RHS))
658      return BinaryOperator::CreateSub(LHS, V);
659
660
661  ConstantInt *C2;
662  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
663    if (X == RHS)   // X*C + X --> X * (C+1)
664      return BinaryOperator::CreateMul(RHS, AddOne(C2));
665
666    // X*C1 + X*C2 --> X * (C1+C2)
667    ConstantInt *C1;
668    if (X == dyn_castFoldableMul(RHS, C1))
669      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
670  }
671
672  // X + X*C --> X * (C+1)
673  if (dyn_castFoldableMul(RHS, C2) == LHS)
674    return BinaryOperator::CreateMul(LHS, AddOne(C2));
675
676  // X + ~X --> -1   since   ~X = -X-1
677  if (dyn_castNotVal(LHS) == RHS ||
678      dyn_castNotVal(RHS) == LHS)
679    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
680
681
682  // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0
683  if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
684    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
685      return R;
686
687  // A+B --> A|B iff A and B have no bits set in common.
688  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
689    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
690    APInt LHSKnownOne(IT->getBitWidth(), 0);
691    APInt LHSKnownZero(IT->getBitWidth(), 0);
692    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
693    if (LHSKnownZero != 0) {
694      APInt RHSKnownOne(IT->getBitWidth(), 0);
695      APInt RHSKnownZero(IT->getBitWidth(), 0);
696      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
697
698      // No bits in common -> bitwise or.
699      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
700        return BinaryOperator::CreateOr(LHS, RHS);
701    }
702  }
703
704  // W*X + Y*Z --> W * (X+Z)  iff W == Y
705  if (I.getType()->isIntOrIntVector()) {
706    Value *W, *X, *Y, *Z;
707    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
708        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
709      if (W != Y) {
710        if (W == Z) {
711          std::swap(Y, Z);
712        } else if (Y == X) {
713          std::swap(W, X);
714        } else if (X == Z) {
715          std::swap(Y, Z);
716          std::swap(W, X);
717        }
718      }
719
720      if (W == Y) {
721        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
722        return BinaryOperator::CreateMul(W, NewAdd);
723      }
724    }
725  }
726
727  if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
728    Value *X = 0;
729    if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
730      return BinaryOperator::CreateSub(SubOne(CRHS), X);
731
732    // (X & FF00) + xx00  -> (X+xx00) & FF00
733    if (LHS->hasOneUse() &&
734        match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
735      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
736      if (Anded == CRHS) {
737        // See if all bits from the first bit set in the Add RHS up are included
738        // in the mask.  First, get the rightmost bit.
739        const APInt& AddRHSV = CRHS->getValue();
740
741        // Form a mask of all bits from the lowest bit added through the top.
742        APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
743
744        // See if the and mask includes all of these bits.
745        APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
746
747        if (AddRHSHighBits == AddRHSHighBitsAnd) {
748          // Okay, the xform is safe.  Insert the new add pronto.
749          Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
750          return BinaryOperator::CreateAnd(NewAdd, C2);
751        }
752      }
753    }
754
755    // Try to fold constant add into select arguments.
756    if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
757      if (Instruction *R = FoldOpIntoSelect(I, SI))
758        return R;
759  }
760
761  // add (select X 0 (sub n A)) A  -->  select X A n
762  {
763    SelectInst *SI = dyn_cast<SelectInst>(LHS);
764    Value *A = RHS;
765    if (!SI) {
766      SI = dyn_cast<SelectInst>(RHS);
767      A = LHS;
768    }
769    if (SI && SI->hasOneUse()) {
770      Value *TV = SI->getTrueValue();
771      Value *FV = SI->getFalseValue();
772      Value *N;
773
774      // Can we fold the add into the argument of the select?
775      // We check both true and false select arguments for a matching subtract.
776      if (match(FV, m_Zero()) &&
777          match(TV, m_Sub(m_Value(N), m_Specific(A))))
778        // Fold the add into the true select value.
779        return SelectInst::Create(SI->getCondition(), N, A);
780      if (match(TV, m_Zero()) &&
781          match(FV, m_Sub(m_Value(N), m_Specific(A))))
782        // Fold the add into the false select value.
783        return SelectInst::Create(SI->getCondition(), A, N);
784    }
785  }
786
787  // Check for (add (sext x), y), see if we can merge this into an
788  // integer add followed by a sext.
789  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
790    // (add (sext x), cst) --> (sext (add x, cst'))
791    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
792      Constant *CI =
793        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
794      if (LHSConv->hasOneUse() &&
795          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
796          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
797        // Insert the new, smaller add.
798        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
799                                              CI, "addconv");
800        return new SExtInst(NewAdd, I.getType());
801      }
802    }
803
804    // (add (sext x), (sext y)) --> (sext (add int x, y))
805    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
806      // Only do this if x/y have the same type, if at last one of them has a
807      // single use (so we don't increase the number of sexts), and if the
808      // integer add will not overflow.
809      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
810          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
811          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
812                                   RHSConv->getOperand(0))) {
813        // Insert the new integer add.
814        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
815                                              RHSConv->getOperand(0), "addconv");
816        return new SExtInst(NewAdd, I.getType());
817      }
818    }
819  }
820
821  return Changed ? &I : 0;
822}
823
824Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
825  bool Changed = SimplifyCommutative(I);
826  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
827
828  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
829    // X + 0 --> X
830    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
831      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
832                              (I.getType())->getValueAPF()))
833        return ReplaceInstUsesWith(I, LHS);
834    }
835
836    if (isa<PHINode>(LHS))
837      if (Instruction *NV = FoldOpIntoPhi(I))
838        return NV;
839  }
840
841  // -A + B  -->  B - A
842  // -A + -B  -->  -(A + B)
843  if (Value *LHSV = dyn_castFNegVal(LHS))
844    return BinaryOperator::CreateFSub(RHS, LHSV);
845
846  // A + -B  -->  A - B
847  if (!isa<Constant>(RHS))
848    if (Value *V = dyn_castFNegVal(RHS))
849      return BinaryOperator::CreateFSub(LHS, V);
850
851  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
852  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
853    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
854      return ReplaceInstUsesWith(I, LHS);
855
856  // Check for (add double (sitofp x), y), see if we can merge this into an
857  // integer add followed by a promotion.
858  if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
859    // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
860    // ... if the constant fits in the integer value.  This is useful for things
861    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
862    // requires a constant pool load, and generally allows the add to be better
863    // instcombined.
864    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
865      Constant *CI =
866      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
867      if (LHSConv->hasOneUse() &&
868          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
869          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
870        // Insert the new integer add.
871        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
872                                              CI, "addconv");
873        return new SIToFPInst(NewAdd, I.getType());
874      }
875    }
876
877    // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
878    if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
879      // Only do this if x/y have the same type, if at last one of them has a
880      // single use (so we don't increase the number of int->fp conversions),
881      // and if the integer add will not overflow.
882      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
883          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
884          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
885                                   RHSConv->getOperand(0))) {
886        // Insert the new integer add.
887        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
888                                              RHSConv->getOperand(0),"addconv");
889        return new SIToFPInst(NewAdd, I.getType());
890      }
891    }
892  }
893
894  return Changed ? &I : 0;
895}
896
897
898/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
899/// code necessary to compute the offset from the base pointer (without adding
900/// in the base pointer).  Return the result as a signed integer of intptr size.
901Value *InstCombiner::EmitGEPOffset(User *GEP) {
902  TargetData &TD = *getTargetData();
903  gep_type_iterator GTI = gep_type_begin(GEP);
904  const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
905  Value *Result = Constant::getNullValue(IntPtrTy);
906
907  // Build a mask for high order bits.
908  unsigned IntPtrWidth = TD.getPointerSizeInBits();
909  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
910
911  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
912       ++i, ++GTI) {
913    Value *Op = *i;
914    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
915    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
916      if (OpC->isZero()) continue;
917
918      // Handle a struct index, which adds its field offset to the pointer.
919      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
920        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
921
922        Result = Builder->CreateAdd(Result,
923                                    ConstantInt::get(IntPtrTy, Size),
924                                    GEP->getName()+".offs");
925        continue;
926      }
927
928      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
929      Constant *OC =
930              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
931      Scale = ConstantExpr::getMul(OC, Scale);
932      // Emit an add instruction.
933      Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
934      continue;
935    }
936    // Convert to correct type.
937    if (Op->getType() != IntPtrTy)
938      Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
939    if (Size != 1) {
940      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
941      // We'll let instcombine(mul) convert this to a shl if possible.
942      Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
943    }
944
945    // Emit an add instruction.
946    Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
947  }
948  return Result;
949}
950
951
952
953
954/// Optimize pointer differences into the same array into a size.  Consider:
955///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
956/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
957///
958Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
959                                               const Type *Ty) {
960  assert(TD && "Must have target data info for this");
961
962  // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
963  // this.
964  bool Swapped = false;
965  GetElementPtrInst *GEP = 0;
966  ConstantExpr *CstGEP = 0;
967
968  // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
969  // For now we require one side to be the base pointer "A" or a constant
970  // expression derived from it.
971  if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
972    // (gep X, ...) - X
973    if (LHSGEP->getOperand(0) == RHS) {
974      GEP = LHSGEP;
975      Swapped = false;
976    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
977      // (gep X, ...) - (ce_gep X, ...)
978      if (CE->getOpcode() == Instruction::GetElementPtr &&
979          LHSGEP->getOperand(0) == CE->getOperand(0)) {
980        CstGEP = CE;
981        GEP = LHSGEP;
982        Swapped = false;
983      }
984    }
985  }
986
987  if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
988    // X - (gep X, ...)
989    if (RHSGEP->getOperand(0) == LHS) {
990      GEP = RHSGEP;
991      Swapped = true;
992    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
993      // (ce_gep X, ...) - (gep X, ...)
994      if (CE->getOpcode() == Instruction::GetElementPtr &&
995          RHSGEP->getOperand(0) == CE->getOperand(0)) {
996        CstGEP = CE;
997        GEP = RHSGEP;
998        Swapped = true;
999      }
1000    }
1001  }
1002
1003  if (GEP == 0)
1004    return 0;
1005
1006  // Emit the offset of the GEP and an intptr_t.
1007  Value *Result = EmitGEPOffset(GEP);
1008
1009  // If we had a constant expression GEP on the other side offsetting the
1010  // pointer, subtract it from the offset we have.
1011  if (CstGEP) {
1012    Value *CstOffset = EmitGEPOffset(CstGEP);
1013    Result = Builder->CreateSub(Result, CstOffset);
1014  }
1015
1016
1017  // If we have p - gep(p, ...)  then we have to negate the result.
1018  if (Swapped)
1019    Result = Builder->CreateNeg(Result, "diff.neg");
1020
1021  return Builder->CreateIntCast(Result, Ty, true);
1022}
1023
1024
1025Instruction *InstCombiner::visitSub(BinaryOperator &I) {
1026  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1027
1028  if (Op0 == Op1)                        // sub X, X  -> 0
1029    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1030
1031  // If this is a 'B = x-(-A)', change to B = x+A.  This preserves NSW/NUW.
1032  if (Value *V = dyn_castNegVal(Op1)) {
1033    BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
1034    Res->setHasNoSignedWrap(I.hasNoSignedWrap());
1035    Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
1036    return Res;
1037  }
1038
1039  if (isa<UndefValue>(Op0))
1040    return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
1041  if (isa<UndefValue>(Op1))
1042    return ReplaceInstUsesWith(I, Op1);    // X - undef -> undef
1043  if (I.getType() == Type::getInt1Ty(I.getContext()))
1044    return BinaryOperator::CreateXor(Op0, Op1);
1045
1046  if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
1047    // Replace (-1 - A) with (~A).
1048    if (C->isAllOnesValue())
1049      return BinaryOperator::CreateNot(Op1);
1050
1051    // C - ~X == X + (1+C)
1052    Value *X = 0;
1053    if (match(Op1, m_Not(m_Value(X))))
1054      return BinaryOperator::CreateAdd(X, AddOne(C));
1055
1056    // -(X >>u 31) -> (X >>s 31)
1057    // -(X >>s 31) -> (X >>u 31)
1058    if (C->isZero()) {
1059      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
1060        if (SI->getOpcode() == Instruction::LShr) {
1061          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1062            // Check to see if we are shifting out everything but the sign bit.
1063            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
1064                SI->getType()->getPrimitiveSizeInBits()-1) {
1065              // Ok, the transformation is safe.  Insert AShr.
1066              return BinaryOperator::Create(Instruction::AShr,
1067                                          SI->getOperand(0), CU, SI->getName());
1068            }
1069          }
1070        } else if (SI->getOpcode() == Instruction::AShr) {
1071          if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1072            // Check to see if we are shifting out everything but the sign bit.
1073            if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
1074                SI->getType()->getPrimitiveSizeInBits()-1) {
1075              // Ok, the transformation is safe.  Insert LShr.
1076              return BinaryOperator::CreateLShr(
1077                                          SI->getOperand(0), CU, SI->getName());
1078            }
1079          }
1080        }
1081      }
1082    }
1083
1084    // Try to fold constant sub into select arguments.
1085    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
1086      if (Instruction *R = FoldOpIntoSelect(I, SI))
1087        return R;
1088
1089    // C - zext(bool) -> bool ? C - 1 : C
1090    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
1091      if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
1092        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
1093  }
1094
1095  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
1096    if (Op1I->getOpcode() == Instruction::Add) {
1097      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
1098        return BinaryOperator::CreateNeg(Op1I->getOperand(1),
1099                                         I.getName());
1100      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
1101        return BinaryOperator::CreateNeg(Op1I->getOperand(0),
1102                                         I.getName());
1103      else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
1104        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
1105          // C1-(X+C2) --> (C1-C2)-X
1106          return BinaryOperator::CreateSub(
1107            ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
1108      }
1109    }
1110
1111    if (Op1I->hasOneUse()) {
1112      // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
1113      // is not used by anyone else...
1114      //
1115      if (Op1I->getOpcode() == Instruction::Sub) {
1116        // Swap the two operands of the subexpr...
1117        Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
1118        Op1I->setOperand(0, IIOp1);
1119        Op1I->setOperand(1, IIOp0);
1120
1121        // Create the new top level add instruction...
1122        return BinaryOperator::CreateAdd(Op0, Op1);
1123      }
1124
1125      // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
1126      //
1127      if (Op1I->getOpcode() == Instruction::And &&
1128          (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
1129        Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
1130
1131        Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
1132        return BinaryOperator::CreateAnd(Op0, NewNot);
1133      }
1134
1135      // 0 - (X sdiv C)  -> (X sdiv -C)
1136      if (Op1I->getOpcode() == Instruction::SDiv)
1137        if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
1138          if (CSI->isZero())
1139            if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
1140              return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
1141                                          ConstantExpr::getNeg(DivRHS));
1142
1143      // X - X*C --> X * (1-C)
1144      ConstantInt *C2 = 0;
1145      if (dyn_castFoldableMul(Op1I, C2) == Op0) {
1146        Constant *CP1 =
1147          ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
1148                                             C2);
1149        return BinaryOperator::CreateMul(Op0, CP1);
1150      }
1151    }
1152  }
1153
1154  if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
1155    if (Op0I->getOpcode() == Instruction::Add) {
1156      if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
1157        return ReplaceInstUsesWith(I, Op0I->getOperand(1));
1158      else if (Op0I->getOperand(1) == Op1)        // (X+Y)-Y == X
1159        return ReplaceInstUsesWith(I, Op0I->getOperand(0));
1160    } else if (Op0I->getOpcode() == Instruction::Sub) {
1161      if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
1162        return BinaryOperator::CreateNeg(Op0I->getOperand(1),
1163                                         I.getName());
1164    }
1165  }
1166
1167  ConstantInt *C1;
1168  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
1169    if (X == Op1)  // X*C - X --> X * (C-1)
1170      return BinaryOperator::CreateMul(Op1, SubOne(C1));
1171
1172    ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
1173    if (X == dyn_castFoldableMul(Op1, C2))
1174      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
1175  }
1176
1177  // Optimize pointer differences into the same array into a size.  Consider:
1178  //  &A[10] - &A[0]: we should compile this to "10".
1179  if (TD) {
1180    Value *LHSOp, *RHSOp;
1181    if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
1182        match(Op1, m_PtrToInt(m_Value(RHSOp))))
1183      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
1184        return ReplaceInstUsesWith(I, Res);
1185
1186    // trunc(p)-trunc(q) -> trunc(p-q)
1187    if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
1188        match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
1189      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
1190        return ReplaceInstUsesWith(I, Res);
1191  }
1192
1193  return 0;
1194}
1195
1196Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
1197  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1198
1199  // If this is a 'B = x-(-A)', change to B = x+A...
1200  if (Value *V = dyn_castFNegVal(Op1))
1201    return BinaryOperator::CreateFAdd(Op0, V);
1202
1203  if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
1204    if (Op1I->getOpcode() == Instruction::FAdd) {
1205      if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
1206        return BinaryOperator::CreateFNeg(Op1I->getOperand(1),
1207                                          I.getName());
1208      else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
1209        return BinaryOperator::CreateFNeg(Op1I->getOperand(0),
1210                                          I.getName());
1211    }
1212  }
1213
1214  return 0;
1215}
1216
1217/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
1218/// are carefully arranged to allow folding of expressions such as:
1219///
1220///      (A < B) | (A > B) --> (A != B)
1221///
1222/// Note that this is only valid if the first and second predicates have the
1223/// same sign. Is illegal to do: (A u< B) | (A s> B)
1224///
1225/// Three bits are used to represent the condition, as follows:
1226///   0  A > B
1227///   1  A == B
1228///   2  A < B
1229///
1230/// <=>  Value  Definition
1231/// 000     0   Always false
1232/// 001     1   A >  B
1233/// 010     2   A == B
1234/// 011     3   A >= B
1235/// 100     4   A <  B
1236/// 101     5   A != B
1237/// 110     6   A <= B
1238/// 111     7   Always true
1239///
1240static unsigned getICmpCode(const ICmpInst *ICI) {
1241  switch (ICI->getPredicate()) {
1242    // False -> 0
1243  case ICmpInst::ICMP_UGT: return 1;  // 001
1244  case ICmpInst::ICMP_SGT: return 1;  // 001
1245  case ICmpInst::ICMP_EQ:  return 2;  // 010
1246  case ICmpInst::ICMP_UGE: return 3;  // 011
1247  case ICmpInst::ICMP_SGE: return 3;  // 011
1248  case ICmpInst::ICMP_ULT: return 4;  // 100
1249  case ICmpInst::ICMP_SLT: return 4;  // 100
1250  case ICmpInst::ICMP_NE:  return 5;  // 101
1251  case ICmpInst::ICMP_ULE: return 6;  // 110
1252  case ICmpInst::ICMP_SLE: return 6;  // 110
1253    // True -> 7
1254  default:
1255    llvm_unreachable("Invalid ICmp predicate!");
1256    return 0;
1257  }
1258}
1259
1260/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
1261/// predicate into a three bit mask. It also returns whether it is an ordered
1262/// predicate by reference.
1263static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
1264  isOrdered = false;
1265  switch (CC) {
1266  case FCmpInst::FCMP_ORD: isOrdered = true; return 0;  // 000
1267  case FCmpInst::FCMP_UNO:                   return 0;  // 000
1268  case FCmpInst::FCMP_OGT: isOrdered = true; return 1;  // 001
1269  case FCmpInst::FCMP_UGT:                   return 1;  // 001
1270  case FCmpInst::FCMP_OEQ: isOrdered = true; return 2;  // 010
1271  case FCmpInst::FCMP_UEQ:                   return 2;  // 010
1272  case FCmpInst::FCMP_OGE: isOrdered = true; return 3;  // 011
1273  case FCmpInst::FCMP_UGE:                   return 3;  // 011
1274  case FCmpInst::FCMP_OLT: isOrdered = true; return 4;  // 100
1275  case FCmpInst::FCMP_ULT:                   return 4;  // 100
1276  case FCmpInst::FCMP_ONE: isOrdered = true; return 5;  // 101
1277  case FCmpInst::FCMP_UNE:                   return 5;  // 101
1278  case FCmpInst::FCMP_OLE: isOrdered = true; return 6;  // 110
1279  case FCmpInst::FCMP_ULE:                   return 6;  // 110
1280    // True -> 7
1281  default:
1282    // Not expecting FCMP_FALSE and FCMP_TRUE;
1283    llvm_unreachable("Unexpected FCmp predicate!");
1284    return 0;
1285  }
1286}
1287
1288/// getICmpValue - This is the complement of getICmpCode, which turns an
1289/// opcode and two operands into either a constant true or false, or a brand
1290/// new ICmp instruction. The sign is passed in to determine which kind
1291/// of predicate to use in the new icmp instruction.
1292static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) {
1293  switch (code) {
1294  default: llvm_unreachable("Illegal ICmp code!");
1295  case  0: return ConstantInt::getFalse(LHS->getContext());
1296  case  1:
1297    if (sign)
1298      return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
1299    else
1300      return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
1301  case  2: return new ICmpInst(ICmpInst::ICMP_EQ,  LHS, RHS);
1302  case  3:
1303    if (sign)
1304      return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
1305    else
1306      return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
1307  case  4:
1308    if (sign)
1309      return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
1310    else
1311      return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
1312  case  5: return new ICmpInst(ICmpInst::ICMP_NE,  LHS, RHS);
1313  case  6:
1314    if (sign)
1315      return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
1316    else
1317      return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
1318  case  7: return ConstantInt::getTrue(LHS->getContext());
1319  }
1320}
1321
1322/// getFCmpValue - This is the complement of getFCmpCode, which turns an
1323/// opcode and two operands into either a FCmp instruction. isordered is passed
1324/// in to determine which kind of predicate to use in the new fcmp instruction.
1325static Value *getFCmpValue(bool isordered, unsigned code,
1326                           Value *LHS, Value *RHS) {
1327  switch (code) {
1328  default: llvm_unreachable("Illegal FCmp code!");
1329  case  0:
1330    if (isordered)
1331      return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
1332    else
1333      return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
1334  case  1:
1335    if (isordered)
1336      return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
1337    else
1338      return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
1339  case  2:
1340    if (isordered)
1341      return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
1342    else
1343      return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
1344  case  3:
1345    if (isordered)
1346      return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
1347    else
1348      return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
1349  case  4:
1350    if (isordered)
1351      return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
1352    else
1353      return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
1354  case  5:
1355    if (isordered)
1356      return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
1357    else
1358      return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
1359  case  6:
1360    if (isordered)
1361      return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
1362    else
1363      return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
1364  case  7: return ConstantInt::getTrue(LHS->getContext());
1365  }
1366}
1367
1368/// PredicatesFoldable - Return true if both predicates match sign or if at
1369/// least one of them is an equality comparison (which is signless).
1370static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
1371  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
1372         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
1373         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
1374}
1375
1376namespace {
1377// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
1378struct FoldICmpLogical {
1379  InstCombiner &IC;
1380  Value *LHS, *RHS;
1381  ICmpInst::Predicate pred;
1382  FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI)
1383    : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)),
1384      pred(ICI->getPredicate()) {}
1385  bool shouldApply(Value *V) const {
1386    if (ICmpInst *ICI = dyn_cast<ICmpInst>(V))
1387      if (PredicatesFoldable(pred, ICI->getPredicate()))
1388        return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) ||
1389                (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS));
1390    return false;
1391  }
1392  Instruction *apply(Instruction &Log) const {
1393    ICmpInst *ICI = cast<ICmpInst>(Log.getOperand(0));
1394    if (ICI->getOperand(0) != LHS) {
1395      assert(ICI->getOperand(1) == LHS);
1396      ICI->swapOperands();  // Swap the LHS and RHS of the ICmp
1397    }
1398
1399    ICmpInst *RHSICI = cast<ICmpInst>(Log.getOperand(1));
1400    unsigned LHSCode = getICmpCode(ICI);
1401    unsigned RHSCode = getICmpCode(RHSICI);
1402    unsigned Code;
1403    switch (Log.getOpcode()) {
1404    case Instruction::And: Code = LHSCode & RHSCode; break;
1405    case Instruction::Or:  Code = LHSCode | RHSCode; break;
1406    case Instruction::Xor: Code = LHSCode ^ RHSCode; break;
1407    default: llvm_unreachable("Illegal logical opcode!"); return 0;
1408    }
1409
1410    bool isSigned = RHSICI->isSigned() || ICI->isSigned();
1411    Value *RV = getICmpValue(isSigned, Code, LHS, RHS);
1412    if (Instruction *I = dyn_cast<Instruction>(RV))
1413      return I;
1414    // Otherwise, it's a constant boolean value...
1415    return IC.ReplaceInstUsesWith(Log, RV);
1416  }
1417};
1418} // end anonymous namespace
1419
1420// OptAndOp - This handles expressions of the form ((val OP C1) & C2).  Where
1421// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
1422// guaranteed to be a binary operator.
1423Instruction *InstCombiner::OptAndOp(Instruction *Op,
1424                                    ConstantInt *OpRHS,
1425                                    ConstantInt *AndRHS,
1426                                    BinaryOperator &TheAnd) {
1427  Value *X = Op->getOperand(0);
1428  Constant *Together = 0;
1429  if (!Op->isShift())
1430    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
1431
1432  switch (Op->getOpcode()) {
1433  case Instruction::Xor:
1434    if (Op->hasOneUse()) {
1435      // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
1436      Value *And = Builder->CreateAnd(X, AndRHS);
1437      And->takeName(Op);
1438      return BinaryOperator::CreateXor(And, Together);
1439    }
1440    break;
1441  case Instruction::Or:
1442    if (Together == AndRHS) // (X | C) & C --> C
1443      return ReplaceInstUsesWith(TheAnd, AndRHS);
1444
1445    if (Op->hasOneUse() && Together != OpRHS) {
1446      // (X | C1) & C2 --> (X | (C1&C2)) & C2
1447      Value *Or = Builder->CreateOr(X, Together);
1448      Or->takeName(Op);
1449      return BinaryOperator::CreateAnd(Or, AndRHS);
1450    }
1451    break;
1452  case Instruction::Add:
1453    if (Op->hasOneUse()) {
1454      // Adding a one to a single bit bit-field should be turned into an XOR
1455      // of the bit.  First thing to check is to see if this AND is with a
1456      // single bit constant.
1457      const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
1458
1459      // If there is only one bit set.
1460      if (AndRHSV.isPowerOf2()) {
1461        // Ok, at this point, we know that we are masking the result of the
1462        // ADD down to exactly one bit.  If the constant we are adding has
1463        // no bits set below this bit, then we can eliminate the ADD.
1464        const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
1465
1466        // Check to see if any bits below the one bit set in AndRHSV are set.
1467        if ((AddRHS & (AndRHSV-1)) == 0) {
1468          // If not, the only thing that can effect the output of the AND is
1469          // the bit specified by AndRHSV.  If that bit is set, the effect of
1470          // the XOR is to toggle the bit.  If it is clear, then the ADD has
1471          // no effect.
1472          if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
1473            TheAnd.setOperand(0, X);
1474            return &TheAnd;
1475          } else {
1476            // Pull the XOR out of the AND.
1477            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
1478            NewAnd->takeName(Op);
1479            return BinaryOperator::CreateXor(NewAnd, AndRHS);
1480          }
1481        }
1482      }
1483    }
1484    break;
1485
1486  case Instruction::Shl: {
1487    // We know that the AND will not produce any of the bits shifted in, so if
1488    // the anded constant includes them, clear them now!
1489    //
1490    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
1491    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
1492    APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
1493    ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
1494                                       AndRHS->getValue() & ShlMask);
1495
1496    if (CI->getValue() == ShlMask) {
1497    // Masking out bits that the shift already masks
1498      return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
1499    } else if (CI != AndRHS) {                  // Reducing bits set in and.
1500      TheAnd.setOperand(1, CI);
1501      return &TheAnd;
1502    }
1503    break;
1504  }
1505  case Instruction::LShr: {
1506    // We know that the AND will not produce any of the bits shifted in, so if
1507    // the anded constant includes them, clear them now!  This only applies to
1508    // unsigned shifts, because a signed shr may bring in set bits!
1509    //
1510    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
1511    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
1512    APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
1513    ConstantInt *CI = ConstantInt::get(Op->getContext(),
1514                                       AndRHS->getValue() & ShrMask);
1515
1516    if (CI->getValue() == ShrMask) {
1517    // Masking out bits that the shift already masks.
1518      return ReplaceInstUsesWith(TheAnd, Op);
1519    } else if (CI != AndRHS) {
1520      TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
1521      return &TheAnd;
1522    }
1523    break;
1524  }
1525  case Instruction::AShr:
1526    // Signed shr.
1527    // See if this is shifting in some sign extension, then masking it out
1528    // with an and.
1529    if (Op->hasOneUse()) {
1530      uint32_t BitWidth = AndRHS->getType()->getBitWidth();
1531      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
1532      APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
1533      Constant *C = ConstantInt::get(Op->getContext(),
1534                                     AndRHS->getValue() & ShrMask);
1535      if (C == AndRHS) {          // Masking out bits shifted in.
1536        // (Val ashr C1) & C2 -> (Val lshr C1) & C2
1537        // Make the argument unsigned.
1538        Value *ShVal = Op->getOperand(0);
1539        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
1540        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
1541      }
1542    }
1543    break;
1544  }
1545  return 0;
1546}
1547
1548
1549/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
1550/// true, otherwise (V < Lo || V >= Hi).  In pratice, we emit the more efficient
1551/// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
1552/// whether to treat the V, Lo and HI as signed or not. IB is the location to
1553/// insert new instructions.
1554Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
1555                                           bool isSigned, bool Inside,
1556                                           Instruction &IB) {
1557  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
1558            ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
1559         "Lo is not <= Hi in range emission code!");
1560
1561  if (Inside) {
1562    if (Lo == Hi)  // Trivially false.
1563      return new ICmpInst(ICmpInst::ICMP_NE, V, V);
1564
1565    // V >= Min && V < Hi --> V < Hi
1566    if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
1567      ICmpInst::Predicate pred = (isSigned ?
1568        ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
1569      return new ICmpInst(pred, V, Hi);
1570    }
1571
1572    // Emit V-Lo <u Hi-Lo
1573    Constant *NegLo = ConstantExpr::getNeg(Lo);
1574    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
1575    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
1576    return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
1577  }
1578
1579  if (Lo == Hi)  // Trivially true.
1580    return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
1581
1582  // V < Min || V >= Hi -> V > Hi-1
1583  Hi = SubOne(cast<ConstantInt>(Hi));
1584  if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
1585    ICmpInst::Predicate pred = (isSigned ?
1586        ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
1587    return new ICmpInst(pred, V, Hi);
1588  }
1589
1590  // Emit V-Lo >u Hi-1-Lo
1591  // Note that Hi has already had one subtracted from it, above.
1592  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
1593  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
1594  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
1595  return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
1596}
1597
1598// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
1599// any number of 0s on either side.  The 1s are allowed to wrap from LSB to
1600// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
1601// not, since all 1s are not contiguous.
1602static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
1603  const APInt& V = Val->getValue();
1604  uint32_t BitWidth = Val->getType()->getBitWidth();
1605  if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
1606
1607  // look for the first zero bit after the run of ones
1608  MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
1609  // look for the first non-zero bit
1610  ME = V.getActiveBits();
1611  return true;
1612}
1613
1614/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
1615/// where isSub determines whether the operator is a sub.  If we can fold one of
1616/// the following xforms:
1617///
1618/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
1619/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
1620/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
1621///
1622/// return (A +/- B).
1623///
1624Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
1625                                        ConstantInt *Mask, bool isSub,
1626                                        Instruction &I) {
1627  Instruction *LHSI = dyn_cast<Instruction>(LHS);
1628  if (!LHSI || LHSI->getNumOperands() != 2 ||
1629      !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
1630
1631  ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
1632
1633  switch (LHSI->getOpcode()) {
1634  default: return 0;
1635  case Instruction::And:
1636    if (ConstantExpr::getAnd(N, Mask) == Mask) {
1637      // If the AndRHS is a power of two minus one (0+1+), this is simple.
1638      if ((Mask->getValue().countLeadingZeros() +
1639           Mask->getValue().countPopulation()) ==
1640          Mask->getValue().getBitWidth())
1641        break;
1642
1643      // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
1644      // part, we don't need any explicit masks to take them out of A.  If that
1645      // is all N is, ignore it.
1646      uint32_t MB = 0, ME = 0;
1647      if (isRunOfOnes(Mask, MB, ME)) {  // begin/end bit of run, inclusive
1648        uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
1649        APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
1650        if (MaskedValueIsZero(RHS, Mask))
1651          break;
1652      }
1653    }
1654    return 0;
1655  case Instruction::Or:
1656  case Instruction::Xor:
1657    // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
1658    if ((Mask->getValue().countLeadingZeros() +
1659         Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
1660        && ConstantExpr::getAnd(N, Mask)->isNullValue())
1661      break;
1662    return 0;
1663  }
1664
1665  if (isSub)
1666    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
1667  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
1668}
1669
1670/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
1671Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
1672                                          ICmpInst *LHS, ICmpInst *RHS) {
1673  Value *Val, *Val2;
1674  ConstantInt *LHSCst, *RHSCst;
1675  ICmpInst::Predicate LHSCC, RHSCC;
1676
1677  // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
1678  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val),
1679                         m_ConstantInt(LHSCst))) ||
1680      !match(RHS, m_ICmp(RHSCC, m_Value(Val2),
1681                         m_ConstantInt(RHSCst))))
1682    return 0;
1683
1684  if (LHSCst == RHSCst && LHSCC == RHSCC) {
1685    // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
1686    // where C is a power of 2
1687    if (LHSCC == ICmpInst::ICMP_ULT &&
1688        LHSCst->getValue().isPowerOf2()) {
1689      Value *NewOr = Builder->CreateOr(Val, Val2);
1690      return new ICmpInst(LHSCC, NewOr, LHSCst);
1691    }
1692
1693    // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
1694    if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
1695      Value *NewOr = Builder->CreateOr(Val, Val2);
1696      return new ICmpInst(LHSCC, NewOr, LHSCst);
1697    }
1698  }
1699
1700  // From here on, we only handle:
1701  //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
1702  if (Val != Val2) return 0;
1703
1704  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
1705  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
1706      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
1707      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
1708      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
1709    return 0;
1710
1711  // We can't fold (ugt x, C) & (sgt x, C2).
1712  if (!PredicatesFoldable(LHSCC, RHSCC))
1713    return 0;
1714
1715  // Ensure that the larger constant is on the RHS.
1716  bool ShouldSwap;
1717  if (CmpInst::isSigned(LHSCC) ||
1718      (ICmpInst::isEquality(LHSCC) &&
1719       CmpInst::isSigned(RHSCC)))
1720    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
1721  else
1722    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
1723
1724  if (ShouldSwap) {
1725    std::swap(LHS, RHS);
1726    std::swap(LHSCst, RHSCst);
1727    std::swap(LHSCC, RHSCC);
1728  }
1729
1730  // At this point, we know we have have two icmp instructions
1731  // comparing a value against two constants and and'ing the result
1732  // together.  Because of the above check, we know that we only have
1733  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
1734  // (from the FoldICmpLogical check above), that the two constants
1735  // are not equal and that the larger constant is on the RHS
1736  assert(LHSCst != RHSCst && "Compares not folded above?");
1737
1738  switch (LHSCC) {
1739  default: llvm_unreachable("Unknown integer condition code!");
1740  case ICmpInst::ICMP_EQ:
1741    switch (RHSCC) {
1742    default: llvm_unreachable("Unknown integer condition code!");
1743    case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
1744    case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
1745    case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
1746      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1747    case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
1748    case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
1749    case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
1750      return ReplaceInstUsesWith(I, LHS);
1751    }
1752  case ICmpInst::ICMP_NE:
1753    switch (RHSCC) {
1754    default: llvm_unreachable("Unknown integer condition code!");
1755    case ICmpInst::ICMP_ULT:
1756      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
1757        return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
1758      break;                        // (X != 13 & X u< 15) -> no change
1759    case ICmpInst::ICMP_SLT:
1760      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
1761        return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
1762      break;                        // (X != 13 & X s< 15) -> no change
1763    case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
1764    case ICmpInst::ICMP_UGT:        // (X != 13 & X u> 15) -> X u> 15
1765    case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
1766      return ReplaceInstUsesWith(I, RHS);
1767    case ICmpInst::ICMP_NE:
1768      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
1769        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
1770        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
1771        return new ICmpInst(ICmpInst::ICMP_UGT, Add,
1772                            ConstantInt::get(Add->getType(), 1));
1773      }
1774      break;                        // (X != 13 & X != 15) -> no change
1775    }
1776    break;
1777  case ICmpInst::ICMP_ULT:
1778    switch (RHSCC) {
1779    default: llvm_unreachable("Unknown integer condition code!");
1780    case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
1781    case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
1782      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1783    case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
1784      break;
1785    case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
1786    case ICmpInst::ICMP_ULT:        // (X u< 13 & X u< 15) -> X u< 13
1787      return ReplaceInstUsesWith(I, LHS);
1788    case ICmpInst::ICMP_SLT:        // (X u< 13 & X s< 15) -> no change
1789      break;
1790    }
1791    break;
1792  case ICmpInst::ICMP_SLT:
1793    switch (RHSCC) {
1794    default: llvm_unreachable("Unknown integer condition code!");
1795    case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
1796    case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
1797      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1798    case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
1799      break;
1800    case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
1801    case ICmpInst::ICMP_SLT:        // (X s< 13 & X s< 15) -> X < 13
1802      return ReplaceInstUsesWith(I, LHS);
1803    case ICmpInst::ICMP_ULT:        // (X s< 13 & X u< 15) -> no change
1804      break;
1805    }
1806    break;
1807  case ICmpInst::ICMP_UGT:
1808    switch (RHSCC) {
1809    default: llvm_unreachable("Unknown integer condition code!");
1810    case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
1811    case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
1812      return ReplaceInstUsesWith(I, RHS);
1813    case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
1814      break;
1815    case ICmpInst::ICMP_NE:
1816      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
1817        return new ICmpInst(LHSCC, Val, RHSCst);
1818      break;                        // (X u> 13 & X != 15) -> no change
1819    case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
1820      return InsertRangeTest(Val, AddOne(LHSCst),
1821                             RHSCst, false, true, I);
1822    case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
1823      break;
1824    }
1825    break;
1826  case ICmpInst::ICMP_SGT:
1827    switch (RHSCC) {
1828    default: llvm_unreachable("Unknown integer condition code!");
1829    case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
1830    case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
1831      return ReplaceInstUsesWith(I, RHS);
1832    case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
1833      break;
1834    case ICmpInst::ICMP_NE:
1835      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
1836        return new ICmpInst(LHSCC, Val, RHSCst);
1837      break;                        // (X s> 13 & X != 15) -> no change
1838    case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
1839      return InsertRangeTest(Val, AddOne(LHSCst),
1840                             RHSCst, true, true, I);
1841    case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
1842      break;
1843    }
1844    break;
1845  }
1846
1847  return 0;
1848}
1849
1850Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
1851                                          FCmpInst *RHS) {
1852
1853  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
1854      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
1855    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
1856    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
1857      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
1858        // If either of the constants are nans, then the whole thing returns
1859        // false.
1860        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
1861          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1862        return new FCmpInst(FCmpInst::FCMP_ORD,
1863                            LHS->getOperand(0), RHS->getOperand(0));
1864      }
1865
1866    // Handle vector zeros.  This occurs because the canonical form of
1867    // "fcmp ord x,x" is "fcmp ord x, 0".
1868    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
1869        isa<ConstantAggregateZero>(RHS->getOperand(1)))
1870      return new FCmpInst(FCmpInst::FCMP_ORD,
1871                          LHS->getOperand(0), RHS->getOperand(0));
1872    return 0;
1873  }
1874
1875  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
1876  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
1877  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
1878
1879
1880  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
1881    // Swap RHS operands to match LHS.
1882    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
1883    std::swap(Op1LHS, Op1RHS);
1884  }
1885
1886  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
1887    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
1888    if (Op0CC == Op1CC)
1889      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
1890
1891    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
1892      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1893    if (Op0CC == FCmpInst::FCMP_TRUE)
1894      return ReplaceInstUsesWith(I, RHS);
1895    if (Op1CC == FCmpInst::FCMP_TRUE)
1896      return ReplaceInstUsesWith(I, LHS);
1897
1898    bool Op0Ordered;
1899    bool Op1Ordered;
1900    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
1901    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
1902    if (Op1Pred == 0) {
1903      std::swap(LHS, RHS);
1904      std::swap(Op0Pred, Op1Pred);
1905      std::swap(Op0Ordered, Op1Ordered);
1906    }
1907    if (Op0Pred == 0) {
1908      // uno && ueq -> uno && (uno || eq) -> ueq
1909      // ord && olt -> ord && (ord && lt) -> olt
1910      if (Op0Ordered == Op1Ordered)
1911        return ReplaceInstUsesWith(I, RHS);
1912
1913      // uno && oeq -> uno && (ord && eq) -> false
1914      // uno && ord -> false
1915      if (!Op0Ordered)
1916        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
1917      // ord && ueq -> ord && (uno || eq) -> oeq
1918      return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS));
1919    }
1920  }
1921
1922  return 0;
1923}
1924
1925
1926Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
1927  bool Changed = SimplifyCommutative(I);
1928  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1929
1930  if (Value *V = SimplifyAndInst(Op0, Op1, TD))
1931    return ReplaceInstUsesWith(I, V);
1932
1933  // See if we can simplify any instructions used by the instruction whose sole
1934  // purpose is to compute bits we don't care about.
1935  if (SimplifyDemandedInstructionBits(I))
1936    return &I;
1937
1938  if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
1939    const APInt &AndRHSMask = AndRHS->getValue();
1940    APInt NotAndRHS(~AndRHSMask);
1941
1942    // Optimize a variety of ((val OP C1) & C2) combinations...
1943    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
1944      Value *Op0LHS = Op0I->getOperand(0);
1945      Value *Op0RHS = Op0I->getOperand(1);
1946      switch (Op0I->getOpcode()) {
1947      default: break;
1948      case Instruction::Xor:
1949      case Instruction::Or:
1950        // If the mask is only needed on one incoming arm, push it up.
1951        if (!Op0I->hasOneUse()) break;
1952
1953        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
1954          // Not masking anything out for the LHS, move to RHS.
1955          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
1956                                             Op0RHS->getName()+".masked");
1957          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
1958        }
1959        if (!isa<Constant>(Op0RHS) &&
1960            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
1961          // Not masking anything out for the RHS, move to LHS.
1962          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
1963                                             Op0LHS->getName()+".masked");
1964          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
1965        }
1966
1967        break;
1968      case Instruction::Add:
1969        // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
1970        // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
1971        // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
1972        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
1973          return BinaryOperator::CreateAnd(V, AndRHS);
1974        if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
1975          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
1976        break;
1977
1978      case Instruction::Sub:
1979        // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
1980        // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
1981        // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
1982        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
1983          return BinaryOperator::CreateAnd(V, AndRHS);
1984
1985        // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
1986        // has 1's for all bits that the subtraction with A might affect.
1987        if (Op0I->hasOneUse()) {
1988          uint32_t BitWidth = AndRHSMask.getBitWidth();
1989          uint32_t Zeros = AndRHSMask.countLeadingZeros();
1990          APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
1991
1992          ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
1993          if (!(A && A->isZero()) &&               // avoid infinite recursion.
1994              MaskedValueIsZero(Op0LHS, Mask)) {
1995            Value *NewNeg = Builder->CreateNeg(Op0RHS);
1996            return BinaryOperator::CreateAnd(NewNeg, AndRHS);
1997          }
1998        }
1999        break;
2000
2001      case Instruction::Shl:
2002      case Instruction::LShr:
2003        // (1 << x) & 1 --> zext(x == 0)
2004        // (1 >> x) & 1 --> zext(x == 0)
2005        if (AndRHSMask == 1 && Op0LHS == AndRHS) {
2006          Value *NewICmp =
2007            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
2008          return new ZExtInst(NewICmp, I.getType());
2009        }
2010        break;
2011      }
2012
2013      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
2014        if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
2015          return Res;
2016    } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
2017      // If this is an integer truncation or change from signed-to-unsigned, and
2018      // if the source is an and/or with immediate, transform it.  This
2019      // frequently occurs for bitfield accesses.
2020      if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
2021        if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
2022            CastOp->getNumOperands() == 2)
2023          if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
2024            if (CastOp->getOpcode() == Instruction::And) {
2025              // Change: and (cast (and X, C1) to T), C2
2026              // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
2027              // This will fold the two constants together, which may allow
2028              // other simplifications.
2029              Value *NewCast = Builder->CreateTruncOrBitCast(
2030                CastOp->getOperand(0), I.getType(),
2031                CastOp->getName()+".shrunk");
2032              // trunc_or_bitcast(C1)&C2
2033              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
2034              C3 = ConstantExpr::getAnd(C3, AndRHS);
2035              return BinaryOperator::CreateAnd(NewCast, C3);
2036            } else if (CastOp->getOpcode() == Instruction::Or) {
2037              // Change: and (cast (or X, C1) to T), C2
2038              // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
2039              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
2040              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
2041                // trunc(C1)&C2
2042                return ReplaceInstUsesWith(I, AndRHS);
2043            }
2044          }
2045      }
2046    }
2047
2048    // Try to fold constant and into select arguments.
2049    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
2050      if (Instruction *R = FoldOpIntoSelect(I, SI))
2051        return R;
2052    if (isa<PHINode>(Op0))
2053      if (Instruction *NV = FoldOpIntoPhi(I))
2054        return NV;
2055  }
2056
2057
2058  // (~A & ~B) == (~(A | B)) - De Morgan's Law
2059  if (Value *Op0NotVal = dyn_castNotVal(Op0))
2060    if (Value *Op1NotVal = dyn_castNotVal(Op1))
2061      if (Op0->hasOneUse() && Op1->hasOneUse()) {
2062        Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
2063                                      I.getName()+".demorgan");
2064        return BinaryOperator::CreateNot(Or);
2065      }
2066
2067  {
2068    Value *A = 0, *B = 0, *C = 0, *D = 0;
2069    // (A|B) & ~(A&B) -> A^B
2070    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
2071        match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
2072        ((A == C && B == D) || (A == D && B == C)))
2073      return BinaryOperator::CreateXor(A, B);
2074
2075    // ~(A&B) & (A|B) -> A^B
2076    if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
2077        match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
2078        ((A == C && B == D) || (A == D && B == C)))
2079      return BinaryOperator::CreateXor(A, B);
2080
2081    if (Op0->hasOneUse() &&
2082        match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
2083      if (A == Op1) {                                // (A^B)&A -> A&(A^B)
2084        I.swapOperands();     // Simplify below
2085        std::swap(Op0, Op1);
2086      } else if (B == Op1) {                         // (A^B)&B -> B&(B^A)
2087        cast<BinaryOperator>(Op0)->swapOperands();
2088        I.swapOperands();     // Simplify below
2089        std::swap(Op0, Op1);
2090      }
2091    }
2092
2093    if (Op1->hasOneUse() &&
2094        match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
2095      if (B == Op0) {                                // B&(A^B) -> B&(B^A)
2096        cast<BinaryOperator>(Op1)->swapOperands();
2097        std::swap(A, B);
2098      }
2099      if (A == Op0)                                // A&(A^B) -> A & ~B
2100        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
2101    }
2102
2103    // (A&((~A)|B)) -> A&B
2104    if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
2105        match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
2106      return BinaryOperator::CreateAnd(A, Op1);
2107    if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
2108        match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
2109      return BinaryOperator::CreateAnd(A, Op0);
2110  }
2111
2112  if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {
2113    // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
2114    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
2115      return R;
2116
2117    if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
2118      if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
2119        return Res;
2120  }
2121
2122  // fold (and (cast A), (cast B)) -> (cast (and A, B))
2123  if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
2124    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
2125      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
2126        const Type *SrcTy = Op0C->getOperand(0)->getType();
2127        if (SrcTy == Op1C->getOperand(0)->getType() &&
2128            SrcTy->isIntOrIntVector() &&
2129            // Only do this if the casts both really cause code to be generated.
2130            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
2131                              I.getType()) &&
2132            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
2133                              I.getType())) {
2134          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
2135                                            Op1C->getOperand(0), I.getName());
2136          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
2137        }
2138      }
2139
2140  // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
2141  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
2142    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
2143      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
2144          SI0->getOperand(1) == SI1->getOperand(1) &&
2145          (SI0->hasOneUse() || SI1->hasOneUse())) {
2146        Value *NewOp =
2147          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
2148                             SI0->getName());
2149        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
2150                                      SI1->getOperand(1));
2151      }
2152  }
2153
2154  // If and'ing two fcmp, try combine them into one.
2155  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
2156    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
2157      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
2158        return Res;
2159  }
2160
2161  return Changed ? &I : 0;
2162}
2163
2164/// CollectBSwapParts - Analyze the specified subexpression and see if it is
2165/// capable of providing pieces of a bswap.  The subexpression provides pieces
2166/// of a bswap if it is proven that each of the non-zero bytes in the output of
2167/// the expression came from the corresponding "byte swapped" byte in some other
2168/// value.  For example, if the current subexpression is "(shl i32 %X, 24)" then
2169/// we know that the expression deposits the low byte of %X into the high byte
2170/// of the bswap result and that all other bytes are zero.  This expression is
2171/// accepted, the high byte of ByteValues is set to X to indicate a correct
2172/// match.
2173///
2174/// This function returns true if the match was unsuccessful and false if so.
2175/// On entry to the function the "OverallLeftShift" is a signed integer value
2176/// indicating the number of bytes that the subexpression is later shifted.  For
2177/// example, if the expression is later right shifted by 16 bits, the
2178/// OverallLeftShift value would be -2 on entry.  This is used to specify which
2179/// byte of ByteValues is actually being set.
2180///
2181/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
2182/// byte is masked to zero by a user.  For example, in (X & 255), X will be
2183/// processed with a bytemask of 1.  Because bytemask is 32-bits, this limits
2184/// this function to working on up to 32-byte (256 bit) values.  ByteMask is
2185/// always in the local (OverallLeftShift) coordinate space.
2186///
2187static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
2188                              SmallVector<Value*, 8> &ByteValues) {
2189  if (Instruction *I = dyn_cast<Instruction>(V)) {
2190    // If this is an or instruction, it may be an inner node of the bswap.
2191    if (I->getOpcode() == Instruction::Or) {
2192      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
2193                               ByteValues) ||
2194             CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
2195                               ByteValues);
2196    }
2197
2198    // If this is a logical shift by a constant multiple of 8, recurse with
2199    // OverallLeftShift and ByteMask adjusted.
2200    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
2201      unsigned ShAmt =
2202        cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
2203      // Ensure the shift amount is defined and of a byte value.
2204      if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
2205        return true;
2206
2207      unsigned ByteShift = ShAmt >> 3;
2208      if (I->getOpcode() == Instruction::Shl) {
2209        // X << 2 -> collect(X, +2)
2210        OverallLeftShift += ByteShift;
2211        ByteMask >>= ByteShift;
2212      } else {
2213        // X >>u 2 -> collect(X, -2)
2214        OverallLeftShift -= ByteShift;
2215        ByteMask <<= ByteShift;
2216        ByteMask &= (~0U >> (32-ByteValues.size()));
2217      }
2218
2219      if (OverallLeftShift >= (int)ByteValues.size()) return true;
2220      if (OverallLeftShift <= -(int)ByteValues.size()) return true;
2221
2222      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
2223                               ByteValues);
2224    }
2225
2226    // If this is a logical 'and' with a mask that clears bytes, clear the
2227    // corresponding bytes in ByteMask.
2228    if (I->getOpcode() == Instruction::And &&
2229        isa<ConstantInt>(I->getOperand(1))) {
2230      // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
2231      unsigned NumBytes = ByteValues.size();
2232      APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
2233      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
2234
2235      for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
2236        // If this byte is masked out by a later operation, we don't care what
2237        // the and mask is.
2238        if ((ByteMask & (1 << i)) == 0)
2239          continue;
2240
2241        // If the AndMask is all zeros for this byte, clear the bit.
2242        APInt MaskB = AndMask & Byte;
2243        if (MaskB == 0) {
2244          ByteMask &= ~(1U << i);
2245          continue;
2246        }
2247
2248        // If the AndMask is not all ones for this byte, it's not a bytezap.
2249        if (MaskB != Byte)
2250          return true;
2251
2252        // Otherwise, this byte is kept.
2253      }
2254
2255      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
2256                               ByteValues);
2257    }
2258  }
2259
2260  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
2261  // the input value to the bswap.  Some observations: 1) if more than one byte
2262  // is demanded from this input, then it could not be successfully assembled
2263  // into a byteswap.  At least one of the two bytes would not be aligned with
2264  // their ultimate destination.
2265  if (!isPowerOf2_32(ByteMask)) return true;
2266  unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
2267
2268  // 2) The input and ultimate destinations must line up: if byte 3 of an i32
2269  // is demanded, it needs to go into byte 0 of the result.  This means that the
2270  // byte needs to be shifted until it lands in the right byte bucket.  The
2271  // shift amount depends on the position: if the byte is coming from the high
2272  // part of the value (e.g. byte 3) then it must be shifted right.  If from the
2273  // low part, it must be shifted left.
2274  unsigned DestByteNo = InputByteNo + OverallLeftShift;
2275  if (InputByteNo < ByteValues.size()/2) {
2276    if (ByteValues.size()-1-DestByteNo != InputByteNo)
2277      return true;
2278  } else {
2279    if (ByteValues.size()-1-DestByteNo != InputByteNo)
2280      return true;
2281  }
2282
2283  // If the destination byte value is already defined, the values are or'd
2284  // together, which isn't a bswap (unless it's an or of the same bits).
2285  if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
2286    return true;
2287  ByteValues[DestByteNo] = V;
2288  return false;
2289}
2290
2291/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
2292/// If so, insert the new bswap intrinsic and return it.
2293Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
2294  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
2295  if (!ITy || ITy->getBitWidth() % 16 ||
2296      // ByteMask only allows up to 32-byte values.
2297      ITy->getBitWidth() > 32*8)
2298    return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
2299
2300  /// ByteValues - For each byte of the result, we keep track of which value
2301  /// defines each byte.
2302  SmallVector<Value*, 8> ByteValues;
2303  ByteValues.resize(ITy->getBitWidth()/8);
2304
2305  // Try to find all the pieces corresponding to the bswap.
2306  uint32_t ByteMask = ~0U >> (32-ByteValues.size());
2307  if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
2308    return 0;
2309
2310  // Check to see if all of the bytes come from the same value.
2311  Value *V = ByteValues[0];
2312  if (V == 0) return 0;  // Didn't find a byte?  Must be zero.
2313
2314  // Check to make sure that all of the bytes come from the same value.
2315  for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
2316    if (ByteValues[i] != V)
2317      return 0;
2318  const Type *Tys[] = { ITy };
2319  Module *M = I.getParent()->getParent()->getParent();
2320  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
2321  return CallInst::Create(F, V);
2322}
2323
2324/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D).  Check
2325/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
2326/// we can simplify this expression to "cond ? C : D or B".
2327static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
2328                                         Value *C, Value *D) {
2329  // If A is not a select of -1/0, this cannot match.
2330  Value *Cond = 0;
2331  if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
2332    return 0;
2333
2334  // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
2335  if (match(D, m_SelectCst<0, -1>(m_Specific(Cond))))
2336    return SelectInst::Create(Cond, C, B);
2337  if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
2338    return SelectInst::Create(Cond, C, B);
2339  // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
2340  if (match(B, m_SelectCst<0, -1>(m_Specific(Cond))))
2341    return SelectInst::Create(Cond, C, D);
2342  if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
2343    return SelectInst::Create(Cond, C, D);
2344  return 0;
2345}
2346
2347/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
2348Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
2349                                         ICmpInst *LHS, ICmpInst *RHS) {
2350  Value *Val, *Val2;
2351  ConstantInt *LHSCst, *RHSCst;
2352  ICmpInst::Predicate LHSCC, RHSCC;
2353
2354  // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
2355  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) ||
2356      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst))))
2357    return 0;
2358
2359
2360  // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
2361  if (LHSCst == RHSCst && LHSCC == RHSCC &&
2362      LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
2363    Value *NewOr = Builder->CreateOr(Val, Val2);
2364    return new ICmpInst(LHSCC, NewOr, LHSCst);
2365  }
2366
2367  // From here on, we only handle:
2368  //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
2369  if (Val != Val2) return 0;
2370
2371  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
2372  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
2373      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
2374      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
2375      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
2376    return 0;
2377
2378  // We can't fold (ugt x, C) | (sgt x, C2).
2379  if (!PredicatesFoldable(LHSCC, RHSCC))
2380    return 0;
2381
2382  // Ensure that the larger constant is on the RHS.
2383  bool ShouldSwap;
2384  if (CmpInst::isSigned(LHSCC) ||
2385      (ICmpInst::isEquality(LHSCC) &&
2386       CmpInst::isSigned(RHSCC)))
2387    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
2388  else
2389    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
2390
2391  if (ShouldSwap) {
2392    std::swap(LHS, RHS);
2393    std::swap(LHSCst, RHSCst);
2394    std::swap(LHSCC, RHSCC);
2395  }
2396
2397  // At this point, we know we have have two icmp instructions
2398  // comparing a value against two constants and or'ing the result
2399  // together.  Because of the above check, we know that we only have
2400  // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
2401  // FoldICmpLogical check above), that the two constants are not
2402  // equal.
2403  assert(LHSCst != RHSCst && "Compares not folded above?");
2404
2405  switch (LHSCC) {
2406  default: llvm_unreachable("Unknown integer condition code!");
2407  case ICmpInst::ICMP_EQ:
2408    switch (RHSCC) {
2409    default: llvm_unreachable("Unknown integer condition code!");
2410    case ICmpInst::ICMP_EQ:
2411      if (LHSCst == SubOne(RHSCst)) {
2412        // (X == 13 | X == 14) -> X-13 <u 2
2413        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
2414        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
2415        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
2416        return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
2417      }
2418      break;                         // (X == 13 | X == 15) -> no change
2419    case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
2420    case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
2421      break;
2422    case ICmpInst::ICMP_NE:          // (X == 13 | X != 15) -> X != 15
2423    case ICmpInst::ICMP_ULT:         // (X == 13 | X u< 15) -> X u< 15
2424    case ICmpInst::ICMP_SLT:         // (X == 13 | X s< 15) -> X s< 15
2425      return ReplaceInstUsesWith(I, RHS);
2426    }
2427    break;
2428  case ICmpInst::ICMP_NE:
2429    switch (RHSCC) {
2430    default: llvm_unreachable("Unknown integer condition code!");
2431    case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
2432    case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
2433    case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
2434      return ReplaceInstUsesWith(I, LHS);
2435    case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
2436    case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
2437    case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
2438      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2439    }
2440    break;
2441  case ICmpInst::ICMP_ULT:
2442    switch (RHSCC) {
2443    default: llvm_unreachable("Unknown integer condition code!");
2444    case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
2445      break;
2446    case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2
2447      // If RHSCst is [us]MAXINT, it is always false.  Not handling
2448      // this can cause overflow.
2449      if (RHSCst->isMaxValue(false))
2450        return ReplaceInstUsesWith(I, LHS);
2451      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
2452                             false, false, I);
2453    case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
2454      break;
2455    case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
2456    case ICmpInst::ICMP_ULT:        // (X u< 13 | X u< 15) -> X u< 15
2457      return ReplaceInstUsesWith(I, RHS);
2458    case ICmpInst::ICMP_SLT:        // (X u< 13 | X s< 15) -> no change
2459      break;
2460    }
2461    break;
2462  case ICmpInst::ICMP_SLT:
2463    switch (RHSCC) {
2464    default: llvm_unreachable("Unknown integer condition code!");
2465    case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
2466      break;
2467    case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2
2468      // If RHSCst is [us]MAXINT, it is always false.  Not handling
2469      // this can cause overflow.
2470      if (RHSCst->isMaxValue(true))
2471        return ReplaceInstUsesWith(I, LHS);
2472      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
2473                             true, false, I);
2474    case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
2475      break;
2476    case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
2477    case ICmpInst::ICMP_SLT:        // (X s< 13 | X s< 15) -> X s< 15
2478      return ReplaceInstUsesWith(I, RHS);
2479    case ICmpInst::ICMP_ULT:        // (X s< 13 | X u< 15) -> no change
2480      break;
2481    }
2482    break;
2483  case ICmpInst::ICMP_UGT:
2484    switch (RHSCC) {
2485    default: llvm_unreachable("Unknown integer condition code!");
2486    case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
2487    case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
2488      return ReplaceInstUsesWith(I, LHS);
2489    case ICmpInst::ICMP_SGT:        // (X u> 13 | X s> 15) -> no change
2490      break;
2491    case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
2492    case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
2493      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2494    case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
2495      break;
2496    }
2497    break;
2498  case ICmpInst::ICMP_SGT:
2499    switch (RHSCC) {
2500    default: llvm_unreachable("Unknown integer condition code!");
2501    case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
2502    case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
2503      return ReplaceInstUsesWith(I, LHS);
2504    case ICmpInst::ICMP_UGT:        // (X s> 13 | X u> 15) -> no change
2505      break;
2506    case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
2507    case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
2508      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2509    case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
2510      break;
2511    }
2512    break;
2513  }
2514  return 0;
2515}
2516
2517Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
2518                                         FCmpInst *RHS) {
2519  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
2520      RHS->getPredicate() == FCmpInst::FCMP_UNO &&
2521      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
2522    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
2523      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
2524        // If either of the constants are nans, then the whole thing returns
2525        // true.
2526        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
2527          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2528
2529        // Otherwise, no need to compare the two constants, compare the
2530        // rest.
2531        return new FCmpInst(FCmpInst::FCMP_UNO,
2532                            LHS->getOperand(0), RHS->getOperand(0));
2533      }
2534
2535    // Handle vector zeros.  This occurs because the canonical form of
2536    // "fcmp uno x,x" is "fcmp uno x, 0".
2537    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
2538        isa<ConstantAggregateZero>(RHS->getOperand(1)))
2539      return new FCmpInst(FCmpInst::FCMP_UNO,
2540                          LHS->getOperand(0), RHS->getOperand(0));
2541
2542    return 0;
2543  }
2544
2545  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
2546  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
2547  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
2548
2549  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
2550    // Swap RHS operands to match LHS.
2551    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
2552    std::swap(Op1LHS, Op1RHS);
2553  }
2554  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
2555    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
2556    if (Op0CC == Op1CC)
2557      return new FCmpInst((FCmpInst::Predicate)Op0CC,
2558                          Op0LHS, Op0RHS);
2559    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
2560      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
2561    if (Op0CC == FCmpInst::FCMP_FALSE)
2562      return ReplaceInstUsesWith(I, RHS);
2563    if (Op1CC == FCmpInst::FCMP_FALSE)
2564      return ReplaceInstUsesWith(I, LHS);
2565    bool Op0Ordered;
2566    bool Op1Ordered;
2567    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
2568    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
2569    if (Op0Ordered == Op1Ordered) {
2570      // If both are ordered or unordered, return a new fcmp with
2571      // or'ed predicates.
2572      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS);
2573      if (Instruction *I = dyn_cast<Instruction>(RV))
2574        return I;
2575      // Otherwise, it's a constant boolean value...
2576      return ReplaceInstUsesWith(I, RV);
2577    }
2578  }
2579  return 0;
2580}
2581
2582/// FoldOrWithConstants - This helper function folds:
2583///
2584///     ((A | B) & C1) | (B & C2)
2585///
2586/// into:
2587///
2588///     (A & C1) | B
2589///
2590/// when the XOR of the two constants is "all ones" (-1).
2591Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
2592                                               Value *A, Value *B, Value *C) {
2593  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
2594  if (!CI1) return 0;
2595
2596  Value *V1 = 0;
2597  ConstantInt *CI2 = 0;
2598  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
2599
2600  APInt Xor = CI1->getValue() ^ CI2->getValue();
2601  if (!Xor.isAllOnesValue()) return 0;
2602
2603  if (V1 == A || V1 == B) {
2604    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
2605    return BinaryOperator::CreateOr(NewOp, V1);
2606  }
2607
2608  return 0;
2609}
2610
2611Instruction *InstCombiner::visitOr(BinaryOperator &I) {
2612  bool Changed = SimplifyCommutative(I);
2613  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2614
2615  if (Value *V = SimplifyOrInst(Op0, Op1, TD))
2616    return ReplaceInstUsesWith(I, V);
2617
2618
2619  // See if we can simplify any instructions used by the instruction whose sole
2620  // purpose is to compute bits we don't care about.
2621  if (SimplifyDemandedInstructionBits(I))
2622    return &I;
2623
2624  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
2625    ConstantInt *C1 = 0; Value *X = 0;
2626    // (X & C1) | C2 --> (X | C2) & (C1|C2)
2627    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
2628        isOnlyUse(Op0)) {
2629      Value *Or = Builder->CreateOr(X, RHS);
2630      Or->takeName(Op0);
2631      return BinaryOperator::CreateAnd(Or,
2632                         ConstantInt::get(I.getContext(),
2633                                          RHS->getValue() | C1->getValue()));
2634    }
2635
2636    // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
2637    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
2638        isOnlyUse(Op0)) {
2639      Value *Or = Builder->CreateOr(X, RHS);
2640      Or->takeName(Op0);
2641      return BinaryOperator::CreateXor(Or,
2642                 ConstantInt::get(I.getContext(),
2643                                  C1->getValue() & ~RHS->getValue()));
2644    }
2645
2646    // Try to fold constant and into select arguments.
2647    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
2648      if (Instruction *R = FoldOpIntoSelect(I, SI))
2649        return R;
2650    if (isa<PHINode>(Op0))
2651      if (Instruction *NV = FoldOpIntoPhi(I))
2652        return NV;
2653  }
2654
2655  Value *A = 0, *B = 0;
2656  ConstantInt *C1 = 0, *C2 = 0;
2657
2658  // (A | B) | C  and  A | (B | C)                  -> bswap if possible.
2659  // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
2660  if (match(Op0, m_Or(m_Value(), m_Value())) ||
2661      match(Op1, m_Or(m_Value(), m_Value())) ||
2662      (match(Op0, m_Shift(m_Value(), m_Value())) &&
2663       match(Op1, m_Shift(m_Value(), m_Value())))) {
2664    if (Instruction *BSwap = MatchBSwap(I))
2665      return BSwap;
2666  }
2667
2668  // (X^C)|Y -> (X|Y)^C iff Y&C == 0
2669  if (Op0->hasOneUse() &&
2670      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
2671      MaskedValueIsZero(Op1, C1->getValue())) {
2672    Value *NOr = Builder->CreateOr(A, Op1);
2673    NOr->takeName(Op0);
2674    return BinaryOperator::CreateXor(NOr, C1);
2675  }
2676
2677  // Y|(X^C) -> (X|Y)^C iff Y&C == 0
2678  if (Op1->hasOneUse() &&
2679      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
2680      MaskedValueIsZero(Op0, C1->getValue())) {
2681    Value *NOr = Builder->CreateOr(A, Op0);
2682    NOr->takeName(Op0);
2683    return BinaryOperator::CreateXor(NOr, C1);
2684  }
2685
2686  // (A & C)|(B & D)
2687  Value *C = 0, *D = 0;
2688  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
2689      match(Op1, m_And(m_Value(B), m_Value(D)))) {
2690    Value *V1 = 0, *V2 = 0, *V3 = 0;
2691    C1 = dyn_cast<ConstantInt>(C);
2692    C2 = dyn_cast<ConstantInt>(D);
2693    if (C1 && C2) {  // (A & C1)|(B & C2)
2694      // If we have: ((V + N) & C1) | (V & C2)
2695      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
2696      // replace with V+N.
2697      if (C1->getValue() == ~C2->getValue()) {
2698        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
2699            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
2700          // Add commutes, try both ways.
2701          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
2702            return ReplaceInstUsesWith(I, A);
2703          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
2704            return ReplaceInstUsesWith(I, A);
2705        }
2706        // Or commutes, try both ways.
2707        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
2708            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
2709          // Add commutes, try both ways.
2710          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
2711            return ReplaceInstUsesWith(I, B);
2712          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
2713            return ReplaceInstUsesWith(I, B);
2714        }
2715      }
2716
2717      // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
2718      // iff (C1&C2) == 0 and (N&~C1) == 0
2719      if ((C1->getValue() & C2->getValue()) == 0) {
2720        if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
2721            ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) ||  // (V|N)
2722             (V2 == B && MaskedValueIsZero(V1, ~C1->getValue()))))   // (N|V)
2723          return BinaryOperator::CreateAnd(A,
2724                               ConstantInt::get(A->getContext(),
2725                                                C1->getValue()|C2->getValue()));
2726        // Or commutes, try both ways.
2727        if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
2728            ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) ||  // (V|N)
2729             (V2 == A && MaskedValueIsZero(V1, ~C2->getValue()))))   // (N|V)
2730          return BinaryOperator::CreateAnd(B,
2731                               ConstantInt::get(B->getContext(),
2732                                                C1->getValue()|C2->getValue()));
2733      }
2734    }
2735
2736    // Check to see if we have any common things being and'ed.  If so, find the
2737    // terms for V1 & (V2|V3).
2738    if (isOnlyUse(Op0) || isOnlyUse(Op1)) {
2739      V1 = 0;
2740      if (A == B)      // (A & C)|(A & D) == A & (C|D)
2741        V1 = A, V2 = C, V3 = D;
2742      else if (A == D) // (A & C)|(B & A) == A & (B|C)
2743        V1 = A, V2 = B, V3 = C;
2744      else if (C == B) // (A & C)|(C & D) == C & (A|D)
2745        V1 = C, V2 = A, V3 = D;
2746      else if (C == D) // (A & C)|(B & C) == C & (A|B)
2747        V1 = C, V2 = A, V3 = B;
2748
2749      if (V1) {
2750        Value *Or = Builder->CreateOr(V2, V3, "tmp");
2751        return BinaryOperator::CreateAnd(V1, Or);
2752      }
2753    }
2754
2755    // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants
2756    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
2757      return Match;
2758    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
2759      return Match;
2760    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
2761      return Match;
2762    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
2763      return Match;
2764
2765    // ((A&~B)|(~A&B)) -> A^B
2766    if ((match(C, m_Not(m_Specific(D))) &&
2767         match(B, m_Not(m_Specific(A)))))
2768      return BinaryOperator::CreateXor(A, D);
2769    // ((~B&A)|(~A&B)) -> A^B
2770    if ((match(A, m_Not(m_Specific(D))) &&
2771         match(B, m_Not(m_Specific(C)))))
2772      return BinaryOperator::CreateXor(C, D);
2773    // ((A&~B)|(B&~A)) -> A^B
2774    if ((match(C, m_Not(m_Specific(B))) &&
2775         match(D, m_Not(m_Specific(A)))))
2776      return BinaryOperator::CreateXor(A, B);
2777    // ((~B&A)|(B&~A)) -> A^B
2778    if ((match(A, m_Not(m_Specific(B))) &&
2779         match(D, m_Not(m_Specific(C)))))
2780      return BinaryOperator::CreateXor(C, B);
2781  }
2782
2783  // (X >> Z) | (Y >> Z)  -> (X|Y) >> Z  for all shifts.
2784  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
2785    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
2786      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
2787          SI0->getOperand(1) == SI1->getOperand(1) &&
2788          (SI0->hasOneUse() || SI1->hasOneUse())) {
2789        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
2790                                         SI0->getName());
2791        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
2792                                      SI1->getOperand(1));
2793      }
2794  }
2795
2796  // ((A|B)&1)|(B&-2) -> (A&1) | B
2797  if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
2798      match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
2799    Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C);
2800    if (Ret) return Ret;
2801  }
2802  // (B&-2)|((A|B)&1) -> (A&1) | B
2803  if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
2804      match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
2805    Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C);
2806    if (Ret) return Ret;
2807  }
2808
2809  // (~A | ~B) == (~(A & B)) - De Morgan's Law
2810  if (Value *Op0NotVal = dyn_castNotVal(Op0))
2811    if (Value *Op1NotVal = dyn_castNotVal(Op1))
2812      if (Op0->hasOneUse() && Op1->hasOneUse()) {
2813        Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
2814                                        I.getName()+".demorgan");
2815        return BinaryOperator::CreateNot(And);
2816      }
2817
2818  // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
2819  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
2820    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
2821      return R;
2822
2823    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
2824      if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
2825        return Res;
2826  }
2827
2828  // fold (or (cast A), (cast B)) -> (cast (or A, B))
2829  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
2830    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
2831      if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
2832        if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
2833            !isa<ICmpInst>(Op1C->getOperand(0))) {
2834          const Type *SrcTy = Op0C->getOperand(0)->getType();
2835          if (SrcTy == Op1C->getOperand(0)->getType() &&
2836              SrcTy->isIntOrIntVector() &&
2837              // Only do this if the casts both really cause code to be
2838              // generated.
2839              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
2840                                I.getType()) &&
2841              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
2842                                I.getType())) {
2843            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
2844                                             Op1C->getOperand(0), I.getName());
2845            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
2846          }
2847        }
2848      }
2849  }
2850
2851
2852  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
2853  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
2854    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
2855      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
2856        return Res;
2857  }
2858
2859  return Changed ? &I : 0;
2860}
2861
2862namespace {
2863
2864// XorSelf - Implements: X ^ X --> 0
2865struct XorSelf {
2866  Value *RHS;
2867  XorSelf(Value *rhs) : RHS(rhs) {}
2868  bool shouldApply(Value *LHS) const { return LHS == RHS; }
2869  Instruction *apply(BinaryOperator &Xor) const {
2870    return &Xor;
2871  }
2872};
2873
2874}
2875
2876Instruction *InstCombiner::visitXor(BinaryOperator &I) {
2877  bool Changed = SimplifyCommutative(I);
2878  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2879
2880  if (isa<UndefValue>(Op1)) {
2881    if (isa<UndefValue>(Op0))
2882      // Handle undef ^ undef -> 0 special case. This is a common
2883      // idiom (misuse).
2884      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
2885    return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
2886  }
2887
2888  // xor X, X = 0, even if X is nested in a sequence of Xor's.
2889  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
2890    assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
2891    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
2892  }
2893
2894  // See if we can simplify any instructions used by the instruction whose sole
2895  // purpose is to compute bits we don't care about.
2896  if (SimplifyDemandedInstructionBits(I))
2897    return &I;
2898  if (isa<VectorType>(I.getType()))
2899    if (isa<ConstantAggregateZero>(Op1))
2900      return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
2901
2902  // Is this a ~ operation?
2903  if (Value *NotOp = dyn_castNotVal(&I)) {
2904    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
2905      if (Op0I->getOpcode() == Instruction::And ||
2906          Op0I->getOpcode() == Instruction::Or) {
2907        // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
2908        // ~(~X | Y) === (X & ~Y) - De Morgan's Law
2909        if (dyn_castNotVal(Op0I->getOperand(1)))
2910          Op0I->swapOperands();
2911        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
2912          Value *NotY =
2913            Builder->CreateNot(Op0I->getOperand(1),
2914                               Op0I->getOperand(1)->getName()+".not");
2915          if (Op0I->getOpcode() == Instruction::And)
2916            return BinaryOperator::CreateOr(Op0NotVal, NotY);
2917          return BinaryOperator::CreateAnd(Op0NotVal, NotY);
2918        }
2919
2920        // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
2921        // ~(X | Y) === (~X & ~Y) - De Morgan's Law
2922        if (isFreeToInvert(Op0I->getOperand(0)) &&
2923            isFreeToInvert(Op0I->getOperand(1))) {
2924          Value *NotX =
2925            Builder->CreateNot(Op0I->getOperand(0), "notlhs");
2926          Value *NotY =
2927            Builder->CreateNot(Op0I->getOperand(1), "notrhs");
2928          if (Op0I->getOpcode() == Instruction::And)
2929            return BinaryOperator::CreateOr(NotX, NotY);
2930          return BinaryOperator::CreateAnd(NotX, NotY);
2931        }
2932      }
2933    }
2934  }
2935
2936
2937  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
2938    if (RHS->isOne() && Op0->hasOneUse()) {
2939      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
2940      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
2941        return new ICmpInst(ICI->getInversePredicate(),
2942                            ICI->getOperand(0), ICI->getOperand(1));
2943
2944      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
2945        return new FCmpInst(FCI->getInversePredicate(),
2946                            FCI->getOperand(0), FCI->getOperand(1));
2947    }
2948
2949    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
2950    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
2951      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
2952        if (CI->hasOneUse() && Op0C->hasOneUse()) {
2953          Instruction::CastOps Opcode = Op0C->getOpcode();
2954          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
2955              (RHS == ConstantExpr::getCast(Opcode,
2956                                           ConstantInt::getTrue(I.getContext()),
2957                                            Op0C->getDestTy()))) {
2958            CI->setPredicate(CI->getInversePredicate());
2959            return CastInst::Create(Opcode, CI, Op0C->getType());
2960          }
2961        }
2962      }
2963    }
2964
2965    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
2966      // ~(c-X) == X-c-1 == X+(-c-1)
2967      if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
2968        if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
2969          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
2970          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
2971                                      ConstantInt::get(I.getType(), 1));
2972          return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
2973        }
2974
2975      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
2976        if (Op0I->getOpcode() == Instruction::Add) {
2977          // ~(X-c) --> (-c-1)-X
2978          if (RHS->isAllOnesValue()) {
2979            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
2980            return BinaryOperator::CreateSub(
2981                           ConstantExpr::getSub(NegOp0CI,
2982                                      ConstantInt::get(I.getType(), 1)),
2983                                      Op0I->getOperand(0));
2984          } else if (RHS->getValue().isSignBit()) {
2985            // (X + C) ^ signbit -> (X + C + signbit)
2986            Constant *C = ConstantInt::get(I.getContext(),
2987                                           RHS->getValue() + Op0CI->getValue());
2988            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
2989
2990          }
2991        } else if (Op0I->getOpcode() == Instruction::Or) {
2992          // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
2993          if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
2994            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
2995            // Anything in both C1 and C2 is known to be zero, remove it from
2996            // NewRHS.
2997            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
2998            NewRHS = ConstantExpr::getAnd(NewRHS,
2999                                       ConstantExpr::getNot(CommonBits));
3000            Worklist.Add(Op0I);
3001            I.setOperand(0, Op0I->getOperand(0));
3002            I.setOperand(1, NewRHS);
3003            return &I;
3004          }
3005        }
3006      }
3007    }
3008
3009    // Try to fold constant and into select arguments.
3010    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
3011      if (Instruction *R = FoldOpIntoSelect(I, SI))
3012        return R;
3013    if (isa<PHINode>(Op0))
3014      if (Instruction *NV = FoldOpIntoPhi(I))
3015        return NV;
3016  }
3017
3018  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
3019    if (X == Op1)
3020      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
3021
3022  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
3023    if (X == Op0)
3024      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
3025
3026
3027  BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
3028  if (Op1I) {
3029    Value *A, *B;
3030    if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
3031      if (A == Op0) {              // B^(B|A) == (A|B)^B
3032        Op1I->swapOperands();
3033        I.swapOperands();
3034        std::swap(Op0, Op1);
3035      } else if (B == Op0) {       // B^(A|B) == (A|B)^B
3036        I.swapOperands();     // Simplified below.
3037        std::swap(Op0, Op1);
3038      }
3039    } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
3040      return ReplaceInstUsesWith(I, B);                      // A^(A^B) == B
3041    } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
3042      return ReplaceInstUsesWith(I, A);                      // A^(B^A) == B
3043    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
3044               Op1I->hasOneUse()){
3045      if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
3046        Op1I->swapOperands();
3047        std::swap(A, B);
3048      }
3049      if (B == Op0) {                                      // A^(B&A) -> (B&A)^A
3050        I.swapOperands();     // Simplified below.
3051        std::swap(Op0, Op1);
3052      }
3053    }
3054  }
3055
3056  BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
3057  if (Op0I) {
3058    Value *A, *B;
3059    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
3060        Op0I->hasOneUse()) {
3061      if (A == Op1)                                  // (B|A)^B == (A|B)^B
3062        std::swap(A, B);
3063      if (B == Op1)                                  // (A|B)^B == A & ~B
3064        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
3065    } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
3066      return ReplaceInstUsesWith(I, B);                      // (A^B)^A == B
3067    } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
3068      return ReplaceInstUsesWith(I, A);                      // (B^A)^A == B
3069    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
3070               Op0I->hasOneUse()){
3071      if (A == Op1)                                        // (A&B)^A -> (B&A)^A
3072        std::swap(A, B);
3073      if (B == Op1 &&                                      // (B&A)^A == ~B & A
3074          !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
3075        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
3076      }
3077    }
3078  }
3079
3080  // (X >> Z) ^ (Y >> Z)  -> (X^Y) >> Z  for all shifts.
3081  if (Op0I && Op1I && Op0I->isShift() &&
3082      Op0I->getOpcode() == Op1I->getOpcode() &&
3083      Op0I->getOperand(1) == Op1I->getOperand(1) &&
3084      (Op1I->hasOneUse() || Op1I->hasOneUse())) {
3085    Value *NewOp =
3086      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
3087                         Op0I->getName());
3088    return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
3089                                  Op1I->getOperand(1));
3090  }
3091
3092  if (Op0I && Op1I) {
3093    Value *A, *B, *C, *D;
3094    // (A & B)^(A | B) -> A ^ B
3095    if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
3096        match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
3097      if ((A == C && B == D) || (A == D && B == C))
3098        return BinaryOperator::CreateXor(A, B);
3099    }
3100    // (A | B)^(A & B) -> A ^ B
3101    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
3102        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
3103      if ((A == C && B == D) || (A == D && B == C))
3104        return BinaryOperator::CreateXor(A, B);
3105    }
3106
3107    // (A & B)^(C & D)
3108    if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
3109        match(Op0I, m_And(m_Value(A), m_Value(B))) &&
3110        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
3111      // (X & Y)^(X & Y) -> (Y^Z) & X
3112      Value *X = 0, *Y = 0, *Z = 0;
3113      if (A == C)
3114        X = A, Y = B, Z = D;
3115      else if (A == D)
3116        X = A, Y = B, Z = C;
3117      else if (B == C)
3118        X = B, Y = A, Z = D;
3119      else if (B == D)
3120        X = B, Y = A, Z = C;
3121
3122      if (X) {
3123        Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
3124        return BinaryOperator::CreateAnd(NewOp, X);
3125      }
3126    }
3127  }
3128
3129  // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
3130  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
3131    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))
3132      return R;
3133
3134  // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
3135  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
3136    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
3137      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
3138        const Type *SrcTy = Op0C->getOperand(0)->getType();
3139        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
3140            // Only do this if the casts both really cause code to be generated.
3141            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
3142                              I.getType()) &&
3143            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),
3144                              I.getType())) {
3145          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
3146                                            Op1C->getOperand(0), I.getName());
3147          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
3148        }
3149      }
3150  }
3151
3152  return Changed ? &I : 0;
3153}
3154
3155
3156Instruction *InstCombiner::visitShl(BinaryOperator &I) {
3157  return commonShiftTransforms(I);
3158}
3159
3160Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
3161  return commonShiftTransforms(I);
3162}
3163
3164Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
3165  if (Instruction *R = commonShiftTransforms(I))
3166    return R;
3167
3168  Value *Op0 = I.getOperand(0);
3169
3170  // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
3171  if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
3172    if (CSI->isAllOnesValue())
3173      return ReplaceInstUsesWith(I, CSI);
3174
3175  // See if we can turn a signed shr into an unsigned shr.
3176  if (MaskedValueIsZero(Op0,
3177                        APInt::getSignBit(I.getType()->getScalarSizeInBits())))
3178    return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
3179
3180  // Arithmetic shifting an all-sign-bit value is a no-op.
3181  unsigned NumSignBits = ComputeNumSignBits(Op0);
3182  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
3183    return ReplaceInstUsesWith(I, Op0);
3184
3185  return 0;
3186}
3187
3188Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
3189  assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
3190  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3191
3192  // shl X, 0 == X and shr X, 0 == X
3193  // shl 0, X == 0 and shr 0, X == 0
3194  if (Op1 == Constant::getNullValue(Op1->getType()) ||
3195      Op0 == Constant::getNullValue(Op0->getType()))
3196    return ReplaceInstUsesWith(I, Op0);
3197
3198  if (isa<UndefValue>(Op0)) {
3199    if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
3200      return ReplaceInstUsesWith(I, Op0);
3201    else                                    // undef << X -> 0, undef >>u X -> 0
3202      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3203  }
3204  if (isa<UndefValue>(Op1)) {
3205    if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X
3206      return ReplaceInstUsesWith(I, Op0);
3207    else                                     // X << undef, X >>u undef -> 0
3208      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3209  }
3210
3211  // See if we can fold away this shift.
3212  if (SimplifyDemandedInstructionBits(I))
3213    return &I;
3214
3215  // Try to fold constant and into select arguments.
3216  if (isa<Constant>(Op0))
3217    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
3218      if (Instruction *R = FoldOpIntoSelect(I, SI))
3219        return R;
3220
3221  if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
3222    if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
3223      return Res;
3224  return 0;
3225}
3226
3227Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
3228                                               BinaryOperator &I) {
3229  bool isLeftShift = I.getOpcode() == Instruction::Shl;
3230
3231  // See if we can simplify any instructions used by the instruction whose sole
3232  // purpose is to compute bits we don't care about.
3233  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
3234
3235  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
3236  // a signed shift.
3237  //
3238  if (Op1->uge(TypeBits)) {
3239    if (I.getOpcode() != Instruction::AShr)
3240      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
3241    else {
3242      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
3243      return &I;
3244    }
3245  }
3246
3247  // ((X*C1) << C2) == (X * (C1 << C2))
3248  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
3249    if (BO->getOpcode() == Instruction::Mul && isLeftShift)
3250      if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
3251        return BinaryOperator::CreateMul(BO->getOperand(0),
3252                                        ConstantExpr::getShl(BOOp, Op1));
3253
3254  // Try to fold constant and into select arguments.
3255  if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
3256    if (Instruction *R = FoldOpIntoSelect(I, SI))
3257      return R;
3258  if (isa<PHINode>(Op0))
3259    if (Instruction *NV = FoldOpIntoPhi(I))
3260      return NV;
3261
3262  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
3263  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
3264    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
3265    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
3266    // place.  Don't try to do this transformation in this case.  Also, we
3267    // require that the input operand is a shift-by-constant so that we have
3268    // confidence that the shifts will get folded together.  We could do this
3269    // xform in more cases, but it is unlikely to be profitable.
3270    if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
3271        isa<ConstantInt>(TrOp->getOperand(1))) {
3272      // Okay, we'll do this xform.  Make the shift of shift.
3273      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
3274      // (shift2 (shift1 & 0x00FF), c2)
3275      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
3276
3277      // For logical shifts, the truncation has the effect of making the high
3278      // part of the register be zeros.  Emulate this by inserting an AND to
3279      // clear the top bits as needed.  This 'and' will usually be zapped by
3280      // other xforms later if dead.
3281      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
3282      unsigned DstSize = TI->getType()->getScalarSizeInBits();
3283      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
3284
3285      // The mask we constructed says what the trunc would do if occurring
3286      // between the shifts.  We want to know the effect *after* the second
3287      // shift.  We know that it is a logical shift by a constant, so adjust the
3288      // mask as appropriate.
3289      if (I.getOpcode() == Instruction::Shl)
3290        MaskV <<= Op1->getZExtValue();
3291      else {
3292        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
3293        MaskV = MaskV.lshr(Op1->getZExtValue());
3294      }
3295
3296      // shift1 & 0x00FF
3297      Value *And = Builder->CreateAnd(NSh,
3298                                      ConstantInt::get(I.getContext(), MaskV),
3299                                      TI->getName());
3300
3301      // Return the value truncated to the interesting size.
3302      return new TruncInst(And, I.getType());
3303    }
3304  }
3305
3306  if (Op0->hasOneUse()) {
3307    if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
3308      // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
3309      Value *V1, *V2;
3310      ConstantInt *CC;
3311      switch (Op0BO->getOpcode()) {
3312        default: break;
3313        case Instruction::Add:
3314        case Instruction::And:
3315        case Instruction::Or:
3316        case Instruction::Xor: {
3317          // These operators commute.
3318          // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
3319          if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
3320              match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
3321                    m_Specific(Op1)))) {
3322            Value *YS =         // (Y << C)
3323              Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
3324            // (X + (Y << C))
3325            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
3326                                            Op0BO->getOperand(1)->getName());
3327            uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
3328            return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
3329                       APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
3330          }
3331
3332          // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
3333          Value *Op0BOOp1 = Op0BO->getOperand(1);
3334          if (isLeftShift && Op0BOOp1->hasOneUse() &&
3335              match(Op0BOOp1,
3336                    m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
3337                          m_ConstantInt(CC))) &&
3338              cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
3339            Value *YS =   // (Y << C)
3340              Builder->CreateShl(Op0BO->getOperand(0), Op1,
3341                                           Op0BO->getName());
3342            // X & (CC << C)
3343            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
3344                                           V1->getName()+".mask");
3345            return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
3346          }
3347        }
3348
3349        // FALL THROUGH.
3350        case Instruction::Sub: {
3351          // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
3352          if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
3353              match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
3354                    m_Specific(Op1)))) {
3355            Value *YS =  // (Y << C)
3356              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
3357            // (X + (Y << C))
3358            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
3359                                            Op0BO->getOperand(0)->getName());
3360            uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
3361            return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
3362                       APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
3363          }
3364
3365          // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
3366          if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
3367              match(Op0BO->getOperand(0),
3368                    m_And(m_Shr(m_Value(V1), m_Value(V2)),
3369                          m_ConstantInt(CC))) && V2 == Op1 &&
3370              cast<BinaryOperator>(Op0BO->getOperand(0))
3371                  ->getOperand(0)->hasOneUse()) {
3372            Value *YS = // (Y << C)
3373              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
3374            // X & (CC << C)
3375            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
3376                                           V1->getName()+".mask");
3377
3378            return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
3379          }
3380
3381          break;
3382        }
3383      }
3384
3385
3386      // If the operand is an bitwise operator with a constant RHS, and the
3387      // shift is the only use, we can pull it out of the shift.
3388      if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
3389        bool isValid = true;     // Valid only for And, Or, Xor
3390        bool highBitSet = false; // Transform if high bit of constant set?
3391
3392        switch (Op0BO->getOpcode()) {
3393          default: isValid = false; break;   // Do not perform transform!
3394          case Instruction::Add:
3395            isValid = isLeftShift;
3396            break;
3397          case Instruction::Or:
3398          case Instruction::Xor:
3399            highBitSet = false;
3400            break;
3401          case Instruction::And:
3402            highBitSet = true;
3403            break;
3404        }
3405
3406        // If this is a signed shift right, and the high bit is modified
3407        // by the logical operation, do not perform the transformation.
3408        // The highBitSet boolean indicates the value of the high bit of
3409        // the constant which would cause it to be modified for this
3410        // operation.
3411        //
3412        if (isValid && I.getOpcode() == Instruction::AShr)
3413          isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
3414
3415        if (isValid) {
3416          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
3417
3418          Value *NewShift =
3419            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
3420          NewShift->takeName(Op0BO);
3421
3422          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
3423                                        NewRHS);
3424        }
3425      }
3426    }
3427  }
3428
3429  // Find out if this is a shift of a shift by a constant.
3430  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
3431  if (ShiftOp && !ShiftOp->isShift())
3432    ShiftOp = 0;
3433
3434  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
3435    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
3436    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
3437    uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
3438    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
3439    if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
3440    Value *X = ShiftOp->getOperand(0);
3441
3442    uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
3443
3444    const IntegerType *Ty = cast<IntegerType>(I.getType());
3445
3446    // Check for (X << c1) << c2  and  (X >> c1) >> c2
3447    if (I.getOpcode() == ShiftOp->getOpcode()) {
3448      // If this is oversized composite shift, then unsigned shifts get 0, ashr
3449      // saturates.
3450      if (AmtSum >= TypeBits) {
3451        if (I.getOpcode() != Instruction::AShr)
3452          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3453        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
3454      }
3455
3456      return BinaryOperator::Create(I.getOpcode(), X,
3457                                    ConstantInt::get(Ty, AmtSum));
3458    }
3459
3460    if (ShiftOp->getOpcode() == Instruction::LShr &&
3461        I.getOpcode() == Instruction::AShr) {
3462      if (AmtSum >= TypeBits)
3463        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
3464
3465      // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
3466      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
3467    }
3468
3469    if (ShiftOp->getOpcode() == Instruction::AShr &&
3470        I.getOpcode() == Instruction::LShr) {
3471      // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
3472      if (AmtSum >= TypeBits)
3473        AmtSum = TypeBits-1;
3474
3475      Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
3476
3477      APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
3478      return BinaryOperator::CreateAnd(Shift,
3479                                       ConstantInt::get(I.getContext(), Mask));
3480    }
3481
3482    // Okay, if we get here, one shift must be left, and the other shift must be
3483    // right.  See if the amounts are equal.
3484    if (ShiftAmt1 == ShiftAmt2) {
3485      // If we have ((X >>? C) << C), turn this into X & (-1 << C).
3486      if (I.getOpcode() == Instruction::Shl) {
3487        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
3488        return BinaryOperator::CreateAnd(X,
3489                                         ConstantInt::get(I.getContext(),Mask));
3490      }
3491      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
3492      if (I.getOpcode() == Instruction::LShr) {
3493        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
3494        return BinaryOperator::CreateAnd(X,
3495                                        ConstantInt::get(I.getContext(), Mask));
3496      }
3497      // We can simplify ((X << C) >>s C) into a trunc + sext.
3498      // NOTE: we could do this for any C, but that would make 'unusual' integer
3499      // types.  For now, just stick to ones well-supported by the code
3500      // generators.
3501      const Type *SExtType = 0;
3502      switch (Ty->getBitWidth() - ShiftAmt1) {
3503      case 1  :
3504      case 8  :
3505      case 16 :
3506      case 32 :
3507      case 64 :
3508      case 128:
3509        SExtType = IntegerType::get(I.getContext(),
3510                                    Ty->getBitWidth() - ShiftAmt1);
3511        break;
3512      default: break;
3513      }
3514      if (SExtType)
3515        return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty);
3516      // Otherwise, we can't handle it yet.
3517    } else if (ShiftAmt1 < ShiftAmt2) {
3518      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
3519
3520      // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
3521      if (I.getOpcode() == Instruction::Shl) {
3522        assert(ShiftOp->getOpcode() == Instruction::LShr ||
3523               ShiftOp->getOpcode() == Instruction::AShr);
3524        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
3525
3526        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
3527        return BinaryOperator::CreateAnd(Shift,
3528                                         ConstantInt::get(I.getContext(),Mask));
3529      }
3530
3531      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
3532      if (I.getOpcode() == Instruction::LShr) {
3533        assert(ShiftOp->getOpcode() == Instruction::Shl);
3534        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
3535
3536        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
3537        return BinaryOperator::CreateAnd(Shift,
3538                                         ConstantInt::get(I.getContext(),Mask));
3539      }
3540
3541      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
3542    } else {
3543      assert(ShiftAmt2 < ShiftAmt1);
3544      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
3545
3546      // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
3547      if (I.getOpcode() == Instruction::Shl) {
3548        assert(ShiftOp->getOpcode() == Instruction::LShr ||
3549               ShiftOp->getOpcode() == Instruction::AShr);
3550        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
3551                                            ConstantInt::get(Ty, ShiftDiff));
3552
3553        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
3554        return BinaryOperator::CreateAnd(Shift,
3555                                         ConstantInt::get(I.getContext(),Mask));
3556      }
3557
3558      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
3559      if (I.getOpcode() == Instruction::LShr) {
3560        assert(ShiftOp->getOpcode() == Instruction::Shl);
3561        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
3562
3563        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
3564        return BinaryOperator::CreateAnd(Shift,
3565                                         ConstantInt::get(I.getContext(),Mask));
3566      }
3567
3568      // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
3569    }
3570  }
3571  return 0;
3572}
3573
3574
3575
3576/// FindElementAtOffset - Given a type and a constant offset, determine whether
3577/// or not there is a sequence of GEP indices into the type that will land us at
3578/// the specified offset.  If so, fill them into NewIndices and return the
3579/// resultant element type, otherwise return null.
3580const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset,
3581                                          SmallVectorImpl<Value*> &NewIndices) {
3582  if (!TD) return 0;
3583  if (!Ty->isSized()) return 0;
3584
3585  // Start with the index over the outer type.  Note that the type size
3586  // might be zero (even if the offset isn't zero) if the indexed type
3587  // is something like [0 x {int, int}]
3588  const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
3589  int64_t FirstIdx = 0;
3590  if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
3591    FirstIdx = Offset/TySize;
3592    Offset -= FirstIdx*TySize;
3593
3594    // Handle hosts where % returns negative instead of values [0..TySize).
3595    if (Offset < 0) {
3596      --FirstIdx;
3597      Offset += TySize;
3598      assert(Offset >= 0);
3599    }
3600    assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
3601  }
3602
3603  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
3604
3605  // Index into the types.  If we fail, set OrigBase to null.
3606  while (Offset) {
3607    // Indexing into tail padding between struct/array elements.
3608    if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
3609      return 0;
3610
3611    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
3612      const StructLayout *SL = TD->getStructLayout(STy);
3613      assert(Offset < (int64_t)SL->getSizeInBytes() &&
3614             "Offset must stay within the indexed type");
3615
3616      unsigned Elt = SL->getElementContainingOffset(Offset);
3617      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
3618                                            Elt));
3619
3620      Offset -= SL->getElementOffset(Elt);
3621      Ty = STy->getElementType(Elt);
3622    } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
3623      uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
3624      assert(EltSize && "Cannot index into a zero-sized array");
3625      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
3626      Offset %= EltSize;
3627      Ty = AT->getElementType();
3628    } else {
3629      // Otherwise, we can't index into the middle of this atomic type, bail.
3630      return 0;
3631    }
3632  }
3633
3634  return Ty;
3635}
3636
3637
3638/// EnforceKnownAlignment - If the specified pointer points to an object that
3639/// we control, modify the object's alignment to PrefAlign. This isn't
3640/// often possible though. If alignment is important, a more reliable approach
3641/// is to simply align all global variables and allocation instructions to
3642/// their preferred alignment from the beginning.
3643///
3644static unsigned EnforceKnownAlignment(Value *V,
3645                                      unsigned Align, unsigned PrefAlign) {
3646
3647  User *U = dyn_cast<User>(V);
3648  if (!U) return Align;
3649
3650  switch (Operator::getOpcode(U)) {
3651  default: break;
3652  case Instruction::BitCast:
3653    return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
3654  case Instruction::GetElementPtr: {
3655    // If all indexes are zero, it is just the alignment of the base pointer.
3656    bool AllZeroOperands = true;
3657    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
3658      if (!isa<Constant>(*i) ||
3659          !cast<Constant>(*i)->isNullValue()) {
3660        AllZeroOperands = false;
3661        break;
3662      }
3663
3664    if (AllZeroOperands) {
3665      // Treat this like a bitcast.
3666      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
3667    }
3668    break;
3669  }
3670  }
3671
3672  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
3673    // If there is a large requested alignment and we can, bump up the alignment
3674    // of the global.
3675    if (!GV->isDeclaration()) {
3676      if (GV->getAlignment() >= PrefAlign)
3677        Align = GV->getAlignment();
3678      else {
3679        GV->setAlignment(PrefAlign);
3680        Align = PrefAlign;
3681      }
3682    }
3683  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
3684    // If there is a requested alignment and if this is an alloca, round up.
3685    if (AI->getAlignment() >= PrefAlign)
3686      Align = AI->getAlignment();
3687    else {
3688      AI->setAlignment(PrefAlign);
3689      Align = PrefAlign;
3690    }
3691  }
3692
3693  return Align;
3694}
3695
3696/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
3697/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
3698/// and it is more than the alignment of the ultimate object, see if we can
3699/// increase the alignment of the ultimate object, making this check succeed.
3700unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
3701                                                  unsigned PrefAlign) {
3702  unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
3703                      sizeof(PrefAlign) * CHAR_BIT;
3704  APInt Mask = APInt::getAllOnesValue(BitWidth);
3705  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
3706  ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
3707  unsigned TrailZ = KnownZero.countTrailingOnes();
3708  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
3709
3710  if (PrefAlign > Align)
3711    Align = EnforceKnownAlignment(V, Align, PrefAlign);
3712
3713    // We don't need to make any adjustment.
3714  return Align;
3715}
3716
3717Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
3718  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
3719  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
3720  unsigned MinAlign = std::min(DstAlign, SrcAlign);
3721  unsigned CopyAlign = MI->getAlignment();
3722
3723  if (CopyAlign < MinAlign) {
3724    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
3725                                             MinAlign, false));
3726    return MI;
3727  }
3728
3729  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
3730  // load/store.
3731  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
3732  if (MemOpLength == 0) return 0;
3733
3734  // Source and destination pointer types are always "i8*" for intrinsic.  See
3735  // if the size is something we can handle with a single primitive load/store.
3736  // A single load+store correctly handles overlapping memory in the memmove
3737  // case.
3738  unsigned Size = MemOpLength->getZExtValue();
3739  if (Size == 0) return MI;  // Delete this mem transfer.
3740
3741  if (Size > 8 || (Size&(Size-1)))
3742    return 0;  // If not 1/2/4/8 bytes, exit.
3743
3744  // Use an integer load+store unless we can find something better.
3745  Type *NewPtrTy =
3746            PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3));
3747
3748  // Memcpy forces the use of i8* for the source and destination.  That means
3749  // that if you're using memcpy to move one double around, you'll get a cast
3750  // from double* to i8*.  We'd much rather use a double load+store rather than
3751  // an i64 load+store, here because this improves the odds that the source or
3752  // dest address will be promotable.  See if we can find a better type than the
3753  // integer datatype.
3754  if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
3755    const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
3756    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
3757      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
3758      // down through these levels if so.
3759      while (!SrcETy->isSingleValueType()) {
3760        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
3761          if (STy->getNumElements() == 1)
3762            SrcETy = STy->getElementType(0);
3763          else
3764            break;
3765        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
3766          if (ATy->getNumElements() == 1)
3767            SrcETy = ATy->getElementType();
3768          else
3769            break;
3770        } else
3771          break;
3772      }
3773
3774      if (SrcETy->isSingleValueType())
3775        NewPtrTy = PointerType::getUnqual(SrcETy);
3776    }
3777  }
3778
3779
3780  // If the memcpy/memmove provides better alignment info than we can
3781  // infer, use it.
3782  SrcAlign = std::max(SrcAlign, CopyAlign);
3783  DstAlign = std::max(DstAlign, CopyAlign);
3784
3785  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy);
3786  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);
3787  Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
3788  InsertNewInstBefore(L, *MI);
3789  InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
3790
3791  // Set the size of the copy to 0, it will be deleted on the next iteration.
3792  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
3793  return MI;
3794}
3795
3796Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
3797  unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
3798  if (MI->getAlignment() < Alignment) {
3799    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
3800                                             Alignment, false));
3801    return MI;
3802  }
3803
3804  // Extract the length and alignment and fill if they are constant.
3805  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
3806  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
3807  if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(MI->getContext()))
3808    return 0;
3809  uint64_t Len = LenC->getZExtValue();
3810  Alignment = MI->getAlignment();
3811
3812  // If the length is zero, this is a no-op
3813  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
3814
3815  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
3816  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
3817    const Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
3818
3819    Value *Dest = MI->getDest();
3820    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
3821
3822    // Alignment 0 is identity for alignment 1 for memset, but not store.
3823    if (Alignment == 0) Alignment = 1;
3824
3825    // Extract the fill value and store.
3826    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
3827    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
3828                                      Dest, false, Alignment), *MI);
3829
3830    // Set the size of the copy to 0, it will be deleted on the next iteration.
3831    MI->setLength(Constant::getNullValue(LenC->getType()));
3832    return MI;
3833  }
3834
3835  return 0;
3836}
3837
3838
3839/// visitCallInst - CallInst simplification.  This mostly only handles folding
3840/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
3841/// the heavy lifting.
3842///
3843Instruction *InstCombiner::visitCallInst(CallInst &CI) {
3844  if (isFreeCall(&CI))
3845    return visitFree(CI);
3846
3847  // If the caller function is nounwind, mark the call as nounwind, even if the
3848  // callee isn't.
3849  if (CI.getParent()->getParent()->doesNotThrow() &&
3850      !CI.doesNotThrow()) {
3851    CI.setDoesNotThrow();
3852    return &CI;
3853  }
3854
3855  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
3856  if (!II) return visitCallSite(&CI);
3857
3858  // Intrinsics cannot occur in an invoke, so handle them here instead of in
3859  // visitCallSite.
3860  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
3861    bool Changed = false;
3862
3863    // memmove/cpy/set of zero bytes is a noop.
3864    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
3865      if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
3866
3867      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
3868        if (CI->getZExtValue() == 1) {
3869          // Replace the instruction with just byte operations.  We would
3870          // transform other cases to loads/stores, but we don't know if
3871          // alignment is sufficient.
3872        }
3873    }
3874
3875    // If we have a memmove and the source operation is a constant global,
3876    // then the source and dest pointers can't alias, so we can change this
3877    // into a call to memcpy.
3878    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
3879      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
3880        if (GVSrc->isConstant()) {
3881          Module *M = CI.getParent()->getParent()->getParent();
3882          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
3883          const Type *Tys[1];
3884          Tys[0] = CI.getOperand(3)->getType();
3885          CI.setOperand(0,
3886                        Intrinsic::getDeclaration(M, MemCpyID, Tys, 1));
3887          Changed = true;
3888        }
3889    }
3890
3891    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
3892      // memmove(x,x,size) -> noop.
3893      if (MTI->getSource() == MTI->getDest())
3894        return EraseInstFromFunction(CI);
3895    }
3896
3897    // If we can determine a pointer alignment that is bigger than currently
3898    // set, update the alignment.
3899    if (isa<MemTransferInst>(MI)) {
3900      if (Instruction *I = SimplifyMemTransfer(MI))
3901        return I;
3902    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
3903      if (Instruction *I = SimplifyMemSet(MSI))
3904        return I;
3905    }
3906
3907    if (Changed) return II;
3908  }
3909
3910  switch (II->getIntrinsicID()) {
3911  default: break;
3912  case Intrinsic::bswap:
3913    // bswap(bswap(x)) -> x
3914    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
3915      if (Operand->getIntrinsicID() == Intrinsic::bswap)
3916        return ReplaceInstUsesWith(CI, Operand->getOperand(1));
3917
3918    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
3919    if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) {
3920      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
3921        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
3922          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
3923                       TI->getType()->getPrimitiveSizeInBits();
3924          Value *CV = ConstantInt::get(Operand->getType(), C);
3925          Value *V = Builder->CreateLShr(Operand->getOperand(1), CV);
3926          return new TruncInst(V, TI->getType());
3927        }
3928    }
3929
3930    break;
3931  case Intrinsic::powi:
3932    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
3933      // powi(x, 0) -> 1.0
3934      if (Power->isZero())
3935        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
3936      // powi(x, 1) -> x
3937      if (Power->isOne())
3938        return ReplaceInstUsesWith(CI, II->getOperand(1));
3939      // powi(x, -1) -> 1/x
3940      if (Power->isAllOnesValue())
3941        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
3942                                          II->getOperand(1));
3943    }
3944    break;
3945
3946  case Intrinsic::uadd_with_overflow: {
3947    Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
3948    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
3949    uint32_t BitWidth = IT->getBitWidth();
3950    APInt Mask = APInt::getSignBit(BitWidth);
3951    APInt LHSKnownZero(BitWidth, 0);
3952    APInt LHSKnownOne(BitWidth, 0);
3953    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
3954    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
3955    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
3956
3957    if (LHSKnownNegative || LHSKnownPositive) {
3958      APInt RHSKnownZero(BitWidth, 0);
3959      APInt RHSKnownOne(BitWidth, 0);
3960      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
3961      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
3962      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
3963      if (LHSKnownNegative && RHSKnownNegative) {
3964        // The sign bit is set in both cases: this MUST overflow.
3965        // Create a simple add instruction, and insert it into the struct.
3966        Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
3967        Worklist.Add(Add);
3968        Constant *V[] = {
3969          UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext())
3970        };
3971        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
3972        return InsertValueInst::Create(Struct, Add, 0);
3973      }
3974
3975      if (LHSKnownPositive && RHSKnownPositive) {
3976        // The sign bit is clear in both cases: this CANNOT overflow.
3977        // Create a simple add instruction, and insert it into the struct.
3978        Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
3979        Worklist.Add(Add);
3980        Constant *V[] = {
3981          UndefValue::get(LHS->getType()),
3982          ConstantInt::getFalse(II->getContext())
3983        };
3984        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
3985        return InsertValueInst::Create(Struct, Add, 0);
3986      }
3987    }
3988  }
3989  // FALL THROUGH uadd into sadd
3990  case Intrinsic::sadd_with_overflow:
3991    // Canonicalize constants into the RHS.
3992    if (isa<Constant>(II->getOperand(1)) &&
3993        !isa<Constant>(II->getOperand(2))) {
3994      Value *LHS = II->getOperand(1);
3995      II->setOperand(1, II->getOperand(2));
3996      II->setOperand(2, LHS);
3997      return II;
3998    }
3999
4000    // X + undef -> undef
4001    if (isa<UndefValue>(II->getOperand(2)))
4002      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
4003
4004    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
4005      // X + 0 -> {X, false}
4006      if (RHS->isZero()) {
4007        Constant *V[] = {
4008          UndefValue::get(II->getOperand(0)->getType()),
4009          ConstantInt::getFalse(II->getContext())
4010        };
4011        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
4012        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
4013      }
4014    }
4015    break;
4016  case Intrinsic::usub_with_overflow:
4017  case Intrinsic::ssub_with_overflow:
4018    // undef - X -> undef
4019    // X - undef -> undef
4020    if (isa<UndefValue>(II->getOperand(1)) ||
4021        isa<UndefValue>(II->getOperand(2)))
4022      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
4023
4024    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
4025      // X - 0 -> {X, false}
4026      if (RHS->isZero()) {
4027        Constant *V[] = {
4028          UndefValue::get(II->getOperand(1)->getType()),
4029          ConstantInt::getFalse(II->getContext())
4030        };
4031        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
4032        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
4033      }
4034    }
4035    break;
4036  case Intrinsic::umul_with_overflow:
4037  case Intrinsic::smul_with_overflow:
4038    // Canonicalize constants into the RHS.
4039    if (isa<Constant>(II->getOperand(1)) &&
4040        !isa<Constant>(II->getOperand(2))) {
4041      Value *LHS = II->getOperand(1);
4042      II->setOperand(1, II->getOperand(2));
4043      II->setOperand(2, LHS);
4044      return II;
4045    }
4046
4047    // X * undef -> undef
4048    if (isa<UndefValue>(II->getOperand(2)))
4049      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
4050
4051    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
4052      // X*0 -> {0, false}
4053      if (RHSI->isZero())
4054        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
4055
4056      // X * 1 -> {X, false}
4057      if (RHSI->equalsInt(1)) {
4058        Constant *V[] = {
4059          UndefValue::get(II->getOperand(1)->getType()),
4060          ConstantInt::getFalse(II->getContext())
4061        };
4062        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
4063        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
4064      }
4065    }
4066    break;
4067  case Intrinsic::ppc_altivec_lvx:
4068  case Intrinsic::ppc_altivec_lvxl:
4069  case Intrinsic::x86_sse_loadu_ps:
4070  case Intrinsic::x86_sse2_loadu_pd:
4071  case Intrinsic::x86_sse2_loadu_dq:
4072    // Turn PPC lvx     -> load if the pointer is known aligned.
4073    // Turn X86 loadups -> load if the pointer is known aligned.
4074    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
4075      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
4076                                         PointerType::getUnqual(II->getType()));
4077      return new LoadInst(Ptr);
4078    }
4079    break;
4080  case Intrinsic::ppc_altivec_stvx:
4081  case Intrinsic::ppc_altivec_stvxl:
4082    // Turn stvx -> store if the pointer is known aligned.
4083    if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
4084      const Type *OpPtrTy =
4085        PointerType::getUnqual(II->getOperand(1)->getType());
4086      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
4087      return new StoreInst(II->getOperand(1), Ptr);
4088    }
4089    break;
4090  case Intrinsic::x86_sse_storeu_ps:
4091  case Intrinsic::x86_sse2_storeu_pd:
4092  case Intrinsic::x86_sse2_storeu_dq:
4093    // Turn X86 storeu -> store if the pointer is known aligned.
4094    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
4095      const Type *OpPtrTy =
4096        PointerType::getUnqual(II->getOperand(2)->getType());
4097      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
4098      return new StoreInst(II->getOperand(2), Ptr);
4099    }
4100    break;
4101
4102  case Intrinsic::x86_sse_cvttss2si: {
4103    // These intrinsics only demands the 0th element of its input vector.  If
4104    // we can simplify the input based on that, do so now.
4105    unsigned VWidth =
4106      cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
4107    APInt DemandedElts(VWidth, 1);
4108    APInt UndefElts(VWidth, 0);
4109    if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
4110                                              UndefElts)) {
4111      II->setOperand(1, V);
4112      return II;
4113    }
4114    break;
4115  }
4116
4117  case Intrinsic::ppc_altivec_vperm:
4118    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
4119    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
4120      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
4121
4122      // Check that all of the elements are integer constants or undefs.
4123      bool AllEltsOk = true;
4124      for (unsigned i = 0; i != 16; ++i) {
4125        if (!isa<ConstantInt>(Mask->getOperand(i)) &&
4126            !isa<UndefValue>(Mask->getOperand(i))) {
4127          AllEltsOk = false;
4128          break;
4129        }
4130      }
4131
4132      if (AllEltsOk) {
4133        // Cast the input vectors to byte vectors.
4134        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
4135        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
4136        Value *Result = UndefValue::get(Op0->getType());
4137
4138        // Only extract each element once.
4139        Value *ExtractedElts[32];
4140        memset(ExtractedElts, 0, sizeof(ExtractedElts));
4141
4142        for (unsigned i = 0; i != 16; ++i) {
4143          if (isa<UndefValue>(Mask->getOperand(i)))
4144            continue;
4145          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
4146          Idx &= 31;  // Match the hardware behavior.
4147
4148          if (ExtractedElts[Idx] == 0) {
4149            ExtractedElts[Idx] =
4150              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
4151                  ConstantInt::get(Type::getInt32Ty(II->getContext()),
4152                                   Idx&15, false), "tmp");
4153          }
4154
4155          // Insert this value into the result vector.
4156          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
4157                         ConstantInt::get(Type::getInt32Ty(II->getContext()),
4158                                          i, false), "tmp");
4159        }
4160        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
4161      }
4162    }
4163    break;
4164
4165  case Intrinsic::stackrestore: {
4166    // If the save is right next to the restore, remove the restore.  This can
4167    // happen when variable allocas are DCE'd.
4168    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
4169      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
4170        BasicBlock::iterator BI = SS;
4171        if (&*++BI == II)
4172          return EraseInstFromFunction(CI);
4173      }
4174    }
4175
4176    // Scan down this block to see if there is another stack restore in the
4177    // same block without an intervening call/alloca.
4178    BasicBlock::iterator BI = II;
4179    TerminatorInst *TI = II->getParent()->getTerminator();
4180    bool CannotRemove = false;
4181    for (++BI; &*BI != TI; ++BI) {
4182      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
4183        CannotRemove = true;
4184        break;
4185      }
4186      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
4187        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
4188          // If there is a stackrestore below this one, remove this one.
4189          if (II->getIntrinsicID() == Intrinsic::stackrestore)
4190            return EraseInstFromFunction(CI);
4191          // Otherwise, ignore the intrinsic.
4192        } else {
4193          // If we found a non-intrinsic call, we can't remove the stack
4194          // restore.
4195          CannotRemove = true;
4196          break;
4197        }
4198      }
4199    }
4200
4201    // If the stack restore is in a return/unwind block and if there are no
4202    // allocas or calls between the restore and the return, nuke the restore.
4203    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
4204      return EraseInstFromFunction(CI);
4205    break;
4206  }
4207  }
4208
4209  return visitCallSite(II);
4210}
4211
4212// InvokeInst simplification
4213//
4214Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
4215  return visitCallSite(&II);
4216}
4217
4218/// isSafeToEliminateVarargsCast - If this cast does not affect the value
4219/// passed through the varargs area, we can eliminate the use of the cast.
4220static bool isSafeToEliminateVarargsCast(const CallSite CS,
4221                                         const CastInst * const CI,
4222                                         const TargetData * const TD,
4223                                         const int ix) {
4224  if (!CI->isLosslessCast())
4225    return false;
4226
4227  // The size of ByVal arguments is derived from the type, so we
4228  // can't change to a type with a different size.  If the size were
4229  // passed explicitly we could avoid this check.
4230  if (!CS.paramHasAttr(ix, Attribute::ByVal))
4231    return true;
4232
4233  const Type* SrcTy =
4234            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
4235  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
4236  if (!SrcTy->isSized() || !DstTy->isSized())
4237    return false;
4238  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
4239    return false;
4240  return true;
4241}
4242
4243// visitCallSite - Improvements for call and invoke instructions.
4244//
4245Instruction *InstCombiner::visitCallSite(CallSite CS) {
4246  bool Changed = false;
4247
4248  // If the callee is a constexpr cast of a function, attempt to move the cast
4249  // to the arguments of the call/invoke.
4250  if (transformConstExprCastCall(CS)) return 0;
4251
4252  Value *Callee = CS.getCalledValue();
4253
4254  if (Function *CalleeF = dyn_cast<Function>(Callee))
4255    if (CalleeF->getCallingConv() != CS.getCallingConv()) {
4256      Instruction *OldCall = CS.getInstruction();
4257      // If the call and callee calling conventions don't match, this call must
4258      // be unreachable, as the call is undefined.
4259      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4260                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
4261                                  OldCall);
4262      // If OldCall dues not return void then replaceAllUsesWith undef.
4263      // This allows ValueHandlers and custom metadata to adjust itself.
4264      if (!OldCall->getType()->isVoidTy())
4265        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
4266      if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
4267        return EraseInstFromFunction(*OldCall);
4268      return 0;
4269    }
4270
4271  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
4272    // This instruction is not reachable, just remove it.  We insert a store to
4273    // undef so that we know that this code is not reachable, despite the fact
4274    // that we can't modify the CFG here.
4275    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4276               UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
4277                  CS.getInstruction());
4278
4279    // If CS dues not return void then replaceAllUsesWith undef.
4280    // This allows ValueHandlers and custom metadata to adjust itself.
4281    if (!CS.getInstruction()->getType()->isVoidTy())
4282      CS.getInstruction()->
4283        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
4284
4285    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
4286      // Don't break the CFG, insert a dummy cond branch.
4287      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
4288                         ConstantInt::getTrue(Callee->getContext()), II);
4289    }
4290    return EraseInstFromFunction(*CS.getInstruction());
4291  }
4292
4293  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
4294    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
4295      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
4296        return transformCallThroughTrampoline(CS);
4297
4298  const PointerType *PTy = cast<PointerType>(Callee->getType());
4299  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4300  if (FTy->isVarArg()) {
4301    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
4302    // See if we can optimize any arguments passed through the varargs area of
4303    // the call.
4304    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
4305           E = CS.arg_end(); I != E; ++I, ++ix) {
4306      CastInst *CI = dyn_cast<CastInst>(*I);
4307      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
4308        *I = CI->getOperand(0);
4309        Changed = true;
4310      }
4311    }
4312  }
4313
4314  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
4315    // Inline asm calls cannot throw - mark them 'nounwind'.
4316    CS.setDoesNotThrow();
4317    Changed = true;
4318  }
4319
4320  return Changed ? CS.getInstruction() : 0;
4321}
4322
4323// transformConstExprCastCall - If the callee is a constexpr cast of a function,
4324// attempt to move the cast to the arguments of the call/invoke.
4325//
4326bool InstCombiner::transformConstExprCastCall(CallSite CS) {
4327  if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
4328  ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
4329  if (CE->getOpcode() != Instruction::BitCast ||
4330      !isa<Function>(CE->getOperand(0)))
4331    return false;
4332  Function *Callee = cast<Function>(CE->getOperand(0));
4333  Instruction *Caller = CS.getInstruction();
4334  const AttrListPtr &CallerPAL = CS.getAttributes();
4335
4336  // Okay, this is a cast from a function to a different type.  Unless doing so
4337  // would cause a type conversion of one of our arguments, change this call to
4338  // be a direct call with arguments casted to the appropriate types.
4339  //
4340  const FunctionType *FT = Callee->getFunctionType();
4341  const Type *OldRetTy = Caller->getType();
4342  const Type *NewRetTy = FT->getReturnType();
4343
4344  if (isa<StructType>(NewRetTy))
4345    return false; // TODO: Handle multiple return values.
4346
4347  // Check to see if we are changing the return type...
4348  if (OldRetTy != NewRetTy) {
4349    if (Callee->isDeclaration() &&
4350        // Conversion is ok if changing from one pointer type to another or from
4351        // a pointer to an integer of the same size.
4352        !((isa<PointerType>(OldRetTy) || !TD ||
4353           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
4354          (isa<PointerType>(NewRetTy) || !TD ||
4355           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
4356      return false;   // Cannot transform this return value.
4357
4358    if (!Caller->use_empty() &&
4359        // void -> non-void is handled specially
4360        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
4361      return false;   // Cannot transform this return value.
4362
4363    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4364      Attributes RAttrs = CallerPAL.getRetAttributes();
4365      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
4366        return false;   // Attribute not compatible with transformed value.
4367    }
4368
4369    // If the callsite is an invoke instruction, and the return value is used by
4370    // a PHI node in a successor, we cannot change the return type of the call
4371    // because there is no place to put the cast instruction (without breaking
4372    // the critical edge).  Bail out in this case.
4373    if (!Caller->use_empty())
4374      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4375        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
4376             UI != E; ++UI)
4377          if (PHINode *PN = dyn_cast<PHINode>(*UI))
4378            if (PN->getParent() == II->getNormalDest() ||
4379                PN->getParent() == II->getUnwindDest())
4380              return false;
4381  }
4382
4383  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
4384  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4385
4386  CallSite::arg_iterator AI = CS.arg_begin();
4387  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4388    const Type *ParamTy = FT->getParamType(i);
4389    const Type *ActTy = (*AI)->getType();
4390
4391    if (!CastInst::isCastable(ActTy, ParamTy))
4392      return false;   // Cannot transform this parameter value.
4393
4394    if (CallerPAL.getParamAttributes(i + 1)
4395        & Attribute::typeIncompatible(ParamTy))
4396      return false;   // Attribute not compatible with transformed value.
4397
4398    // Converting from one pointer type to another or between a pointer and an
4399    // integer of the same size is safe even if we do not have a body.
4400    bool isConvertible = ActTy == ParamTy ||
4401      (TD && ((isa<PointerType>(ParamTy) ||
4402      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
4403              (isa<PointerType>(ActTy) ||
4404              ActTy == TD->getIntPtrType(Caller->getContext()))));
4405    if (Callee->isDeclaration() && !isConvertible) return false;
4406  }
4407
4408  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
4409      Callee->isDeclaration())
4410    return false;   // Do not delete arguments unless we have a function body.
4411
4412  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4413      !CallerPAL.isEmpty())
4414    // In this case we have more arguments than the new function type, but we
4415    // won't be dropping them.  Check that these extra arguments have attributes
4416    // that are compatible with being a vararg call argument.
4417    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
4418      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
4419        break;
4420      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
4421      if (PAttrs & Attribute::VarArgsIncompatible)
4422        return false;
4423    }
4424
4425  // Okay, we decided that this is a safe thing to do: go ahead and start
4426  // inserting cast instructions as necessary...
4427  std::vector<Value*> Args;
4428  Args.reserve(NumActualArgs);
4429  SmallVector<AttributeWithIndex, 8> attrVec;
4430  attrVec.reserve(NumCommonArgs);
4431
4432  // Get any return attributes.
4433  Attributes RAttrs = CallerPAL.getRetAttributes();
4434
4435  // If the return value is not being used, the type may not be compatible
4436  // with the existing attributes.  Wipe out any problematic attributes.
4437  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
4438
4439  // Add the new return attributes.
4440  if (RAttrs)
4441    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
4442
4443  AI = CS.arg_begin();
4444  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4445    const Type *ParamTy = FT->getParamType(i);
4446    if ((*AI)->getType() == ParamTy) {
4447      Args.push_back(*AI);
4448    } else {
4449      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
4450          false, ParamTy, false);
4451      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
4452    }
4453
4454    // Add any parameter attributes.
4455    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
4456      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
4457  }
4458
4459  // If the function takes more arguments than the call was taking, add them
4460  // now.
4461  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
4462    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
4463
4464  // If we are removing arguments to the function, emit an obnoxious warning.
4465  if (FT->getNumParams() < NumActualArgs) {
4466    if (!FT->isVarArg()) {
4467      errs() << "WARNING: While resolving call to function '"
4468             << Callee->getName() << "' arguments were dropped!\n";
4469    } else {
4470      // Add all of the arguments in their promoted form to the arg list.
4471      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4472        const Type *PTy = getPromotedType((*AI)->getType());
4473        if (PTy != (*AI)->getType()) {
4474          // Must promote to pass through va_arg area!
4475          Instruction::CastOps opcode =
4476            CastInst::getCastOpcode(*AI, false, PTy, false);
4477          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
4478        } else {
4479          Args.push_back(*AI);
4480        }
4481
4482        // Add any parameter attributes.
4483        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
4484          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
4485      }
4486    }
4487  }
4488
4489  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
4490    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
4491
4492  if (NewRetTy->isVoidTy())
4493    Caller->setName("");   // Void type should not have a name.
4494
4495  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
4496                                                     attrVec.end());
4497
4498  Instruction *NC;
4499  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4500    NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
4501                            Args.begin(), Args.end(),
4502                            Caller->getName(), Caller);
4503    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
4504    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
4505  } else {
4506    NC = CallInst::Create(Callee, Args.begin(), Args.end(),
4507                          Caller->getName(), Caller);
4508    CallInst *CI = cast<CallInst>(Caller);
4509    if (CI->isTailCall())
4510      cast<CallInst>(NC)->setTailCall();
4511    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
4512    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
4513  }
4514
4515  // Insert a cast of the return type as necessary.
4516  Value *NV = NC;
4517  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4518    if (!NV->getType()->isVoidTy()) {
4519      Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
4520                                                            OldRetTy, false);
4521      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
4522
4523      // If this is an invoke instruction, we should insert it after the first
4524      // non-phi, instruction in the normal successor block.
4525      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4526        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
4527        InsertNewInstBefore(NC, *I);
4528      } else {
4529        // Otherwise, it's a call, just insert cast right after the call instr
4530        InsertNewInstBefore(NC, *Caller);
4531      }
4532      Worklist.AddUsersToWorkList(*Caller);
4533    } else {
4534      NV = UndefValue::get(Caller->getType());
4535    }
4536  }
4537
4538
4539  if (!Caller->use_empty())
4540    Caller->replaceAllUsesWith(NV);
4541
4542  EraseInstFromFunction(*Caller);
4543  return true;
4544}
4545
4546// transformCallThroughTrampoline - Turn a call to a function created by the
4547// init_trampoline intrinsic into a direct call to the underlying function.
4548//
4549Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
4550  Value *Callee = CS.getCalledValue();
4551  const PointerType *PTy = cast<PointerType>(Callee->getType());
4552  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4553  const AttrListPtr &Attrs = CS.getAttributes();
4554
4555  // If the call already has the 'nest' attribute somewhere then give up -
4556  // otherwise 'nest' would occur twice after splicing in the chain.
4557  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4558    return 0;
4559
4560  IntrinsicInst *Tramp =
4561    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
4562
4563  Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
4564  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
4565  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
4566
4567  const AttrListPtr &NestAttrs = NestF->getAttributes();
4568  if (!NestAttrs.isEmpty()) {
4569    unsigned NestIdx = 1;
4570    const Type *NestTy = 0;
4571    Attributes NestAttr = Attribute::None;
4572
4573    // Look for a parameter marked with the 'nest' attribute.
4574    for (FunctionType::param_iterator I = NestFTy->param_begin(),
4575         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
4576      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
4577        // Record the parameter type and any other attributes.
4578        NestTy = *I;
4579        NestAttr = NestAttrs.getParamAttributes(NestIdx);
4580        break;
4581      }
4582
4583    if (NestTy) {
4584      Instruction *Caller = CS.getInstruction();
4585      std::vector<Value*> NewArgs;
4586      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
4587
4588      SmallVector<AttributeWithIndex, 8> NewAttrs;
4589      NewAttrs.reserve(Attrs.getNumSlots() + 1);
4590
4591      // Insert the nest argument into the call argument list, which may
4592      // mean appending it.  Likewise for attributes.
4593
4594      // Add any result attributes.
4595      if (Attributes Attr = Attrs.getRetAttributes())
4596        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
4597
4598      {
4599        unsigned Idx = 1;
4600        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4601        do {
4602          if (Idx == NestIdx) {
4603            // Add the chain argument and attributes.
4604            Value *NestVal = Tramp->getOperand(3);
4605            if (NestVal->getType() != NestTy)
4606              NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
4607            NewArgs.push_back(NestVal);
4608            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
4609          }
4610
4611          if (I == E)
4612            break;
4613
4614          // Add the original argument and attributes.
4615          NewArgs.push_back(*I);
4616          if (Attributes Attr = Attrs.getParamAttributes(Idx))
4617            NewAttrs.push_back
4618              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
4619
4620          ++Idx, ++I;
4621        } while (1);
4622      }
4623
4624      // Add any function attributes.
4625      if (Attributes Attr = Attrs.getFnAttributes())
4626        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
4627
4628      // The trampoline may have been bitcast to a bogus type (FTy).
4629      // Handle this by synthesizing a new function type, equal to FTy
4630      // with the chain parameter inserted.
4631
4632      std::vector<const Type*> NewTypes;
4633      NewTypes.reserve(FTy->getNumParams()+1);
4634
4635      // Insert the chain's type into the list of parameter types, which may
4636      // mean appending it.
4637      {
4638        unsigned Idx = 1;
4639        FunctionType::param_iterator I = FTy->param_begin(),
4640          E = FTy->param_end();
4641
4642        do {
4643          if (Idx == NestIdx)
4644            // Add the chain's type.
4645            NewTypes.push_back(NestTy);
4646
4647          if (I == E)
4648            break;
4649
4650          // Add the original type.
4651          NewTypes.push_back(*I);
4652
4653          ++Idx, ++I;
4654        } while (1);
4655      }
4656
4657      // Replace the trampoline call with a direct call.  Let the generic
4658      // code sort out any function type mismatches.
4659      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4660                                                FTy->isVarArg());
4661      Constant *NewCallee =
4662        NestF->getType() == PointerType::getUnqual(NewFTy) ?
4663        NestF : ConstantExpr::getBitCast(NestF,
4664                                         PointerType::getUnqual(NewFTy));
4665      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
4666                                                   NewAttrs.end());
4667
4668      Instruction *NewCaller;
4669      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4670        NewCaller = InvokeInst::Create(NewCallee,
4671                                       II->getNormalDest(), II->getUnwindDest(),
4672                                       NewArgs.begin(), NewArgs.end(),
4673                                       Caller->getName(), Caller);
4674        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4675        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4676      } else {
4677        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
4678                                     Caller->getName(), Caller);
4679        if (cast<CallInst>(Caller)->isTailCall())
4680          cast<CallInst>(NewCaller)->setTailCall();
4681        cast<CallInst>(NewCaller)->
4682          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
4683        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4684      }
4685      if (!Caller->getType()->isVoidTy())
4686        Caller->replaceAllUsesWith(NewCaller);
4687      Caller->eraseFromParent();
4688      Worklist.Remove(Caller);
4689      return 0;
4690    }
4691  }
4692
4693  // Replace the trampoline call with a direct call.  Since there is no 'nest'
4694  // parameter, there is no need to adjust the argument list.  Let the generic
4695  // code sort out any function type mismatches.
4696  Constant *NewCallee =
4697    NestF->getType() == PTy ? NestF :
4698                              ConstantExpr::getBitCast(NestF, PTy);
4699  CS.setCalledFunction(NewCallee);
4700  return CS.getInstruction();
4701}
4702
4703
4704
4705Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
4706  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
4707
4708  if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
4709    return ReplaceInstUsesWith(GEP, V);
4710
4711  Value *PtrOp = GEP.getOperand(0);
4712
4713  if (isa<UndefValue>(GEP.getOperand(0)))
4714    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
4715
4716  // Eliminate unneeded casts for indices.
4717  if (TD) {
4718    bool MadeChange = false;
4719    unsigned PtrSize = TD->getPointerSizeInBits();
4720
4721    gep_type_iterator GTI = gep_type_begin(GEP);
4722    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
4723         I != E; ++I, ++GTI) {
4724      if (!isa<SequentialType>(*GTI)) continue;
4725
4726      // If we are using a wider index than needed for this platform, shrink it
4727      // to what we need.  If narrower, sign-extend it to what we need.  This
4728      // explicit cast can make subsequent optimizations more obvious.
4729      unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
4730      if (OpBits == PtrSize)
4731        continue;
4732
4733      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
4734      MadeChange = true;
4735    }
4736    if (MadeChange) return &GEP;
4737  }
4738
4739  // Combine Indices - If the source pointer to this getelementptr instruction
4740  // is a getelementptr instruction, combine the indices of the two
4741  // getelementptr instructions into a single instruction.
4742  //
4743  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
4744    // Note that if our source is a gep chain itself that we wait for that
4745    // chain to be resolved before we perform this transformation.  This
4746    // avoids us creating a TON of code in some cases.
4747    //
4748    if (GetElementPtrInst *SrcGEP =
4749          dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
4750      if (SrcGEP->getNumOperands() == 2)
4751        return 0;   // Wait until our source is folded to completion.
4752
4753    SmallVector<Value*, 8> Indices;
4754
4755    // Find out whether the last index in the source GEP is a sequential idx.
4756    bool EndsWithSequential = false;
4757    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
4758         I != E; ++I)
4759      EndsWithSequential = !isa<StructType>(*I);
4760
4761    // Can we combine the two pointer arithmetics offsets?
4762    if (EndsWithSequential) {
4763      // Replace: gep (gep %P, long B), long A, ...
4764      // With:    T = long A+B; gep %P, T, ...
4765      //
4766      Value *Sum;
4767      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
4768      Value *GO1 = GEP.getOperand(1);
4769      if (SO1 == Constant::getNullValue(SO1->getType())) {
4770        Sum = GO1;
4771      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
4772        Sum = SO1;
4773      } else {
4774        // If they aren't the same type, then the input hasn't been processed
4775        // by the loop above yet (which canonicalizes sequential index types to
4776        // intptr_t).  Just avoid transforming this until the input has been
4777        // normalized.
4778        if (SO1->getType() != GO1->getType())
4779          return 0;
4780        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
4781      }
4782
4783      // Update the GEP in place if possible.
4784      if (Src->getNumOperands() == 2) {
4785        GEP.setOperand(0, Src->getOperand(0));
4786        GEP.setOperand(1, Sum);
4787        return &GEP;
4788      }
4789      Indices.append(Src->op_begin()+1, Src->op_end()-1);
4790      Indices.push_back(Sum);
4791      Indices.append(GEP.op_begin()+2, GEP.op_end());
4792    } else if (isa<Constant>(*GEP.idx_begin()) &&
4793               cast<Constant>(*GEP.idx_begin())->isNullValue() &&
4794               Src->getNumOperands() != 1) {
4795      // Otherwise we can do the fold if the first index of the GEP is a zero
4796      Indices.append(Src->op_begin()+1, Src->op_end());
4797      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
4798    }
4799
4800    if (!Indices.empty())
4801      return (cast<GEPOperator>(&GEP)->isInBounds() &&
4802              Src->isInBounds()) ?
4803        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
4804                                          Indices.end(), GEP.getName()) :
4805        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
4806                                  Indices.end(), GEP.getName());
4807  }
4808
4809  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
4810  if (Value *X = getBitCastOperand(PtrOp)) {
4811    assert(isa<PointerType>(X->getType()) && "Must be cast from pointer");
4812
4813    // If the input bitcast is actually "bitcast(bitcast(x))", then we don't
4814    // want to change the gep until the bitcasts are eliminated.
4815    if (getBitCastOperand(X)) {
4816      Worklist.AddValue(PtrOp);
4817      return 0;
4818    }
4819
4820    bool HasZeroPointerIndex = false;
4821    if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
4822      HasZeroPointerIndex = C->isZero();
4823
4824    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
4825    // into     : GEP [10 x i8]* X, i32 0, ...
4826    //
4827    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
4828    //           into     : GEP i8* X, ...
4829    //
4830    // This occurs when the program declares an array extern like "int X[];"
4831    if (HasZeroPointerIndex) {
4832      const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
4833      const PointerType *XTy = cast<PointerType>(X->getType());
4834      if (const ArrayType *CATy =
4835          dyn_cast<ArrayType>(CPTy->getElementType())) {
4836        // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
4837        if (CATy->getElementType() == XTy->getElementType()) {
4838          // -> GEP i8* X, ...
4839          SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end());
4840          return cast<GEPOperator>(&GEP)->isInBounds() ?
4841            GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(),
4842                                              GEP.getName()) :
4843            GetElementPtrInst::Create(X, Indices.begin(), Indices.end(),
4844                                      GEP.getName());
4845        }
4846
4847        if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){
4848          // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
4849          if (CATy->getElementType() == XATy->getElementType()) {
4850            // -> GEP [10 x i8]* X, i32 0, ...
4851            // At this point, we know that the cast source type is a pointer
4852            // to an array of the same type as the destination pointer
4853            // array.  Because the array type is never stepped over (there
4854            // is a leading zero) we can fold the cast into this GEP.
4855            GEP.setOperand(0, X);
4856            return &GEP;
4857          }
4858        }
4859      }
4860    } else if (GEP.getNumOperands() == 2) {
4861      // Transform things like:
4862      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
4863      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
4864      const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
4865      const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
4866      if (TD && isa<ArrayType>(SrcElTy) &&
4867          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
4868          TD->getTypeAllocSize(ResElTy)) {
4869        Value *Idx[2];
4870        Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
4871        Idx[1] = GEP.getOperand(1);
4872        Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
4873          Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
4874          Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
4875        // V and GEP are both pointer types --> BitCast
4876        return new BitCastInst(NewGEP, GEP.getType());
4877      }
4878
4879      // Transform things like:
4880      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
4881      //   (where tmp = 8*tmp2) into:
4882      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
4883
4884      if (TD && isa<ArrayType>(SrcElTy) &&
4885          ResElTy == Type::getInt8Ty(GEP.getContext())) {
4886        uint64_t ArrayEltSize =
4887            TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
4888
4889        // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
4890        // allow either a mul, shift, or constant here.
4891        Value *NewIdx = 0;
4892        ConstantInt *Scale = 0;
4893        if (ArrayEltSize == 1) {
4894          NewIdx = GEP.getOperand(1);
4895          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
4896        } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
4897          NewIdx = ConstantInt::get(CI->getType(), 1);
4898          Scale = CI;
4899        } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
4900          if (Inst->getOpcode() == Instruction::Shl &&
4901              isa<ConstantInt>(Inst->getOperand(1))) {
4902            ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
4903            uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
4904            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
4905                                     1ULL << ShAmtVal);
4906            NewIdx = Inst->getOperand(0);
4907          } else if (Inst->getOpcode() == Instruction::Mul &&
4908                     isa<ConstantInt>(Inst->getOperand(1))) {
4909            Scale = cast<ConstantInt>(Inst->getOperand(1));
4910            NewIdx = Inst->getOperand(0);
4911          }
4912        }
4913
4914        // If the index will be to exactly the right offset with the scale taken
4915        // out, perform the transformation. Note, we don't know whether Scale is
4916        // signed or not. We'll use unsigned version of division/modulo
4917        // operation after making sure Scale doesn't have the sign bit set.
4918        if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
4919            Scale->getZExtValue() % ArrayEltSize == 0) {
4920          Scale = ConstantInt::get(Scale->getType(),
4921                                   Scale->getZExtValue() / ArrayEltSize);
4922          if (Scale->getZExtValue() != 1) {
4923            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
4924                                                       false /*ZExt*/);
4925            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
4926          }
4927
4928          // Insert the new GEP instruction.
4929          Value *Idx[2];
4930          Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
4931          Idx[1] = NewIdx;
4932          Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
4933            Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) :
4934            Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());
4935          // The NewGEP must be pointer typed, so must the old one -> BitCast
4936          return new BitCastInst(NewGEP, GEP.getType());
4937        }
4938      }
4939    }
4940  }
4941
4942  /// See if we can simplify:
4943  ///   X = bitcast A* to B*
4944  ///   Y = gep X, <...constant indices...>
4945  /// into a gep of the original struct.  This is important for SROA and alias
4946  /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
4947  if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
4948    if (TD &&
4949        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
4950      // Determine how much the GEP moves the pointer.  We are guaranteed to get
4951      // a constant back from EmitGEPOffset.
4952      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
4953      int64_t Offset = OffsetV->getSExtValue();
4954
4955      // If this GEP instruction doesn't move the pointer, just replace the GEP
4956      // with a bitcast of the real input to the dest type.
4957      if (Offset == 0) {
4958        // If the bitcast is of an allocation, and the allocation will be
4959        // converted to match the type of the cast, don't touch this.
4960        if (isa<AllocaInst>(BCI->getOperand(0)) ||
4961            isMalloc(BCI->getOperand(0))) {
4962          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
4963          if (Instruction *I = visitBitCast(*BCI)) {
4964            if (I != BCI) {
4965              I->takeName(BCI);
4966              BCI->getParent()->getInstList().insert(BCI, I);
4967              ReplaceInstUsesWith(*BCI, I);
4968            }
4969            return &GEP;
4970          }
4971        }
4972        return new BitCastInst(BCI->getOperand(0), GEP.getType());
4973      }
4974
4975      // Otherwise, if the offset is non-zero, we need to find out if there is a
4976      // field at Offset in 'A's type.  If so, we can pull the cast through the
4977      // GEP.
4978      SmallVector<Value*, 8> NewIndices;
4979      const Type *InTy =
4980        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
4981      if (FindElementAtOffset(InTy, Offset, NewIndices)) {
4982        Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ?
4983          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
4984                                     NewIndices.end()) :
4985          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
4986                             NewIndices.end());
4987
4988        if (NGEP->getType() == GEP.getType())
4989          return ReplaceInstUsesWith(GEP, NGEP);
4990        NGEP->takeName(&GEP);
4991        return new BitCastInst(NGEP, GEP.getType());
4992      }
4993    }
4994  }
4995
4996  return 0;
4997}
4998
4999Instruction *InstCombiner::visitFree(Instruction &FI) {
5000  Value *Op = FI.getOperand(1);
5001
5002  // free undef -> unreachable.
5003  if (isa<UndefValue>(Op)) {
5004    // Insert a new store to null because we cannot modify the CFG here.
5005    new StoreInst(ConstantInt::getTrue(FI.getContext()),
5006           UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI);
5007    return EraseInstFromFunction(FI);
5008  }
5009
5010  // If we have 'free null' delete the instruction.  This can happen in stl code
5011  // when lots of inlining happens.
5012  if (isa<ConstantPointerNull>(Op))
5013    return EraseInstFromFunction(FI);
5014
5015  // If we have a malloc call whose only use is a free call, delete both.
5016  if (isMalloc(Op)) {
5017    if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
5018      if (Op->hasOneUse() && CI->hasOneUse()) {
5019        EraseInstFromFunction(FI);
5020        EraseInstFromFunction(*CI);
5021        return EraseInstFromFunction(*cast<Instruction>(Op));
5022      }
5023    } else {
5024      // Op is a call to malloc
5025      if (Op->hasOneUse()) {
5026        EraseInstFromFunction(FI);
5027        return EraseInstFromFunction(*cast<Instruction>(Op));
5028      }
5029    }
5030  }
5031
5032  return 0;
5033}
5034
5035
5036
5037Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
5038  // Change br (not X), label True, label False to: br X, label False, True
5039  Value *X = 0;
5040  BasicBlock *TrueDest;
5041  BasicBlock *FalseDest;
5042  if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
5043      !isa<Constant>(X)) {
5044    // Swap Destinations and condition...
5045    BI.setCondition(X);
5046    BI.setSuccessor(0, FalseDest);
5047    BI.setSuccessor(1, TrueDest);
5048    return &BI;
5049  }
5050
5051  // Cannonicalize fcmp_one -> fcmp_oeq
5052  FCmpInst::Predicate FPred; Value *Y;
5053  if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
5054                             TrueDest, FalseDest)) &&
5055      BI.getCondition()->hasOneUse())
5056    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
5057        FPred == FCmpInst::FCMP_OGE) {
5058      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
5059      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
5060
5061      // Swap Destinations and condition.
5062      BI.setSuccessor(0, FalseDest);
5063      BI.setSuccessor(1, TrueDest);
5064      Worklist.Add(Cond);
5065      return &BI;
5066    }
5067
5068  // Cannonicalize icmp_ne -> icmp_eq
5069  ICmpInst::Predicate IPred;
5070  if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
5071                      TrueDest, FalseDest)) &&
5072      BI.getCondition()->hasOneUse())
5073    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
5074        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
5075        IPred == ICmpInst::ICMP_SGE) {
5076      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
5077      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
5078      // Swap Destinations and condition.
5079      BI.setSuccessor(0, FalseDest);
5080      BI.setSuccessor(1, TrueDest);
5081      Worklist.Add(Cond);
5082      return &BI;
5083    }
5084
5085  return 0;
5086}
5087
5088Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
5089  Value *Cond = SI.getCondition();
5090  if (Instruction *I = dyn_cast<Instruction>(Cond)) {
5091    if (I->getOpcode() == Instruction::Add)
5092      if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
5093        // change 'switch (X+4) case 1:' into 'switch (X) case -3'
5094        for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
5095          SI.setOperand(i,
5096                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
5097                                                AddRHS));
5098        SI.setOperand(0, I->getOperand(0));
5099        Worklist.Add(I);
5100        return &SI;
5101      }
5102  }
5103  return 0;
5104}
5105
5106Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
5107  Value *Agg = EV.getAggregateOperand();
5108
5109  if (!EV.hasIndices())
5110    return ReplaceInstUsesWith(EV, Agg);
5111
5112  if (Constant *C = dyn_cast<Constant>(Agg)) {
5113    if (isa<UndefValue>(C))
5114      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
5115
5116    if (isa<ConstantAggregateZero>(C))
5117      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
5118
5119    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
5120      // Extract the element indexed by the first index out of the constant
5121      Value *V = C->getOperand(*EV.idx_begin());
5122      if (EV.getNumIndices() > 1)
5123        // Extract the remaining indices out of the constant indexed by the
5124        // first index
5125        return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
5126      else
5127        return ReplaceInstUsesWith(EV, V);
5128    }
5129    return 0; // Can't handle other constants
5130  }
5131  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
5132    // We're extracting from an insertvalue instruction, compare the indices
5133    const unsigned *exti, *exte, *insi, *inse;
5134    for (exti = EV.idx_begin(), insi = IV->idx_begin(),
5135         exte = EV.idx_end(), inse = IV->idx_end();
5136         exti != exte && insi != inse;
5137         ++exti, ++insi) {
5138      if (*insi != *exti)
5139        // The insert and extract both reference distinctly different elements.
5140        // This means the extract is not influenced by the insert, and we can
5141        // replace the aggregate operand of the extract with the aggregate
5142        // operand of the insert. i.e., replace
5143        // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
5144        // %E = extractvalue { i32, { i32 } } %I, 0
5145        // with
5146        // %E = extractvalue { i32, { i32 } } %A, 0
5147        return ExtractValueInst::Create(IV->getAggregateOperand(),
5148                                        EV.idx_begin(), EV.idx_end());
5149    }
5150    if (exti == exte && insi == inse)
5151      // Both iterators are at the end: Index lists are identical. Replace
5152      // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
5153      // %C = extractvalue { i32, { i32 } } %B, 1, 0
5154      // with "i32 42"
5155      return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
5156    if (exti == exte) {
5157      // The extract list is a prefix of the insert list. i.e. replace
5158      // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
5159      // %E = extractvalue { i32, { i32 } } %I, 1
5160      // with
5161      // %X = extractvalue { i32, { i32 } } %A, 1
5162      // %E = insertvalue { i32 } %X, i32 42, 0
5163      // by switching the order of the insert and extract (though the
5164      // insertvalue should be left in, since it may have other uses).
5165      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
5166                                                 EV.idx_begin(), EV.idx_end());
5167      return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
5168                                     insi, inse);
5169    }
5170    if (insi == inse)
5171      // The insert list is a prefix of the extract list
5172      // We can simply remove the common indices from the extract and make it
5173      // operate on the inserted value instead of the insertvalue result.
5174      // i.e., replace
5175      // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
5176      // %E = extractvalue { i32, { i32 } } %I, 1, 0
5177      // with
5178      // %E extractvalue { i32 } { i32 42 }, 0
5179      return ExtractValueInst::Create(IV->getInsertedValueOperand(),
5180                                      exti, exte);
5181  }
5182  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
5183    // We're extracting from an intrinsic, see if we're the only user, which
5184    // allows us to simplify multiple result intrinsics to simpler things that
5185    // just get one value..
5186    if (II->hasOneUse()) {
5187      // Check if we're grabbing the overflow bit or the result of a 'with
5188      // overflow' intrinsic.  If it's the latter we can remove the intrinsic
5189      // and replace it with a traditional binary instruction.
5190      switch (II->getIntrinsicID()) {
5191      case Intrinsic::uadd_with_overflow:
5192      case Intrinsic::sadd_with_overflow:
5193        if (*EV.idx_begin() == 0) {  // Normal result.
5194          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
5195          II->replaceAllUsesWith(UndefValue::get(II->getType()));
5196          EraseInstFromFunction(*II);
5197          return BinaryOperator::CreateAdd(LHS, RHS);
5198        }
5199        break;
5200      case Intrinsic::usub_with_overflow:
5201      case Intrinsic::ssub_with_overflow:
5202        if (*EV.idx_begin() == 0) {  // Normal result.
5203          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
5204          II->replaceAllUsesWith(UndefValue::get(II->getType()));
5205          EraseInstFromFunction(*II);
5206          return BinaryOperator::CreateSub(LHS, RHS);
5207        }
5208        break;
5209      case Intrinsic::umul_with_overflow:
5210      case Intrinsic::smul_with_overflow:
5211        if (*EV.idx_begin() == 0) {  // Normal result.
5212          Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
5213          II->replaceAllUsesWith(UndefValue::get(II->getType()));
5214          EraseInstFromFunction(*II);
5215          return BinaryOperator::CreateMul(LHS, RHS);
5216        }
5217        break;
5218      default:
5219        break;
5220      }
5221    }
5222  }
5223  // Can't simplify extracts from other values. Note that nested extracts are
5224  // already simplified implicitely by the above (extract ( extract (insert) )
5225  // will be translated into extract ( insert ( extract ) ) first and then just
5226  // the value inserted, if appropriate).
5227  return 0;
5228}
5229
5230
5231
5232
5233/// TryToSinkInstruction - Try to move the specified instruction from its
5234/// current block into the beginning of DestBlock, which can only happen if it's
5235/// safe to move the instruction past all of the instructions between it and the
5236/// end of its block.
5237static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
5238  assert(I->hasOneUse() && "Invariants didn't hold!");
5239
5240  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5241  if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
5242    return false;
5243
5244  // Do not sink alloca instructions out of the entry block.
5245  if (isa<AllocaInst>(I) && I->getParent() ==
5246        &DestBlock->getParent()->getEntryBlock())
5247    return false;
5248
5249  // We can only sink load instructions if there is nothing between the load and
5250  // the end of block that could change the value.
5251  if (I->mayReadFromMemory()) {
5252    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
5253         Scan != E; ++Scan)
5254      if (Scan->mayWriteToMemory())
5255        return false;
5256  }
5257
5258  BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
5259
5260  I->moveBefore(InsertPos);
5261  ++NumSunkInst;
5262  return true;
5263}
5264
5265
5266/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
5267/// all reachable code to the worklist.
5268///
5269/// This has a couple of tricks to make the code faster and more powerful.  In
5270/// particular, we constant fold and DCE instructions as we go, to avoid adding
5271/// them to the worklist (this significantly speeds up instcombine on code where
5272/// many instructions are dead or constant).  Additionally, if we find a branch
5273/// whose condition is a known constant, we only visit the reachable successors.
5274///
5275static bool AddReachableCodeToWorklist(BasicBlock *BB,
5276                                       SmallPtrSet<BasicBlock*, 64> &Visited,
5277                                       InstCombiner &IC,
5278                                       const TargetData *TD) {
5279  bool MadeIRChange = false;
5280  SmallVector<BasicBlock*, 256> Worklist;
5281  Worklist.push_back(BB);
5282
5283  std::vector<Instruction*> InstrsForInstCombineWorklist;
5284  InstrsForInstCombineWorklist.reserve(128);
5285
5286  SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
5287
5288  while (!Worklist.empty()) {
5289    BB = Worklist.back();
5290    Worklist.pop_back();
5291
5292    // We have now visited this block!  If we've already been here, ignore it.
5293    if (!Visited.insert(BB)) continue;
5294
5295    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
5296      Instruction *Inst = BBI++;
5297
5298      // DCE instruction if trivially dead.
5299      if (isInstructionTriviallyDead(Inst)) {
5300        ++NumDeadInst;
5301        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
5302        Inst->eraseFromParent();
5303        continue;
5304      }
5305
5306      // ConstantProp instruction if trivially constant.
5307      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
5308        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
5309          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
5310                       << *Inst << '\n');
5311          Inst->replaceAllUsesWith(C);
5312          ++NumConstProp;
5313          Inst->eraseFromParent();
5314          continue;
5315        }
5316
5317
5318
5319      if (TD) {
5320        // See if we can constant fold its operands.
5321        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
5322             i != e; ++i) {
5323          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
5324          if (CE == 0) continue;
5325
5326          // If we already folded this constant, don't try again.
5327          if (!FoldedConstants.insert(CE))
5328            continue;
5329
5330          Constant *NewC = ConstantFoldConstantExpression(CE, TD);
5331          if (NewC && NewC != CE) {
5332            *i = NewC;
5333            MadeIRChange = true;
5334          }
5335        }
5336      }
5337
5338
5339      InstrsForInstCombineWorklist.push_back(Inst);
5340    }
5341
5342    // Recursively visit successors.  If this is a branch or switch on a
5343    // constant, only visit the reachable successor.
5344    TerminatorInst *TI = BB->getTerminator();
5345    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
5346      if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
5347        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
5348        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
5349        Worklist.push_back(ReachableBB);
5350        continue;
5351      }
5352    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
5353      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
5354        // See if this is an explicit destination.
5355        for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
5356          if (SI->getCaseValue(i) == Cond) {
5357            BasicBlock *ReachableBB = SI->getSuccessor(i);
5358            Worklist.push_back(ReachableBB);
5359            continue;
5360          }
5361
5362        // Otherwise it is the default destination.
5363        Worklist.push_back(SI->getSuccessor(0));
5364        continue;
5365      }
5366    }
5367
5368    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
5369      Worklist.push_back(TI->getSuccessor(i));
5370  }
5371
5372  // Once we've found all of the instructions to add to instcombine's worklist,
5373  // add them in reverse order.  This way instcombine will visit from the top
5374  // of the function down.  This jives well with the way that it adds all uses
5375  // of instructions to the worklist after doing a transformation, thus avoiding
5376  // some N^2 behavior in pathological cases.
5377  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
5378                              InstrsForInstCombineWorklist.size());
5379
5380  return MadeIRChange;
5381}
5382
5383bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
5384  MadeIRChange = false;
5385
5386  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
5387        << F.getNameStr() << "\n");
5388
5389  {
5390    // Do a depth-first traversal of the function, populate the worklist with
5391    // the reachable instructions.  Ignore blocks that are not reachable.  Keep
5392    // track of which blocks we visit.
5393    SmallPtrSet<BasicBlock*, 64> Visited;
5394    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
5395
5396    // Do a quick scan over the function.  If we find any blocks that are
5397    // unreachable, remove any instructions inside of them.  This prevents
5398    // the instcombine code from having to deal with some bad special cases.
5399    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
5400      if (!Visited.count(BB)) {
5401        Instruction *Term = BB->getTerminator();
5402        while (Term != BB->begin()) {   // Remove instrs bottom-up
5403          BasicBlock::iterator I = Term; --I;
5404
5405          DEBUG(errs() << "IC: DCE: " << *I << '\n');
5406          // A debug intrinsic shouldn't force another iteration if we weren't
5407          // going to do one without it.
5408          if (!isa<DbgInfoIntrinsic>(I)) {
5409            ++NumDeadInst;
5410            MadeIRChange = true;
5411          }
5412
5413          // If I is not void type then replaceAllUsesWith undef.
5414          // This allows ValueHandlers and custom metadata to adjust itself.
5415          if (!I->getType()->isVoidTy())
5416            I->replaceAllUsesWith(UndefValue::get(I->getType()));
5417          I->eraseFromParent();
5418        }
5419      }
5420  }
5421
5422  while (!Worklist.isEmpty()) {
5423    Instruction *I = Worklist.RemoveOne();
5424    if (I == 0) continue;  // skip null values.
5425
5426    // Check to see if we can DCE the instruction.
5427    if (isInstructionTriviallyDead(I)) {
5428      DEBUG(errs() << "IC: DCE: " << *I << '\n');
5429      EraseInstFromFunction(*I);
5430      ++NumDeadInst;
5431      MadeIRChange = true;
5432      continue;
5433    }
5434
5435    // Instruction isn't dead, see if we can constant propagate it.
5436    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
5437      if (Constant *C = ConstantFoldInstruction(I, TD)) {
5438        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
5439
5440        // Add operands to the worklist.
5441        ReplaceInstUsesWith(*I, C);
5442        ++NumConstProp;
5443        EraseInstFromFunction(*I);
5444        MadeIRChange = true;
5445        continue;
5446      }
5447
5448    // See if we can trivially sink this instruction to a successor basic block.
5449    if (I->hasOneUse()) {
5450      BasicBlock *BB = I->getParent();
5451      Instruction *UserInst = cast<Instruction>(I->use_back());
5452      BasicBlock *UserParent;
5453
5454      // Get the block the use occurs in.
5455      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
5456        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
5457      else
5458        UserParent = UserInst->getParent();
5459
5460      if (UserParent != BB) {
5461        bool UserIsSuccessor = false;
5462        // See if the user is one of our successors.
5463        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
5464          if (*SI == UserParent) {
5465            UserIsSuccessor = true;
5466            break;
5467          }
5468
5469        // If the user is one of our immediate successors, and if that successor
5470        // only has us as a predecessors (we'd have to split the critical edge
5471        // otherwise), we can keep going.
5472        if (UserIsSuccessor && UserParent->getSinglePredecessor())
5473          // Okay, the CFG is simple enough, try to sink this instruction.
5474          MadeIRChange |= TryToSinkInstruction(I, UserParent);
5475      }
5476    }
5477
5478    // Now that we have an instruction, try combining it to simplify it.
5479    Builder->SetInsertPoint(I->getParent(), I);
5480
5481#ifndef NDEBUG
5482    std::string OrigI;
5483#endif
5484    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
5485    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
5486
5487    if (Instruction *Result = visit(*I)) {
5488      ++NumCombined;
5489      // Should we replace the old instruction with a new one?
5490      if (Result != I) {
5491        DEBUG(errs() << "IC: Old = " << *I << '\n'
5492                     << "    New = " << *Result << '\n');
5493
5494        // Everything uses the new instruction now.
5495        I->replaceAllUsesWith(Result);
5496
5497        // Push the new instruction and any users onto the worklist.
5498        Worklist.Add(Result);
5499        Worklist.AddUsersToWorkList(*Result);
5500
5501        // Move the name to the new instruction first.
5502        Result->takeName(I);
5503
5504        // Insert the new instruction into the basic block...
5505        BasicBlock *InstParent = I->getParent();
5506        BasicBlock::iterator InsertPos = I;
5507
5508        if (!isa<PHINode>(Result))        // If combining a PHI, don't insert
5509          while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
5510            ++InsertPos;
5511
5512        InstParent->getInstList().insert(InsertPos, Result);
5513
5514        EraseInstFromFunction(*I);
5515      } else {
5516#ifndef NDEBUG
5517        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
5518                     << "    New = " << *I << '\n');
5519#endif
5520
5521        // If the instruction was modified, it's possible that it is now dead.
5522        // if so, remove it.
5523        if (isInstructionTriviallyDead(I)) {
5524          EraseInstFromFunction(*I);
5525        } else {
5526          Worklist.Add(I);
5527          Worklist.AddUsersToWorkList(*I);
5528        }
5529      }
5530      MadeIRChange = true;
5531    }
5532  }
5533
5534  Worklist.Zap();
5535  return MadeIRChange;
5536}
5537
5538
5539bool InstCombiner::runOnFunction(Function &F) {
5540  MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
5541  TD = getAnalysisIfAvailable<TargetData>();
5542
5543
5544  /// Builder - This is an IRBuilder that automatically inserts new
5545  /// instructions into the worklist when they are created.
5546  IRBuilder<true, TargetFolder, InstCombineIRInserter>
5547    TheBuilder(F.getContext(), TargetFolder(TD),
5548               InstCombineIRInserter(Worklist));
5549  Builder = &TheBuilder;
5550
5551  bool EverMadeChange = false;
5552
5553  // Iterate while there is work to do.
5554  unsigned Iteration = 0;
5555  while (DoOneIteration(F, Iteration++))
5556    EverMadeChange = true;
5557
5558  Builder = 0;
5559  return EverMadeChange;
5560}
5561
5562FunctionPass *llvm::createInstructionCombiningPass() {
5563  return new InstCombiner();
5564}
5565