InstCombineSimplifyDemanded.cpp revision e0b4b721aa82796c6ee5cf501ecd85f4974732ee
1//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains logic for simplifying instructions based on information
11// about how they are used.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "InstCombine.h"
17#include "llvm/Target/TargetData.h"
18#include "llvm/IntrinsicInst.h"
19
20using namespace llvm;
21
22
23/// ShrinkDemandedConstant - Check to see if the specified operand of the
24/// specified instruction is a constant integer.  If so, check to see if there
25/// are any bits set in the constant that are not demanded.  If so, shrink the
26/// constant and return true.
27static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
28                                   APInt Demanded) {
29  assert(I && "No instruction?");
30  assert(OpNo < I->getNumOperands() && "Operand index too large");
31
32  // If the operand is not a constant integer, nothing to do.
33  ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
34  if (!OpC) return false;
35
36  // If there are no bits set that aren't demanded, nothing to do.
37  Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
38  if ((~Demanded & OpC->getValue()) == 0)
39    return false;
40
41  // This instruction is producing bits that are not demanded. Shrink the RHS.
42  Demanded &= OpC->getValue();
43  I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
44  return true;
45}
46
47
48
49/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
50/// SimplifyDemandedBits knows about.  See if the instruction has any
51/// properties that allow us to simplify its operands.
52bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
53  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
54  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
55  APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
56
57  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
58                                     KnownZero, KnownOne, 0);
59  if (V == 0) return false;
60  if (V == &Inst) return true;
61  ReplaceInstUsesWith(Inst, V);
62  return true;
63}
64
65/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
66/// specified instruction operand if possible, updating it in place.  It returns
67/// true if it made any change and false otherwise.
68bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
69                                        APInt &KnownZero, APInt &KnownOne,
70                                        unsigned Depth) {
71  Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
72                                          KnownZero, KnownOne, Depth);
73  if (NewVal == 0) return false;
74  U = NewVal;
75  return true;
76}
77
78
79/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
80/// value based on the demanded bits.  When this function is called, it is known
81/// that only the bits set in DemandedMask of the result of V are ever used
82/// downstream. Consequently, depending on the mask and V, it may be possible
83/// to replace V with a constant or one of its operands. In such cases, this
84/// function does the replacement and returns true. In all other cases, it
85/// returns false after analyzing the expression and setting KnownOne and known
86/// to be one in the expression.  KnownZero contains all the bits that are known
87/// to be zero in the expression. These are provided to potentially allow the
88/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
89/// the expression. KnownOne and KnownZero always follow the invariant that
90/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
91/// the bits in KnownOne and KnownZero may only be accurate for those bits set
92/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
93/// and KnownOne must all be the same.
94///
95/// This returns null if it did not change anything and it permits no
96/// simplification.  This returns V itself if it did some simplification of V's
97/// operands based on the information about what bits are demanded. This returns
98/// some other non-null value if it found out that V is equal to another value
99/// in the context where the specified bits are demanded, but not for all users.
100Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
101                                             APInt &KnownZero, APInt &KnownOne,
102                                             unsigned Depth) {
103  assert(V != 0 && "Null pointer of Value???");
104  assert(Depth <= 6 && "Limit Search Depth");
105  uint32_t BitWidth = DemandedMask.getBitWidth();
106  const Type *VTy = V->getType();
107  assert((TD || !isa<PointerType>(VTy)) &&
108         "SimplifyDemandedBits needs to know bit widths!");
109  assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
110         (!VTy->isIntOrIntVector() ||
111          VTy->getScalarSizeInBits() == BitWidth) &&
112         KnownZero.getBitWidth() == BitWidth &&
113         KnownOne.getBitWidth() == BitWidth &&
114         "Value *V, DemandedMask, KnownZero and KnownOne "
115         "must have same BitWidth");
116  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
117    // We know all of the bits for a constant!
118    KnownOne = CI->getValue() & DemandedMask;
119    KnownZero = ~KnownOne & DemandedMask;
120    return 0;
121  }
122  if (isa<ConstantPointerNull>(V)) {
123    // We know all of the bits for a constant!
124    KnownOne.clear();
125    KnownZero = DemandedMask;
126    return 0;
127  }
128
129  KnownZero.clear();
130  KnownOne.clear();
131  if (DemandedMask == 0) {   // Not demanding any bits from V.
132    if (isa<UndefValue>(V))
133      return 0;
134    return UndefValue::get(VTy);
135  }
136
137  if (Depth == 6)        // Limit search depth.
138    return 0;
139
140  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
141  APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne;
142
143  Instruction *I = dyn_cast<Instruction>(V);
144  if (!I) {
145    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
146    return 0;        // Only analyze instructions.
147  }
148
149  // If there are multiple uses of this value and we aren't at the root, then
150  // we can't do any simplifications of the operands, because DemandedMask
151  // only reflects the bits demanded by *one* of the users.
152  if (Depth != 0 && !I->hasOneUse()) {
153    // Despite the fact that we can't simplify this instruction in all User's
154    // context, we can at least compute the knownzero/knownone bits, and we can
155    // do simplifications that apply to *just* the one user if we know that
156    // this instruction has a simpler value in that context.
157    if (I->getOpcode() == Instruction::And) {
158      // If either the LHS or the RHS are Zero, the result is zero.
159      ComputeMaskedBits(I->getOperand(1), DemandedMask,
160                        RHSKnownZero, RHSKnownOne, Depth+1);
161      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
162                        LHSKnownZero, LHSKnownOne, Depth+1);
163
164      // If all of the demanded bits are known 1 on one side, return the other.
165      // These bits cannot contribute to the result of the 'and' in this
166      // context.
167      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
168          (DemandedMask & ~LHSKnownZero))
169        return I->getOperand(0);
170      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
171          (DemandedMask & ~RHSKnownZero))
172        return I->getOperand(1);
173
174      // If all of the demanded bits in the inputs are known zeros, return zero.
175      if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
176        return Constant::getNullValue(VTy);
177
178    } else if (I->getOpcode() == Instruction::Or) {
179      // We can simplify (X|Y) -> X or Y in the user's context if we know that
180      // only bits from X or Y are demanded.
181
182      // If either the LHS or the RHS are One, the result is One.
183      ComputeMaskedBits(I->getOperand(1), DemandedMask,
184                        RHSKnownZero, RHSKnownOne, Depth+1);
185      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne,
186                        LHSKnownZero, LHSKnownOne, Depth+1);
187
188      // If all of the demanded bits are known zero on one side, return the
189      // other.  These bits cannot contribute to the result of the 'or' in this
190      // context.
191      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
192          (DemandedMask & ~LHSKnownOne))
193        return I->getOperand(0);
194      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
195          (DemandedMask & ~RHSKnownOne))
196        return I->getOperand(1);
197
198      // If all of the potentially set bits on one side are known to be set on
199      // the other side, just use the 'other' side.
200      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
201          (DemandedMask & (~RHSKnownZero)))
202        return I->getOperand(0);
203      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
204          (DemandedMask & (~LHSKnownZero)))
205        return I->getOperand(1);
206    }
207
208    // Compute the KnownZero/KnownOne bits to simplify things downstream.
209    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
210    return 0;
211  }
212
213  // If this is the root being simplified, allow it to have multiple uses,
214  // just set the DemandedMask to all bits so that we can try to simplify the
215  // operands.  This allows visitTruncInst (for example) to simplify the
216  // operand of a trunc without duplicating all the logic below.
217  if (Depth == 0 && !V->hasOneUse())
218    DemandedMask = APInt::getAllOnesValue(BitWidth);
219
220  switch (I->getOpcode()) {
221  default:
222    ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
223    break;
224  case Instruction::And:
225    // If either the LHS or the RHS are Zero, the result is zero.
226    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
227                             RHSKnownZero, RHSKnownOne, Depth+1) ||
228        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
229                             LHSKnownZero, LHSKnownOne, Depth+1))
230      return I;
231    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
232    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
233
234    // If all of the demanded bits are known 1 on one side, return the other.
235    // These bits cannot contribute to the result of the 'and'.
236    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
237        (DemandedMask & ~LHSKnownZero))
238      return I->getOperand(0);
239    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
240        (DemandedMask & ~RHSKnownZero))
241      return I->getOperand(1);
242
243    // If all of the demanded bits in the inputs are known zeros, return zero.
244    if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
245      return Constant::getNullValue(VTy);
246
247    // If the RHS is a constant, see if we can simplify it.
248    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
249      return I;
250
251    // Output known-1 bits are only known if set in both the LHS & RHS.
252    RHSKnownOne &= LHSKnownOne;
253    // Output known-0 are known to be clear if zero in either the LHS | RHS.
254    RHSKnownZero |= LHSKnownZero;
255    break;
256  case Instruction::Or:
257    // If either the LHS or the RHS are One, the result is One.
258    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
259                             RHSKnownZero, RHSKnownOne, Depth+1) ||
260        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
261                             LHSKnownZero, LHSKnownOne, Depth+1))
262      return I;
263    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
264    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
265
266    // If all of the demanded bits are known zero on one side, return the other.
267    // These bits cannot contribute to the result of the 'or'.
268    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
269        (DemandedMask & ~LHSKnownOne))
270      return I->getOperand(0);
271    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
272        (DemandedMask & ~RHSKnownOne))
273      return I->getOperand(1);
274
275    // If all of the potentially set bits on one side are known to be set on
276    // the other side, just use the 'other' side.
277    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
278        (DemandedMask & (~RHSKnownZero)))
279      return I->getOperand(0);
280    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
281        (DemandedMask & (~LHSKnownZero)))
282      return I->getOperand(1);
283
284    // If the RHS is a constant, see if we can simplify it.
285    if (ShrinkDemandedConstant(I, 1, DemandedMask))
286      return I;
287
288    // Output known-0 bits are only known if clear in both the LHS & RHS.
289    RHSKnownZero &= LHSKnownZero;
290    // Output known-1 are known to be set if set in either the LHS | RHS.
291    RHSKnownOne |= LHSKnownOne;
292    break;
293  case Instruction::Xor: {
294    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
295                             RHSKnownZero, RHSKnownOne, Depth+1) ||
296        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
297                             LHSKnownZero, LHSKnownOne, Depth+1))
298      return I;
299    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
300    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
301
302    // If all of the demanded bits are known zero on one side, return the other.
303    // These bits cannot contribute to the result of the 'xor'.
304    if ((DemandedMask & RHSKnownZero) == DemandedMask)
305      return I->getOperand(0);
306    if ((DemandedMask & LHSKnownZero) == DemandedMask)
307      return I->getOperand(1);
308
309    // Output known-0 bits are known if clear or set in both the LHS & RHS.
310    APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) |
311                         (RHSKnownOne & LHSKnownOne);
312    // Output known-1 are known to be set if set in only one of the LHS, RHS.
313    APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) |
314                        (RHSKnownOne & LHSKnownZero);
315
316    // If all of the demanded bits are known to be zero on one side or the
317    // other, turn this into an *inclusive* or.
318    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
319    if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
320      Instruction *Or =
321        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
322                                 I->getName());
323      return InsertNewInstBefore(Or, *I);
324    }
325
326    // If all of the demanded bits on one side are known, and all of the set
327    // bits on that side are also known to be set on the other side, turn this
328    // into an AND, as we know the bits will be cleared.
329    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
330    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
331      // all known
332      if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
333        Constant *AndC = Constant::getIntegerValue(VTy,
334                                                   ~RHSKnownOne & DemandedMask);
335        Instruction *And =
336          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
337        return InsertNewInstBefore(And, *I);
338      }
339    }
340
341    // If the RHS is a constant, see if we can simplify it.
342    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
343    if (ShrinkDemandedConstant(I, 1, DemandedMask))
344      return I;
345
346    // If our LHS is an 'and' and if it has one use, and if any of the bits we
347    // are flipping are known to be set, then the xor is just resetting those
348    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
349    // simplifying both of them.
350    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
351      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
352          isa<ConstantInt>(I->getOperand(1)) &&
353          isa<ConstantInt>(LHSInst->getOperand(1)) &&
354          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
355        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
356        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
357        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
358
359        Constant *AndC =
360          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
361        Instruction *NewAnd =
362          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
363        InsertNewInstBefore(NewAnd, *I);
364
365        Constant *XorC =
366          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
367        Instruction *NewXor =
368          BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
369        return InsertNewInstBefore(NewXor, *I);
370      }
371
372
373    RHSKnownZero = KnownZeroOut;
374    RHSKnownOne  = KnownOneOut;
375    break;
376  }
377  case Instruction::Select:
378    if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
379                             RHSKnownZero, RHSKnownOne, Depth+1) ||
380        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
381                             LHSKnownZero, LHSKnownOne, Depth+1))
382      return I;
383    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
384    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
385
386    // If the operands are constants, see if we can simplify them.
387    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
388        ShrinkDemandedConstant(I, 2, DemandedMask))
389      return I;
390
391    // Only known if known in both the LHS and RHS.
392    RHSKnownOne &= LHSKnownOne;
393    RHSKnownZero &= LHSKnownZero;
394    break;
395  case Instruction::Trunc: {
396    unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
397    DemandedMask.zext(truncBf);
398    RHSKnownZero.zext(truncBf);
399    RHSKnownOne.zext(truncBf);
400    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
401                             RHSKnownZero, RHSKnownOne, Depth+1))
402      return I;
403    DemandedMask.trunc(BitWidth);
404    RHSKnownZero.trunc(BitWidth);
405    RHSKnownOne.trunc(BitWidth);
406    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
407    break;
408  }
409  case Instruction::BitCast:
410    if (!I->getOperand(0)->getType()->isIntOrIntVector())
411      return false;  // vector->int or fp->int?
412
413    if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
414      if (const VectorType *SrcVTy =
415            dyn_cast<VectorType>(I->getOperand(0)->getType())) {
416        if (DstVTy->getNumElements() != SrcVTy->getNumElements())
417          // Don't touch a bitcast between vectors of different element counts.
418          return false;
419      } else
420        // Don't touch a scalar-to-vector bitcast.
421        return false;
422    } else if (isa<VectorType>(I->getOperand(0)->getType()))
423      // Don't touch a vector-to-scalar bitcast.
424      return false;
425
426    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
427                             RHSKnownZero, RHSKnownOne, Depth+1))
428      return I;
429    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
430    break;
431  case Instruction::ZExt: {
432    // Compute the bits in the result that are not present in the input.
433    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
434
435    DemandedMask.trunc(SrcBitWidth);
436    RHSKnownZero.trunc(SrcBitWidth);
437    RHSKnownOne.trunc(SrcBitWidth);
438    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
439                             RHSKnownZero, RHSKnownOne, Depth+1))
440      return I;
441    DemandedMask.zext(BitWidth);
442    RHSKnownZero.zext(BitWidth);
443    RHSKnownOne.zext(BitWidth);
444    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
445    // The top bits are known to be zero.
446    RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
447    break;
448  }
449  case Instruction::SExt: {
450    // Compute the bits in the result that are not present in the input.
451    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
452
453    APInt InputDemandedBits = DemandedMask &
454                              APInt::getLowBitsSet(BitWidth, SrcBitWidth);
455
456    APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
457    // If any of the sign extended bits are demanded, we know that the sign
458    // bit is demanded.
459    if ((NewBits & DemandedMask) != 0)
460      InputDemandedBits.set(SrcBitWidth-1);
461
462    InputDemandedBits.trunc(SrcBitWidth);
463    RHSKnownZero.trunc(SrcBitWidth);
464    RHSKnownOne.trunc(SrcBitWidth);
465    if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
466                             RHSKnownZero, RHSKnownOne, Depth+1))
467      return I;
468    InputDemandedBits.zext(BitWidth);
469    RHSKnownZero.zext(BitWidth);
470    RHSKnownOne.zext(BitWidth);
471    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
472
473    // If the sign bit of the input is known set or clear, then we know the
474    // top bits of the result.
475
476    // If the input sign bit is known zero, or if the NewBits are not demanded
477    // convert this into a zero extension.
478    if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
479      // Convert to ZExt cast
480      CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
481      return InsertNewInstBefore(NewCast, *I);
482    } else if (RHSKnownOne[SrcBitWidth-1]) {    // Input sign bit known set
483      RHSKnownOne |= NewBits;
484    }
485    break;
486  }
487  case Instruction::Add: {
488    // Figure out what the input bits are.  If the top bits of the and result
489    // are not demanded, then the add doesn't demand them from its input
490    // either.
491    unsigned NLZ = DemandedMask.countLeadingZeros();
492
493    // If there is a constant on the RHS, there are a variety of xformations
494    // we can do.
495    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
496      // If null, this should be simplified elsewhere.  Some of the xforms here
497      // won't work if the RHS is zero.
498      if (RHS->isZero())
499        break;
500
501      // If the top bit of the output is demanded, demand everything from the
502      // input.  Otherwise, we demand all the input bits except NLZ top bits.
503      APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
504
505      // Find information about known zero/one bits in the input.
506      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
507                               LHSKnownZero, LHSKnownOne, Depth+1))
508        return I;
509
510      // If the RHS of the add has bits set that can't affect the input, reduce
511      // the constant.
512      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
513        return I;
514
515      // Avoid excess work.
516      if (LHSKnownZero == 0 && LHSKnownOne == 0)
517        break;
518
519      // Turn it into OR if input bits are zero.
520      if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
521        Instruction *Or =
522          BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
523                                   I->getName());
524        return InsertNewInstBefore(Or, *I);
525      }
526
527      // We can say something about the output known-zero and known-one bits,
528      // depending on potential carries from the input constant and the
529      // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
530      // bits set and the RHS constant is 0x01001, then we know we have a known
531      // one mask of 0x00001 and a known zero mask of 0xE0F0E.
532
533      // To compute this, we first compute the potential carry bits.  These are
534      // the bits which may be modified.  I'm not aware of a better way to do
535      // this scan.
536      const APInt &RHSVal = RHS->getValue();
537      APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
538
539      // Now that we know which bits have carries, compute the known-1/0 sets.
540
541      // Bits are known one if they are known zero in one operand and one in the
542      // other, and there is no input carry.
543      RHSKnownOne = ((LHSKnownZero & RHSVal) |
544                     (LHSKnownOne & ~RHSVal)) & ~CarryBits;
545
546      // Bits are known zero if they are known zero in both operands and there
547      // is no input carry.
548      RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
549    } else {
550      // If the high-bits of this ADD are not demanded, then it does not demand
551      // the high bits of its LHS or RHS.
552      if (DemandedMask[BitWidth-1] == 0) {
553        // Right fill the mask of bits for this ADD to demand the most
554        // significant bit and all those below it.
555        APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
556        if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
557                                 LHSKnownZero, LHSKnownOne, Depth+1) ||
558            SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
559                                 LHSKnownZero, LHSKnownOne, Depth+1))
560          return I;
561      }
562    }
563    break;
564  }
565  case Instruction::Sub:
566    // If the high-bits of this SUB are not demanded, then it does not demand
567    // the high bits of its LHS or RHS.
568    if (DemandedMask[BitWidth-1] == 0) {
569      // Right fill the mask of bits for this SUB to demand the most
570      // significant bit and all those below it.
571      uint32_t NLZ = DemandedMask.countLeadingZeros();
572      APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
573      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
574                               LHSKnownZero, LHSKnownOne, Depth+1) ||
575          SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
576                               LHSKnownZero, LHSKnownOne, Depth+1))
577        return I;
578    }
579    // Otherwise just hand the sub off to ComputeMaskedBits to fill in
580    // the known zeros and ones.
581    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
582    break;
583  case Instruction::Shl:
584    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
585      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
586      APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
587      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
588                               RHSKnownZero, RHSKnownOne, Depth+1))
589        return I;
590      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
591      RHSKnownZero <<= ShiftAmt;
592      RHSKnownOne  <<= ShiftAmt;
593      // low bits known zero.
594      if (ShiftAmt)
595        RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
596    }
597    break;
598  case Instruction::LShr:
599    // For a logical shift right
600    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
601      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
602
603      // Unsigned shift right.
604      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
605      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
606                               RHSKnownZero, RHSKnownOne, Depth+1))
607        return I;
608      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
609      RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
610      RHSKnownOne  = APIntOps::lshr(RHSKnownOne, ShiftAmt);
611      if (ShiftAmt) {
612        // Compute the new bits that are at the top now.
613        APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
614        RHSKnownZero |= HighBits;  // high bits known zero.
615      }
616    }
617    break;
618  case Instruction::AShr:
619    // If this is an arithmetic shift right and only the low-bit is set, we can
620    // always convert this into a logical shr, even if the shift amount is
621    // variable.  The low bit of the shift cannot be an input sign bit unless
622    // the shift amount is >= the size of the datatype, which is undefined.
623    if (DemandedMask == 1) {
624      // Perform the logical shift right.
625      Instruction *NewVal = BinaryOperator::CreateLShr(
626                        I->getOperand(0), I->getOperand(1), I->getName());
627      return InsertNewInstBefore(NewVal, *I);
628    }
629
630    // If the sign bit is the only bit demanded by this ashr, then there is no
631    // need to do it, the shift doesn't change the high bit.
632    if (DemandedMask.isSignBit())
633      return I->getOperand(0);
634
635    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
636      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
637
638      // Signed shift right.
639      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
640      // If any of the "high bits" are demanded, we should set the sign bit as
641      // demanded.
642      if (DemandedMask.countLeadingZeros() <= ShiftAmt)
643        DemandedMaskIn.set(BitWidth-1);
644      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
645                               RHSKnownZero, RHSKnownOne, Depth+1))
646        return I;
647      assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
648      // Compute the new bits that are at the top now.
649      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
650      RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt);
651      RHSKnownOne  = APIntOps::lshr(RHSKnownOne, ShiftAmt);
652
653      // Handle the sign bits.
654      APInt SignBit(APInt::getSignBit(BitWidth));
655      // Adjust to where it is now in the mask.
656      SignBit = APIntOps::lshr(SignBit, ShiftAmt);
657
658      // If the input sign bit is known to be zero, or if none of the top bits
659      // are demanded, turn this into an unsigned shift right.
660      if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] ||
661          (HighBits & ~DemandedMask) == HighBits) {
662        // Perform the logical shift right.
663        Instruction *NewVal = BinaryOperator::CreateLShr(
664                          I->getOperand(0), SA, I->getName());
665        return InsertNewInstBefore(NewVal, *I);
666      } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one.
667        RHSKnownOne |= HighBits;
668      }
669    }
670    break;
671  case Instruction::SRem:
672    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
673      APInt RA = Rem->getValue().abs();
674      if (RA.isPowerOf2()) {
675        if (DemandedMask.ult(RA))    // srem won't affect demanded bits
676          return I->getOperand(0);
677
678        APInt LowBits = RA - 1;
679        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
680        if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
681                                 LHSKnownZero, LHSKnownOne, Depth+1))
682          return I;
683
684        if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
685          LHSKnownZero |= ~LowBits;
686
687        KnownZero |= LHSKnownZero & DemandedMask;
688
689        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
690      }
691    }
692    break;
693  case Instruction::URem: {
694    APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
695    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
696    if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
697                             KnownZero2, KnownOne2, Depth+1) ||
698        SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
699                             KnownZero2, KnownOne2, Depth+1))
700      return I;
701
702    unsigned Leaders = KnownZero2.countLeadingOnes();
703    Leaders = std::max(Leaders,
704                       KnownZero2.countLeadingOnes());
705    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
706    break;
707  }
708  case Instruction::Call:
709    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
710      switch (II->getIntrinsicID()) {
711      default: break;
712      case Intrinsic::bswap: {
713        // If the only bits demanded come from one byte of the bswap result,
714        // just shift the input byte into position to eliminate the bswap.
715        unsigned NLZ = DemandedMask.countLeadingZeros();
716        unsigned NTZ = DemandedMask.countTrailingZeros();
717
718        // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
719        // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
720        // have 14 leading zeros, round to 8.
721        NLZ &= ~7;
722        NTZ &= ~7;
723        // If we need exactly one byte, we can do this transformation.
724        if (BitWidth-NLZ-NTZ == 8) {
725          unsigned ResultBit = NTZ;
726          unsigned InputBit = BitWidth-NTZ-8;
727
728          // Replace this with either a left or right shift to get the byte into
729          // the right place.
730          Instruction *NewVal;
731          if (InputBit > ResultBit)
732            NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
733                    ConstantInt::get(I->getType(), InputBit-ResultBit));
734          else
735            NewVal = BinaryOperator::CreateShl(I->getOperand(1),
736                    ConstantInt::get(I->getType(), ResultBit-InputBit));
737          NewVal->takeName(I);
738          return InsertNewInstBefore(NewVal, *I);
739        }
740
741        // TODO: Could compute known zero/one bits based on the input.
742        break;
743      }
744      }
745    }
746    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
747    break;
748  }
749
750  // If the client is only demanding bits that we know, return the known
751  // constant.
752  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask)
753    return Constant::getIntegerValue(VTy, RHSKnownOne);
754  return false;
755}
756
757
758/// SimplifyDemandedVectorElts - The specified value produces a vector with
759/// any number of elements. DemandedElts contains the set of elements that are
760/// actually used by the caller.  This method analyzes which elements of the
761/// operand are undef and returns that information in UndefElts.
762///
763/// If the information about demanded elements can be used to simplify the
764/// operation, the operation is simplified, then the resultant value is
765/// returned.  This returns null if no change was made.
766Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
767                                                APInt& UndefElts,
768                                                unsigned Depth) {
769  unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
770  APInt EltMask(APInt::getAllOnesValue(VWidth));
771  assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
772
773  if (isa<UndefValue>(V)) {
774    // If the entire vector is undefined, just return this info.
775    UndefElts = EltMask;
776    return 0;
777  } else if (DemandedElts == 0) { // If nothing is demanded, provide undef.
778    UndefElts = EltMask;
779    return UndefValue::get(V->getType());
780  }
781
782  UndefElts = 0;
783  if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {
784    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
785    Constant *Undef = UndefValue::get(EltTy);
786
787    std::vector<Constant*> Elts;
788    for (unsigned i = 0; i != VWidth; ++i)
789      if (!DemandedElts[i]) {   // If not demanded, set to undef.
790        Elts.push_back(Undef);
791        UndefElts.set(i);
792      } else if (isa<UndefValue>(CP->getOperand(i))) {   // Already undef.
793        Elts.push_back(Undef);
794        UndefElts.set(i);
795      } else {                               // Otherwise, defined.
796        Elts.push_back(CP->getOperand(i));
797      }
798
799    // If we changed the constant, return it.
800    Constant *NewCP = ConstantVector::get(Elts);
801    return NewCP != CP ? NewCP : 0;
802  } else if (isa<ConstantAggregateZero>(V)) {
803    // Simplify the CAZ to a ConstantVector where the non-demanded elements are
804    // set to undef.
805
806    // Check if this is identity. If so, return 0 since we are not simplifying
807    // anything.
808    if (DemandedElts == ((1ULL << VWidth) -1))
809      return 0;
810
811    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
812    Constant *Zero = Constant::getNullValue(EltTy);
813    Constant *Undef = UndefValue::get(EltTy);
814    std::vector<Constant*> Elts;
815    for (unsigned i = 0; i != VWidth; ++i) {
816      Constant *Elt = DemandedElts[i] ? Zero : Undef;
817      Elts.push_back(Elt);
818    }
819    UndefElts = DemandedElts ^ EltMask;
820    return ConstantVector::get(Elts);
821  }
822
823  // Limit search depth.
824  if (Depth == 10)
825    return 0;
826
827  // If multiple users are using the root value, procede with
828  // simplification conservatively assuming that all elements
829  // are needed.
830  if (!V->hasOneUse()) {
831    // Quit if we find multiple users of a non-root value though.
832    // They'll be handled when it's their turn to be visited by
833    // the main instcombine process.
834    if (Depth != 0)
835      // TODO: Just compute the UndefElts information recursively.
836      return 0;
837
838    // Conservatively assume that all elements are needed.
839    DemandedElts = EltMask;
840  }
841
842  Instruction *I = dyn_cast<Instruction>(V);
843  if (!I) return 0;        // Only analyze instructions.
844
845  bool MadeChange = false;
846  APInt UndefElts2(VWidth, 0);
847  Value *TmpV;
848  switch (I->getOpcode()) {
849  default: break;
850
851  case Instruction::InsertElement: {
852    // If this is a variable index, we don't know which element it overwrites.
853    // demand exactly the same input as we produce.
854    ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
855    if (Idx == 0) {
856      // Note that we can't propagate undef elt info, because we don't know
857      // which elt is getting updated.
858      TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
859                                        UndefElts2, Depth+1);
860      if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
861      break;
862    }
863
864    // If this is inserting an element that isn't demanded, remove this
865    // insertelement.
866    unsigned IdxNo = Idx->getZExtValue();
867    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
868      Worklist.Add(I);
869      return I->getOperand(0);
870    }
871
872    // Otherwise, the element inserted overwrites whatever was there, so the
873    // input demanded set is simpler than the output set.
874    APInt DemandedElts2 = DemandedElts;
875    DemandedElts2.clear(IdxNo);
876    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
877                                      UndefElts, Depth+1);
878    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
879
880    // The inserted element is defined.
881    UndefElts.clear(IdxNo);
882    break;
883  }
884  case Instruction::ShuffleVector: {
885    ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
886    uint64_t LHSVWidth =
887      cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
888    APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
889    for (unsigned i = 0; i < VWidth; i++) {
890      if (DemandedElts[i]) {
891        unsigned MaskVal = Shuffle->getMaskValue(i);
892        if (MaskVal != -1u) {
893          assert(MaskVal < LHSVWidth * 2 &&
894                 "shufflevector mask index out of range!");
895          if (MaskVal < LHSVWidth)
896            LeftDemanded.set(MaskVal);
897          else
898            RightDemanded.set(MaskVal - LHSVWidth);
899        }
900      }
901    }
902
903    APInt UndefElts4(LHSVWidth, 0);
904    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
905                                      UndefElts4, Depth+1);
906    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
907
908    APInt UndefElts3(LHSVWidth, 0);
909    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
910                                      UndefElts3, Depth+1);
911    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
912
913    bool NewUndefElts = false;
914    for (unsigned i = 0; i < VWidth; i++) {
915      unsigned MaskVal = Shuffle->getMaskValue(i);
916      if (MaskVal == -1u) {
917        UndefElts.set(i);
918      } else if (MaskVal < LHSVWidth) {
919        if (UndefElts4[MaskVal]) {
920          NewUndefElts = true;
921          UndefElts.set(i);
922        }
923      } else {
924        if (UndefElts3[MaskVal - LHSVWidth]) {
925          NewUndefElts = true;
926          UndefElts.set(i);
927        }
928      }
929    }
930
931    if (NewUndefElts) {
932      // Add additional discovered undefs.
933      std::vector<Constant*> Elts;
934      for (unsigned i = 0; i < VWidth; ++i) {
935        if (UndefElts[i])
936          Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
937        else
938          Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
939                                          Shuffle->getMaskValue(i)));
940      }
941      I->setOperand(2, ConstantVector::get(Elts));
942      MadeChange = true;
943    }
944    break;
945  }
946  case Instruction::BitCast: {
947    // Vector->vector casts only.
948    const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
949    if (!VTy) break;
950    unsigned InVWidth = VTy->getNumElements();
951    APInt InputDemandedElts(InVWidth, 0);
952    unsigned Ratio;
953
954    if (VWidth == InVWidth) {
955      // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
956      // elements as are demanded of us.
957      Ratio = 1;
958      InputDemandedElts = DemandedElts;
959    } else if (VWidth > InVWidth) {
960      // Untested so far.
961      break;
962
963      // If there are more elements in the result than there are in the source,
964      // then an input element is live if any of the corresponding output
965      // elements are live.
966      Ratio = VWidth/InVWidth;
967      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
968        if (DemandedElts[OutIdx])
969          InputDemandedElts.set(OutIdx/Ratio);
970      }
971    } else {
972      // Untested so far.
973      break;
974
975      // If there are more elements in the source than there are in the result,
976      // then an input element is live if the corresponding output element is
977      // live.
978      Ratio = InVWidth/VWidth;
979      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
980        if (DemandedElts[InIdx/Ratio])
981          InputDemandedElts.set(InIdx);
982    }
983
984    // div/rem demand all inputs, because they don't want divide by zero.
985    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
986                                      UndefElts2, Depth+1);
987    if (TmpV) {
988      I->setOperand(0, TmpV);
989      MadeChange = true;
990    }
991
992    UndefElts = UndefElts2;
993    if (VWidth > InVWidth) {
994      llvm_unreachable("Unimp");
995      // If there are more elements in the result than there are in the source,
996      // then an output element is undef if the corresponding input element is
997      // undef.
998      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
999        if (UndefElts2[OutIdx/Ratio])
1000          UndefElts.set(OutIdx);
1001    } else if (VWidth < InVWidth) {
1002      llvm_unreachable("Unimp");
1003      // If there are more elements in the source than there are in the result,
1004      // then a result element is undef if all of the corresponding input
1005      // elements are undef.
1006      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
1007      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1008        if (!UndefElts2[InIdx])            // Not undef?
1009          UndefElts.clear(InIdx/Ratio);    // Clear undef bit.
1010    }
1011    break;
1012  }
1013  case Instruction::And:
1014  case Instruction::Or:
1015  case Instruction::Xor:
1016  case Instruction::Add:
1017  case Instruction::Sub:
1018  case Instruction::Mul:
1019    // div/rem demand all inputs, because they don't want divide by zero.
1020    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
1021                                      UndefElts, Depth+1);
1022    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1023    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
1024                                      UndefElts2, Depth+1);
1025    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
1026
1027    // Output elements are undefined if both are undefined.  Consider things
1028    // like undef&0.  The result is known zero, not undef.
1029    UndefElts &= UndefElts2;
1030    break;
1031
1032  case Instruction::Call: {
1033    IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
1034    if (!II) break;
1035    switch (II->getIntrinsicID()) {
1036    default: break;
1037
1038    // Binary vector operations that work column-wise.  A dest element is a
1039    // function of the corresponding input elements from the two inputs.
1040    case Intrinsic::x86_sse_sub_ss:
1041    case Intrinsic::x86_sse_mul_ss:
1042    case Intrinsic::x86_sse_min_ss:
1043    case Intrinsic::x86_sse_max_ss:
1044    case Intrinsic::x86_sse2_sub_sd:
1045    case Intrinsic::x86_sse2_mul_sd:
1046    case Intrinsic::x86_sse2_min_sd:
1047    case Intrinsic::x86_sse2_max_sd:
1048      TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
1049                                        UndefElts, Depth+1);
1050      if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; }
1051      TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts,
1052                                        UndefElts2, Depth+1);
1053      if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; }
1054
1055      // If only the low elt is demanded and this is a scalarizable intrinsic,
1056      // scalarize it now.
1057      if (DemandedElts == 1) {
1058        switch (II->getIntrinsicID()) {
1059        default: break;
1060        case Intrinsic::x86_sse_sub_ss:
1061        case Intrinsic::x86_sse_mul_ss:
1062        case Intrinsic::x86_sse2_sub_sd:
1063        case Intrinsic::x86_sse2_mul_sd:
1064          // TODO: Lower MIN/MAX/ABS/etc
1065          Value *LHS = II->getOperand(1);
1066          Value *RHS = II->getOperand(2);
1067          // Extract the element as scalars.
1068          LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS,
1069            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
1070          RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS,
1071            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
1072
1073          switch (II->getIntrinsicID()) {
1074          default: llvm_unreachable("Case stmts out of sync!");
1075          case Intrinsic::x86_sse_sub_ss:
1076          case Intrinsic::x86_sse2_sub_sd:
1077            TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
1078                                                        II->getName()), *II);
1079            break;
1080          case Intrinsic::x86_sse_mul_ss:
1081          case Intrinsic::x86_sse2_mul_sd:
1082            TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS,
1083                                                         II->getName()), *II);
1084            break;
1085          }
1086
1087          Instruction *New =
1088            InsertElementInst::Create(
1089              UndefValue::get(II->getType()), TmpV,
1090              ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
1091                                      II->getName());
1092          InsertNewInstBefore(New, *II);
1093          return New;
1094        }
1095      }
1096
1097      // Output elements are undefined if both are undefined.  Consider things
1098      // like undef&0.  The result is known zero, not undef.
1099      UndefElts &= UndefElts2;
1100      break;
1101    }
1102    break;
1103  }
1104  }
1105  return MadeChange ? I : 0;
1106}
1107