1//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains logic for simplifying instructions based on information
11// about how they are used.
12//
13//===----------------------------------------------------------------------===//
14
15#include "InstCombineInternal.h"
16#include "llvm/IR/IntrinsicInst.h"
17#include "llvm/IR/PatternMatch.h"
18
19using namespace llvm;
20using namespace llvm::PatternMatch;
21
22#define DEBUG_TYPE "instcombine"
23
24/// ShrinkDemandedConstant - Check to see if the specified operand of the
25/// specified instruction is a constant integer.  If so, check to see if there
26/// are any bits set in the constant that are not demanded.  If so, shrink the
27/// constant and return true.
28static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
29                                   APInt Demanded) {
30  assert(I && "No instruction?");
31  assert(OpNo < I->getNumOperands() && "Operand index too large");
32
33  // If the operand is not a constant integer, nothing to do.
34  ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
35  if (!OpC) return false;
36
37  // If there are no bits set that aren't demanded, nothing to do.
38  Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
39  if ((~Demanded & OpC->getValue()) == 0)
40    return false;
41
42  // This instruction is producing bits that are not demanded. Shrink the RHS.
43  Demanded &= OpC->getValue();
44  I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
45
46  // If either 'nsw' or 'nuw' is set and the constant is negative,
47  // removing *any* bits from the constant could make overflow occur.
48  // Remove 'nsw' and 'nuw' from the instruction in this case.
49  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
50    assert(OBO->getOpcode() == Instruction::Add);
51    if (OBO->hasNoSignedWrap() || OBO->hasNoUnsignedWrap()) {
52      if (OpC->getValue().isNegative()) {
53        cast<BinaryOperator>(OBO)->setHasNoSignedWrap(false);
54        cast<BinaryOperator>(OBO)->setHasNoUnsignedWrap(false);
55      }
56    }
57  }
58
59  return true;
60}
61
62
63
64/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
65/// SimplifyDemandedBits knows about.  See if the instruction has any
66/// properties that allow us to simplify its operands.
67bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
68  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
69  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
70  APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
71
72  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
73                                     0, &Inst);
74  if (!V) return false;
75  if (V == &Inst) return true;
76  ReplaceInstUsesWith(Inst, V);
77  return true;
78}
79
80/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
81/// specified instruction operand if possible, updating it in place.  It returns
82/// true if it made any change and false otherwise.
83bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
84                                        APInt &KnownZero, APInt &KnownOne,
85                                        unsigned Depth) {
86  Value *NewVal =
87      SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth,
88                              dyn_cast<Instruction>(U.getUser()));
89  if (!NewVal) return false;
90  U = NewVal;
91  return true;
92}
93
94
95/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
96/// value based on the demanded bits.  When this function is called, it is known
97/// that only the bits set in DemandedMask of the result of V are ever used
98/// downstream. Consequently, depending on the mask and V, it may be possible
99/// to replace V with a constant or one of its operands. In such cases, this
100/// function does the replacement and returns true. In all other cases, it
101/// returns false after analyzing the expression and setting KnownOne and known
102/// to be one in the expression.  KnownZero contains all the bits that are known
103/// to be zero in the expression. These are provided to potentially allow the
104/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
105/// the expression. KnownOne and KnownZero always follow the invariant that
106/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
107/// the bits in KnownOne and KnownZero may only be accurate for those bits set
108/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
109/// and KnownOne must all be the same.
110///
111/// This returns null if it did not change anything and it permits no
112/// simplification.  This returns V itself if it did some simplification of V's
113/// operands based on the information about what bits are demanded. This returns
114/// some other non-null value if it found out that V is equal to another value
115/// in the context where the specified bits are demanded, but not for all users.
116Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
117                                             APInt &KnownZero, APInt &KnownOne,
118                                             unsigned Depth,
119                                             Instruction *CxtI) {
120  assert(V != nullptr && "Null pointer of Value???");
121  assert(Depth <= 6 && "Limit Search Depth");
122  uint32_t BitWidth = DemandedMask.getBitWidth();
123  Type *VTy = V->getType();
124  assert(
125      (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
126      KnownZero.getBitWidth() == BitWidth &&
127      KnownOne.getBitWidth() == BitWidth &&
128      "Value *V, DemandedMask, KnownZero and KnownOne "
129      "must have same BitWidth");
130  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
131    // We know all of the bits for a constant!
132    KnownOne = CI->getValue() & DemandedMask;
133    KnownZero = ~KnownOne & DemandedMask;
134    return nullptr;
135  }
136  if (isa<ConstantPointerNull>(V)) {
137    // We know all of the bits for a constant!
138    KnownOne.clearAllBits();
139    KnownZero = DemandedMask;
140    return nullptr;
141  }
142
143  KnownZero.clearAllBits();
144  KnownOne.clearAllBits();
145  if (DemandedMask == 0) {   // Not demanding any bits from V.
146    if (isa<UndefValue>(V))
147      return nullptr;
148    return UndefValue::get(VTy);
149  }
150
151  if (Depth == 6)        // Limit search depth.
152    return nullptr;
153
154  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
155  APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
156
157  Instruction *I = dyn_cast<Instruction>(V);
158  if (!I) {
159    computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
160    return nullptr;        // Only analyze instructions.
161  }
162
163  // If there are multiple uses of this value and we aren't at the root, then
164  // we can't do any simplifications of the operands, because DemandedMask
165  // only reflects the bits demanded by *one* of the users.
166  if (Depth != 0 && !I->hasOneUse()) {
167    // Despite the fact that we can't simplify this instruction in all User's
168    // context, we can at least compute the knownzero/knownone bits, and we can
169    // do simplifications that apply to *just* the one user if we know that
170    // this instruction has a simpler value in that context.
171    if (I->getOpcode() == Instruction::And) {
172      // If either the LHS or the RHS are Zero, the result is zero.
173      computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
174                       CxtI);
175      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
176                       CxtI);
177
178      // If all of the demanded bits are known 1 on one side, return the other.
179      // These bits cannot contribute to the result of the 'and' in this
180      // context.
181      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
182          (DemandedMask & ~LHSKnownZero))
183        return I->getOperand(0);
184      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
185          (DemandedMask & ~RHSKnownZero))
186        return I->getOperand(1);
187
188      // If all of the demanded bits in the inputs are known zeros, return zero.
189      if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
190        return Constant::getNullValue(VTy);
191
192    } else if (I->getOpcode() == Instruction::Or) {
193      // We can simplify (X|Y) -> X or Y in the user's context if we know that
194      // only bits from X or Y are demanded.
195
196      // If either the LHS or the RHS are One, the result is One.
197      computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
198                       CxtI);
199      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
200                       CxtI);
201
202      // If all of the demanded bits are known zero on one side, return the
203      // other.  These bits cannot contribute to the result of the 'or' in this
204      // context.
205      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
206          (DemandedMask & ~LHSKnownOne))
207        return I->getOperand(0);
208      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
209          (DemandedMask & ~RHSKnownOne))
210        return I->getOperand(1);
211
212      // If all of the potentially set bits on one side are known to be set on
213      // the other side, just use the 'other' side.
214      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
215          (DemandedMask & (~RHSKnownZero)))
216        return I->getOperand(0);
217      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
218          (DemandedMask & (~LHSKnownZero)))
219        return I->getOperand(1);
220    } else if (I->getOpcode() == Instruction::Xor) {
221      // We can simplify (X^Y) -> X or Y in the user's context if we know that
222      // only bits from X or Y are demanded.
223
224      computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
225                       CxtI);
226      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
227                       CxtI);
228
229      // If all of the demanded bits are known zero on one side, return the
230      // other.
231      if ((DemandedMask & RHSKnownZero) == DemandedMask)
232        return I->getOperand(0);
233      if ((DemandedMask & LHSKnownZero) == DemandedMask)
234        return I->getOperand(1);
235    }
236
237    // Compute the KnownZero/KnownOne bits to simplify things downstream.
238    computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
239    return nullptr;
240  }
241
242  // If this is the root being simplified, allow it to have multiple uses,
243  // just set the DemandedMask to all bits so that we can try to simplify the
244  // operands.  This allows visitTruncInst (for example) to simplify the
245  // operand of a trunc without duplicating all the logic below.
246  if (Depth == 0 && !V->hasOneUse())
247    DemandedMask = APInt::getAllOnesValue(BitWidth);
248
249  switch (I->getOpcode()) {
250  default:
251    computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
252    break;
253  case Instruction::And:
254    // If either the LHS or the RHS are Zero, the result is zero.
255    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
256                             RHSKnownOne, Depth + 1) ||
257        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
258                             LHSKnownZero, LHSKnownOne, Depth + 1))
259      return I;
260    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
261    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
262
263    // If the client is only demanding bits that we know, return the known
264    // constant.
265    if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)|
266                         (RHSKnownOne & LHSKnownOne))) == DemandedMask)
267      return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne);
268
269    // If all of the demanded bits are known 1 on one side, return the other.
270    // These bits cannot contribute to the result of the 'and'.
271    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
272        (DemandedMask & ~LHSKnownZero))
273      return I->getOperand(0);
274    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
275        (DemandedMask & ~RHSKnownZero))
276      return I->getOperand(1);
277
278    // If all of the demanded bits in the inputs are known zeros, return zero.
279    if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
280      return Constant::getNullValue(VTy);
281
282    // If the RHS is a constant, see if we can simplify it.
283    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
284      return I;
285
286    // Output known-1 bits are only known if set in both the LHS & RHS.
287    KnownOne = RHSKnownOne & LHSKnownOne;
288    // Output known-0 are known to be clear if zero in either the LHS | RHS.
289    KnownZero = RHSKnownZero | LHSKnownZero;
290    break;
291  case Instruction::Or:
292    // If either the LHS or the RHS are One, the result is One.
293    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
294                             RHSKnownOne, Depth + 1) ||
295        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
296                             LHSKnownZero, LHSKnownOne, Depth + 1))
297      return I;
298    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
299    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
300
301    // If the client is only demanding bits that we know, return the known
302    // constant.
303    if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)|
304                         (RHSKnownOne | LHSKnownOne))) == DemandedMask)
305      return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne);
306
307    // If all of the demanded bits are known zero on one side, return the other.
308    // These bits cannot contribute to the result of the 'or'.
309    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
310        (DemandedMask & ~LHSKnownOne))
311      return I->getOperand(0);
312    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
313        (DemandedMask & ~RHSKnownOne))
314      return I->getOperand(1);
315
316    // If all of the potentially set bits on one side are known to be set on
317    // the other side, just use the 'other' side.
318    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
319        (DemandedMask & (~RHSKnownZero)))
320      return I->getOperand(0);
321    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
322        (DemandedMask & (~LHSKnownZero)))
323      return I->getOperand(1);
324
325    // If the RHS is a constant, see if we can simplify it.
326    if (ShrinkDemandedConstant(I, 1, DemandedMask))
327      return I;
328
329    // Output known-0 bits are only known if clear in both the LHS & RHS.
330    KnownZero = RHSKnownZero & LHSKnownZero;
331    // Output known-1 are known to be set if set in either the LHS | RHS.
332    KnownOne = RHSKnownOne | LHSKnownOne;
333    break;
334  case Instruction::Xor: {
335    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
336                             RHSKnownOne, Depth + 1) ||
337        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
338                             LHSKnownOne, Depth + 1))
339      return I;
340    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
341    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
342
343    // Output known-0 bits are known if clear or set in both the LHS & RHS.
344    APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
345                       (RHSKnownOne & LHSKnownOne);
346    // Output known-1 are known to be set if set in only one of the LHS, RHS.
347    APInt IKnownOne =  (RHSKnownZero & LHSKnownOne) |
348                       (RHSKnownOne & LHSKnownZero);
349
350    // If the client is only demanding bits that we know, return the known
351    // constant.
352    if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
353      return Constant::getIntegerValue(VTy, IKnownOne);
354
355    // If all of the demanded bits are known zero on one side, return the other.
356    // These bits cannot contribute to the result of the 'xor'.
357    if ((DemandedMask & RHSKnownZero) == DemandedMask)
358      return I->getOperand(0);
359    if ((DemandedMask & LHSKnownZero) == DemandedMask)
360      return I->getOperand(1);
361
362    // If all of the demanded bits are known to be zero on one side or the
363    // other, turn this into an *inclusive* or.
364    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
365    if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
366      Instruction *Or =
367        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
368                                 I->getName());
369      return InsertNewInstWith(Or, *I);
370    }
371
372    // If all of the demanded bits on one side are known, and all of the set
373    // bits on that side are also known to be set on the other side, turn this
374    // into an AND, as we know the bits will be cleared.
375    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
376    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
377      // all known
378      if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
379        Constant *AndC = Constant::getIntegerValue(VTy,
380                                                   ~RHSKnownOne & DemandedMask);
381        Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
382        return InsertNewInstWith(And, *I);
383      }
384    }
385
386    // If the RHS is a constant, see if we can simplify it.
387    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
388    if (ShrinkDemandedConstant(I, 1, DemandedMask))
389      return I;
390
391    // If our LHS is an 'and' and if it has one use, and if any of the bits we
392    // are flipping are known to be set, then the xor is just resetting those
393    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
394    // simplifying both of them.
395    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
396      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
397          isa<ConstantInt>(I->getOperand(1)) &&
398          isa<ConstantInt>(LHSInst->getOperand(1)) &&
399          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
400        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
401        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
402        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
403
404        Constant *AndC =
405          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
406        Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
407        InsertNewInstWith(NewAnd, *I);
408
409        Constant *XorC =
410          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
411        Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
412        return InsertNewInstWith(NewXor, *I);
413      }
414
415    // Output known-0 bits are known if clear or set in both the LHS & RHS.
416    KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
417    // Output known-1 are known to be set if set in only one of the LHS, RHS.
418    KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
419    break;
420  }
421  case Instruction::Select:
422    if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
423                             RHSKnownOne, Depth + 1) ||
424        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
425                             LHSKnownOne, Depth + 1))
426      return I;
427    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
428    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
429
430    // If the operands are constants, see if we can simplify them.
431    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
432        ShrinkDemandedConstant(I, 2, DemandedMask))
433      return I;
434
435    // Only known if known in both the LHS and RHS.
436    KnownOne = RHSKnownOne & LHSKnownOne;
437    KnownZero = RHSKnownZero & LHSKnownZero;
438    break;
439  case Instruction::Trunc: {
440    unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
441    DemandedMask = DemandedMask.zext(truncBf);
442    KnownZero = KnownZero.zext(truncBf);
443    KnownOne = KnownOne.zext(truncBf);
444    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
445                             KnownOne, Depth + 1))
446      return I;
447    DemandedMask = DemandedMask.trunc(BitWidth);
448    KnownZero = KnownZero.trunc(BitWidth);
449    KnownOne = KnownOne.trunc(BitWidth);
450    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
451    break;
452  }
453  case Instruction::BitCast:
454    if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
455      return nullptr;  // vector->int or fp->int?
456
457    if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
458      if (VectorType *SrcVTy =
459            dyn_cast<VectorType>(I->getOperand(0)->getType())) {
460        if (DstVTy->getNumElements() != SrcVTy->getNumElements())
461          // Don't touch a bitcast between vectors of different element counts.
462          return nullptr;
463      } else
464        // Don't touch a scalar-to-vector bitcast.
465        return nullptr;
466    } else if (I->getOperand(0)->getType()->isVectorTy())
467      // Don't touch a vector-to-scalar bitcast.
468      return nullptr;
469
470    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
471                             KnownOne, Depth + 1))
472      return I;
473    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
474    break;
475  case Instruction::ZExt: {
476    // Compute the bits in the result that are not present in the input.
477    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
478
479    DemandedMask = DemandedMask.trunc(SrcBitWidth);
480    KnownZero = KnownZero.trunc(SrcBitWidth);
481    KnownOne = KnownOne.trunc(SrcBitWidth);
482    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
483                             KnownOne, Depth + 1))
484      return I;
485    DemandedMask = DemandedMask.zext(BitWidth);
486    KnownZero = KnownZero.zext(BitWidth);
487    KnownOne = KnownOne.zext(BitWidth);
488    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
489    // The top bits are known to be zero.
490    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
491    break;
492  }
493  case Instruction::SExt: {
494    // Compute the bits in the result that are not present in the input.
495    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
496
497    APInt InputDemandedBits = DemandedMask &
498                              APInt::getLowBitsSet(BitWidth, SrcBitWidth);
499
500    APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
501    // If any of the sign extended bits are demanded, we know that the sign
502    // bit is demanded.
503    if ((NewBits & DemandedMask) != 0)
504      InputDemandedBits.setBit(SrcBitWidth-1);
505
506    InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
507    KnownZero = KnownZero.trunc(SrcBitWidth);
508    KnownOne = KnownOne.trunc(SrcBitWidth);
509    if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
510                             KnownOne, Depth + 1))
511      return I;
512    InputDemandedBits = InputDemandedBits.zext(BitWidth);
513    KnownZero = KnownZero.zext(BitWidth);
514    KnownOne = KnownOne.zext(BitWidth);
515    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
516
517    // If the sign bit of the input is known set or clear, then we know the
518    // top bits of the result.
519
520    // If the input sign bit is known zero, or if the NewBits are not demanded
521    // convert this into a zero extension.
522    if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
523      // Convert to ZExt cast
524      CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
525      return InsertNewInstWith(NewCast, *I);
526    } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
527      KnownOne |= NewBits;
528    }
529    break;
530  }
531  case Instruction::Add: {
532    // Figure out what the input bits are.  If the top bits of the and result
533    // are not demanded, then the add doesn't demand them from its input
534    // either.
535    unsigned NLZ = DemandedMask.countLeadingZeros();
536
537    // If there is a constant on the RHS, there are a variety of xformations
538    // we can do.
539    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
540      // If null, this should be simplified elsewhere.  Some of the xforms here
541      // won't work if the RHS is zero.
542      if (RHS->isZero())
543        break;
544
545      // If the top bit of the output is demanded, demand everything from the
546      // input.  Otherwise, we demand all the input bits except NLZ top bits.
547      APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
548
549      // Find information about known zero/one bits in the input.
550      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
551                               LHSKnownZero, LHSKnownOne, Depth + 1))
552        return I;
553
554      // If the RHS of the add has bits set that can't affect the input, reduce
555      // the constant.
556      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
557        return I;
558
559      // Avoid excess work.
560      if (LHSKnownZero == 0 && LHSKnownOne == 0)
561        break;
562
563      // Turn it into OR if input bits are zero.
564      if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
565        Instruction *Or =
566          BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
567                                   I->getName());
568        return InsertNewInstWith(Or, *I);
569      }
570
571      // We can say something about the output known-zero and known-one bits,
572      // depending on potential carries from the input constant and the
573      // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
574      // bits set and the RHS constant is 0x01001, then we know we have a known
575      // one mask of 0x00001 and a known zero mask of 0xE0F0E.
576
577      // To compute this, we first compute the potential carry bits.  These are
578      // the bits which may be modified.  I'm not aware of a better way to do
579      // this scan.
580      const APInt &RHSVal = RHS->getValue();
581      APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
582
583      // Now that we know which bits have carries, compute the known-1/0 sets.
584
585      // Bits are known one if they are known zero in one operand and one in the
586      // other, and there is no input carry.
587      KnownOne = ((LHSKnownZero & RHSVal) |
588                  (LHSKnownOne & ~RHSVal)) & ~CarryBits;
589
590      // Bits are known zero if they are known zero in both operands and there
591      // is no input carry.
592      KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
593    } else {
594      // If the high-bits of this ADD are not demanded, then it does not demand
595      // the high bits of its LHS or RHS.
596      if (DemandedMask[BitWidth-1] == 0) {
597        // Right fill the mask of bits for this ADD to demand the most
598        // significant bit and all those below it.
599        APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
600        if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
601                                 LHSKnownZero, LHSKnownOne, Depth + 1) ||
602            SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
603                                 LHSKnownZero, LHSKnownOne, Depth + 1))
604          return I;
605      }
606    }
607    break;
608  }
609  case Instruction::Sub:
610    // If the high-bits of this SUB are not demanded, then it does not demand
611    // the high bits of its LHS or RHS.
612    if (DemandedMask[BitWidth-1] == 0) {
613      // Right fill the mask of bits for this SUB to demand the most
614      // significant bit and all those below it.
615      uint32_t NLZ = DemandedMask.countLeadingZeros();
616      APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
617      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
618                               LHSKnownZero, LHSKnownOne, Depth + 1) ||
619          SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
620                               LHSKnownZero, LHSKnownOne, Depth + 1))
621        return I;
622    }
623
624    // Otherwise just hand the sub off to computeKnownBits to fill in
625    // the known zeros and ones.
626    computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
627
628    // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
629    // zero.
630    if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
631      APInt I0 = C0->getValue();
632      if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
633        Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
634        return InsertNewInstWith(Xor, *I);
635      }
636    }
637    break;
638  case Instruction::Shl:
639    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
640      {
641        Value *VarX; ConstantInt *C1;
642        if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
643          Instruction *Shr = cast<Instruction>(I->getOperand(0));
644          Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
645                                                KnownZero, KnownOne);
646          if (R)
647            return R;
648        }
649      }
650
651      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
652      APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
653
654      // If the shift is NUW/NSW, then it does demand the high bits.
655      ShlOperator *IOp = cast<ShlOperator>(I);
656      if (IOp->hasNoSignedWrap())
657        DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
658      else if (IOp->hasNoUnsignedWrap())
659        DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
660
661      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
662                               KnownOne, Depth + 1))
663        return I;
664      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
665      KnownZero <<= ShiftAmt;
666      KnownOne  <<= ShiftAmt;
667      // low bits known zero.
668      if (ShiftAmt)
669        KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
670    }
671    break;
672  case Instruction::LShr:
673    // For a logical shift right
674    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
675      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
676
677      // Unsigned shift right.
678      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
679
680      // If the shift is exact, then it does demand the low bits (and knows that
681      // they are zero).
682      if (cast<LShrOperator>(I)->isExact())
683        DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
684
685      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
686                               KnownOne, Depth + 1))
687        return I;
688      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
689      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
690      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
691      if (ShiftAmt) {
692        // Compute the new bits that are at the top now.
693        APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
694        KnownZero |= HighBits;  // high bits known zero.
695      }
696    }
697    break;
698  case Instruction::AShr:
699    // If this is an arithmetic shift right and only the low-bit is set, we can
700    // always convert this into a logical shr, even if the shift amount is
701    // variable.  The low bit of the shift cannot be an input sign bit unless
702    // the shift amount is >= the size of the datatype, which is undefined.
703    if (DemandedMask == 1) {
704      // Perform the logical shift right.
705      Instruction *NewVal = BinaryOperator::CreateLShr(
706                        I->getOperand(0), I->getOperand(1), I->getName());
707      return InsertNewInstWith(NewVal, *I);
708    }
709
710    // If the sign bit is the only bit demanded by this ashr, then there is no
711    // need to do it, the shift doesn't change the high bit.
712    if (DemandedMask.isSignBit())
713      return I->getOperand(0);
714
715    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
716      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
717
718      // Signed shift right.
719      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
720      // If any of the "high bits" are demanded, we should set the sign bit as
721      // demanded.
722      if (DemandedMask.countLeadingZeros() <= ShiftAmt)
723        DemandedMaskIn.setBit(BitWidth-1);
724
725      // If the shift is exact, then it does demand the low bits (and knows that
726      // they are zero).
727      if (cast<AShrOperator>(I)->isExact())
728        DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
729
730      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
731                               KnownOne, Depth + 1))
732        return I;
733      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
734      // Compute the new bits that are at the top now.
735      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
736      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
737      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
738
739      // Handle the sign bits.
740      APInt SignBit(APInt::getSignBit(BitWidth));
741      // Adjust to where it is now in the mask.
742      SignBit = APIntOps::lshr(SignBit, ShiftAmt);
743
744      // If the input sign bit is known to be zero, or if none of the top bits
745      // are demanded, turn this into an unsigned shift right.
746      if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
747          (HighBits & ~DemandedMask) == HighBits) {
748        // Perform the logical shift right.
749        BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
750                                                            SA, I->getName());
751        NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
752        return InsertNewInstWith(NewVal, *I);
753      } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
754        KnownOne |= HighBits;
755      }
756    }
757    break;
758  case Instruction::SRem:
759    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
760      // X % -1 demands all the bits because we don't want to introduce
761      // INT_MIN % -1 (== undef) by accident.
762      if (Rem->isAllOnesValue())
763        break;
764      APInt RA = Rem->getValue().abs();
765      if (RA.isPowerOf2()) {
766        if (DemandedMask.ult(RA))    // srem won't affect demanded bits
767          return I->getOperand(0);
768
769        APInt LowBits = RA - 1;
770        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
771        if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
772                                 LHSKnownOne, Depth + 1))
773          return I;
774
775        // The low bits of LHS are unchanged by the srem.
776        KnownZero = LHSKnownZero & LowBits;
777        KnownOne = LHSKnownOne & LowBits;
778
779        // If LHS is non-negative or has all low bits zero, then the upper bits
780        // are all zero.
781        if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
782          KnownZero |= ~LowBits;
783
784        // If LHS is negative and not all low bits are zero, then the upper bits
785        // are all one.
786        if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
787          KnownOne |= ~LowBits;
788
789        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
790      }
791    }
792
793    // The sign bit is the LHS's sign bit, except when the result of the
794    // remainder is zero.
795    if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
796      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
797      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
798                       CxtI);
799      // If it's known zero, our sign bit is also zero.
800      if (LHSKnownZero.isNegative())
801        KnownZero.setBit(KnownZero.getBitWidth() - 1);
802    }
803    break;
804  case Instruction::URem: {
805    APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
806    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
807    if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
808                             KnownOne2, Depth + 1) ||
809        SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
810                             KnownOne2, Depth + 1))
811      return I;
812
813    unsigned Leaders = KnownZero2.countLeadingOnes();
814    Leaders = std::max(Leaders,
815                       KnownZero2.countLeadingOnes());
816    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
817    break;
818  }
819  case Instruction::Call:
820    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
821      switch (II->getIntrinsicID()) {
822      default: break;
823      case Intrinsic::bswap: {
824        // If the only bits demanded come from one byte of the bswap result,
825        // just shift the input byte into position to eliminate the bswap.
826        unsigned NLZ = DemandedMask.countLeadingZeros();
827        unsigned NTZ = DemandedMask.countTrailingZeros();
828
829        // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
830        // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
831        // have 14 leading zeros, round to 8.
832        NLZ &= ~7;
833        NTZ &= ~7;
834        // If we need exactly one byte, we can do this transformation.
835        if (BitWidth-NLZ-NTZ == 8) {
836          unsigned ResultBit = NTZ;
837          unsigned InputBit = BitWidth-NTZ-8;
838
839          // Replace this with either a left or right shift to get the byte into
840          // the right place.
841          Instruction *NewVal;
842          if (InputBit > ResultBit)
843            NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
844                    ConstantInt::get(I->getType(), InputBit-ResultBit));
845          else
846            NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
847                    ConstantInt::get(I->getType(), ResultBit-InputBit));
848          NewVal->takeName(I);
849          return InsertNewInstWith(NewVal, *I);
850        }
851
852        // TODO: Could compute known zero/one bits based on the input.
853        break;
854      }
855      case Intrinsic::x86_sse42_crc32_64_64:
856        KnownZero = APInt::getHighBitsSet(64, 32);
857        return nullptr;
858      }
859    }
860    computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
861    break;
862  }
863
864  // If the client is only demanding bits that we know, return the known
865  // constant.
866  if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
867    return Constant::getIntegerValue(VTy, KnownOne);
868  return nullptr;
869}
870
871/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
872/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
873/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
874/// of "C2-C1".
875///
876/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
877/// ..., bn}, without considering the specific value X is holding.
878/// This transformation is legal iff one of following conditions is hold:
879///  1) All the bit in S are 0, in this case E1 == E2.
880///  2) We don't care those bits in S, per the input DemandedMask.
881///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
882///     rest bits.
883///
884/// Currently we only test condition 2).
885///
886/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
887/// not successful.
888Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
889  Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
890
891  const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
892  const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
893  if (!ShlOp1 || !ShrOp1)
894      return nullptr; // Noop.
895
896  Value *VarX = Shr->getOperand(0);
897  Type *Ty = VarX->getType();
898  unsigned BitWidth = Ty->getIntegerBitWidth();
899  if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
900    return nullptr; // Undef.
901
902  unsigned ShlAmt = ShlOp1.getZExtValue();
903  unsigned ShrAmt = ShrOp1.getZExtValue();
904
905  KnownOne.clearAllBits();
906  KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
907  KnownZero &= DemandedMask;
908
909  APInt BitMask1(APInt::getAllOnesValue(BitWidth));
910  APInt BitMask2(APInt::getAllOnesValue(BitWidth));
911
912  bool isLshr = (Shr->getOpcode() == Instruction::LShr);
913  BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
914                      (BitMask1.ashr(ShrAmt) << ShlAmt);
915
916  if (ShrAmt <= ShlAmt) {
917    BitMask2 <<= (ShlAmt - ShrAmt);
918  } else {
919    BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
920                        BitMask2.ashr(ShrAmt - ShlAmt);
921  }
922
923  // Check if condition-2 (see the comment to this function) is satified.
924  if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
925    if (ShrAmt == ShlAmt)
926      return VarX;
927
928    if (!Shr->hasOneUse())
929      return nullptr;
930
931    BinaryOperator *New;
932    if (ShrAmt < ShlAmt) {
933      Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
934      New = BinaryOperator::CreateShl(VarX, Amt);
935      BinaryOperator *Orig = cast<BinaryOperator>(Shl);
936      New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
937      New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
938    } else {
939      Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
940      New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
941                     BinaryOperator::CreateAShr(VarX, Amt);
942      if (cast<BinaryOperator>(Shr)->isExact())
943        New->setIsExact(true);
944    }
945
946    return InsertNewInstWith(New, *Shl);
947  }
948
949  return nullptr;
950}
951
952/// SimplifyDemandedVectorElts - The specified value produces a vector with
953/// any number of elements. DemandedElts contains the set of elements that are
954/// actually used by the caller.  This method analyzes which elements of the
955/// operand are undef and returns that information in UndefElts.
956///
957/// If the information about demanded elements can be used to simplify the
958/// operation, the operation is simplified, then the resultant value is
959/// returned.  This returns null if no change was made.
960Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
961                                                APInt &UndefElts,
962                                                unsigned Depth) {
963  unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
964  APInt EltMask(APInt::getAllOnesValue(VWidth));
965  assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
966
967  if (isa<UndefValue>(V)) {
968    // If the entire vector is undefined, just return this info.
969    UndefElts = EltMask;
970    return nullptr;
971  }
972
973  if (DemandedElts == 0) { // If nothing is demanded, provide undef.
974    UndefElts = EltMask;
975    return UndefValue::get(V->getType());
976  }
977
978  UndefElts = 0;
979
980  // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
981  if (Constant *C = dyn_cast<Constant>(V)) {
982    // Check if this is identity. If so, return 0 since we are not simplifying
983    // anything.
984    if (DemandedElts.isAllOnesValue())
985      return nullptr;
986
987    Type *EltTy = cast<VectorType>(V->getType())->getElementType();
988    Constant *Undef = UndefValue::get(EltTy);
989
990    SmallVector<Constant*, 16> Elts;
991    for (unsigned i = 0; i != VWidth; ++i) {
992      if (!DemandedElts[i]) {   // If not demanded, set to undef.
993        Elts.push_back(Undef);
994        UndefElts.setBit(i);
995        continue;
996      }
997
998      Constant *Elt = C->getAggregateElement(i);
999      if (!Elt) return nullptr;
1000
1001      if (isa<UndefValue>(Elt)) {   // Already undef.
1002        Elts.push_back(Undef);
1003        UndefElts.setBit(i);
1004      } else {                               // Otherwise, defined.
1005        Elts.push_back(Elt);
1006      }
1007    }
1008
1009    // If we changed the constant, return it.
1010    Constant *NewCV = ConstantVector::get(Elts);
1011    return NewCV != C ? NewCV : nullptr;
1012  }
1013
1014  // Limit search depth.
1015  if (Depth == 10)
1016    return nullptr;
1017
1018  // If multiple users are using the root value, proceed with
1019  // simplification conservatively assuming that all elements
1020  // are needed.
1021  if (!V->hasOneUse()) {
1022    // Quit if we find multiple users of a non-root value though.
1023    // They'll be handled when it's their turn to be visited by
1024    // the main instcombine process.
1025    if (Depth != 0)
1026      // TODO: Just compute the UndefElts information recursively.
1027      return nullptr;
1028
1029    // Conservatively assume that all elements are needed.
1030    DemandedElts = EltMask;
1031  }
1032
1033  Instruction *I = dyn_cast<Instruction>(V);
1034  if (!I) return nullptr;        // Only analyze instructions.
1035
1036  bool MadeChange = false;
1037  APInt UndefElts2(VWidth, 0);
1038  Value *TmpV;
1039  switch (I->getOpcode()) {
1040  default: break;
1041
1042  case Instruction::InsertElement: {
1043    // If this is a variable index, we don't know which element it overwrites.
1044    // demand exactly the same input as we produce.
1045    ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
1046    if (!Idx) {
1047      // Note that we can't propagate undef elt info, because we don't know
1048      // which elt is getting updated.
1049      TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
1050                                        UndefElts2, Depth + 1);
1051      if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1052      break;
1053    }
1054
1055    // If this is inserting an element that isn't demanded, remove this
1056    // insertelement.
1057    unsigned IdxNo = Idx->getZExtValue();
1058    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
1059      Worklist.Add(I);
1060      return I->getOperand(0);
1061    }
1062
1063    // Otherwise, the element inserted overwrites whatever was there, so the
1064    // input demanded set is simpler than the output set.
1065    APInt DemandedElts2 = DemandedElts;
1066    DemandedElts2.clearBit(IdxNo);
1067    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
1068                                      UndefElts, Depth + 1);
1069    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1070
1071    // The inserted element is defined.
1072    UndefElts.clearBit(IdxNo);
1073    break;
1074  }
1075  case Instruction::ShuffleVector: {
1076    ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1077    uint64_t LHSVWidth =
1078      cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
1079    APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
1080    for (unsigned i = 0; i < VWidth; i++) {
1081      if (DemandedElts[i]) {
1082        unsigned MaskVal = Shuffle->getMaskValue(i);
1083        if (MaskVal != -1u) {
1084          assert(MaskVal < LHSVWidth * 2 &&
1085                 "shufflevector mask index out of range!");
1086          if (MaskVal < LHSVWidth)
1087            LeftDemanded.setBit(MaskVal);
1088          else
1089            RightDemanded.setBit(MaskVal - LHSVWidth);
1090        }
1091      }
1092    }
1093
1094    APInt UndefElts4(LHSVWidth, 0);
1095    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
1096                                      UndefElts4, Depth + 1);
1097    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1098
1099    APInt UndefElts3(LHSVWidth, 0);
1100    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
1101                                      UndefElts3, Depth + 1);
1102    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
1103
1104    bool NewUndefElts = false;
1105    for (unsigned i = 0; i < VWidth; i++) {
1106      unsigned MaskVal = Shuffle->getMaskValue(i);
1107      if (MaskVal == -1u) {
1108        UndefElts.setBit(i);
1109      } else if (!DemandedElts[i]) {
1110        NewUndefElts = true;
1111        UndefElts.setBit(i);
1112      } else if (MaskVal < LHSVWidth) {
1113        if (UndefElts4[MaskVal]) {
1114          NewUndefElts = true;
1115          UndefElts.setBit(i);
1116        }
1117      } else {
1118        if (UndefElts3[MaskVal - LHSVWidth]) {
1119          NewUndefElts = true;
1120          UndefElts.setBit(i);
1121        }
1122      }
1123    }
1124
1125    if (NewUndefElts) {
1126      // Add additional discovered undefs.
1127      SmallVector<Constant*, 16> Elts;
1128      for (unsigned i = 0; i < VWidth; ++i) {
1129        if (UndefElts[i])
1130          Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
1131        else
1132          Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
1133                                          Shuffle->getMaskValue(i)));
1134      }
1135      I->setOperand(2, ConstantVector::get(Elts));
1136      MadeChange = true;
1137    }
1138    break;
1139  }
1140  case Instruction::Select: {
1141    APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
1142    if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
1143      for (unsigned i = 0; i < VWidth; i++) {
1144        if (CV->getAggregateElement(i)->isNullValue())
1145          LeftDemanded.clearBit(i);
1146        else
1147          RightDemanded.clearBit(i);
1148      }
1149    }
1150
1151    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
1152                                      Depth + 1);
1153    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
1154
1155    TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
1156                                      UndefElts2, Depth + 1);
1157    if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
1158
1159    // Output elements are undefined if both are undefined.
1160    UndefElts &= UndefElts2;
1161    break;
1162  }
1163  case Instruction::BitCast: {
1164    // Vector->vector casts only.
1165    VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
1166    if (!VTy) break;
1167    unsigned InVWidth = VTy->getNumElements();
1168    APInt InputDemandedElts(InVWidth, 0);
1169    unsigned Ratio;
1170
1171    if (VWidth == InVWidth) {
1172      // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1173      // elements as are demanded of us.
1174      Ratio = 1;
1175      InputDemandedElts = DemandedElts;
1176    } else if (VWidth > InVWidth) {
1177      // Untested so far.
1178      break;
1179
1180      // If there are more elements in the result than there are in the source,
1181      // then an input element is live if any of the corresponding output
1182      // elements are live.
1183      Ratio = VWidth/InVWidth;
1184      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
1185        if (DemandedElts[OutIdx])
1186          InputDemandedElts.setBit(OutIdx/Ratio);
1187      }
1188    } else {
1189      // Untested so far.
1190      break;
1191
1192      // If there are more elements in the source than there are in the result,
1193      // then an input element is live if the corresponding output element is
1194      // live.
1195      Ratio = InVWidth/VWidth;
1196      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1197        if (DemandedElts[InIdx/Ratio])
1198          InputDemandedElts.setBit(InIdx);
1199    }
1200
1201    // div/rem demand all inputs, because they don't want divide by zero.
1202    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
1203                                      UndefElts2, Depth + 1);
1204    if (TmpV) {
1205      I->setOperand(0, TmpV);
1206      MadeChange = true;
1207    }
1208
1209    UndefElts = UndefElts2;
1210    if (VWidth > InVWidth) {
1211      llvm_unreachable("Unimp");
1212      // If there are more elements in the result than there are in the source,
1213      // then an output element is undef if the corresponding input element is
1214      // undef.
1215      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1216        if (UndefElts2[OutIdx/Ratio])
1217          UndefElts.setBit(OutIdx);
1218    } else if (VWidth < InVWidth) {
1219      llvm_unreachable("Unimp");
1220      // If there are more elements in the source than there are in the result,
1221      // then a result element is undef if all of the corresponding input
1222      // elements are undef.
1223      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
1224      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1225        if (!UndefElts2[InIdx])            // Not undef?
1226          UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
1227    }
1228    break;
1229  }
1230  case Instruction::And:
1231  case Instruction::Or:
1232  case Instruction::Xor:
1233  case Instruction::Add:
1234  case Instruction::Sub:
1235  case Instruction::Mul:
1236    // div/rem demand all inputs, because they don't want divide by zero.
1237    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
1238                                      Depth + 1);
1239    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1240    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
1241                                      UndefElts2, Depth + 1);
1242    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
1243
1244    // Output elements are undefined if both are undefined.  Consider things
1245    // like undef&0.  The result is known zero, not undef.
1246    UndefElts &= UndefElts2;
1247    break;
1248  case Instruction::FPTrunc:
1249  case Instruction::FPExt:
1250    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
1251                                      Depth + 1);
1252    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
1253    break;
1254
1255  case Instruction::Call: {
1256    IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
1257    if (!II) break;
1258    switch (II->getIntrinsicID()) {
1259    default: break;
1260
1261    // Binary vector operations that work column-wise.  A dest element is a
1262    // function of the corresponding input elements from the two inputs.
1263    case Intrinsic::x86_sse_sub_ss:
1264    case Intrinsic::x86_sse_mul_ss:
1265    case Intrinsic::x86_sse_min_ss:
1266    case Intrinsic::x86_sse_max_ss:
1267    case Intrinsic::x86_sse2_sub_sd:
1268    case Intrinsic::x86_sse2_mul_sd:
1269    case Intrinsic::x86_sse2_min_sd:
1270    case Intrinsic::x86_sse2_max_sd:
1271      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1272                                        UndefElts, Depth + 1);
1273      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
1274      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
1275                                        UndefElts2, Depth + 1);
1276      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
1277
1278      // If only the low elt is demanded and this is a scalarizable intrinsic,
1279      // scalarize it now.
1280      if (DemandedElts == 1) {
1281        switch (II->getIntrinsicID()) {
1282        default: break;
1283        case Intrinsic::x86_sse_sub_ss:
1284        case Intrinsic::x86_sse_mul_ss:
1285        case Intrinsic::x86_sse2_sub_sd:
1286        case Intrinsic::x86_sse2_mul_sd:
1287          // TODO: Lower MIN/MAX/ABS/etc
1288          Value *LHS = II->getArgOperand(0);
1289          Value *RHS = II->getArgOperand(1);
1290          // Extract the element as scalars.
1291          LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
1292            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
1293          RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
1294            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
1295
1296          switch (II->getIntrinsicID()) {
1297          default: llvm_unreachable("Case stmts out of sync!");
1298          case Intrinsic::x86_sse_sub_ss:
1299          case Intrinsic::x86_sse2_sub_sd:
1300            TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
1301                                                        II->getName()), *II);
1302            break;
1303          case Intrinsic::x86_sse_mul_ss:
1304          case Intrinsic::x86_sse2_mul_sd:
1305            TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
1306                                                         II->getName()), *II);
1307            break;
1308          }
1309
1310          Instruction *New =
1311            InsertElementInst::Create(
1312              UndefValue::get(II->getType()), TmpV,
1313              ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
1314                                      II->getName());
1315          InsertNewInstWith(New, *II);
1316          return New;
1317        }
1318      }
1319
1320      // Output elements are undefined if both are undefined.  Consider things
1321      // like undef&0.  The result is known zero, not undef.
1322      UndefElts &= UndefElts2;
1323      break;
1324    }
1325    break;
1326  }
1327  }
1328  return MadeChange ? I : nullptr;
1329}
1330