InstCombineCalls.cpp revision ce52bc53538df8e5412ec507f2da3661c991baf1
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the visitCall and visitInvoke functions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombine.h"
15#include "llvm/Support/CallSite.h"
16#include "llvm/Target/TargetData.h"
17#include "llvm/Analysis/MemoryBuiltins.h"
18#include "llvm/Transforms/Utils/BuildLibCalls.h"
19#include "llvm/Transforms/Utils/Local.h"
20using namespace llvm;
21
22/// getPromotedType - Return the specified type promoted as it would be to pass
23/// though a va_arg area.
24static Type *getPromotedType(Type *Ty) {
25  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
26    if (ITy->getBitWidth() < 32)
27      return Type::getInt32Ty(Ty->getContext());
28  }
29  return Ty;
30}
31
32/// reduceToSingleValueType - Given an aggregate type which ultimately holds a
33/// single scalar element, like {{{type}}} or [1 x type], return type.
34static Type *reduceToSingleValueType(Type *T) {
35  while (!T->isSingleValueType()) {
36    if (StructType *STy = dyn_cast<StructType>(T)) {
37      if (STy->getNumElements() == 1)
38        T = STy->getElementType(0);
39      else
40        break;
41    } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
42      if (ATy->getNumElements() == 1)
43        T = ATy->getElementType();
44      else
45        break;
46    } else
47      break;
48  }
49
50  return T;
51}
52
53Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
54  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
55  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
56  unsigned MinAlign = std::min(DstAlign, SrcAlign);
57  unsigned CopyAlign = MI->getAlignment();
58
59  if (CopyAlign < MinAlign) {
60    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
61                                             MinAlign, false));
62    return MI;
63  }
64
65  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
66  // load/store.
67  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
68  if (MemOpLength == 0) return 0;
69
70  // Source and destination pointer types are always "i8*" for intrinsic.  See
71  // if the size is something we can handle with a single primitive load/store.
72  // A single load+store correctly handles overlapping memory in the memmove
73  // case.
74  uint64_t Size = MemOpLength->getLimitedValue();
75  assert(Size && "0-sized memory transfering should be removed already.");
76
77  if (Size > 8 || (Size&(Size-1)))
78    return 0;  // If not 1/2/4/8 bytes, exit.
79
80  // Use an integer load+store unless we can find something better.
81  unsigned SrcAddrSp =
82    cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
83  unsigned DstAddrSp =
84    cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
85
86  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
87  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
88  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
89
90  // Memcpy forces the use of i8* for the source and destination.  That means
91  // that if you're using memcpy to move one double around, you'll get a cast
92  // from double* to i8*.  We'd much rather use a double load+store rather than
93  // an i64 load+store, here because this improves the odds that the source or
94  // dest address will be promotable.  See if we can find a better type than the
95  // integer datatype.
96  Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
97  if (StrippedDest != MI->getArgOperand(0)) {
98    Type *SrcETy = cast<PointerType>(StrippedDest->getType())
99                                    ->getElementType();
100    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
101      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
102      // down through these levels if so.
103      SrcETy = reduceToSingleValueType(SrcETy);
104
105      if (SrcETy->isSingleValueType()) {
106        NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
107        NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
108      }
109    }
110  }
111
112  // If the memcpy/memmove provides better alignment info than we can
113  // infer, use it.
114  SrcAlign = std::max(SrcAlign, CopyAlign);
115  DstAlign = std::max(DstAlign, CopyAlign);
116
117  Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
118  Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
119  LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
120  L->setAlignment(SrcAlign);
121  StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
122  S->setAlignment(DstAlign);
123
124  // Set the size of the copy to 0, it will be deleted on the next iteration.
125  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
126  return MI;
127}
128
129Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
130  unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
131  if (MI->getAlignment() < Alignment) {
132    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
133                                             Alignment, false));
134    return MI;
135  }
136
137  // Extract the length and alignment and fill if they are constant.
138  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
139  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
140  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
141    return 0;
142  uint64_t Len = LenC->getLimitedValue();
143  Alignment = MI->getAlignment();
144  assert(Len && "0-sized memory setting should be removed already.");
145
146  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
147  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
148    Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
149
150    Value *Dest = MI->getDest();
151    unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
152    Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
153    Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
154
155    // Alignment 0 is identity for alignment 1 for memset, but not store.
156    if (Alignment == 0) Alignment = 1;
157
158    // Extract the fill value and store.
159    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
160    StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
161                                        MI->isVolatile());
162    S->setAlignment(Alignment);
163
164    // Set the size of the copy to 0, it will be deleted on the next iteration.
165    MI->setLength(Constant::getNullValue(LenC->getType()));
166    return MI;
167  }
168
169  return 0;
170}
171
172/// visitCallInst - CallInst simplification.  This mostly only handles folding
173/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
174/// the heavy lifting.
175///
176Instruction *InstCombiner::visitCallInst(CallInst &CI) {
177  if (isFreeCall(&CI, TLI))
178    return visitFree(CI);
179
180  // If the caller function is nounwind, mark the call as nounwind, even if the
181  // callee isn't.
182  if (CI.getParent()->getParent()->doesNotThrow() &&
183      !CI.doesNotThrow()) {
184    CI.setDoesNotThrow();
185    return &CI;
186  }
187
188  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
189  if (!II) return visitCallSite(&CI);
190
191  // Intrinsics cannot occur in an invoke, so handle them here instead of in
192  // visitCallSite.
193  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
194    bool Changed = false;
195
196    // memmove/cpy/set of zero bytes is a noop.
197    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
198      if (NumBytes->isNullValue())
199        return EraseInstFromFunction(CI);
200
201      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
202        if (CI->getZExtValue() == 1) {
203          // Replace the instruction with just byte operations.  We would
204          // transform other cases to loads/stores, but we don't know if
205          // alignment is sufficient.
206        }
207    }
208
209    // No other transformations apply to volatile transfers.
210    if (MI->isVolatile())
211      return 0;
212
213    // If we have a memmove and the source operation is a constant global,
214    // then the source and dest pointers can't alias, so we can change this
215    // into a call to memcpy.
216    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
217      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
218        if (GVSrc->isConstant()) {
219          Module *M = CI.getParent()->getParent()->getParent();
220          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
221          Type *Tys[3] = { CI.getArgOperand(0)->getType(),
222                           CI.getArgOperand(1)->getType(),
223                           CI.getArgOperand(2)->getType() };
224          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
225          Changed = true;
226        }
227    }
228
229    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
230      // memmove(x,x,size) -> noop.
231      if (MTI->getSource() == MTI->getDest())
232        return EraseInstFromFunction(CI);
233    }
234
235    // If we can determine a pointer alignment that is bigger than currently
236    // set, update the alignment.
237    if (isa<MemTransferInst>(MI)) {
238      if (Instruction *I = SimplifyMemTransfer(MI))
239        return I;
240    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
241      if (Instruction *I = SimplifyMemSet(MSI))
242        return I;
243    }
244
245    if (Changed) return II;
246  }
247
248  switch (II->getIntrinsicID()) {
249  default: break;
250  case Intrinsic::objectsize: {
251    uint64_t Size;
252    if (getObjectSize(II->getArgOperand(0), Size, TD, TLI))
253      return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
254    return 0;
255  }
256  case Intrinsic::bswap:
257    // bswap(bswap(x)) -> x
258    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
259      if (Operand->getIntrinsicID() == Intrinsic::bswap)
260        return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
261
262    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
263    if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
264      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
265        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
266          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
267                       TI->getType()->getPrimitiveSizeInBits();
268          Value *CV = ConstantInt::get(Operand->getType(), C);
269          Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
270          return new TruncInst(V, TI->getType());
271        }
272    }
273
274    break;
275  case Intrinsic::powi:
276    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
277      // powi(x, 0) -> 1.0
278      if (Power->isZero())
279        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
280      // powi(x, 1) -> x
281      if (Power->isOne())
282        return ReplaceInstUsesWith(CI, II->getArgOperand(0));
283      // powi(x, -1) -> 1/x
284      if (Power->isAllOnesValue())
285        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
286                                          II->getArgOperand(0));
287    }
288    break;
289  case Intrinsic::cttz: {
290    // If all bits below the first known one are known zero,
291    // this value is constant.
292    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
293    // FIXME: Try to simplify vectors of integers.
294    if (!IT) break;
295    uint32_t BitWidth = IT->getBitWidth();
296    APInt KnownZero(BitWidth, 0);
297    APInt KnownOne(BitWidth, 0);
298    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
299    unsigned TrailingZeros = KnownOne.countTrailingZeros();
300    APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
301    if ((Mask & KnownZero) == Mask)
302      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
303                                 APInt(BitWidth, TrailingZeros)));
304
305    }
306    break;
307  case Intrinsic::ctlz: {
308    // If all bits above the first known one are known zero,
309    // this value is constant.
310    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
311    // FIXME: Try to simplify vectors of integers.
312    if (!IT) break;
313    uint32_t BitWidth = IT->getBitWidth();
314    APInt KnownZero(BitWidth, 0);
315    APInt KnownOne(BitWidth, 0);
316    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
317    unsigned LeadingZeros = KnownOne.countLeadingZeros();
318    APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
319    if ((Mask & KnownZero) == Mask)
320      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
321                                 APInt(BitWidth, LeadingZeros)));
322
323    }
324    break;
325  case Intrinsic::uadd_with_overflow: {
326    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
327    IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
328    uint32_t BitWidth = IT->getBitWidth();
329    APInt LHSKnownZero(BitWidth, 0);
330    APInt LHSKnownOne(BitWidth, 0);
331    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
332    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
333    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
334
335    if (LHSKnownNegative || LHSKnownPositive) {
336      APInt RHSKnownZero(BitWidth, 0);
337      APInt RHSKnownOne(BitWidth, 0);
338      ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
339      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
340      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
341      if (LHSKnownNegative && RHSKnownNegative) {
342        // The sign bit is set in both cases: this MUST overflow.
343        // Create a simple add instruction, and insert it into the struct.
344        Value *Add = Builder->CreateAdd(LHS, RHS);
345        Add->takeName(&CI);
346        Constant *V[] = {
347          UndefValue::get(LHS->getType()),
348          ConstantInt::getTrue(II->getContext())
349        };
350        StructType *ST = cast<StructType>(II->getType());
351        Constant *Struct = ConstantStruct::get(ST, V);
352        return InsertValueInst::Create(Struct, Add, 0);
353      }
354
355      if (LHSKnownPositive && RHSKnownPositive) {
356        // The sign bit is clear in both cases: this CANNOT overflow.
357        // Create a simple add instruction, and insert it into the struct.
358        Value *Add = Builder->CreateNUWAdd(LHS, RHS);
359        Add->takeName(&CI);
360        Constant *V[] = {
361          UndefValue::get(LHS->getType()),
362          ConstantInt::getFalse(II->getContext())
363        };
364        StructType *ST = cast<StructType>(II->getType());
365        Constant *Struct = ConstantStruct::get(ST, V);
366        return InsertValueInst::Create(Struct, Add, 0);
367      }
368    }
369  }
370  // FALL THROUGH uadd into sadd
371  case Intrinsic::sadd_with_overflow:
372    // Canonicalize constants into the RHS.
373    if (isa<Constant>(II->getArgOperand(0)) &&
374        !isa<Constant>(II->getArgOperand(1))) {
375      Value *LHS = II->getArgOperand(0);
376      II->setArgOperand(0, II->getArgOperand(1));
377      II->setArgOperand(1, LHS);
378      return II;
379    }
380
381    // X + undef -> undef
382    if (isa<UndefValue>(II->getArgOperand(1)))
383      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
384
385    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
386      // X + 0 -> {X, false}
387      if (RHS->isZero()) {
388        Constant *V[] = {
389          UndefValue::get(II->getArgOperand(0)->getType()),
390          ConstantInt::getFalse(II->getContext())
391        };
392        Constant *Struct =
393          ConstantStruct::get(cast<StructType>(II->getType()), V);
394        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
395      }
396    }
397    break;
398  case Intrinsic::usub_with_overflow:
399  case Intrinsic::ssub_with_overflow:
400    // undef - X -> undef
401    // X - undef -> undef
402    if (isa<UndefValue>(II->getArgOperand(0)) ||
403        isa<UndefValue>(II->getArgOperand(1)))
404      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
405
406    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
407      // X - 0 -> {X, false}
408      if (RHS->isZero()) {
409        Constant *V[] = {
410          UndefValue::get(II->getArgOperand(0)->getType()),
411          ConstantInt::getFalse(II->getContext())
412        };
413        Constant *Struct =
414          ConstantStruct::get(cast<StructType>(II->getType()), V);
415        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
416      }
417    }
418    break;
419  case Intrinsic::umul_with_overflow: {
420    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
421    unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
422
423    APInt LHSKnownZero(BitWidth, 0);
424    APInt LHSKnownOne(BitWidth, 0);
425    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
426    APInt RHSKnownZero(BitWidth, 0);
427    APInt RHSKnownOne(BitWidth, 0);
428    ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
429
430    // Get the largest possible values for each operand.
431    APInt LHSMax = ~LHSKnownZero;
432    APInt RHSMax = ~RHSKnownZero;
433
434    // If multiplying the maximum values does not overflow then we can turn
435    // this into a plain NUW mul.
436    bool Overflow;
437    LHSMax.umul_ov(RHSMax, Overflow);
438    if (!Overflow) {
439      Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
440      Constant *V[] = {
441        UndefValue::get(LHS->getType()),
442        Builder->getFalse()
443      };
444      Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
445      return InsertValueInst::Create(Struct, Mul, 0);
446    }
447  } // FALL THROUGH
448  case Intrinsic::smul_with_overflow:
449    // Canonicalize constants into the RHS.
450    if (isa<Constant>(II->getArgOperand(0)) &&
451        !isa<Constant>(II->getArgOperand(1))) {
452      Value *LHS = II->getArgOperand(0);
453      II->setArgOperand(0, II->getArgOperand(1));
454      II->setArgOperand(1, LHS);
455      return II;
456    }
457
458    // X * undef -> undef
459    if (isa<UndefValue>(II->getArgOperand(1)))
460      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
461
462    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
463      // X*0 -> {0, false}
464      if (RHSI->isZero())
465        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
466
467      // X * 1 -> {X, false}
468      if (RHSI->equalsInt(1)) {
469        Constant *V[] = {
470          UndefValue::get(II->getArgOperand(0)->getType()),
471          ConstantInt::getFalse(II->getContext())
472        };
473        Constant *Struct =
474          ConstantStruct::get(cast<StructType>(II->getType()), V);
475        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
476      }
477    }
478    break;
479  case Intrinsic::ppc_altivec_lvx:
480  case Intrinsic::ppc_altivec_lvxl:
481    // Turn PPC lvx -> load if the pointer is known aligned.
482    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
483      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
484                                         PointerType::getUnqual(II->getType()));
485      return new LoadInst(Ptr);
486    }
487    break;
488  case Intrinsic::ppc_altivec_stvx:
489  case Intrinsic::ppc_altivec_stvxl:
490    // Turn stvx -> store if the pointer is known aligned.
491    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
492      Type *OpPtrTy =
493        PointerType::getUnqual(II->getArgOperand(0)->getType());
494      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
495      return new StoreInst(II->getArgOperand(0), Ptr);
496    }
497    break;
498  case Intrinsic::x86_sse_storeu_ps:
499  case Intrinsic::x86_sse2_storeu_pd:
500  case Intrinsic::x86_sse2_storeu_dq:
501    // Turn X86 storeu -> store if the pointer is known aligned.
502    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
503      Type *OpPtrTy =
504        PointerType::getUnqual(II->getArgOperand(1)->getType());
505      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
506      return new StoreInst(II->getArgOperand(1), Ptr);
507    }
508    break;
509
510  case Intrinsic::x86_sse_cvtss2si:
511  case Intrinsic::x86_sse_cvtss2si64:
512  case Intrinsic::x86_sse_cvttss2si:
513  case Intrinsic::x86_sse_cvttss2si64:
514  case Intrinsic::x86_sse2_cvtsd2si:
515  case Intrinsic::x86_sse2_cvtsd2si64:
516  case Intrinsic::x86_sse2_cvttsd2si:
517  case Intrinsic::x86_sse2_cvttsd2si64: {
518    // These intrinsics only demand the 0th element of their input vectors. If
519    // we can simplify the input based on that, do so now.
520    unsigned VWidth =
521      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
522    APInt DemandedElts(VWidth, 1);
523    APInt UndefElts(VWidth, 0);
524    if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
525                                              DemandedElts, UndefElts)) {
526      II->setArgOperand(0, V);
527      return II;
528    }
529    break;
530  }
531
532
533  case Intrinsic::x86_sse41_pmovsxbw:
534  case Intrinsic::x86_sse41_pmovsxwd:
535  case Intrinsic::x86_sse41_pmovsxdq:
536  case Intrinsic::x86_sse41_pmovzxbw:
537  case Intrinsic::x86_sse41_pmovzxwd:
538  case Intrinsic::x86_sse41_pmovzxdq: {
539    // pmov{s|z}x ignores the upper half of their input vectors.
540    unsigned VWidth =
541      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
542    unsigned LowHalfElts = VWidth / 2;
543    APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
544    APInt UndefElts(VWidth, 0);
545    if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
546                                                 InputDemandedElts,
547                                                 UndefElts)) {
548      II->setArgOperand(0, TmpV);
549      return II;
550    }
551    break;
552  }
553
554  case Intrinsic::ppc_altivec_vperm:
555    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
556    if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
557      assert(Mask->getType()->getVectorNumElements() == 16 &&
558             "Bad type for intrinsic!");
559
560      // Check that all of the elements are integer constants or undefs.
561      bool AllEltsOk = true;
562      for (unsigned i = 0; i != 16; ++i) {
563        Constant *Elt = Mask->getAggregateElement(i);
564        if (Elt == 0 ||
565            !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
566          AllEltsOk = false;
567          break;
568        }
569      }
570
571      if (AllEltsOk) {
572        // Cast the input vectors to byte vectors.
573        Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
574                                            Mask->getType());
575        Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
576                                            Mask->getType());
577        Value *Result = UndefValue::get(Op0->getType());
578
579        // Only extract each element once.
580        Value *ExtractedElts[32];
581        memset(ExtractedElts, 0, sizeof(ExtractedElts));
582
583        for (unsigned i = 0; i != 16; ++i) {
584          if (isa<UndefValue>(Mask->getAggregateElement(i)))
585            continue;
586          unsigned Idx =
587            cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
588          Idx &= 31;  // Match the hardware behavior.
589
590          if (ExtractedElts[Idx] == 0) {
591            ExtractedElts[Idx] =
592              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
593                                            Builder->getInt32(Idx&15));
594          }
595
596          // Insert this value into the result vector.
597          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
598                                                Builder->getInt32(i));
599        }
600        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
601      }
602    }
603    break;
604
605  case Intrinsic::arm_neon_vld1:
606  case Intrinsic::arm_neon_vld2:
607  case Intrinsic::arm_neon_vld3:
608  case Intrinsic::arm_neon_vld4:
609  case Intrinsic::arm_neon_vld2lane:
610  case Intrinsic::arm_neon_vld3lane:
611  case Intrinsic::arm_neon_vld4lane:
612  case Intrinsic::arm_neon_vst1:
613  case Intrinsic::arm_neon_vst2:
614  case Intrinsic::arm_neon_vst3:
615  case Intrinsic::arm_neon_vst4:
616  case Intrinsic::arm_neon_vst2lane:
617  case Intrinsic::arm_neon_vst3lane:
618  case Intrinsic::arm_neon_vst4lane: {
619    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
620    unsigned AlignArg = II->getNumArgOperands() - 1;
621    ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
622    if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
623      II->setArgOperand(AlignArg,
624                        ConstantInt::get(Type::getInt32Ty(II->getContext()),
625                                         MemAlign, false));
626      return II;
627    }
628    break;
629  }
630
631  case Intrinsic::arm_neon_vmulls:
632  case Intrinsic::arm_neon_vmullu: {
633    Value *Arg0 = II->getArgOperand(0);
634    Value *Arg1 = II->getArgOperand(1);
635
636    // Handle mul by zero first:
637    if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
638      return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
639    }
640
641    // Check for constant LHS & RHS - in this case we just simplify.
642    bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu);
643    VectorType *NewVT = cast<VectorType>(II->getType());
644    unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth();
645    if (ConstantDataVector *CV0 = dyn_cast<ConstantDataVector>(Arg0)) {
646      if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
647        VectorType* VT = cast<VectorType>(CV0->getType());
648        SmallVector<Constant*, 4> NewElems;
649        for (unsigned i = 0; i < VT->getNumElements(); ++i) {
650          APInt CV0E =
651            (cast<ConstantInt>(CV0->getAggregateElement(i)))->getValue();
652          CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth);
653          APInt CV1E =
654            (cast<ConstantInt>(CV1->getAggregateElement(i)))->getValue();
655          CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth);
656          NewElems.push_back(
657            ConstantInt::get(NewVT->getElementType(), CV0E * CV1E));
658        }
659        return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems));
660      }
661
662      // Couldn't simplify - cannonicalize constant to the RHS.
663      std::swap(Arg0, Arg1);
664    }
665
666    // Handle mul by one:
667    if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
668      if (ConstantInt *Splat =
669            dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) {
670        if (Splat->isOne()) {
671          if (Zext)
672            return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
673          // else
674          return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
675        }
676      }
677    }
678
679    break;
680  }
681
682  case Intrinsic::stackrestore: {
683    // If the save is right next to the restore, remove the restore.  This can
684    // happen when variable allocas are DCE'd.
685    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
686      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
687        BasicBlock::iterator BI = SS;
688        if (&*++BI == II)
689          return EraseInstFromFunction(CI);
690      }
691    }
692
693    // Scan down this block to see if there is another stack restore in the
694    // same block without an intervening call/alloca.
695    BasicBlock::iterator BI = II;
696    TerminatorInst *TI = II->getParent()->getTerminator();
697    bool CannotRemove = false;
698    for (++BI; &*BI != TI; ++BI) {
699      if (isa<AllocaInst>(BI)) {
700        CannotRemove = true;
701        break;
702      }
703      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
704        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
705          // If there is a stackrestore below this one, remove this one.
706          if (II->getIntrinsicID() == Intrinsic::stackrestore)
707            return EraseInstFromFunction(CI);
708          // Otherwise, ignore the intrinsic.
709        } else {
710          // If we found a non-intrinsic call, we can't remove the stack
711          // restore.
712          CannotRemove = true;
713          break;
714        }
715      }
716    }
717
718    // If the stack restore is in a return, resume, or unwind block and if there
719    // are no allocas or calls between the restore and the return, nuke the
720    // restore.
721    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
722      return EraseInstFromFunction(CI);
723    break;
724  }
725  }
726
727  return visitCallSite(II);
728}
729
730// InvokeInst simplification
731//
732Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
733  return visitCallSite(&II);
734}
735
736/// isSafeToEliminateVarargsCast - If this cast does not affect the value
737/// passed through the varargs area, we can eliminate the use of the cast.
738static bool isSafeToEliminateVarargsCast(const CallSite CS,
739                                         const CastInst * const CI,
740                                         const TargetData * const TD,
741                                         const int ix) {
742  if (!CI->isLosslessCast())
743    return false;
744
745  // The size of ByVal arguments is derived from the type, so we
746  // can't change to a type with a different size.  If the size were
747  // passed explicitly we could avoid this check.
748  if (!CS.isByValArgument(ix))
749    return true;
750
751  Type* SrcTy =
752            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
753  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
754  if (!SrcTy->isSized() || !DstTy->isSized())
755    return false;
756  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
757    return false;
758  return true;
759}
760
761namespace {
762class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
763  InstCombiner *IC;
764protected:
765  void replaceCall(Value *With) {
766    NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
767  }
768  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
769    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
770      return true;
771    if (ConstantInt *SizeCI =
772                           dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
773      if (SizeCI->isAllOnesValue())
774        return true;
775      if (isString) {
776        uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
777        // If the length is 0 we don't know how long it is and so we can't
778        // remove the check.
779        if (Len == 0) return false;
780        return SizeCI->getZExtValue() >= Len;
781      }
782      if (ConstantInt *Arg = dyn_cast<ConstantInt>(
783                                                  CI->getArgOperand(SizeArgOp)))
784        return SizeCI->getZExtValue() >= Arg->getZExtValue();
785    }
786    return false;
787  }
788public:
789  InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
790  Instruction *NewInstruction;
791};
792} // end anonymous namespace
793
794// Try to fold some different type of calls here.
795// Currently we're only working with the checking functions, memcpy_chk,
796// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
797// strcat_chk and strncat_chk.
798Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
799  if (CI->getCalledFunction() == 0) return 0;
800
801  InstCombineFortifiedLibCalls Simplifier(this);
802  Simplifier.fold(CI, TD, TLI);
803  return Simplifier.NewInstruction;
804}
805
806static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
807  // Strip off at most one level of pointer casts, looking for an alloca.  This
808  // is good enough in practice and simpler than handling any number of casts.
809  Value *Underlying = TrampMem->stripPointerCasts();
810  if (Underlying != TrampMem &&
811      (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem))
812    return 0;
813  if (!isa<AllocaInst>(Underlying))
814    return 0;
815
816  IntrinsicInst *InitTrampoline = 0;
817  for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end();
818       I != E; I++) {
819    IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I);
820    if (!II)
821      return 0;
822    if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
823      if (InitTrampoline)
824        // More than one init_trampoline writes to this value.  Give up.
825        return 0;
826      InitTrampoline = II;
827      continue;
828    }
829    if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
830      // Allow any number of calls to adjust.trampoline.
831      continue;
832    return 0;
833  }
834
835  // No call to init.trampoline found.
836  if (!InitTrampoline)
837    return 0;
838
839  // Check that the alloca is being used in the expected way.
840  if (InitTrampoline->getOperand(0) != TrampMem)
841    return 0;
842
843  return InitTrampoline;
844}
845
846static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
847                                               Value *TrampMem) {
848  // Visit all the previous instructions in the basic block, and try to find a
849  // init.trampoline which has a direct path to the adjust.trampoline.
850  for (BasicBlock::iterator I = AdjustTramp,
851       E = AdjustTramp->getParent()->begin(); I != E; ) {
852    Instruction *Inst = --I;
853    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
854      if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
855          II->getOperand(0) == TrampMem)
856        return II;
857    if (Inst->mayWriteToMemory())
858      return 0;
859  }
860  return 0;
861}
862
863// Given a call to llvm.adjust.trampoline, find and return the corresponding
864// call to llvm.init.trampoline if the call to the trampoline can be optimized
865// to a direct call to a function.  Otherwise return NULL.
866//
867static IntrinsicInst *FindInitTrampoline(Value *Callee) {
868  Callee = Callee->stripPointerCasts();
869  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
870  if (!AdjustTramp ||
871      AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
872    return 0;
873
874  Value *TrampMem = AdjustTramp->getOperand(0);
875
876  if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
877    return IT;
878  if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
879    return IT;
880  return 0;
881}
882
883// visitCallSite - Improvements for call and invoke instructions.
884//
885Instruction *InstCombiner::visitCallSite(CallSite CS) {
886  if (isAllocLikeFn(CS.getInstruction(), TLI))
887    return visitAllocSite(*CS.getInstruction());
888
889  bool Changed = false;
890
891  // If the callee is a pointer to a function, attempt to move any casts to the
892  // arguments of the call/invoke.
893  Value *Callee = CS.getCalledValue();
894  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
895    return 0;
896
897  if (Function *CalleeF = dyn_cast<Function>(Callee))
898    // If the call and callee calling conventions don't match, this call must
899    // be unreachable, as the call is undefined.
900    if (CalleeF->getCallingConv() != CS.getCallingConv() &&
901        // Only do this for calls to a function with a body.  A prototype may
902        // not actually end up matching the implementation's calling conv for a
903        // variety of reasons (e.g. it may be written in assembly).
904        !CalleeF->isDeclaration()) {
905      Instruction *OldCall = CS.getInstruction();
906      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
907                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
908                                  OldCall);
909      // If OldCall dues not return void then replaceAllUsesWith undef.
910      // This allows ValueHandlers and custom metadata to adjust itself.
911      if (!OldCall->getType()->isVoidTy())
912        ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
913      if (isa<CallInst>(OldCall))
914        return EraseInstFromFunction(*OldCall);
915
916      // We cannot remove an invoke, because it would change the CFG, just
917      // change the callee to a null pointer.
918      cast<InvokeInst>(OldCall)->setCalledFunction(
919                                    Constant::getNullValue(CalleeF->getType()));
920      return 0;
921    }
922
923  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
924    // If CS does not return void then replaceAllUsesWith undef.
925    // This allows ValueHandlers and custom metadata to adjust itself.
926    if (!CS.getInstruction()->getType()->isVoidTy())
927      ReplaceInstUsesWith(*CS.getInstruction(),
928                          UndefValue::get(CS.getInstruction()->getType()));
929
930    if (isa<InvokeInst>(CS.getInstruction())) {
931      // Can't remove an invoke because we cannot change the CFG.
932      return 0;
933    }
934
935    // This instruction is not reachable, just remove it.  We insert a store to
936    // undef so that we know that this code is not reachable, despite the fact
937    // that we can't modify the CFG here.
938    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
939                  UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
940                  CS.getInstruction());
941
942    return EraseInstFromFunction(*CS.getInstruction());
943  }
944
945  if (IntrinsicInst *II = FindInitTrampoline(Callee))
946    return transformCallThroughTrampoline(CS, II);
947
948  PointerType *PTy = cast<PointerType>(Callee->getType());
949  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
950  if (FTy->isVarArg()) {
951    int ix = FTy->getNumParams();
952    // See if we can optimize any arguments passed through the varargs area of
953    // the call.
954    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
955           E = CS.arg_end(); I != E; ++I, ++ix) {
956      CastInst *CI = dyn_cast<CastInst>(*I);
957      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
958        *I = CI->getOperand(0);
959        Changed = true;
960      }
961    }
962  }
963
964  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
965    // Inline asm calls cannot throw - mark them 'nounwind'.
966    CS.setDoesNotThrow();
967    Changed = true;
968  }
969
970  // Try to optimize the call if possible, we require TargetData for most of
971  // this.  None of these calls are seen as possibly dead so go ahead and
972  // delete the instruction now.
973  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
974    Instruction *I = tryOptimizeCall(CI, TD);
975    // If we changed something return the result, etc. Otherwise let
976    // the fallthrough check.
977    if (I) return EraseInstFromFunction(*I);
978  }
979
980  return Changed ? CS.getInstruction() : 0;
981}
982
983// transformConstExprCastCall - If the callee is a constexpr cast of a function,
984// attempt to move the cast to the arguments of the call/invoke.
985//
986bool InstCombiner::transformConstExprCastCall(CallSite CS) {
987  Function *Callee =
988    dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
989  if (Callee == 0)
990    return false;
991  Instruction *Caller = CS.getInstruction();
992  const AttrListPtr &CallerPAL = CS.getAttributes();
993
994  // Okay, this is a cast from a function to a different type.  Unless doing so
995  // would cause a type conversion of one of our arguments, change this call to
996  // be a direct call with arguments casted to the appropriate types.
997  //
998  FunctionType *FT = Callee->getFunctionType();
999  Type *OldRetTy = Caller->getType();
1000  Type *NewRetTy = FT->getReturnType();
1001
1002  if (NewRetTy->isStructTy())
1003    return false; // TODO: Handle multiple return values.
1004
1005  // Check to see if we are changing the return type...
1006  if (OldRetTy != NewRetTy) {
1007    if (Callee->isDeclaration() &&
1008        // Conversion is ok if changing from one pointer type to another or from
1009        // a pointer to an integer of the same size.
1010        !((OldRetTy->isPointerTy() || !TD ||
1011           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
1012          (NewRetTy->isPointerTy() || !TD ||
1013           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
1014      return false;   // Cannot transform this return value.
1015
1016    if (!Caller->use_empty() &&
1017        // void -> non-void is handled specially
1018        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
1019      return false;   // Cannot transform this return value.
1020
1021    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
1022      Attributes RAttrs = CallerPAL.getRetAttributes();
1023      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
1024        return false;   // Attribute not compatible with transformed value.
1025    }
1026
1027    // If the callsite is an invoke instruction, and the return value is used by
1028    // a PHI node in a successor, we cannot change the return type of the call
1029    // because there is no place to put the cast instruction (without breaking
1030    // the critical edge).  Bail out in this case.
1031    if (!Caller->use_empty())
1032      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
1033        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
1034             UI != E; ++UI)
1035          if (PHINode *PN = dyn_cast<PHINode>(*UI))
1036            if (PN->getParent() == II->getNormalDest() ||
1037                PN->getParent() == II->getUnwindDest())
1038              return false;
1039  }
1040
1041  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
1042  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
1043
1044  CallSite::arg_iterator AI = CS.arg_begin();
1045  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
1046    Type *ParamTy = FT->getParamType(i);
1047    Type *ActTy = (*AI)->getType();
1048
1049    if (!CastInst::isCastable(ActTy, ParamTy))
1050      return false;   // Cannot transform this parameter value.
1051
1052    Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
1053    if (Attrs & Attribute::typeIncompatible(ParamTy))
1054      return false;   // Attribute not compatible with transformed value.
1055
1056    // If the parameter is passed as a byval argument, then we have to have a
1057    // sized type and the sized type has to have the same size as the old type.
1058    if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
1059      PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
1060      if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
1061        return false;
1062
1063      Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
1064      if (TD->getTypeAllocSize(CurElTy) !=
1065          TD->getTypeAllocSize(ParamPTy->getElementType()))
1066        return false;
1067    }
1068
1069    // Converting from one pointer type to another or between a pointer and an
1070    // integer of the same size is safe even if we do not have a body.
1071    bool isConvertible = ActTy == ParamTy ||
1072      (TD && ((ParamTy->isPointerTy() ||
1073      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
1074              (ActTy->isPointerTy() ||
1075              ActTy == TD->getIntPtrType(Caller->getContext()))));
1076    if (Callee->isDeclaration() && !isConvertible) return false;
1077  }
1078
1079  if (Callee->isDeclaration()) {
1080    // Do not delete arguments unless we have a function body.
1081    if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
1082      return false;
1083
1084    // If the callee is just a declaration, don't change the varargsness of the
1085    // call.  We don't want to introduce a varargs call where one doesn't
1086    // already exist.
1087    PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
1088    if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
1089      return false;
1090
1091    // If both the callee and the cast type are varargs, we still have to make
1092    // sure the number of fixed parameters are the same or we have the same
1093    // ABI issues as if we introduce a varargs call.
1094    if (FT->isVarArg() &&
1095        cast<FunctionType>(APTy->getElementType())->isVarArg() &&
1096        FT->getNumParams() !=
1097        cast<FunctionType>(APTy->getElementType())->getNumParams())
1098      return false;
1099  }
1100
1101  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
1102      !CallerPAL.isEmpty())
1103    // In this case we have more arguments than the new function type, but we
1104    // won't be dropping them.  Check that these extra arguments have attributes
1105    // that are compatible with being a vararg call argument.
1106    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
1107      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
1108        break;
1109      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
1110      if (PAttrs & Attribute::VarArgsIncompatible)
1111        return false;
1112    }
1113
1114
1115  // Okay, we decided that this is a safe thing to do: go ahead and start
1116  // inserting cast instructions as necessary.
1117  std::vector<Value*> Args;
1118  Args.reserve(NumActualArgs);
1119  SmallVector<AttributeWithIndex, 8> attrVec;
1120  attrVec.reserve(NumCommonArgs);
1121
1122  // Get any return attributes.
1123  Attributes RAttrs = CallerPAL.getRetAttributes();
1124
1125  // If the return value is not being used, the type may not be compatible
1126  // with the existing attributes.  Wipe out any problematic attributes.
1127  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
1128
1129  // Add the new return attributes.
1130  if (RAttrs)
1131    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
1132
1133  AI = CS.arg_begin();
1134  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
1135    Type *ParamTy = FT->getParamType(i);
1136    if ((*AI)->getType() == ParamTy) {
1137      Args.push_back(*AI);
1138    } else {
1139      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
1140          false, ParamTy, false);
1141      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy));
1142    }
1143
1144    // Add any parameter attributes.
1145    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
1146      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
1147  }
1148
1149  // If the function takes more arguments than the call was taking, add them
1150  // now.
1151  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
1152    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
1153
1154  // If we are removing arguments to the function, emit an obnoxious warning.
1155  if (FT->getNumParams() < NumActualArgs) {
1156    if (!FT->isVarArg()) {
1157      errs() << "WARNING: While resolving call to function '"
1158             << Callee->getName() << "' arguments were dropped!\n";
1159    } else {
1160      // Add all of the arguments in their promoted form to the arg list.
1161      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
1162        Type *PTy = getPromotedType((*AI)->getType());
1163        if (PTy != (*AI)->getType()) {
1164          // Must promote to pass through va_arg area!
1165          Instruction::CastOps opcode =
1166            CastInst::getCastOpcode(*AI, false, PTy, false);
1167          Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
1168        } else {
1169          Args.push_back(*AI);
1170        }
1171
1172        // Add any parameter attributes.
1173        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
1174          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
1175      }
1176    }
1177  }
1178
1179  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
1180    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
1181
1182  if (NewRetTy->isVoidTy())
1183    Caller->setName("");   // Void type should not have a name.
1184
1185  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec);
1186
1187  Instruction *NC;
1188  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1189    NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
1190                               II->getUnwindDest(), Args);
1191    NC->takeName(II);
1192    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
1193    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
1194  } else {
1195    CallInst *CI = cast<CallInst>(Caller);
1196    NC = Builder->CreateCall(Callee, Args);
1197    NC->takeName(CI);
1198    if (CI->isTailCall())
1199      cast<CallInst>(NC)->setTailCall();
1200    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
1201    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
1202  }
1203
1204  // Insert a cast of the return type as necessary.
1205  Value *NV = NC;
1206  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
1207    if (!NV->getType()->isVoidTy()) {
1208      Instruction::CastOps opcode =
1209        CastInst::getCastOpcode(NC, false, OldRetTy, false);
1210      NV = NC = CastInst::Create(opcode, NC, OldRetTy);
1211      NC->setDebugLoc(Caller->getDebugLoc());
1212
1213      // If this is an invoke instruction, we should insert it after the first
1214      // non-phi, instruction in the normal successor block.
1215      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1216        BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
1217        InsertNewInstBefore(NC, *I);
1218      } else {
1219        // Otherwise, it's a call, just insert cast right after the call.
1220        InsertNewInstBefore(NC, *Caller);
1221      }
1222      Worklist.AddUsersToWorkList(*Caller);
1223    } else {
1224      NV = UndefValue::get(Caller->getType());
1225    }
1226  }
1227
1228  if (!Caller->use_empty())
1229    ReplaceInstUsesWith(*Caller, NV);
1230
1231  EraseInstFromFunction(*Caller);
1232  return true;
1233}
1234
1235// transformCallThroughTrampoline - Turn a call to a function created by
1236// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
1237// underlying function.
1238//
1239Instruction *
1240InstCombiner::transformCallThroughTrampoline(CallSite CS,
1241                                             IntrinsicInst *Tramp) {
1242  Value *Callee = CS.getCalledValue();
1243  PointerType *PTy = cast<PointerType>(Callee->getType());
1244  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
1245  const AttrListPtr &Attrs = CS.getAttributes();
1246
1247  // If the call already has the 'nest' attribute somewhere then give up -
1248  // otherwise 'nest' would occur twice after splicing in the chain.
1249  if (Attrs.hasAttrSomewhere(Attribute::Nest))
1250    return 0;
1251
1252  assert(Tramp &&
1253         "transformCallThroughTrampoline called with incorrect CallSite.");
1254
1255  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
1256  PointerType *NestFPTy = cast<PointerType>(NestF->getType());
1257  FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
1258
1259  const AttrListPtr &NestAttrs = NestF->getAttributes();
1260  if (!NestAttrs.isEmpty()) {
1261    unsigned NestIdx = 1;
1262    Type *NestTy = 0;
1263    Attributes NestAttr = Attribute::None;
1264
1265    // Look for a parameter marked with the 'nest' attribute.
1266    for (FunctionType::param_iterator I = NestFTy->param_begin(),
1267         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
1268      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
1269        // Record the parameter type and any other attributes.
1270        NestTy = *I;
1271        NestAttr = NestAttrs.getParamAttributes(NestIdx);
1272        break;
1273      }
1274
1275    if (NestTy) {
1276      Instruction *Caller = CS.getInstruction();
1277      std::vector<Value*> NewArgs;
1278      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
1279
1280      SmallVector<AttributeWithIndex, 8> NewAttrs;
1281      NewAttrs.reserve(Attrs.getNumSlots() + 1);
1282
1283      // Insert the nest argument into the call argument list, which may
1284      // mean appending it.  Likewise for attributes.
1285
1286      // Add any result attributes.
1287      if (Attributes Attr = Attrs.getRetAttributes())
1288        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
1289
1290      {
1291        unsigned Idx = 1;
1292        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
1293        do {
1294          if (Idx == NestIdx) {
1295            // Add the chain argument and attributes.
1296            Value *NestVal = Tramp->getArgOperand(2);
1297            if (NestVal->getType() != NestTy)
1298              NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
1299            NewArgs.push_back(NestVal);
1300            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
1301          }
1302
1303          if (I == E)
1304            break;
1305
1306          // Add the original argument and attributes.
1307          NewArgs.push_back(*I);
1308          if (Attributes Attr = Attrs.getParamAttributes(Idx))
1309            NewAttrs.push_back
1310              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
1311
1312          ++Idx, ++I;
1313        } while (1);
1314      }
1315
1316      // Add any function attributes.
1317      if (Attributes Attr = Attrs.getFnAttributes())
1318        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
1319
1320      // The trampoline may have been bitcast to a bogus type (FTy).
1321      // Handle this by synthesizing a new function type, equal to FTy
1322      // with the chain parameter inserted.
1323
1324      std::vector<Type*> NewTypes;
1325      NewTypes.reserve(FTy->getNumParams()+1);
1326
1327      // Insert the chain's type into the list of parameter types, which may
1328      // mean appending it.
1329      {
1330        unsigned Idx = 1;
1331        FunctionType::param_iterator I = FTy->param_begin(),
1332          E = FTy->param_end();
1333
1334        do {
1335          if (Idx == NestIdx)
1336            // Add the chain's type.
1337            NewTypes.push_back(NestTy);
1338
1339          if (I == E)
1340            break;
1341
1342          // Add the original type.
1343          NewTypes.push_back(*I);
1344
1345          ++Idx, ++I;
1346        } while (1);
1347      }
1348
1349      // Replace the trampoline call with a direct call.  Let the generic
1350      // code sort out any function type mismatches.
1351      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
1352                                                FTy->isVarArg());
1353      Constant *NewCallee =
1354        NestF->getType() == PointerType::getUnqual(NewFTy) ?
1355        NestF : ConstantExpr::getBitCast(NestF,
1356                                         PointerType::getUnqual(NewFTy));
1357      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs);
1358
1359      Instruction *NewCaller;
1360      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1361        NewCaller = InvokeInst::Create(NewCallee,
1362                                       II->getNormalDest(), II->getUnwindDest(),
1363                                       NewArgs);
1364        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
1365        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
1366      } else {
1367        NewCaller = CallInst::Create(NewCallee, NewArgs);
1368        if (cast<CallInst>(Caller)->isTailCall())
1369          cast<CallInst>(NewCaller)->setTailCall();
1370        cast<CallInst>(NewCaller)->
1371          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
1372        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
1373      }
1374
1375      return NewCaller;
1376    }
1377  }
1378
1379  // Replace the trampoline call with a direct call.  Since there is no 'nest'
1380  // parameter, there is no need to adjust the argument list.  Let the generic
1381  // code sort out any function type mismatches.
1382  Constant *NewCallee =
1383    NestF->getType() == PTy ? NestF :
1384                              ConstantExpr::getBitCast(NestF, PTy);
1385  CS.setCalledFunction(NewCallee);
1386  return CS.getInstruction();
1387}
1388