InstCombineCalls.cpp revision b998913ff4aa58c3c342e167c785029cb331078e
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the visitCall and visitInvoke functions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombine.h"
15#include "llvm/Support/CallSite.h"
16#include "llvm/Target/TargetData.h"
17#include "llvm/Analysis/MemoryBuiltins.h"
18#include "llvm/Transforms/Utils/BuildLibCalls.h"
19#include "llvm/Transforms/Utils/Local.h"
20using namespace llvm;
21
22/// getPromotedType - Return the specified type promoted as it would be to pass
23/// though a va_arg area.
24static Type *getPromotedType(Type *Ty) {
25  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
26    if (ITy->getBitWidth() < 32)
27      return Type::getInt32Ty(Ty->getContext());
28  }
29  return Ty;
30}
31
32/// reduceToSingleValueType - Given an aggregate type which ultimately holds a
33/// single scalar element, like {{{type}}} or [1 x type], return type.
34static Type *reduceToSingleValueType(Type *T) {
35  while (!T->isSingleValueType()) {
36    if (StructType *STy = dyn_cast<StructType>(T)) {
37      if (STy->getNumElements() == 1)
38        T = STy->getElementType(0);
39      else
40        break;
41    } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
42      if (ATy->getNumElements() == 1)
43        T = ATy->getElementType();
44      else
45        break;
46    } else
47      break;
48  }
49
50  return T;
51}
52
53Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
54  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
55  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
56  unsigned MinAlign = std::min(DstAlign, SrcAlign);
57  unsigned CopyAlign = MI->getAlignment();
58
59  if (CopyAlign < MinAlign) {
60    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
61                                             MinAlign, false));
62    return MI;
63  }
64
65  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
66  // load/store.
67  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
68  if (MemOpLength == 0) return 0;
69
70  // Source and destination pointer types are always "i8*" for intrinsic.  See
71  // if the size is something we can handle with a single primitive load/store.
72  // A single load+store correctly handles overlapping memory in the memmove
73  // case.
74  uint64_t Size = MemOpLength->getLimitedValue();
75  assert(Size && "0-sized memory transfering should be removed already.");
76
77  if (Size > 8 || (Size&(Size-1)))
78    return 0;  // If not 1/2/4/8 bytes, exit.
79
80  // Use an integer load+store unless we can find something better.
81  unsigned SrcAddrSp =
82    cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
83  unsigned DstAddrSp =
84    cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
85
86  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
87  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
88  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
89
90  // Memcpy forces the use of i8* for the source and destination.  That means
91  // that if you're using memcpy to move one double around, you'll get a cast
92  // from double* to i8*.  We'd much rather use a double load+store rather than
93  // an i64 load+store, here because this improves the odds that the source or
94  // dest address will be promotable.  See if we can find a better type than the
95  // integer datatype.
96  Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
97  MDNode *CopyMD = 0;
98  if (StrippedDest != MI->getArgOperand(0)) {
99    Type *SrcETy = cast<PointerType>(StrippedDest->getType())
100                                    ->getElementType();
101    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
102      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
103      // down through these levels if so.
104      SrcETy = reduceToSingleValueType(SrcETy);
105
106      if (SrcETy->isSingleValueType()) {
107        NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
108        NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
109
110        // If the memcpy has metadata describing the members, see if we can
111        // get the TBAA tag describing our copy.
112        if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
113          if (M->getNumOperands() == 3 &&
114              isa<ConstantInt>(M->getOperand(0)) &&
115              cast<ConstantInt>(M->getOperand(0))->isNullValue() &&
116              isa<ConstantInt>(M->getOperand(1)) &&
117              cast<ConstantInt>(M->getOperand(1))->getValue() == Size &&
118              isa<MDNode>(M->getOperand(2)))
119            CopyMD = cast<MDNode>(M->getOperand(2));
120        }
121      }
122    }
123  }
124
125  // If the memcpy/memmove provides better alignment info than we can
126  // infer, use it.
127  SrcAlign = std::max(SrcAlign, CopyAlign);
128  DstAlign = std::max(DstAlign, CopyAlign);
129
130  Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
131  Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
132  LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
133  L->setAlignment(SrcAlign);
134  if (CopyMD)
135    L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
136  StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
137  S->setAlignment(DstAlign);
138  if (CopyMD)
139    S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
140
141  // Set the size of the copy to 0, it will be deleted on the next iteration.
142  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
143  return MI;
144}
145
146Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
147  unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
148  if (MI->getAlignment() < Alignment) {
149    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
150                                             Alignment, false));
151    return MI;
152  }
153
154  // Extract the length and alignment and fill if they are constant.
155  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
156  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
157  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
158    return 0;
159  uint64_t Len = LenC->getLimitedValue();
160  Alignment = MI->getAlignment();
161  assert(Len && "0-sized memory setting should be removed already.");
162
163  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
164  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
165    Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
166
167    Value *Dest = MI->getDest();
168    unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
169    Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
170    Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
171
172    // Alignment 0 is identity for alignment 1 for memset, but not store.
173    if (Alignment == 0) Alignment = 1;
174
175    // Extract the fill value and store.
176    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
177    StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
178                                        MI->isVolatile());
179    S->setAlignment(Alignment);
180
181    // Set the size of the copy to 0, it will be deleted on the next iteration.
182    MI->setLength(Constant::getNullValue(LenC->getType()));
183    return MI;
184  }
185
186  return 0;
187}
188
189/// visitCallInst - CallInst simplification.  This mostly only handles folding
190/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
191/// the heavy lifting.
192///
193Instruction *InstCombiner::visitCallInst(CallInst &CI) {
194  if (isFreeCall(&CI, TLI))
195    return visitFree(CI);
196
197  // If the caller function is nounwind, mark the call as nounwind, even if the
198  // callee isn't.
199  if (CI.getParent()->getParent()->doesNotThrow() &&
200      !CI.doesNotThrow()) {
201    CI.setDoesNotThrow();
202    return &CI;
203  }
204
205  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
206  if (!II) return visitCallSite(&CI);
207
208  // Intrinsics cannot occur in an invoke, so handle them here instead of in
209  // visitCallSite.
210  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
211    bool Changed = false;
212
213    // memmove/cpy/set of zero bytes is a noop.
214    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
215      if (NumBytes->isNullValue())
216        return EraseInstFromFunction(CI);
217
218      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
219        if (CI->getZExtValue() == 1) {
220          // Replace the instruction with just byte operations.  We would
221          // transform other cases to loads/stores, but we don't know if
222          // alignment is sufficient.
223        }
224    }
225
226    // No other transformations apply to volatile transfers.
227    if (MI->isVolatile())
228      return 0;
229
230    // If we have a memmove and the source operation is a constant global,
231    // then the source and dest pointers can't alias, so we can change this
232    // into a call to memcpy.
233    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
234      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
235        if (GVSrc->isConstant()) {
236          Module *M = CI.getParent()->getParent()->getParent();
237          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
238          Type *Tys[3] = { CI.getArgOperand(0)->getType(),
239                           CI.getArgOperand(1)->getType(),
240                           CI.getArgOperand(2)->getType() };
241          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
242          Changed = true;
243        }
244    }
245
246    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
247      // memmove(x,x,size) -> noop.
248      if (MTI->getSource() == MTI->getDest())
249        return EraseInstFromFunction(CI);
250    }
251
252    // If we can determine a pointer alignment that is bigger than currently
253    // set, update the alignment.
254    if (isa<MemTransferInst>(MI)) {
255      if (Instruction *I = SimplifyMemTransfer(MI))
256        return I;
257    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
258      if (Instruction *I = SimplifyMemSet(MSI))
259        return I;
260    }
261
262    if (Changed) return II;
263  }
264
265  switch (II->getIntrinsicID()) {
266  default: break;
267  case Intrinsic::objectsize: {
268    uint64_t Size;
269    if (getObjectSize(II->getArgOperand(0), Size, TD, TLI))
270      return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
271    return 0;
272  }
273  case Intrinsic::bswap:
274    // bswap(bswap(x)) -> x
275    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
276      if (Operand->getIntrinsicID() == Intrinsic::bswap)
277        return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
278
279    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
280    if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
281      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
282        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
283          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
284                       TI->getType()->getPrimitiveSizeInBits();
285          Value *CV = ConstantInt::get(Operand->getType(), C);
286          Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
287          return new TruncInst(V, TI->getType());
288        }
289    }
290
291    break;
292  case Intrinsic::powi:
293    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
294      // powi(x, 0) -> 1.0
295      if (Power->isZero())
296        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
297      // powi(x, 1) -> x
298      if (Power->isOne())
299        return ReplaceInstUsesWith(CI, II->getArgOperand(0));
300      // powi(x, -1) -> 1/x
301      if (Power->isAllOnesValue())
302        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
303                                          II->getArgOperand(0));
304    }
305    break;
306  case Intrinsic::cttz: {
307    // If all bits below the first known one are known zero,
308    // this value is constant.
309    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
310    // FIXME: Try to simplify vectors of integers.
311    if (!IT) break;
312    uint32_t BitWidth = IT->getBitWidth();
313    APInt KnownZero(BitWidth, 0);
314    APInt KnownOne(BitWidth, 0);
315    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
316    unsigned TrailingZeros = KnownOne.countTrailingZeros();
317    APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
318    if ((Mask & KnownZero) == Mask)
319      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
320                                 APInt(BitWidth, TrailingZeros)));
321
322    }
323    break;
324  case Intrinsic::ctlz: {
325    // If all bits above the first known one are known zero,
326    // this value is constant.
327    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
328    // FIXME: Try to simplify vectors of integers.
329    if (!IT) break;
330    uint32_t BitWidth = IT->getBitWidth();
331    APInt KnownZero(BitWidth, 0);
332    APInt KnownOne(BitWidth, 0);
333    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
334    unsigned LeadingZeros = KnownOne.countLeadingZeros();
335    APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
336    if ((Mask & KnownZero) == Mask)
337      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
338                                 APInt(BitWidth, LeadingZeros)));
339
340    }
341    break;
342  case Intrinsic::uadd_with_overflow: {
343    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
344    IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
345    uint32_t BitWidth = IT->getBitWidth();
346    APInt LHSKnownZero(BitWidth, 0);
347    APInt LHSKnownOne(BitWidth, 0);
348    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
349    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
350    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
351
352    if (LHSKnownNegative || LHSKnownPositive) {
353      APInt RHSKnownZero(BitWidth, 0);
354      APInt RHSKnownOne(BitWidth, 0);
355      ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
356      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
357      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
358      if (LHSKnownNegative && RHSKnownNegative) {
359        // The sign bit is set in both cases: this MUST overflow.
360        // Create a simple add instruction, and insert it into the struct.
361        Value *Add = Builder->CreateAdd(LHS, RHS);
362        Add->takeName(&CI);
363        Constant *V[] = {
364          UndefValue::get(LHS->getType()),
365          ConstantInt::getTrue(II->getContext())
366        };
367        StructType *ST = cast<StructType>(II->getType());
368        Constant *Struct = ConstantStruct::get(ST, V);
369        return InsertValueInst::Create(Struct, Add, 0);
370      }
371
372      if (LHSKnownPositive && RHSKnownPositive) {
373        // The sign bit is clear in both cases: this CANNOT overflow.
374        // Create a simple add instruction, and insert it into the struct.
375        Value *Add = Builder->CreateNUWAdd(LHS, RHS);
376        Add->takeName(&CI);
377        Constant *V[] = {
378          UndefValue::get(LHS->getType()),
379          ConstantInt::getFalse(II->getContext())
380        };
381        StructType *ST = cast<StructType>(II->getType());
382        Constant *Struct = ConstantStruct::get(ST, V);
383        return InsertValueInst::Create(Struct, Add, 0);
384      }
385    }
386  }
387  // FALL THROUGH uadd into sadd
388  case Intrinsic::sadd_with_overflow:
389    // Canonicalize constants into the RHS.
390    if (isa<Constant>(II->getArgOperand(0)) &&
391        !isa<Constant>(II->getArgOperand(1))) {
392      Value *LHS = II->getArgOperand(0);
393      II->setArgOperand(0, II->getArgOperand(1));
394      II->setArgOperand(1, LHS);
395      return II;
396    }
397
398    // X + undef -> undef
399    if (isa<UndefValue>(II->getArgOperand(1)))
400      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
401
402    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
403      // X + 0 -> {X, false}
404      if (RHS->isZero()) {
405        Constant *V[] = {
406          UndefValue::get(II->getArgOperand(0)->getType()),
407          ConstantInt::getFalse(II->getContext())
408        };
409        Constant *Struct =
410          ConstantStruct::get(cast<StructType>(II->getType()), V);
411        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
412      }
413    }
414    break;
415  case Intrinsic::usub_with_overflow:
416  case Intrinsic::ssub_with_overflow:
417    // undef - X -> undef
418    // X - undef -> undef
419    if (isa<UndefValue>(II->getArgOperand(0)) ||
420        isa<UndefValue>(II->getArgOperand(1)))
421      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
422
423    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
424      // X - 0 -> {X, false}
425      if (RHS->isZero()) {
426        Constant *V[] = {
427          UndefValue::get(II->getArgOperand(0)->getType()),
428          ConstantInt::getFalse(II->getContext())
429        };
430        Constant *Struct =
431          ConstantStruct::get(cast<StructType>(II->getType()), V);
432        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
433      }
434    }
435    break;
436  case Intrinsic::umul_with_overflow: {
437    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
438    unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
439
440    APInt LHSKnownZero(BitWidth, 0);
441    APInt LHSKnownOne(BitWidth, 0);
442    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
443    APInt RHSKnownZero(BitWidth, 0);
444    APInt RHSKnownOne(BitWidth, 0);
445    ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
446
447    // Get the largest possible values for each operand.
448    APInt LHSMax = ~LHSKnownZero;
449    APInt RHSMax = ~RHSKnownZero;
450
451    // If multiplying the maximum values does not overflow then we can turn
452    // this into a plain NUW mul.
453    bool Overflow;
454    LHSMax.umul_ov(RHSMax, Overflow);
455    if (!Overflow) {
456      Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
457      Constant *V[] = {
458        UndefValue::get(LHS->getType()),
459        Builder->getFalse()
460      };
461      Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
462      return InsertValueInst::Create(Struct, Mul, 0);
463    }
464  } // FALL THROUGH
465  case Intrinsic::smul_with_overflow:
466    // Canonicalize constants into the RHS.
467    if (isa<Constant>(II->getArgOperand(0)) &&
468        !isa<Constant>(II->getArgOperand(1))) {
469      Value *LHS = II->getArgOperand(0);
470      II->setArgOperand(0, II->getArgOperand(1));
471      II->setArgOperand(1, LHS);
472      return II;
473    }
474
475    // X * undef -> undef
476    if (isa<UndefValue>(II->getArgOperand(1)))
477      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
478
479    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
480      // X*0 -> {0, false}
481      if (RHSI->isZero())
482        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
483
484      // X * 1 -> {X, false}
485      if (RHSI->equalsInt(1)) {
486        Constant *V[] = {
487          UndefValue::get(II->getArgOperand(0)->getType()),
488          ConstantInt::getFalse(II->getContext())
489        };
490        Constant *Struct =
491          ConstantStruct::get(cast<StructType>(II->getType()), V);
492        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
493      }
494    }
495    break;
496  case Intrinsic::ppc_altivec_lvx:
497  case Intrinsic::ppc_altivec_lvxl:
498    // Turn PPC lvx -> load if the pointer is known aligned.
499    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
500      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
501                                         PointerType::getUnqual(II->getType()));
502      return new LoadInst(Ptr);
503    }
504    break;
505  case Intrinsic::ppc_altivec_stvx:
506  case Intrinsic::ppc_altivec_stvxl:
507    // Turn stvx -> store if the pointer is known aligned.
508    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
509      Type *OpPtrTy =
510        PointerType::getUnqual(II->getArgOperand(0)->getType());
511      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
512      return new StoreInst(II->getArgOperand(0), Ptr);
513    }
514    break;
515  case Intrinsic::x86_sse_storeu_ps:
516  case Intrinsic::x86_sse2_storeu_pd:
517  case Intrinsic::x86_sse2_storeu_dq:
518    // Turn X86 storeu -> store if the pointer is known aligned.
519    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
520      Type *OpPtrTy =
521        PointerType::getUnqual(II->getArgOperand(1)->getType());
522      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
523      return new StoreInst(II->getArgOperand(1), Ptr);
524    }
525    break;
526
527  case Intrinsic::x86_sse_cvtss2si:
528  case Intrinsic::x86_sse_cvtss2si64:
529  case Intrinsic::x86_sse_cvttss2si:
530  case Intrinsic::x86_sse_cvttss2si64:
531  case Intrinsic::x86_sse2_cvtsd2si:
532  case Intrinsic::x86_sse2_cvtsd2si64:
533  case Intrinsic::x86_sse2_cvttsd2si:
534  case Intrinsic::x86_sse2_cvttsd2si64: {
535    // These intrinsics only demand the 0th element of their input vectors. If
536    // we can simplify the input based on that, do so now.
537    unsigned VWidth =
538      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
539    APInt DemandedElts(VWidth, 1);
540    APInt UndefElts(VWidth, 0);
541    if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
542                                              DemandedElts, UndefElts)) {
543      II->setArgOperand(0, V);
544      return II;
545    }
546    break;
547  }
548
549
550  case Intrinsic::x86_sse41_pmovsxbw:
551  case Intrinsic::x86_sse41_pmovsxwd:
552  case Intrinsic::x86_sse41_pmovsxdq:
553  case Intrinsic::x86_sse41_pmovzxbw:
554  case Intrinsic::x86_sse41_pmovzxwd:
555  case Intrinsic::x86_sse41_pmovzxdq: {
556    // pmov{s|z}x ignores the upper half of their input vectors.
557    unsigned VWidth =
558      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
559    unsigned LowHalfElts = VWidth / 2;
560    APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
561    APInt UndefElts(VWidth, 0);
562    if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
563                                                 InputDemandedElts,
564                                                 UndefElts)) {
565      II->setArgOperand(0, TmpV);
566      return II;
567    }
568    break;
569  }
570
571  case Intrinsic::ppc_altivec_vperm:
572    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
573    if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
574      assert(Mask->getType()->getVectorNumElements() == 16 &&
575             "Bad type for intrinsic!");
576
577      // Check that all of the elements are integer constants or undefs.
578      bool AllEltsOk = true;
579      for (unsigned i = 0; i != 16; ++i) {
580        Constant *Elt = Mask->getAggregateElement(i);
581        if (Elt == 0 ||
582            !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
583          AllEltsOk = false;
584          break;
585        }
586      }
587
588      if (AllEltsOk) {
589        // Cast the input vectors to byte vectors.
590        Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
591                                            Mask->getType());
592        Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
593                                            Mask->getType());
594        Value *Result = UndefValue::get(Op0->getType());
595
596        // Only extract each element once.
597        Value *ExtractedElts[32];
598        memset(ExtractedElts, 0, sizeof(ExtractedElts));
599
600        for (unsigned i = 0; i != 16; ++i) {
601          if (isa<UndefValue>(Mask->getAggregateElement(i)))
602            continue;
603          unsigned Idx =
604            cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
605          Idx &= 31;  // Match the hardware behavior.
606
607          if (ExtractedElts[Idx] == 0) {
608            ExtractedElts[Idx] =
609              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
610                                            Builder->getInt32(Idx&15));
611          }
612
613          // Insert this value into the result vector.
614          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
615                                                Builder->getInt32(i));
616        }
617        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
618      }
619    }
620    break;
621
622  case Intrinsic::arm_neon_vld1:
623  case Intrinsic::arm_neon_vld2:
624  case Intrinsic::arm_neon_vld3:
625  case Intrinsic::arm_neon_vld4:
626  case Intrinsic::arm_neon_vld2lane:
627  case Intrinsic::arm_neon_vld3lane:
628  case Intrinsic::arm_neon_vld4lane:
629  case Intrinsic::arm_neon_vst1:
630  case Intrinsic::arm_neon_vst2:
631  case Intrinsic::arm_neon_vst3:
632  case Intrinsic::arm_neon_vst4:
633  case Intrinsic::arm_neon_vst2lane:
634  case Intrinsic::arm_neon_vst3lane:
635  case Intrinsic::arm_neon_vst4lane: {
636    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
637    unsigned AlignArg = II->getNumArgOperands() - 1;
638    ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
639    if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
640      II->setArgOperand(AlignArg,
641                        ConstantInt::get(Type::getInt32Ty(II->getContext()),
642                                         MemAlign, false));
643      return II;
644    }
645    break;
646  }
647
648  case Intrinsic::arm_neon_vmulls:
649  case Intrinsic::arm_neon_vmullu: {
650    Value *Arg0 = II->getArgOperand(0);
651    Value *Arg1 = II->getArgOperand(1);
652
653    // Handle mul by zero first:
654    if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
655      return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
656    }
657
658    // Check for constant LHS & RHS - in this case we just simplify.
659    bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu);
660    VectorType *NewVT = cast<VectorType>(II->getType());
661    unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth();
662    if (ConstantDataVector *CV0 = dyn_cast<ConstantDataVector>(Arg0)) {
663      if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
664        VectorType* VT = cast<VectorType>(CV0->getType());
665        SmallVector<Constant*, 4> NewElems;
666        for (unsigned i = 0; i < VT->getNumElements(); ++i) {
667          APInt CV0E =
668            (cast<ConstantInt>(CV0->getAggregateElement(i)))->getValue();
669          CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth);
670          APInt CV1E =
671            (cast<ConstantInt>(CV1->getAggregateElement(i)))->getValue();
672          CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth);
673          NewElems.push_back(
674            ConstantInt::get(NewVT->getElementType(), CV0E * CV1E));
675        }
676        return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems));
677      }
678
679      // Couldn't simplify - cannonicalize constant to the RHS.
680      std::swap(Arg0, Arg1);
681    }
682
683    // Handle mul by one:
684    if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
685      if (ConstantInt *Splat =
686            dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) {
687        if (Splat->isOne()) {
688          if (Zext)
689            return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
690          // else
691          return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
692        }
693      }
694    }
695
696    break;
697  }
698
699  case Intrinsic::stackrestore: {
700    // If the save is right next to the restore, remove the restore.  This can
701    // happen when variable allocas are DCE'd.
702    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
703      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
704        BasicBlock::iterator BI = SS;
705        if (&*++BI == II)
706          return EraseInstFromFunction(CI);
707      }
708    }
709
710    // Scan down this block to see if there is another stack restore in the
711    // same block without an intervening call/alloca.
712    BasicBlock::iterator BI = II;
713    TerminatorInst *TI = II->getParent()->getTerminator();
714    bool CannotRemove = false;
715    for (++BI; &*BI != TI; ++BI) {
716      if (isa<AllocaInst>(BI)) {
717        CannotRemove = true;
718        break;
719      }
720      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
721        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
722          // If there is a stackrestore below this one, remove this one.
723          if (II->getIntrinsicID() == Intrinsic::stackrestore)
724            return EraseInstFromFunction(CI);
725          // Otherwise, ignore the intrinsic.
726        } else {
727          // If we found a non-intrinsic call, we can't remove the stack
728          // restore.
729          CannotRemove = true;
730          break;
731        }
732      }
733    }
734
735    // If the stack restore is in a return, resume, or unwind block and if there
736    // are no allocas or calls between the restore and the return, nuke the
737    // restore.
738    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
739      return EraseInstFromFunction(CI);
740    break;
741  }
742  }
743
744  return visitCallSite(II);
745}
746
747// InvokeInst simplification
748//
749Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
750  return visitCallSite(&II);
751}
752
753/// isSafeToEliminateVarargsCast - If this cast does not affect the value
754/// passed through the varargs area, we can eliminate the use of the cast.
755static bool isSafeToEliminateVarargsCast(const CallSite CS,
756                                         const CastInst * const CI,
757                                         const TargetData * const TD,
758                                         const int ix) {
759  if (!CI->isLosslessCast())
760    return false;
761
762  // The size of ByVal arguments is derived from the type, so we
763  // can't change to a type with a different size.  If the size were
764  // passed explicitly we could avoid this check.
765  if (!CS.isByValArgument(ix))
766    return true;
767
768  Type* SrcTy =
769            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
770  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
771  if (!SrcTy->isSized() || !DstTy->isSized())
772    return false;
773  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
774    return false;
775  return true;
776}
777
778namespace {
779class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
780  InstCombiner *IC;
781protected:
782  void replaceCall(Value *With) {
783    NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
784  }
785  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
786    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
787      return true;
788    if (ConstantInt *SizeCI =
789                           dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
790      if (SizeCI->isAllOnesValue())
791        return true;
792      if (isString) {
793        uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
794        // If the length is 0 we don't know how long it is and so we can't
795        // remove the check.
796        if (Len == 0) return false;
797        return SizeCI->getZExtValue() >= Len;
798      }
799      if (ConstantInt *Arg = dyn_cast<ConstantInt>(
800                                                  CI->getArgOperand(SizeArgOp)))
801        return SizeCI->getZExtValue() >= Arg->getZExtValue();
802    }
803    return false;
804  }
805public:
806  InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
807  Instruction *NewInstruction;
808};
809} // end anonymous namespace
810
811// Try to fold some different type of calls here.
812// Currently we're only working with the checking functions, memcpy_chk,
813// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
814// strcat_chk and strncat_chk.
815Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
816  if (CI->getCalledFunction() == 0) return 0;
817
818  InstCombineFortifiedLibCalls Simplifier(this);
819  Simplifier.fold(CI, TD, TLI);
820  return Simplifier.NewInstruction;
821}
822
823static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
824  // Strip off at most one level of pointer casts, looking for an alloca.  This
825  // is good enough in practice and simpler than handling any number of casts.
826  Value *Underlying = TrampMem->stripPointerCasts();
827  if (Underlying != TrampMem &&
828      (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem))
829    return 0;
830  if (!isa<AllocaInst>(Underlying))
831    return 0;
832
833  IntrinsicInst *InitTrampoline = 0;
834  for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end();
835       I != E; I++) {
836    IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I);
837    if (!II)
838      return 0;
839    if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
840      if (InitTrampoline)
841        // More than one init_trampoline writes to this value.  Give up.
842        return 0;
843      InitTrampoline = II;
844      continue;
845    }
846    if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
847      // Allow any number of calls to adjust.trampoline.
848      continue;
849    return 0;
850  }
851
852  // No call to init.trampoline found.
853  if (!InitTrampoline)
854    return 0;
855
856  // Check that the alloca is being used in the expected way.
857  if (InitTrampoline->getOperand(0) != TrampMem)
858    return 0;
859
860  return InitTrampoline;
861}
862
863static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
864                                               Value *TrampMem) {
865  // Visit all the previous instructions in the basic block, and try to find a
866  // init.trampoline which has a direct path to the adjust.trampoline.
867  for (BasicBlock::iterator I = AdjustTramp,
868       E = AdjustTramp->getParent()->begin(); I != E; ) {
869    Instruction *Inst = --I;
870    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
871      if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
872          II->getOperand(0) == TrampMem)
873        return II;
874    if (Inst->mayWriteToMemory())
875      return 0;
876  }
877  return 0;
878}
879
880// Given a call to llvm.adjust.trampoline, find and return the corresponding
881// call to llvm.init.trampoline if the call to the trampoline can be optimized
882// to a direct call to a function.  Otherwise return NULL.
883//
884static IntrinsicInst *FindInitTrampoline(Value *Callee) {
885  Callee = Callee->stripPointerCasts();
886  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
887  if (!AdjustTramp ||
888      AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
889    return 0;
890
891  Value *TrampMem = AdjustTramp->getOperand(0);
892
893  if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
894    return IT;
895  if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
896    return IT;
897  return 0;
898}
899
900// visitCallSite - Improvements for call and invoke instructions.
901//
902Instruction *InstCombiner::visitCallSite(CallSite CS) {
903  if (isAllocLikeFn(CS.getInstruction(), TLI))
904    return visitAllocSite(*CS.getInstruction());
905
906  bool Changed = false;
907
908  // If the callee is a pointer to a function, attempt to move any casts to the
909  // arguments of the call/invoke.
910  Value *Callee = CS.getCalledValue();
911  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
912    return 0;
913
914  if (Function *CalleeF = dyn_cast<Function>(Callee))
915    // If the call and callee calling conventions don't match, this call must
916    // be unreachable, as the call is undefined.
917    if (CalleeF->getCallingConv() != CS.getCallingConv() &&
918        // Only do this for calls to a function with a body.  A prototype may
919        // not actually end up matching the implementation's calling conv for a
920        // variety of reasons (e.g. it may be written in assembly).
921        !CalleeF->isDeclaration()) {
922      Instruction *OldCall = CS.getInstruction();
923      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
924                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
925                                  OldCall);
926      // If OldCall dues not return void then replaceAllUsesWith undef.
927      // This allows ValueHandlers and custom metadata to adjust itself.
928      if (!OldCall->getType()->isVoidTy())
929        ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
930      if (isa<CallInst>(OldCall))
931        return EraseInstFromFunction(*OldCall);
932
933      // We cannot remove an invoke, because it would change the CFG, just
934      // change the callee to a null pointer.
935      cast<InvokeInst>(OldCall)->setCalledFunction(
936                                    Constant::getNullValue(CalleeF->getType()));
937      return 0;
938    }
939
940  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
941    // If CS does not return void then replaceAllUsesWith undef.
942    // This allows ValueHandlers and custom metadata to adjust itself.
943    if (!CS.getInstruction()->getType()->isVoidTy())
944      ReplaceInstUsesWith(*CS.getInstruction(),
945                          UndefValue::get(CS.getInstruction()->getType()));
946
947    if (isa<InvokeInst>(CS.getInstruction())) {
948      // Can't remove an invoke because we cannot change the CFG.
949      return 0;
950    }
951
952    // This instruction is not reachable, just remove it.  We insert a store to
953    // undef so that we know that this code is not reachable, despite the fact
954    // that we can't modify the CFG here.
955    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
956                  UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
957                  CS.getInstruction());
958
959    return EraseInstFromFunction(*CS.getInstruction());
960  }
961
962  if (IntrinsicInst *II = FindInitTrampoline(Callee))
963    return transformCallThroughTrampoline(CS, II);
964
965  PointerType *PTy = cast<PointerType>(Callee->getType());
966  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
967  if (FTy->isVarArg()) {
968    int ix = FTy->getNumParams();
969    // See if we can optimize any arguments passed through the varargs area of
970    // the call.
971    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
972           E = CS.arg_end(); I != E; ++I, ++ix) {
973      CastInst *CI = dyn_cast<CastInst>(*I);
974      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
975        *I = CI->getOperand(0);
976        Changed = true;
977      }
978    }
979  }
980
981  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
982    // Inline asm calls cannot throw - mark them 'nounwind'.
983    CS.setDoesNotThrow();
984    Changed = true;
985  }
986
987  // Try to optimize the call if possible, we require TargetData for most of
988  // this.  None of these calls are seen as possibly dead so go ahead and
989  // delete the instruction now.
990  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
991    Instruction *I = tryOptimizeCall(CI, TD);
992    // If we changed something return the result, etc. Otherwise let
993    // the fallthrough check.
994    if (I) return EraseInstFromFunction(*I);
995  }
996
997  return Changed ? CS.getInstruction() : 0;
998}
999
1000// transformConstExprCastCall - If the callee is a constexpr cast of a function,
1001// attempt to move the cast to the arguments of the call/invoke.
1002//
1003bool InstCombiner::transformConstExprCastCall(CallSite CS) {
1004  Function *Callee =
1005    dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
1006  if (Callee == 0)
1007    return false;
1008  Instruction *Caller = CS.getInstruction();
1009  const AttrListPtr &CallerPAL = CS.getAttributes();
1010
1011  // Okay, this is a cast from a function to a different type.  Unless doing so
1012  // would cause a type conversion of one of our arguments, change this call to
1013  // be a direct call with arguments casted to the appropriate types.
1014  //
1015  FunctionType *FT = Callee->getFunctionType();
1016  Type *OldRetTy = Caller->getType();
1017  Type *NewRetTy = FT->getReturnType();
1018
1019  if (NewRetTy->isStructTy())
1020    return false; // TODO: Handle multiple return values.
1021
1022  // Check to see if we are changing the return type...
1023  if (OldRetTy != NewRetTy) {
1024    if (Callee->isDeclaration() &&
1025        // Conversion is ok if changing from one pointer type to another or from
1026        // a pointer to an integer of the same size.
1027        !((OldRetTy->isPointerTy() || !TD ||
1028           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
1029          (NewRetTy->isPointerTy() || !TD ||
1030           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
1031      return false;   // Cannot transform this return value.
1032
1033    if (!Caller->use_empty() &&
1034        // void -> non-void is handled specially
1035        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
1036      return false;   // Cannot transform this return value.
1037
1038    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
1039      Attributes RAttrs = CallerPAL.getRetAttributes();
1040      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
1041        return false;   // Attribute not compatible with transformed value.
1042    }
1043
1044    // If the callsite is an invoke instruction, and the return value is used by
1045    // a PHI node in a successor, we cannot change the return type of the call
1046    // because there is no place to put the cast instruction (without breaking
1047    // the critical edge).  Bail out in this case.
1048    if (!Caller->use_empty())
1049      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
1050        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
1051             UI != E; ++UI)
1052          if (PHINode *PN = dyn_cast<PHINode>(*UI))
1053            if (PN->getParent() == II->getNormalDest() ||
1054                PN->getParent() == II->getUnwindDest())
1055              return false;
1056  }
1057
1058  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
1059  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
1060
1061  CallSite::arg_iterator AI = CS.arg_begin();
1062  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
1063    Type *ParamTy = FT->getParamType(i);
1064    Type *ActTy = (*AI)->getType();
1065
1066    if (!CastInst::isCastable(ActTy, ParamTy))
1067      return false;   // Cannot transform this parameter value.
1068
1069    Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
1070    if (Attrs & Attribute::typeIncompatible(ParamTy))
1071      return false;   // Attribute not compatible with transformed value.
1072
1073    // If the parameter is passed as a byval argument, then we have to have a
1074    // sized type and the sized type has to have the same size as the old type.
1075    if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
1076      PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
1077      if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
1078        return false;
1079
1080      Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
1081      if (TD->getTypeAllocSize(CurElTy) !=
1082          TD->getTypeAllocSize(ParamPTy->getElementType()))
1083        return false;
1084    }
1085
1086    // Converting from one pointer type to another or between a pointer and an
1087    // integer of the same size is safe even if we do not have a body.
1088    bool isConvertible = ActTy == ParamTy ||
1089      (TD && ((ParamTy->isPointerTy() ||
1090      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
1091              (ActTy->isPointerTy() ||
1092              ActTy == TD->getIntPtrType(Caller->getContext()))));
1093    if (Callee->isDeclaration() && !isConvertible) return false;
1094  }
1095
1096  if (Callee->isDeclaration()) {
1097    // Do not delete arguments unless we have a function body.
1098    if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
1099      return false;
1100
1101    // If the callee is just a declaration, don't change the varargsness of the
1102    // call.  We don't want to introduce a varargs call where one doesn't
1103    // already exist.
1104    PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
1105    if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
1106      return false;
1107
1108    // If both the callee and the cast type are varargs, we still have to make
1109    // sure the number of fixed parameters are the same or we have the same
1110    // ABI issues as if we introduce a varargs call.
1111    if (FT->isVarArg() &&
1112        cast<FunctionType>(APTy->getElementType())->isVarArg() &&
1113        FT->getNumParams() !=
1114        cast<FunctionType>(APTy->getElementType())->getNumParams())
1115      return false;
1116  }
1117
1118  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
1119      !CallerPAL.isEmpty())
1120    // In this case we have more arguments than the new function type, but we
1121    // won't be dropping them.  Check that these extra arguments have attributes
1122    // that are compatible with being a vararg call argument.
1123    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
1124      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
1125        break;
1126      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
1127      if (PAttrs & Attribute::VarArgsIncompatible)
1128        return false;
1129    }
1130
1131
1132  // Okay, we decided that this is a safe thing to do: go ahead and start
1133  // inserting cast instructions as necessary.
1134  std::vector<Value*> Args;
1135  Args.reserve(NumActualArgs);
1136  SmallVector<AttributeWithIndex, 8> attrVec;
1137  attrVec.reserve(NumCommonArgs);
1138
1139  // Get any return attributes.
1140  Attributes RAttrs = CallerPAL.getRetAttributes();
1141
1142  // If the return value is not being used, the type may not be compatible
1143  // with the existing attributes.  Wipe out any problematic attributes.
1144  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
1145
1146  // Add the new return attributes.
1147  if (RAttrs)
1148    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
1149
1150  AI = CS.arg_begin();
1151  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
1152    Type *ParamTy = FT->getParamType(i);
1153    if ((*AI)->getType() == ParamTy) {
1154      Args.push_back(*AI);
1155    } else {
1156      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
1157          false, ParamTy, false);
1158      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy));
1159    }
1160
1161    // Add any parameter attributes.
1162    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
1163      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
1164  }
1165
1166  // If the function takes more arguments than the call was taking, add them
1167  // now.
1168  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
1169    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
1170
1171  // If we are removing arguments to the function, emit an obnoxious warning.
1172  if (FT->getNumParams() < NumActualArgs) {
1173    if (!FT->isVarArg()) {
1174      errs() << "WARNING: While resolving call to function '"
1175             << Callee->getName() << "' arguments were dropped!\n";
1176    } else {
1177      // Add all of the arguments in their promoted form to the arg list.
1178      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
1179        Type *PTy = getPromotedType((*AI)->getType());
1180        if (PTy != (*AI)->getType()) {
1181          // Must promote to pass through va_arg area!
1182          Instruction::CastOps opcode =
1183            CastInst::getCastOpcode(*AI, false, PTy, false);
1184          Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
1185        } else {
1186          Args.push_back(*AI);
1187        }
1188
1189        // Add any parameter attributes.
1190        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
1191          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
1192      }
1193    }
1194  }
1195
1196  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
1197    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
1198
1199  if (NewRetTy->isVoidTy())
1200    Caller->setName("");   // Void type should not have a name.
1201
1202  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec);
1203
1204  Instruction *NC;
1205  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1206    NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
1207                               II->getUnwindDest(), Args);
1208    NC->takeName(II);
1209    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
1210    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
1211  } else {
1212    CallInst *CI = cast<CallInst>(Caller);
1213    NC = Builder->CreateCall(Callee, Args);
1214    NC->takeName(CI);
1215    if (CI->isTailCall())
1216      cast<CallInst>(NC)->setTailCall();
1217    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
1218    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
1219  }
1220
1221  // Insert a cast of the return type as necessary.
1222  Value *NV = NC;
1223  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
1224    if (!NV->getType()->isVoidTy()) {
1225      Instruction::CastOps opcode =
1226        CastInst::getCastOpcode(NC, false, OldRetTy, false);
1227      NV = NC = CastInst::Create(opcode, NC, OldRetTy);
1228      NC->setDebugLoc(Caller->getDebugLoc());
1229
1230      // If this is an invoke instruction, we should insert it after the first
1231      // non-phi, instruction in the normal successor block.
1232      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1233        BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
1234        InsertNewInstBefore(NC, *I);
1235      } else {
1236        // Otherwise, it's a call, just insert cast right after the call.
1237        InsertNewInstBefore(NC, *Caller);
1238      }
1239      Worklist.AddUsersToWorkList(*Caller);
1240    } else {
1241      NV = UndefValue::get(Caller->getType());
1242    }
1243  }
1244
1245  if (!Caller->use_empty())
1246    ReplaceInstUsesWith(*Caller, NV);
1247
1248  EraseInstFromFunction(*Caller);
1249  return true;
1250}
1251
1252// transformCallThroughTrampoline - Turn a call to a function created by
1253// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
1254// underlying function.
1255//
1256Instruction *
1257InstCombiner::transformCallThroughTrampoline(CallSite CS,
1258                                             IntrinsicInst *Tramp) {
1259  Value *Callee = CS.getCalledValue();
1260  PointerType *PTy = cast<PointerType>(Callee->getType());
1261  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
1262  const AttrListPtr &Attrs = CS.getAttributes();
1263
1264  // If the call already has the 'nest' attribute somewhere then give up -
1265  // otherwise 'nest' would occur twice after splicing in the chain.
1266  if (Attrs.hasAttrSomewhere(Attribute::Nest))
1267    return 0;
1268
1269  assert(Tramp &&
1270         "transformCallThroughTrampoline called with incorrect CallSite.");
1271
1272  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
1273  PointerType *NestFPTy = cast<PointerType>(NestF->getType());
1274  FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
1275
1276  const AttrListPtr &NestAttrs = NestF->getAttributes();
1277  if (!NestAttrs.isEmpty()) {
1278    unsigned NestIdx = 1;
1279    Type *NestTy = 0;
1280    Attributes NestAttr = Attribute::None;
1281
1282    // Look for a parameter marked with the 'nest' attribute.
1283    for (FunctionType::param_iterator I = NestFTy->param_begin(),
1284         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
1285      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
1286        // Record the parameter type and any other attributes.
1287        NestTy = *I;
1288        NestAttr = NestAttrs.getParamAttributes(NestIdx);
1289        break;
1290      }
1291
1292    if (NestTy) {
1293      Instruction *Caller = CS.getInstruction();
1294      std::vector<Value*> NewArgs;
1295      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
1296
1297      SmallVector<AttributeWithIndex, 8> NewAttrs;
1298      NewAttrs.reserve(Attrs.getNumSlots() + 1);
1299
1300      // Insert the nest argument into the call argument list, which may
1301      // mean appending it.  Likewise for attributes.
1302
1303      // Add any result attributes.
1304      if (Attributes Attr = Attrs.getRetAttributes())
1305        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
1306
1307      {
1308        unsigned Idx = 1;
1309        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
1310        do {
1311          if (Idx == NestIdx) {
1312            // Add the chain argument and attributes.
1313            Value *NestVal = Tramp->getArgOperand(2);
1314            if (NestVal->getType() != NestTy)
1315              NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
1316            NewArgs.push_back(NestVal);
1317            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
1318          }
1319
1320          if (I == E)
1321            break;
1322
1323          // Add the original argument and attributes.
1324          NewArgs.push_back(*I);
1325          if (Attributes Attr = Attrs.getParamAttributes(Idx))
1326            NewAttrs.push_back
1327              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
1328
1329          ++Idx, ++I;
1330        } while (1);
1331      }
1332
1333      // Add any function attributes.
1334      if (Attributes Attr = Attrs.getFnAttributes())
1335        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
1336
1337      // The trampoline may have been bitcast to a bogus type (FTy).
1338      // Handle this by synthesizing a new function type, equal to FTy
1339      // with the chain parameter inserted.
1340
1341      std::vector<Type*> NewTypes;
1342      NewTypes.reserve(FTy->getNumParams()+1);
1343
1344      // Insert the chain's type into the list of parameter types, which may
1345      // mean appending it.
1346      {
1347        unsigned Idx = 1;
1348        FunctionType::param_iterator I = FTy->param_begin(),
1349          E = FTy->param_end();
1350
1351        do {
1352          if (Idx == NestIdx)
1353            // Add the chain's type.
1354            NewTypes.push_back(NestTy);
1355
1356          if (I == E)
1357            break;
1358
1359          // Add the original type.
1360          NewTypes.push_back(*I);
1361
1362          ++Idx, ++I;
1363        } while (1);
1364      }
1365
1366      // Replace the trampoline call with a direct call.  Let the generic
1367      // code sort out any function type mismatches.
1368      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
1369                                                FTy->isVarArg());
1370      Constant *NewCallee =
1371        NestF->getType() == PointerType::getUnqual(NewFTy) ?
1372        NestF : ConstantExpr::getBitCast(NestF,
1373                                         PointerType::getUnqual(NewFTy));
1374      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs);
1375
1376      Instruction *NewCaller;
1377      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1378        NewCaller = InvokeInst::Create(NewCallee,
1379                                       II->getNormalDest(), II->getUnwindDest(),
1380                                       NewArgs);
1381        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
1382        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
1383      } else {
1384        NewCaller = CallInst::Create(NewCallee, NewArgs);
1385        if (cast<CallInst>(Caller)->isTailCall())
1386          cast<CallInst>(NewCaller)->setTailCall();
1387        cast<CallInst>(NewCaller)->
1388          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
1389        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
1390      }
1391
1392      return NewCaller;
1393    }
1394  }
1395
1396  // Replace the trampoline call with a direct call.  Since there is no 'nest'
1397  // parameter, there is no need to adjust the argument list.  Let the generic
1398  // code sort out any function type mismatches.
1399  Constant *NewCallee =
1400    NestF->getType() == PTy ? NestF :
1401                              ConstantExpr::getBitCast(NestF, PTy);
1402  CS.setCalledFunction(NewCallee);
1403  return CS.getInstruction();
1404}
1405