InstCombineCalls.cpp revision 3854a5d90fee52af1065edbed34521fff6cdc18d
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the visitCall and visitInvoke functions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombine.h"
15#include "llvm/ADT/Statistic.h"
16#include "llvm/Analysis/InstructionSimplify.h"
17#include "llvm/Analysis/MemoryBuiltins.h"
18#include "llvm/IR/DataLayout.h"
19#include "llvm/Support/CallSite.h"
20#include "llvm/Support/PatternMatch.h"
21#include "llvm/Transforms/Utils/BuildLibCalls.h"
22#include "llvm/Transforms/Utils/Local.h"
23using namespace llvm;
24using namespace PatternMatch;
25
26STATISTIC(NumSimplified, "Number of library calls simplified");
27
28/// getPromotedType - Return the specified type promoted as it would be to pass
29/// though a va_arg area.
30static Type *getPromotedType(Type *Ty) {
31  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
32    if (ITy->getBitWidth() < 32)
33      return Type::getInt32Ty(Ty->getContext());
34  }
35  return Ty;
36}
37
38/// reduceToSingleValueType - Given an aggregate type which ultimately holds a
39/// single scalar element, like {{{type}}} or [1 x type], return type.
40static Type *reduceToSingleValueType(Type *T) {
41  while (!T->isSingleValueType()) {
42    if (StructType *STy = dyn_cast<StructType>(T)) {
43      if (STy->getNumElements() == 1)
44        T = STy->getElementType(0);
45      else
46        break;
47    } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
48      if (ATy->getNumElements() == 1)
49        T = ATy->getElementType();
50      else
51        break;
52    } else
53      break;
54  }
55
56  return T;
57}
58
59Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
60  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
61  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
62  unsigned MinAlign = std::min(DstAlign, SrcAlign);
63  unsigned CopyAlign = MI->getAlignment();
64
65  if (CopyAlign < MinAlign) {
66    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
67                                             MinAlign, false));
68    return MI;
69  }
70
71  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
72  // load/store.
73  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
74  if (MemOpLength == 0) return 0;
75
76  // Source and destination pointer types are always "i8*" for intrinsic.  See
77  // if the size is something we can handle with a single primitive load/store.
78  // A single load+store correctly handles overlapping memory in the memmove
79  // case.
80  uint64_t Size = MemOpLength->getLimitedValue();
81  assert(Size && "0-sized memory transfering should be removed already.");
82
83  if (Size > 8 || (Size&(Size-1)))
84    return 0;  // If not 1/2/4/8 bytes, exit.
85
86  // Use an integer load+store unless we can find something better.
87  unsigned SrcAddrSp =
88    cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
89  unsigned DstAddrSp =
90    cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
91
92  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
93  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
94  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
95
96  // Memcpy forces the use of i8* for the source and destination.  That means
97  // that if you're using memcpy to move one double around, you'll get a cast
98  // from double* to i8*.  We'd much rather use a double load+store rather than
99  // an i64 load+store, here because this improves the odds that the source or
100  // dest address will be promotable.  See if we can find a better type than the
101  // integer datatype.
102  Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
103  MDNode *CopyMD = 0;
104  if (StrippedDest != MI->getArgOperand(0)) {
105    Type *SrcETy = cast<PointerType>(StrippedDest->getType())
106                                    ->getElementType();
107    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
108      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
109      // down through these levels if so.
110      SrcETy = reduceToSingleValueType(SrcETy);
111
112      if (SrcETy->isSingleValueType()) {
113        NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
114        NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
115
116        // If the memcpy has metadata describing the members, see if we can
117        // get the TBAA tag describing our copy.
118        if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
119          if (M->getNumOperands() == 3 &&
120              M->getOperand(0) &&
121              isa<ConstantInt>(M->getOperand(0)) &&
122              cast<ConstantInt>(M->getOperand(0))->isNullValue() &&
123              M->getOperand(1) &&
124              isa<ConstantInt>(M->getOperand(1)) &&
125              cast<ConstantInt>(M->getOperand(1))->getValue() == Size &&
126              M->getOperand(2) &&
127              isa<MDNode>(M->getOperand(2)))
128            CopyMD = cast<MDNode>(M->getOperand(2));
129        }
130      }
131    }
132  }
133
134  // If the memcpy/memmove provides better alignment info than we can
135  // infer, use it.
136  SrcAlign = std::max(SrcAlign, CopyAlign);
137  DstAlign = std::max(DstAlign, CopyAlign);
138
139  Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
140  Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
141  LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
142  L->setAlignment(SrcAlign);
143  if (CopyMD)
144    L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
145  StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
146  S->setAlignment(DstAlign);
147  if (CopyMD)
148    S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
149
150  // Set the size of the copy to 0, it will be deleted on the next iteration.
151  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
152  return MI;
153}
154
155Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
156  unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
157  if (MI->getAlignment() < Alignment) {
158    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
159                                             Alignment, false));
160    return MI;
161  }
162
163  // Extract the length and alignment and fill if they are constant.
164  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
165  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
166  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
167    return 0;
168  uint64_t Len = LenC->getLimitedValue();
169  Alignment = MI->getAlignment();
170  assert(Len && "0-sized memory setting should be removed already.");
171
172  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
173  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
174    Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
175
176    Value *Dest = MI->getDest();
177    unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
178    Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
179    Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
180
181    // Alignment 0 is identity for alignment 1 for memset, but not store.
182    if (Alignment == 0) Alignment = 1;
183
184    // Extract the fill value and store.
185    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
186    StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
187                                        MI->isVolatile());
188    S->setAlignment(Alignment);
189
190    // Set the size of the copy to 0, it will be deleted on the next iteration.
191    MI->setLength(Constant::getNullValue(LenC->getType()));
192    return MI;
193  }
194
195  return 0;
196}
197
198/// visitCallInst - CallInst simplification.  This mostly only handles folding
199/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
200/// the heavy lifting.
201///
202Instruction *InstCombiner::visitCallInst(CallInst &CI) {
203  if (isFreeCall(&CI, TLI))
204    return visitFree(CI);
205
206  // If the caller function is nounwind, mark the call as nounwind, even if the
207  // callee isn't.
208  if (CI.getParent()->getParent()->doesNotThrow() &&
209      !CI.doesNotThrow()) {
210    CI.setDoesNotThrow();
211    return &CI;
212  }
213
214  CallSite CS(&CI);
215  if (Value *V = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
216                              TD))
217    return ReplaceInstUsesWith(CI, V);
218
219  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
220  if (!II) return visitCallSite(&CI);
221
222  // Intrinsics cannot occur in an invoke, so handle them here instead of in
223  // visitCallSite.
224  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
225    bool Changed = false;
226
227    // memmove/cpy/set of zero bytes is a noop.
228    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
229      if (NumBytes->isNullValue())
230        return EraseInstFromFunction(CI);
231
232      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
233        if (CI->getZExtValue() == 1) {
234          // Replace the instruction with just byte operations.  We would
235          // transform other cases to loads/stores, but we don't know if
236          // alignment is sufficient.
237        }
238    }
239
240    // No other transformations apply to volatile transfers.
241    if (MI->isVolatile())
242      return 0;
243
244    // If we have a memmove and the source operation is a constant global,
245    // then the source and dest pointers can't alias, so we can change this
246    // into a call to memcpy.
247    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
248      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
249        if (GVSrc->isConstant()) {
250          Module *M = CI.getParent()->getParent()->getParent();
251          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
252          Type *Tys[3] = { CI.getArgOperand(0)->getType(),
253                           CI.getArgOperand(1)->getType(),
254                           CI.getArgOperand(2)->getType() };
255          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
256          Changed = true;
257        }
258    }
259
260    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
261      // memmove(x,x,size) -> noop.
262      if (MTI->getSource() == MTI->getDest())
263        return EraseInstFromFunction(CI);
264    }
265
266    // If we can determine a pointer alignment that is bigger than currently
267    // set, update the alignment.
268    if (isa<MemTransferInst>(MI)) {
269      if (Instruction *I = SimplifyMemTransfer(MI))
270        return I;
271    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
272      if (Instruction *I = SimplifyMemSet(MSI))
273        return I;
274    }
275
276    if (Changed) return II;
277  }
278
279  switch (II->getIntrinsicID()) {
280  default: break;
281  case Intrinsic::objectsize: {
282    uint64_t Size;
283    if (getObjectSize(II->getArgOperand(0), Size, TD, TLI))
284      return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
285    return 0;
286  }
287  case Intrinsic::bswap: {
288    Value *IIOperand = II->getArgOperand(0);
289    Value *X = 0;
290
291    // bswap(bswap(x)) -> x
292    if (match(IIOperand, m_BSwap(m_Value(X))))
293        return ReplaceInstUsesWith(CI, X);
294
295    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
296    if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
297      unsigned C = X->getType()->getPrimitiveSizeInBits() -
298        IIOperand->getType()->getPrimitiveSizeInBits();
299      Value *CV = ConstantInt::get(X->getType(), C);
300      Value *V = Builder->CreateLShr(X, CV);
301      return new TruncInst(V, IIOperand->getType());
302    }
303    break;
304  }
305
306  case Intrinsic::powi:
307    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
308      // powi(x, 0) -> 1.0
309      if (Power->isZero())
310        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
311      // powi(x, 1) -> x
312      if (Power->isOne())
313        return ReplaceInstUsesWith(CI, II->getArgOperand(0));
314      // powi(x, -1) -> 1/x
315      if (Power->isAllOnesValue())
316        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
317                                          II->getArgOperand(0));
318    }
319    break;
320  case Intrinsic::cttz: {
321    // If all bits below the first known one are known zero,
322    // this value is constant.
323    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
324    // FIXME: Try to simplify vectors of integers.
325    if (!IT) break;
326    uint32_t BitWidth = IT->getBitWidth();
327    APInt KnownZero(BitWidth, 0);
328    APInt KnownOne(BitWidth, 0);
329    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
330    unsigned TrailingZeros = KnownOne.countTrailingZeros();
331    APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
332    if ((Mask & KnownZero) == Mask)
333      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
334                                 APInt(BitWidth, TrailingZeros)));
335
336    }
337    break;
338  case Intrinsic::ctlz: {
339    // If all bits above the first known one are known zero,
340    // this value is constant.
341    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
342    // FIXME: Try to simplify vectors of integers.
343    if (!IT) break;
344    uint32_t BitWidth = IT->getBitWidth();
345    APInt KnownZero(BitWidth, 0);
346    APInt KnownOne(BitWidth, 0);
347    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
348    unsigned LeadingZeros = KnownOne.countLeadingZeros();
349    APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
350    if ((Mask & KnownZero) == Mask)
351      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
352                                 APInt(BitWidth, LeadingZeros)));
353
354    }
355    break;
356  case Intrinsic::uadd_with_overflow: {
357    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
358    IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
359    uint32_t BitWidth = IT->getBitWidth();
360    APInt LHSKnownZero(BitWidth, 0);
361    APInt LHSKnownOne(BitWidth, 0);
362    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
363    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
364    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
365
366    if (LHSKnownNegative || LHSKnownPositive) {
367      APInt RHSKnownZero(BitWidth, 0);
368      APInt RHSKnownOne(BitWidth, 0);
369      ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
370      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
371      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
372      if (LHSKnownNegative && RHSKnownNegative) {
373        // The sign bit is set in both cases: this MUST overflow.
374        // Create a simple add instruction, and insert it into the struct.
375        Value *Add = Builder->CreateAdd(LHS, RHS);
376        Add->takeName(&CI);
377        Constant *V[] = {
378          UndefValue::get(LHS->getType()),
379          ConstantInt::getTrue(II->getContext())
380        };
381        StructType *ST = cast<StructType>(II->getType());
382        Constant *Struct = ConstantStruct::get(ST, V);
383        return InsertValueInst::Create(Struct, Add, 0);
384      }
385
386      if (LHSKnownPositive && RHSKnownPositive) {
387        // The sign bit is clear in both cases: this CANNOT overflow.
388        // Create a simple add instruction, and insert it into the struct.
389        Value *Add = Builder->CreateNUWAdd(LHS, RHS);
390        Add->takeName(&CI);
391        Constant *V[] = {
392          UndefValue::get(LHS->getType()),
393          ConstantInt::getFalse(II->getContext())
394        };
395        StructType *ST = cast<StructType>(II->getType());
396        Constant *Struct = ConstantStruct::get(ST, V);
397        return InsertValueInst::Create(Struct, Add, 0);
398      }
399    }
400  }
401  // FALL THROUGH uadd into sadd
402  case Intrinsic::sadd_with_overflow:
403    // Canonicalize constants into the RHS.
404    if (isa<Constant>(II->getArgOperand(0)) &&
405        !isa<Constant>(II->getArgOperand(1))) {
406      Value *LHS = II->getArgOperand(0);
407      II->setArgOperand(0, II->getArgOperand(1));
408      II->setArgOperand(1, LHS);
409      return II;
410    }
411
412    // X + undef -> undef
413    if (isa<UndefValue>(II->getArgOperand(1)))
414      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
415
416    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
417      // X + 0 -> {X, false}
418      if (RHS->isZero()) {
419        Constant *V[] = {
420          UndefValue::get(II->getArgOperand(0)->getType()),
421          ConstantInt::getFalse(II->getContext())
422        };
423        Constant *Struct =
424          ConstantStruct::get(cast<StructType>(II->getType()), V);
425        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
426      }
427    }
428    break;
429  case Intrinsic::usub_with_overflow:
430  case Intrinsic::ssub_with_overflow:
431    // undef - X -> undef
432    // X - undef -> undef
433    if (isa<UndefValue>(II->getArgOperand(0)) ||
434        isa<UndefValue>(II->getArgOperand(1)))
435      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
436
437    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
438      // X - 0 -> {X, false}
439      if (RHS->isZero()) {
440        Constant *V[] = {
441          UndefValue::get(II->getArgOperand(0)->getType()),
442          ConstantInt::getFalse(II->getContext())
443        };
444        Constant *Struct =
445          ConstantStruct::get(cast<StructType>(II->getType()), V);
446        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
447      }
448    }
449    break;
450  case Intrinsic::umul_with_overflow: {
451    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
452    unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
453
454    APInt LHSKnownZero(BitWidth, 0);
455    APInt LHSKnownOne(BitWidth, 0);
456    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
457    APInt RHSKnownZero(BitWidth, 0);
458    APInt RHSKnownOne(BitWidth, 0);
459    ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
460
461    // Get the largest possible values for each operand.
462    APInt LHSMax = ~LHSKnownZero;
463    APInt RHSMax = ~RHSKnownZero;
464
465    // If multiplying the maximum values does not overflow then we can turn
466    // this into a plain NUW mul.
467    bool Overflow;
468    LHSMax.umul_ov(RHSMax, Overflow);
469    if (!Overflow) {
470      Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
471      Constant *V[] = {
472        UndefValue::get(LHS->getType()),
473        Builder->getFalse()
474      };
475      Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
476      return InsertValueInst::Create(Struct, Mul, 0);
477    }
478  } // FALL THROUGH
479  case Intrinsic::smul_with_overflow:
480    // Canonicalize constants into the RHS.
481    if (isa<Constant>(II->getArgOperand(0)) &&
482        !isa<Constant>(II->getArgOperand(1))) {
483      Value *LHS = II->getArgOperand(0);
484      II->setArgOperand(0, II->getArgOperand(1));
485      II->setArgOperand(1, LHS);
486      return II;
487    }
488
489    // X * undef -> undef
490    if (isa<UndefValue>(II->getArgOperand(1)))
491      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
492
493    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
494      // X*0 -> {0, false}
495      if (RHSI->isZero())
496        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
497
498      // X * 1 -> {X, false}
499      if (RHSI->equalsInt(1)) {
500        Constant *V[] = {
501          UndefValue::get(II->getArgOperand(0)->getType()),
502          ConstantInt::getFalse(II->getContext())
503        };
504        Constant *Struct =
505          ConstantStruct::get(cast<StructType>(II->getType()), V);
506        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
507      }
508    }
509    break;
510  case Intrinsic::ppc_altivec_lvx:
511  case Intrinsic::ppc_altivec_lvxl:
512    // Turn PPC lvx -> load if the pointer is known aligned.
513    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
514      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
515                                         PointerType::getUnqual(II->getType()));
516      return new LoadInst(Ptr);
517    }
518    break;
519  case Intrinsic::ppc_altivec_stvx:
520  case Intrinsic::ppc_altivec_stvxl:
521    // Turn stvx -> store if the pointer is known aligned.
522    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
523      Type *OpPtrTy =
524        PointerType::getUnqual(II->getArgOperand(0)->getType());
525      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
526      return new StoreInst(II->getArgOperand(0), Ptr);
527    }
528    break;
529  case Intrinsic::x86_sse_storeu_ps:
530  case Intrinsic::x86_sse2_storeu_pd:
531  case Intrinsic::x86_sse2_storeu_dq:
532    // Turn X86 storeu -> store if the pointer is known aligned.
533    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
534      Type *OpPtrTy =
535        PointerType::getUnqual(II->getArgOperand(1)->getType());
536      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
537      return new StoreInst(II->getArgOperand(1), Ptr);
538    }
539    break;
540
541  case Intrinsic::x86_sse_cvtss2si:
542  case Intrinsic::x86_sse_cvtss2si64:
543  case Intrinsic::x86_sse_cvttss2si:
544  case Intrinsic::x86_sse_cvttss2si64:
545  case Intrinsic::x86_sse2_cvtsd2si:
546  case Intrinsic::x86_sse2_cvtsd2si64:
547  case Intrinsic::x86_sse2_cvttsd2si:
548  case Intrinsic::x86_sse2_cvttsd2si64: {
549    // These intrinsics only demand the 0th element of their input vectors. If
550    // we can simplify the input based on that, do so now.
551    unsigned VWidth =
552      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
553    APInt DemandedElts(VWidth, 1);
554    APInt UndefElts(VWidth, 0);
555    if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
556                                              DemandedElts, UndefElts)) {
557      II->setArgOperand(0, V);
558      return II;
559    }
560    break;
561  }
562
563
564  case Intrinsic::x86_sse41_pmovsxbw:
565  case Intrinsic::x86_sse41_pmovsxwd:
566  case Intrinsic::x86_sse41_pmovsxdq:
567  case Intrinsic::x86_sse41_pmovzxbw:
568  case Intrinsic::x86_sse41_pmovzxwd:
569  case Intrinsic::x86_sse41_pmovzxdq: {
570    // pmov{s|z}x ignores the upper half of their input vectors.
571    unsigned VWidth =
572      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
573    unsigned LowHalfElts = VWidth / 2;
574    APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
575    APInt UndefElts(VWidth, 0);
576    if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
577                                                 InputDemandedElts,
578                                                 UndefElts)) {
579      II->setArgOperand(0, TmpV);
580      return II;
581    }
582    break;
583  }
584
585  case Intrinsic::ppc_altivec_vperm:
586    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
587    if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
588      assert(Mask->getType()->getVectorNumElements() == 16 &&
589             "Bad type for intrinsic!");
590
591      // Check that all of the elements are integer constants or undefs.
592      bool AllEltsOk = true;
593      for (unsigned i = 0; i != 16; ++i) {
594        Constant *Elt = Mask->getAggregateElement(i);
595        if (Elt == 0 ||
596            !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
597          AllEltsOk = false;
598          break;
599        }
600      }
601
602      if (AllEltsOk) {
603        // Cast the input vectors to byte vectors.
604        Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
605                                            Mask->getType());
606        Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
607                                            Mask->getType());
608        Value *Result = UndefValue::get(Op0->getType());
609
610        // Only extract each element once.
611        Value *ExtractedElts[32];
612        memset(ExtractedElts, 0, sizeof(ExtractedElts));
613
614        for (unsigned i = 0; i != 16; ++i) {
615          if (isa<UndefValue>(Mask->getAggregateElement(i)))
616            continue;
617          unsigned Idx =
618            cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
619          Idx &= 31;  // Match the hardware behavior.
620
621          if (ExtractedElts[Idx] == 0) {
622            ExtractedElts[Idx] =
623              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
624                                            Builder->getInt32(Idx&15));
625          }
626
627          // Insert this value into the result vector.
628          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
629                                                Builder->getInt32(i));
630        }
631        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
632      }
633    }
634    break;
635
636  case Intrinsic::arm_neon_vld1:
637  case Intrinsic::arm_neon_vld2:
638  case Intrinsic::arm_neon_vld3:
639  case Intrinsic::arm_neon_vld4:
640  case Intrinsic::arm_neon_vld2lane:
641  case Intrinsic::arm_neon_vld3lane:
642  case Intrinsic::arm_neon_vld4lane:
643  case Intrinsic::arm_neon_vst1:
644  case Intrinsic::arm_neon_vst2:
645  case Intrinsic::arm_neon_vst3:
646  case Intrinsic::arm_neon_vst4:
647  case Intrinsic::arm_neon_vst2lane:
648  case Intrinsic::arm_neon_vst3lane:
649  case Intrinsic::arm_neon_vst4lane: {
650    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
651    unsigned AlignArg = II->getNumArgOperands() - 1;
652    ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
653    if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
654      II->setArgOperand(AlignArg,
655                        ConstantInt::get(Type::getInt32Ty(II->getContext()),
656                                         MemAlign, false));
657      return II;
658    }
659    break;
660  }
661
662  case Intrinsic::arm_neon_vmulls:
663  case Intrinsic::arm_neon_vmullu: {
664    Value *Arg0 = II->getArgOperand(0);
665    Value *Arg1 = II->getArgOperand(1);
666
667    // Handle mul by zero first:
668    if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
669      return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
670    }
671
672    // Check for constant LHS & RHS - in this case we just simplify.
673    bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu);
674    VectorType *NewVT = cast<VectorType>(II->getType());
675    unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth();
676    if (ConstantDataVector *CV0 = dyn_cast<ConstantDataVector>(Arg0)) {
677      if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
678        VectorType* VT = cast<VectorType>(CV0->getType());
679        SmallVector<Constant*, 4> NewElems;
680        for (unsigned i = 0; i < VT->getNumElements(); ++i) {
681          APInt CV0E =
682            (cast<ConstantInt>(CV0->getAggregateElement(i)))->getValue();
683          CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth);
684          APInt CV1E =
685            (cast<ConstantInt>(CV1->getAggregateElement(i)))->getValue();
686          CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth);
687          NewElems.push_back(
688            ConstantInt::get(NewVT->getElementType(), CV0E * CV1E));
689        }
690        return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems));
691      }
692
693      // Couldn't simplify - cannonicalize constant to the RHS.
694      std::swap(Arg0, Arg1);
695    }
696
697    // Handle mul by one:
698    if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
699      if (ConstantInt *Splat =
700            dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) {
701        if (Splat->isOne()) {
702          if (Zext)
703            return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
704          // else
705          return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
706        }
707      }
708    }
709
710    break;
711  }
712
713  case Intrinsic::stackrestore: {
714    // If the save is right next to the restore, remove the restore.  This can
715    // happen when variable allocas are DCE'd.
716    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
717      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
718        BasicBlock::iterator BI = SS;
719        if (&*++BI == II)
720          return EraseInstFromFunction(CI);
721      }
722    }
723
724    // Scan down this block to see if there is another stack restore in the
725    // same block without an intervening call/alloca.
726    BasicBlock::iterator BI = II;
727    TerminatorInst *TI = II->getParent()->getTerminator();
728    bool CannotRemove = false;
729    for (++BI; &*BI != TI; ++BI) {
730      if (isa<AllocaInst>(BI)) {
731        CannotRemove = true;
732        break;
733      }
734      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
735        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
736          // If there is a stackrestore below this one, remove this one.
737          if (II->getIntrinsicID() == Intrinsic::stackrestore)
738            return EraseInstFromFunction(CI);
739          // Otherwise, ignore the intrinsic.
740        } else {
741          // If we found a non-intrinsic call, we can't remove the stack
742          // restore.
743          CannotRemove = true;
744          break;
745        }
746      }
747    }
748
749    // If the stack restore is in a return, resume, or unwind block and if there
750    // are no allocas or calls between the restore and the return, nuke the
751    // restore.
752    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
753      return EraseInstFromFunction(CI);
754    break;
755  }
756  }
757
758  return visitCallSite(II);
759}
760
761// InvokeInst simplification
762//
763Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
764  return visitCallSite(&II);
765}
766
767/// isSafeToEliminateVarargsCast - If this cast does not affect the value
768/// passed through the varargs area, we can eliminate the use of the cast.
769static bool isSafeToEliminateVarargsCast(const CallSite CS,
770                                         const CastInst * const CI,
771                                         const DataLayout * const TD,
772                                         const int ix) {
773  if (!CI->isLosslessCast())
774    return false;
775
776  // The size of ByVal arguments is derived from the type, so we
777  // can't change to a type with a different size.  If the size were
778  // passed explicitly we could avoid this check.
779  if (!CS.isByValArgument(ix))
780    return true;
781
782  Type* SrcTy =
783            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
784  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
785  if (!SrcTy->isSized() || !DstTy->isSized())
786    return false;
787  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
788    return false;
789  return true;
790}
791
792// Try to fold some different type of calls here.
793// Currently we're only working with the checking functions, memcpy_chk,
794// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
795// strcat_chk and strncat_chk.
796Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
797  if (CI->getCalledFunction() == 0) return 0;
798
799  if (Value *With = Simplifier->optimizeCall(CI)) {
800    ++NumSimplified;
801    return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
802  }
803
804  return 0;
805}
806
807static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
808  // Strip off at most one level of pointer casts, looking for an alloca.  This
809  // is good enough in practice and simpler than handling any number of casts.
810  Value *Underlying = TrampMem->stripPointerCasts();
811  if (Underlying != TrampMem &&
812      (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem))
813    return 0;
814  if (!isa<AllocaInst>(Underlying))
815    return 0;
816
817  IntrinsicInst *InitTrampoline = 0;
818  for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end();
819       I != E; I++) {
820    IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I);
821    if (!II)
822      return 0;
823    if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
824      if (InitTrampoline)
825        // More than one init_trampoline writes to this value.  Give up.
826        return 0;
827      InitTrampoline = II;
828      continue;
829    }
830    if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
831      // Allow any number of calls to adjust.trampoline.
832      continue;
833    return 0;
834  }
835
836  // No call to init.trampoline found.
837  if (!InitTrampoline)
838    return 0;
839
840  // Check that the alloca is being used in the expected way.
841  if (InitTrampoline->getOperand(0) != TrampMem)
842    return 0;
843
844  return InitTrampoline;
845}
846
847static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
848                                               Value *TrampMem) {
849  // Visit all the previous instructions in the basic block, and try to find a
850  // init.trampoline which has a direct path to the adjust.trampoline.
851  for (BasicBlock::iterator I = AdjustTramp,
852       E = AdjustTramp->getParent()->begin(); I != E; ) {
853    Instruction *Inst = --I;
854    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
855      if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
856          II->getOperand(0) == TrampMem)
857        return II;
858    if (Inst->mayWriteToMemory())
859      return 0;
860  }
861  return 0;
862}
863
864// Given a call to llvm.adjust.trampoline, find and return the corresponding
865// call to llvm.init.trampoline if the call to the trampoline can be optimized
866// to a direct call to a function.  Otherwise return NULL.
867//
868static IntrinsicInst *FindInitTrampoline(Value *Callee) {
869  Callee = Callee->stripPointerCasts();
870  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
871  if (!AdjustTramp ||
872      AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
873    return 0;
874
875  Value *TrampMem = AdjustTramp->getOperand(0);
876
877  if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
878    return IT;
879  if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
880    return IT;
881  return 0;
882}
883
884// visitCallSite - Improvements for call and invoke instructions.
885//
886Instruction *InstCombiner::visitCallSite(CallSite CS) {
887  if (isAllocLikeFn(CS.getInstruction(), TLI))
888    return visitAllocSite(*CS.getInstruction());
889
890  bool Changed = false;
891
892  // If the callee is a pointer to a function, attempt to move any casts to the
893  // arguments of the call/invoke.
894  Value *Callee = CS.getCalledValue();
895  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
896    return 0;
897
898  if (Function *CalleeF = dyn_cast<Function>(Callee))
899    // If the call and callee calling conventions don't match, this call must
900    // be unreachable, as the call is undefined.
901    if (CalleeF->getCallingConv() != CS.getCallingConv() &&
902        // Only do this for calls to a function with a body.  A prototype may
903        // not actually end up matching the implementation's calling conv for a
904        // variety of reasons (e.g. it may be written in assembly).
905        !CalleeF->isDeclaration()) {
906      Instruction *OldCall = CS.getInstruction();
907      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
908                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
909                                  OldCall);
910      // If OldCall does not return void then replaceAllUsesWith undef.
911      // This allows ValueHandlers and custom metadata to adjust itself.
912      if (!OldCall->getType()->isVoidTy())
913        ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
914      if (isa<CallInst>(OldCall))
915        return EraseInstFromFunction(*OldCall);
916
917      // We cannot remove an invoke, because it would change the CFG, just
918      // change the callee to a null pointer.
919      cast<InvokeInst>(OldCall)->setCalledFunction(
920                                    Constant::getNullValue(CalleeF->getType()));
921      return 0;
922    }
923
924  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
925    // If CS does not return void then replaceAllUsesWith undef.
926    // This allows ValueHandlers and custom metadata to adjust itself.
927    if (!CS.getInstruction()->getType()->isVoidTy())
928      ReplaceInstUsesWith(*CS.getInstruction(),
929                          UndefValue::get(CS.getInstruction()->getType()));
930
931    if (isa<InvokeInst>(CS.getInstruction())) {
932      // Can't remove an invoke because we cannot change the CFG.
933      return 0;
934    }
935
936    // This instruction is not reachable, just remove it.  We insert a store to
937    // undef so that we know that this code is not reachable, despite the fact
938    // that we can't modify the CFG here.
939    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
940                  UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
941                  CS.getInstruction());
942
943    return EraseInstFromFunction(*CS.getInstruction());
944  }
945
946  if (IntrinsicInst *II = FindInitTrampoline(Callee))
947    return transformCallThroughTrampoline(CS, II);
948
949  PointerType *PTy = cast<PointerType>(Callee->getType());
950  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
951  if (FTy->isVarArg()) {
952    int ix = FTy->getNumParams();
953    // See if we can optimize any arguments passed through the varargs area of
954    // the call.
955    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
956           E = CS.arg_end(); I != E; ++I, ++ix) {
957      CastInst *CI = dyn_cast<CastInst>(*I);
958      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
959        *I = CI->getOperand(0);
960        Changed = true;
961      }
962    }
963  }
964
965  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
966    // Inline asm calls cannot throw - mark them 'nounwind'.
967    CS.setDoesNotThrow();
968    Changed = true;
969  }
970
971  // Try to optimize the call if possible, we require DataLayout for most of
972  // this.  None of these calls are seen as possibly dead so go ahead and
973  // delete the instruction now.
974  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
975    Instruction *I = tryOptimizeCall(CI, TD);
976    // If we changed something return the result, etc. Otherwise let
977    // the fallthrough check.
978    if (I) return EraseInstFromFunction(*I);
979  }
980
981  return Changed ? CS.getInstruction() : 0;
982}
983
984// transformConstExprCastCall - If the callee is a constexpr cast of a function,
985// attempt to move the cast to the arguments of the call/invoke.
986//
987bool InstCombiner::transformConstExprCastCall(CallSite CS) {
988  Function *Callee =
989    dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
990  if (Callee == 0)
991    return false;
992  Instruction *Caller = CS.getInstruction();
993  const AttributeSet &CallerPAL = CS.getAttributes();
994
995  // Okay, this is a cast from a function to a different type.  Unless doing so
996  // would cause a type conversion of one of our arguments, change this call to
997  // be a direct call with arguments casted to the appropriate types.
998  //
999  FunctionType *FT = Callee->getFunctionType();
1000  Type *OldRetTy = Caller->getType();
1001  Type *NewRetTy = FT->getReturnType();
1002
1003  if (NewRetTy->isStructTy())
1004    return false; // TODO: Handle multiple return values.
1005
1006  // Check to see if we are changing the return type...
1007  if (OldRetTy != NewRetTy) {
1008    if (Callee->isDeclaration() &&
1009        // Conversion is ok if changing from one pointer type to another or from
1010        // a pointer to an integer of the same size.
1011        !((OldRetTy->isPointerTy() || !TD ||
1012           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
1013          (NewRetTy->isPointerTy() || !TD ||
1014           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
1015      return false;   // Cannot transform this return value.
1016
1017    if (!Caller->use_empty() &&
1018        // void -> non-void is handled specially
1019        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
1020      return false;   // Cannot transform this return value.
1021
1022    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
1023      AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
1024      if (RAttrs.
1025          hasAttributes(AttributeFuncs::
1026                        typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
1027                        AttributeSet::ReturnIndex))
1028        return false;   // Attribute not compatible with transformed value.
1029    }
1030
1031    // If the callsite is an invoke instruction, and the return value is used by
1032    // a PHI node in a successor, we cannot change the return type of the call
1033    // because there is no place to put the cast instruction (without breaking
1034    // the critical edge).  Bail out in this case.
1035    if (!Caller->use_empty())
1036      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
1037        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
1038             UI != E; ++UI)
1039          if (PHINode *PN = dyn_cast<PHINode>(*UI))
1040            if (PN->getParent() == II->getNormalDest() ||
1041                PN->getParent() == II->getUnwindDest())
1042              return false;
1043  }
1044
1045  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
1046  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
1047
1048  CallSite::arg_iterator AI = CS.arg_begin();
1049  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
1050    Type *ParamTy = FT->getParamType(i);
1051    Type *ActTy = (*AI)->getType();
1052
1053    if (!CastInst::isCastable(ActTy, ParamTy))
1054      return false;   // Cannot transform this parameter value.
1055
1056    if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
1057          hasAttributes(AttributeFuncs::
1058                        typeIncompatible(ParamTy, i + 1), i + 1))
1059      return false;   // Attribute not compatible with transformed value.
1060
1061    // If the parameter is passed as a byval argument, then we have to have a
1062    // sized type and the sized type has to have the same size as the old type.
1063    if (ParamTy != ActTy &&
1064        CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
1065                                                         Attribute::ByVal)) {
1066      PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
1067      if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
1068        return false;
1069
1070      Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
1071      if (TD->getTypeAllocSize(CurElTy) !=
1072          TD->getTypeAllocSize(ParamPTy->getElementType()))
1073        return false;
1074    }
1075
1076    // Converting from one pointer type to another or between a pointer and an
1077    // integer of the same size is safe even if we do not have a body.
1078    bool isConvertible = ActTy == ParamTy ||
1079      (TD && ((ParamTy->isPointerTy() ||
1080      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
1081              (ActTy->isPointerTy() ||
1082              ActTy == TD->getIntPtrType(Caller->getContext()))));
1083    if (Callee->isDeclaration() && !isConvertible) return false;
1084  }
1085
1086  if (Callee->isDeclaration()) {
1087    // Do not delete arguments unless we have a function body.
1088    if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
1089      return false;
1090
1091    // If the callee is just a declaration, don't change the varargsness of the
1092    // call.  We don't want to introduce a varargs call where one doesn't
1093    // already exist.
1094    PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
1095    if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
1096      return false;
1097
1098    // If both the callee and the cast type are varargs, we still have to make
1099    // sure the number of fixed parameters are the same or we have the same
1100    // ABI issues as if we introduce a varargs call.
1101    if (FT->isVarArg() &&
1102        cast<FunctionType>(APTy->getElementType())->isVarArg() &&
1103        FT->getNumParams() !=
1104        cast<FunctionType>(APTy->getElementType())->getNumParams())
1105      return false;
1106  }
1107
1108  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
1109      !CallerPAL.isEmpty())
1110    // In this case we have more arguments than the new function type, but we
1111    // won't be dropping them.  Check that these extra arguments have attributes
1112    // that are compatible with being a vararg call argument.
1113    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
1114      unsigned Index = CallerPAL.getSlotIndex(i - 1);
1115      if (Index <= FT->getNumParams())
1116        break;
1117
1118      // Check if it has an attribute that's incompatible with varargs.
1119      AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
1120      if (PAttrs.hasAttribute(Index, Attribute::StructRet))
1121        return false;
1122    }
1123
1124
1125  // Okay, we decided that this is a safe thing to do: go ahead and start
1126  // inserting cast instructions as necessary.
1127  std::vector<Value*> Args;
1128  Args.reserve(NumActualArgs);
1129  SmallVector<AttributeSet, 8> attrVec;
1130  attrVec.reserve(NumCommonArgs);
1131
1132  // Get any return attributes.
1133  AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
1134
1135  // If the return value is not being used, the type may not be compatible
1136  // with the existing attributes.  Wipe out any problematic attributes.
1137  RAttrs.
1138    removeAttributes(AttributeFuncs::
1139                     typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
1140                     AttributeSet::ReturnIndex);
1141
1142  // Add the new return attributes.
1143  if (RAttrs.hasAttributes())
1144    attrVec.push_back(AttributeSet::get(Caller->getContext(),
1145                                        AttributeSet::ReturnIndex, RAttrs));
1146
1147  AI = CS.arg_begin();
1148  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
1149    Type *ParamTy = FT->getParamType(i);
1150    if ((*AI)->getType() == ParamTy) {
1151      Args.push_back(*AI);
1152    } else {
1153      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
1154          false, ParamTy, false);
1155      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy));
1156    }
1157
1158    // Add any parameter attributes.
1159    AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
1160    if (PAttrs.hasAttributes())
1161      attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
1162                                          PAttrs));
1163  }
1164
1165  // If the function takes more arguments than the call was taking, add them
1166  // now.
1167  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
1168    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
1169
1170  // If we are removing arguments to the function, emit an obnoxious warning.
1171  if (FT->getNumParams() < NumActualArgs) {
1172    // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
1173    if (FT->isVarArg()) {
1174      // Add all of the arguments in their promoted form to the arg list.
1175      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
1176        Type *PTy = getPromotedType((*AI)->getType());
1177        if (PTy != (*AI)->getType()) {
1178          // Must promote to pass through va_arg area!
1179          Instruction::CastOps opcode =
1180            CastInst::getCastOpcode(*AI, false, PTy, false);
1181          Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
1182        } else {
1183          Args.push_back(*AI);
1184        }
1185
1186        // Add any parameter attributes.
1187        AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
1188        if (PAttrs.hasAttributes())
1189          attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
1190                                              PAttrs));
1191      }
1192    }
1193  }
1194
1195  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
1196  if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex))
1197    attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
1198
1199  if (NewRetTy->isVoidTy())
1200    Caller->setName("");   // Void type should not have a name.
1201
1202  const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
1203                                                       attrVec);
1204
1205  Instruction *NC;
1206  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1207    NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
1208                               II->getUnwindDest(), Args);
1209    NC->takeName(II);
1210    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
1211    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
1212  } else {
1213    CallInst *CI = cast<CallInst>(Caller);
1214    NC = Builder->CreateCall(Callee, Args);
1215    NC->takeName(CI);
1216    if (CI->isTailCall())
1217      cast<CallInst>(NC)->setTailCall();
1218    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
1219    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
1220  }
1221
1222  // Insert a cast of the return type as necessary.
1223  Value *NV = NC;
1224  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
1225    if (!NV->getType()->isVoidTy()) {
1226      Instruction::CastOps opcode =
1227        CastInst::getCastOpcode(NC, false, OldRetTy, false);
1228      NV = NC = CastInst::Create(opcode, NC, OldRetTy);
1229      NC->setDebugLoc(Caller->getDebugLoc());
1230
1231      // If this is an invoke instruction, we should insert it after the first
1232      // non-phi, instruction in the normal successor block.
1233      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1234        BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
1235        InsertNewInstBefore(NC, *I);
1236      } else {
1237        // Otherwise, it's a call, just insert cast right after the call.
1238        InsertNewInstBefore(NC, *Caller);
1239      }
1240      Worklist.AddUsersToWorkList(*Caller);
1241    } else {
1242      NV = UndefValue::get(Caller->getType());
1243    }
1244  }
1245
1246  if (!Caller->use_empty())
1247    ReplaceInstUsesWith(*Caller, NV);
1248
1249  EraseInstFromFunction(*Caller);
1250  return true;
1251}
1252
1253// transformCallThroughTrampoline - Turn a call to a function created by
1254// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
1255// underlying function.
1256//
1257Instruction *
1258InstCombiner::transformCallThroughTrampoline(CallSite CS,
1259                                             IntrinsicInst *Tramp) {
1260  Value *Callee = CS.getCalledValue();
1261  PointerType *PTy = cast<PointerType>(Callee->getType());
1262  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
1263  const AttributeSet &Attrs = CS.getAttributes();
1264
1265  // If the call already has the 'nest' attribute somewhere then give up -
1266  // otherwise 'nest' would occur twice after splicing in the chain.
1267  if (Attrs.hasAttrSomewhere(Attribute::Nest))
1268    return 0;
1269
1270  assert(Tramp &&
1271         "transformCallThroughTrampoline called with incorrect CallSite.");
1272
1273  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
1274  PointerType *NestFPTy = cast<PointerType>(NestF->getType());
1275  FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
1276
1277  const AttributeSet &NestAttrs = NestF->getAttributes();
1278  if (!NestAttrs.isEmpty()) {
1279    unsigned NestIdx = 1;
1280    Type *NestTy = 0;
1281    AttributeSet NestAttr;
1282
1283    // Look for a parameter marked with the 'nest' attribute.
1284    for (FunctionType::param_iterator I = NestFTy->param_begin(),
1285         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
1286      if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
1287        // Record the parameter type and any other attributes.
1288        NestTy = *I;
1289        NestAttr = NestAttrs.getParamAttributes(NestIdx);
1290        break;
1291      }
1292
1293    if (NestTy) {
1294      Instruction *Caller = CS.getInstruction();
1295      std::vector<Value*> NewArgs;
1296      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
1297
1298      SmallVector<AttributeSet, 8> NewAttrs;
1299      NewAttrs.reserve(Attrs.getNumSlots() + 1);
1300
1301      // Insert the nest argument into the call argument list, which may
1302      // mean appending it.  Likewise for attributes.
1303
1304      // Add any result attributes.
1305      if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
1306        NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
1307                                             Attrs.getRetAttributes()));
1308
1309      {
1310        unsigned Idx = 1;
1311        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
1312        do {
1313          if (Idx == NestIdx) {
1314            // Add the chain argument and attributes.
1315            Value *NestVal = Tramp->getArgOperand(2);
1316            if (NestVal->getType() != NestTy)
1317              NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
1318            NewArgs.push_back(NestVal);
1319            NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
1320                                                 NestAttr));
1321          }
1322
1323          if (I == E)
1324            break;
1325
1326          // Add the original argument and attributes.
1327          NewArgs.push_back(*I);
1328          AttributeSet Attr = Attrs.getParamAttributes(Idx);
1329          if (Attr.hasAttributes(Idx)) {
1330            AttrBuilder B(Attr, Idx);
1331            NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
1332                                                 Idx + (Idx >= NestIdx), B));
1333          }
1334
1335          ++Idx, ++I;
1336        } while (1);
1337      }
1338
1339      // Add any function attributes.
1340      if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
1341        NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
1342                                             Attrs.getFnAttributes()));
1343
1344      // The trampoline may have been bitcast to a bogus type (FTy).
1345      // Handle this by synthesizing a new function type, equal to FTy
1346      // with the chain parameter inserted.
1347
1348      std::vector<Type*> NewTypes;
1349      NewTypes.reserve(FTy->getNumParams()+1);
1350
1351      // Insert the chain's type into the list of parameter types, which may
1352      // mean appending it.
1353      {
1354        unsigned Idx = 1;
1355        FunctionType::param_iterator I = FTy->param_begin(),
1356          E = FTy->param_end();
1357
1358        do {
1359          if (Idx == NestIdx)
1360            // Add the chain's type.
1361            NewTypes.push_back(NestTy);
1362
1363          if (I == E)
1364            break;
1365
1366          // Add the original type.
1367          NewTypes.push_back(*I);
1368
1369          ++Idx, ++I;
1370        } while (1);
1371      }
1372
1373      // Replace the trampoline call with a direct call.  Let the generic
1374      // code sort out any function type mismatches.
1375      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
1376                                                FTy->isVarArg());
1377      Constant *NewCallee =
1378        NestF->getType() == PointerType::getUnqual(NewFTy) ?
1379        NestF : ConstantExpr::getBitCast(NestF,
1380                                         PointerType::getUnqual(NewFTy));
1381      const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
1382
1383      Instruction *NewCaller;
1384      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1385        NewCaller = InvokeInst::Create(NewCallee,
1386                                       II->getNormalDest(), II->getUnwindDest(),
1387                                       NewArgs);
1388        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
1389        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
1390      } else {
1391        NewCaller = CallInst::Create(NewCallee, NewArgs);
1392        if (cast<CallInst>(Caller)->isTailCall())
1393          cast<CallInst>(NewCaller)->setTailCall();
1394        cast<CallInst>(NewCaller)->
1395          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
1396        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
1397      }
1398
1399      return NewCaller;
1400    }
1401  }
1402
1403  // Replace the trampoline call with a direct call.  Since there is no 'nest'
1404  // parameter, there is no need to adjust the argument list.  Let the generic
1405  // code sort out any function type mismatches.
1406  Constant *NewCallee =
1407    NestF->getType() == PTy ? NestF :
1408                              ConstantExpr::getBitCast(NestF, PTy);
1409  CS.setCalledFunction(NewCallee);
1410  return CS.getInstruction();
1411}
1412