InstCombineCalls.cpp revision 8e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the visitCall and visitInvoke functions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombine.h"
15#include "llvm/Support/CallSite.h"
16#include "llvm/Target/TargetData.h"
17#include "llvm/Analysis/MemoryBuiltins.h"
18#include "llvm/Transforms/Utils/BuildLibCalls.h"
19#include "llvm/Transforms/Utils/Local.h"
20using namespace llvm;
21
22/// getPromotedType - Return the specified type promoted as it would be to pass
23/// though a va_arg area.
24static Type *getPromotedType(Type *Ty) {
25  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
26    if (ITy->getBitWidth() < 32)
27      return Type::getInt32Ty(Ty->getContext());
28  }
29  return Ty;
30}
31
32
33Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
34  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
35  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
36  unsigned MinAlign = std::min(DstAlign, SrcAlign);
37  unsigned CopyAlign = MI->getAlignment();
38
39  if (CopyAlign < MinAlign) {
40    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
41                                             MinAlign, false));
42    return MI;
43  }
44
45  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
46  // load/store.
47  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
48  if (MemOpLength == 0) return 0;
49
50  // Source and destination pointer types are always "i8*" for intrinsic.  See
51  // if the size is something we can handle with a single primitive load/store.
52  // A single load+store correctly handles overlapping memory in the memmove
53  // case.
54  uint64_t Size = MemOpLength->getLimitedValue();
55  assert(Size && "0-sized memory transfering should be removed already.");
56
57  if (Size > 8 || (Size&(Size-1)))
58    return 0;  // If not 1/2/4/8 bytes, exit.
59
60  // Use an integer load+store unless we can find something better.
61  unsigned SrcAddrSp =
62    cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
63  unsigned DstAddrSp =
64    cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
65
66  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
67  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
68  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
69
70  // Memcpy forces the use of i8* for the source and destination.  That means
71  // that if you're using memcpy to move one double around, you'll get a cast
72  // from double* to i8*.  We'd much rather use a double load+store rather than
73  // an i64 load+store, here because this improves the odds that the source or
74  // dest address will be promotable.  See if we can find a better type than the
75  // integer datatype.
76  Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
77  if (StrippedDest != MI->getArgOperand(0)) {
78    Type *SrcETy = cast<PointerType>(StrippedDest->getType())
79                                    ->getElementType();
80    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
81      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
82      // down through these levels if so.
83      while (!SrcETy->isSingleValueType()) {
84        if (StructType *STy = dyn_cast<StructType>(SrcETy)) {
85          if (STy->getNumElements() == 1)
86            SrcETy = STy->getElementType(0);
87          else
88            break;
89        } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
90          if (ATy->getNumElements() == 1)
91            SrcETy = ATy->getElementType();
92          else
93            break;
94        } else
95          break;
96      }
97
98      if (SrcETy->isSingleValueType()) {
99        NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
100        NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
101      }
102    }
103  }
104
105
106  // If the memcpy/memmove provides better alignment info than we can
107  // infer, use it.
108  SrcAlign = std::max(SrcAlign, CopyAlign);
109  DstAlign = std::max(DstAlign, CopyAlign);
110
111  Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
112  Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
113  LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
114  L->setAlignment(SrcAlign);
115  StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
116  S->setAlignment(DstAlign);
117
118  // Set the size of the copy to 0, it will be deleted on the next iteration.
119  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
120  return MI;
121}
122
123Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
124  unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
125  if (MI->getAlignment() < Alignment) {
126    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
127                                             Alignment, false));
128    return MI;
129  }
130
131  // Extract the length and alignment and fill if they are constant.
132  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
133  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
134  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
135    return 0;
136  uint64_t Len = LenC->getLimitedValue();
137  Alignment = MI->getAlignment();
138  assert(Len && "0-sized memory setting should be removed already.");
139
140  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
141  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
142    Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
143
144    Value *Dest = MI->getDest();
145    unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
146    Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
147    Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
148
149    // Alignment 0 is identity for alignment 1 for memset, but not store.
150    if (Alignment == 0) Alignment = 1;
151
152    // Extract the fill value and store.
153    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
154    StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
155                                        MI->isVolatile());
156    S->setAlignment(Alignment);
157
158    // Set the size of the copy to 0, it will be deleted on the next iteration.
159    MI->setLength(Constant::getNullValue(LenC->getType()));
160    return MI;
161  }
162
163  return 0;
164}
165
166/// visitCallInst - CallInst simplification.  This mostly only handles folding
167/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
168/// the heavy lifting.
169///
170Instruction *InstCombiner::visitCallInst(CallInst &CI) {
171  if (isFreeCall(&CI, TLI))
172    return visitFree(CI);
173
174  // If the caller function is nounwind, mark the call as nounwind, even if the
175  // callee isn't.
176  if (CI.getParent()->getParent()->doesNotThrow() &&
177      !CI.doesNotThrow()) {
178    CI.setDoesNotThrow();
179    return &CI;
180  }
181
182  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
183  if (!II) return visitCallSite(&CI);
184
185  // Intrinsics cannot occur in an invoke, so handle them here instead of in
186  // visitCallSite.
187  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
188    bool Changed = false;
189
190    // memmove/cpy/set of zero bytes is a noop.
191    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
192      if (NumBytes->isNullValue())
193        return EraseInstFromFunction(CI);
194
195      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
196        if (CI->getZExtValue() == 1) {
197          // Replace the instruction with just byte operations.  We would
198          // transform other cases to loads/stores, but we don't know if
199          // alignment is sufficient.
200        }
201    }
202
203    // No other transformations apply to volatile transfers.
204    if (MI->isVolatile())
205      return 0;
206
207    // If we have a memmove and the source operation is a constant global,
208    // then the source and dest pointers can't alias, so we can change this
209    // into a call to memcpy.
210    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
211      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
212        if (GVSrc->isConstant()) {
213          Module *M = CI.getParent()->getParent()->getParent();
214          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
215          Type *Tys[3] = { CI.getArgOperand(0)->getType(),
216                           CI.getArgOperand(1)->getType(),
217                           CI.getArgOperand(2)->getType() };
218          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
219          Changed = true;
220        }
221    }
222
223    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
224      // memmove(x,x,size) -> noop.
225      if (MTI->getSource() == MTI->getDest())
226        return EraseInstFromFunction(CI);
227    }
228
229    // If we can determine a pointer alignment that is bigger than currently
230    // set, update the alignment.
231    if (isa<MemTransferInst>(MI)) {
232      if (Instruction *I = SimplifyMemTransfer(MI))
233        return I;
234    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
235      if (Instruction *I = SimplifyMemSet(MSI))
236        return I;
237    }
238
239    if (Changed) return II;
240  }
241
242  switch (II->getIntrinsicID()) {
243  default: break;
244  case Intrinsic::objectsize: {
245    uint64_t Size;
246    if (getObjectSize(II->getArgOperand(0), Size, TD, TLI))
247      return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
248    return 0;
249  }
250  case Intrinsic::bswap:
251    // bswap(bswap(x)) -> x
252    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
253      if (Operand->getIntrinsicID() == Intrinsic::bswap)
254        return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
255
256    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
257    if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
258      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
259        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
260          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
261                       TI->getType()->getPrimitiveSizeInBits();
262          Value *CV = ConstantInt::get(Operand->getType(), C);
263          Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
264          return new TruncInst(V, TI->getType());
265        }
266    }
267
268    break;
269  case Intrinsic::powi:
270    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
271      // powi(x, 0) -> 1.0
272      if (Power->isZero())
273        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
274      // powi(x, 1) -> x
275      if (Power->isOne())
276        return ReplaceInstUsesWith(CI, II->getArgOperand(0));
277      // powi(x, -1) -> 1/x
278      if (Power->isAllOnesValue())
279        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
280                                          II->getArgOperand(0));
281    }
282    break;
283  case Intrinsic::cttz: {
284    // If all bits below the first known one are known zero,
285    // this value is constant.
286    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
287    // FIXME: Try to simplify vectors of integers.
288    if (!IT) break;
289    uint32_t BitWidth = IT->getBitWidth();
290    APInt KnownZero(BitWidth, 0);
291    APInt KnownOne(BitWidth, 0);
292    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
293    unsigned TrailingZeros = KnownOne.countTrailingZeros();
294    APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
295    if ((Mask & KnownZero) == Mask)
296      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
297                                 APInt(BitWidth, TrailingZeros)));
298
299    }
300    break;
301  case Intrinsic::ctlz: {
302    // If all bits above the first known one are known zero,
303    // this value is constant.
304    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
305    // FIXME: Try to simplify vectors of integers.
306    if (!IT) break;
307    uint32_t BitWidth = IT->getBitWidth();
308    APInt KnownZero(BitWidth, 0);
309    APInt KnownOne(BitWidth, 0);
310    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
311    unsigned LeadingZeros = KnownOne.countLeadingZeros();
312    APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
313    if ((Mask & KnownZero) == Mask)
314      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
315                                 APInt(BitWidth, LeadingZeros)));
316
317    }
318    break;
319  case Intrinsic::uadd_with_overflow: {
320    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
321    IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
322    uint32_t BitWidth = IT->getBitWidth();
323    APInt LHSKnownZero(BitWidth, 0);
324    APInt LHSKnownOne(BitWidth, 0);
325    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
326    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
327    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
328
329    if (LHSKnownNegative || LHSKnownPositive) {
330      APInt RHSKnownZero(BitWidth, 0);
331      APInt RHSKnownOne(BitWidth, 0);
332      ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
333      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
334      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
335      if (LHSKnownNegative && RHSKnownNegative) {
336        // The sign bit is set in both cases: this MUST overflow.
337        // Create a simple add instruction, and insert it into the struct.
338        Value *Add = Builder->CreateAdd(LHS, RHS);
339        Add->takeName(&CI);
340        Constant *V[] = {
341          UndefValue::get(LHS->getType()),
342          ConstantInt::getTrue(II->getContext())
343        };
344        StructType *ST = cast<StructType>(II->getType());
345        Constant *Struct = ConstantStruct::get(ST, V);
346        return InsertValueInst::Create(Struct, Add, 0);
347      }
348
349      if (LHSKnownPositive && RHSKnownPositive) {
350        // The sign bit is clear in both cases: this CANNOT overflow.
351        // Create a simple add instruction, and insert it into the struct.
352        Value *Add = Builder->CreateNUWAdd(LHS, RHS);
353        Add->takeName(&CI);
354        Constant *V[] = {
355          UndefValue::get(LHS->getType()),
356          ConstantInt::getFalse(II->getContext())
357        };
358        StructType *ST = cast<StructType>(II->getType());
359        Constant *Struct = ConstantStruct::get(ST, V);
360        return InsertValueInst::Create(Struct, Add, 0);
361      }
362    }
363  }
364  // FALL THROUGH uadd into sadd
365  case Intrinsic::sadd_with_overflow:
366    // Canonicalize constants into the RHS.
367    if (isa<Constant>(II->getArgOperand(0)) &&
368        !isa<Constant>(II->getArgOperand(1))) {
369      Value *LHS = II->getArgOperand(0);
370      II->setArgOperand(0, II->getArgOperand(1));
371      II->setArgOperand(1, LHS);
372      return II;
373    }
374
375    // X + undef -> undef
376    if (isa<UndefValue>(II->getArgOperand(1)))
377      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
378
379    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
380      // X + 0 -> {X, false}
381      if (RHS->isZero()) {
382        Constant *V[] = {
383          UndefValue::get(II->getArgOperand(0)->getType()),
384          ConstantInt::getFalse(II->getContext())
385        };
386        Constant *Struct =
387          ConstantStruct::get(cast<StructType>(II->getType()), V);
388        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
389      }
390    }
391    break;
392  case Intrinsic::usub_with_overflow:
393  case Intrinsic::ssub_with_overflow:
394    // undef - X -> undef
395    // X - undef -> undef
396    if (isa<UndefValue>(II->getArgOperand(0)) ||
397        isa<UndefValue>(II->getArgOperand(1)))
398      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
399
400    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
401      // X - 0 -> {X, false}
402      if (RHS->isZero()) {
403        Constant *V[] = {
404          UndefValue::get(II->getArgOperand(0)->getType()),
405          ConstantInt::getFalse(II->getContext())
406        };
407        Constant *Struct =
408          ConstantStruct::get(cast<StructType>(II->getType()), V);
409        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
410      }
411    }
412    break;
413  case Intrinsic::umul_with_overflow: {
414    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
415    unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
416
417    APInt LHSKnownZero(BitWidth, 0);
418    APInt LHSKnownOne(BitWidth, 0);
419    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
420    APInt RHSKnownZero(BitWidth, 0);
421    APInt RHSKnownOne(BitWidth, 0);
422    ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
423
424    // Get the largest possible values for each operand.
425    APInt LHSMax = ~LHSKnownZero;
426    APInt RHSMax = ~RHSKnownZero;
427
428    // If multiplying the maximum values does not overflow then we can turn
429    // this into a plain NUW mul.
430    bool Overflow;
431    LHSMax.umul_ov(RHSMax, Overflow);
432    if (!Overflow) {
433      Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
434      Constant *V[] = {
435        UndefValue::get(LHS->getType()),
436        Builder->getFalse()
437      };
438      Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
439      return InsertValueInst::Create(Struct, Mul, 0);
440    }
441  } // FALL THROUGH
442  case Intrinsic::smul_with_overflow:
443    // Canonicalize constants into the RHS.
444    if (isa<Constant>(II->getArgOperand(0)) &&
445        !isa<Constant>(II->getArgOperand(1))) {
446      Value *LHS = II->getArgOperand(0);
447      II->setArgOperand(0, II->getArgOperand(1));
448      II->setArgOperand(1, LHS);
449      return II;
450    }
451
452    // X * undef -> undef
453    if (isa<UndefValue>(II->getArgOperand(1)))
454      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
455
456    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
457      // X*0 -> {0, false}
458      if (RHSI->isZero())
459        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
460
461      // X * 1 -> {X, false}
462      if (RHSI->equalsInt(1)) {
463        Constant *V[] = {
464          UndefValue::get(II->getArgOperand(0)->getType()),
465          ConstantInt::getFalse(II->getContext())
466        };
467        Constant *Struct =
468          ConstantStruct::get(cast<StructType>(II->getType()), V);
469        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
470      }
471    }
472    break;
473  case Intrinsic::ppc_altivec_lvx:
474  case Intrinsic::ppc_altivec_lvxl:
475    // Turn PPC lvx -> load if the pointer is known aligned.
476    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
477      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
478                                         PointerType::getUnqual(II->getType()));
479      return new LoadInst(Ptr);
480    }
481    break;
482  case Intrinsic::ppc_altivec_stvx:
483  case Intrinsic::ppc_altivec_stvxl:
484    // Turn stvx -> store if the pointer is known aligned.
485    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
486      Type *OpPtrTy =
487        PointerType::getUnqual(II->getArgOperand(0)->getType());
488      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
489      return new StoreInst(II->getArgOperand(0), Ptr);
490    }
491    break;
492  case Intrinsic::x86_sse_storeu_ps:
493  case Intrinsic::x86_sse2_storeu_pd:
494  case Intrinsic::x86_sse2_storeu_dq:
495    // Turn X86 storeu -> store if the pointer is known aligned.
496    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
497      Type *OpPtrTy =
498        PointerType::getUnqual(II->getArgOperand(1)->getType());
499      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
500      return new StoreInst(II->getArgOperand(1), Ptr);
501    }
502    break;
503
504  case Intrinsic::x86_sse_cvtss2si:
505  case Intrinsic::x86_sse_cvtss2si64:
506  case Intrinsic::x86_sse_cvttss2si:
507  case Intrinsic::x86_sse_cvttss2si64:
508  case Intrinsic::x86_sse2_cvtsd2si:
509  case Intrinsic::x86_sse2_cvtsd2si64:
510  case Intrinsic::x86_sse2_cvttsd2si:
511  case Intrinsic::x86_sse2_cvttsd2si64: {
512    // These intrinsics only demand the 0th element of their input vectors. If
513    // we can simplify the input based on that, do so now.
514    unsigned VWidth =
515      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
516    APInt DemandedElts(VWidth, 1);
517    APInt UndefElts(VWidth, 0);
518    if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
519                                              DemandedElts, UndefElts)) {
520      II->setArgOperand(0, V);
521      return II;
522    }
523    break;
524  }
525
526
527  case Intrinsic::x86_sse41_pmovsxbw:
528  case Intrinsic::x86_sse41_pmovsxwd:
529  case Intrinsic::x86_sse41_pmovsxdq:
530  case Intrinsic::x86_sse41_pmovzxbw:
531  case Intrinsic::x86_sse41_pmovzxwd:
532  case Intrinsic::x86_sse41_pmovzxdq: {
533    // pmov{s|z}x ignores the upper half of their input vectors.
534    unsigned VWidth =
535      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
536    unsigned LowHalfElts = VWidth / 2;
537    APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
538    APInt UndefElts(VWidth, 0);
539    if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
540                                                 InputDemandedElts,
541                                                 UndefElts)) {
542      II->setArgOperand(0, TmpV);
543      return II;
544    }
545    break;
546  }
547
548  case Intrinsic::ppc_altivec_vperm:
549    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
550    if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
551      assert(Mask->getType()->getVectorNumElements() == 16 &&
552             "Bad type for intrinsic!");
553
554      // Check that all of the elements are integer constants or undefs.
555      bool AllEltsOk = true;
556      for (unsigned i = 0; i != 16; ++i) {
557        Constant *Elt = Mask->getAggregateElement(i);
558        if (Elt == 0 ||
559            !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
560          AllEltsOk = false;
561          break;
562        }
563      }
564
565      if (AllEltsOk) {
566        // Cast the input vectors to byte vectors.
567        Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
568                                            Mask->getType());
569        Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
570                                            Mask->getType());
571        Value *Result = UndefValue::get(Op0->getType());
572
573        // Only extract each element once.
574        Value *ExtractedElts[32];
575        memset(ExtractedElts, 0, sizeof(ExtractedElts));
576
577        for (unsigned i = 0; i != 16; ++i) {
578          if (isa<UndefValue>(Mask->getAggregateElement(i)))
579            continue;
580          unsigned Idx =
581            cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
582          Idx &= 31;  // Match the hardware behavior.
583
584          if (ExtractedElts[Idx] == 0) {
585            ExtractedElts[Idx] =
586              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
587                                            Builder->getInt32(Idx&15));
588          }
589
590          // Insert this value into the result vector.
591          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
592                                                Builder->getInt32(i));
593        }
594        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
595      }
596    }
597    break;
598
599  case Intrinsic::arm_neon_vld1:
600  case Intrinsic::arm_neon_vld2:
601  case Intrinsic::arm_neon_vld3:
602  case Intrinsic::arm_neon_vld4:
603  case Intrinsic::arm_neon_vld2lane:
604  case Intrinsic::arm_neon_vld3lane:
605  case Intrinsic::arm_neon_vld4lane:
606  case Intrinsic::arm_neon_vst1:
607  case Intrinsic::arm_neon_vst2:
608  case Intrinsic::arm_neon_vst3:
609  case Intrinsic::arm_neon_vst4:
610  case Intrinsic::arm_neon_vst2lane:
611  case Intrinsic::arm_neon_vst3lane:
612  case Intrinsic::arm_neon_vst4lane: {
613    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
614    unsigned AlignArg = II->getNumArgOperands() - 1;
615    ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
616    if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
617      II->setArgOperand(AlignArg,
618                        ConstantInt::get(Type::getInt32Ty(II->getContext()),
619                                         MemAlign, false));
620      return II;
621    }
622    break;
623  }
624
625  case Intrinsic::arm_neon_vmulls:
626  case Intrinsic::arm_neon_vmullu: {
627    Value *Arg0 = II->getArgOperand(0);
628    Value *Arg1 = II->getArgOperand(1);
629
630    // Handle mul by zero first:
631    if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
632      return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
633    }
634
635    // Check for constant LHS & RHS - in this case we just simplify.
636    bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu);
637    VectorType *NewVT = cast<VectorType>(II->getType());
638    unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth();
639    if (ConstantDataVector *CV0 = dyn_cast<ConstantDataVector>(Arg0)) {
640      if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
641        VectorType* VT = cast<VectorType>(CV0->getType());
642        SmallVector<Constant*, 4> NewElems;
643        for (unsigned i = 0; i < VT->getNumElements(); ++i) {
644          APInt CV0E =
645            (cast<ConstantInt>(CV0->getAggregateElement(i)))->getValue();
646          CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth);
647          APInt CV1E =
648            (cast<ConstantInt>(CV1->getAggregateElement(i)))->getValue();
649          CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth);
650          NewElems.push_back(
651            ConstantInt::get(NewVT->getElementType(), CV0E * CV1E));
652        }
653        return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems));
654      }
655
656      // Couldn't simplify - cannonicalize constant to the RHS.
657      std::swap(Arg0, Arg1);
658    }
659
660    // Handle mul by one:
661    if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
662      if (ConstantInt *Splat =
663            dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) {
664        if (Splat->isOne()) {
665          if (Zext)
666            return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
667          // else
668          return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
669        }
670      }
671    }
672
673    break;
674  }
675
676  case Intrinsic::stackrestore: {
677    // If the save is right next to the restore, remove the restore.  This can
678    // happen when variable allocas are DCE'd.
679    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
680      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
681        BasicBlock::iterator BI = SS;
682        if (&*++BI == II)
683          return EraseInstFromFunction(CI);
684      }
685    }
686
687    // Scan down this block to see if there is another stack restore in the
688    // same block without an intervening call/alloca.
689    BasicBlock::iterator BI = II;
690    TerminatorInst *TI = II->getParent()->getTerminator();
691    bool CannotRemove = false;
692    for (++BI; &*BI != TI; ++BI) {
693      if (isa<AllocaInst>(BI)) {
694        CannotRemove = true;
695        break;
696      }
697      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
698        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
699          // If there is a stackrestore below this one, remove this one.
700          if (II->getIntrinsicID() == Intrinsic::stackrestore)
701            return EraseInstFromFunction(CI);
702          // Otherwise, ignore the intrinsic.
703        } else {
704          // If we found a non-intrinsic call, we can't remove the stack
705          // restore.
706          CannotRemove = true;
707          break;
708        }
709      }
710    }
711
712    // If the stack restore is in a return, resume, or unwind block and if there
713    // are no allocas or calls between the restore and the return, nuke the
714    // restore.
715    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
716      return EraseInstFromFunction(CI);
717    break;
718  }
719  }
720
721  return visitCallSite(II);
722}
723
724// InvokeInst simplification
725//
726Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
727  return visitCallSite(&II);
728}
729
730/// isSafeToEliminateVarargsCast - If this cast does not affect the value
731/// passed through the varargs area, we can eliminate the use of the cast.
732static bool isSafeToEliminateVarargsCast(const CallSite CS,
733                                         const CastInst * const CI,
734                                         const TargetData * const TD,
735                                         const int ix) {
736  if (!CI->isLosslessCast())
737    return false;
738
739  // The size of ByVal arguments is derived from the type, so we
740  // can't change to a type with a different size.  If the size were
741  // passed explicitly we could avoid this check.
742  if (!CS.isByValArgument(ix))
743    return true;
744
745  Type* SrcTy =
746            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
747  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
748  if (!SrcTy->isSized() || !DstTy->isSized())
749    return false;
750  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
751    return false;
752  return true;
753}
754
755namespace {
756class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
757  InstCombiner *IC;
758protected:
759  void replaceCall(Value *With) {
760    NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
761  }
762  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
763    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
764      return true;
765    if (ConstantInt *SizeCI =
766                           dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
767      if (SizeCI->isAllOnesValue())
768        return true;
769      if (isString) {
770        uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
771        // If the length is 0 we don't know how long it is and so we can't
772        // remove the check.
773        if (Len == 0) return false;
774        return SizeCI->getZExtValue() >= Len;
775      }
776      if (ConstantInt *Arg = dyn_cast<ConstantInt>(
777                                                  CI->getArgOperand(SizeArgOp)))
778        return SizeCI->getZExtValue() >= Arg->getZExtValue();
779    }
780    return false;
781  }
782public:
783  InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
784  Instruction *NewInstruction;
785};
786} // end anonymous namespace
787
788// Try to fold some different type of calls here.
789// Currently we're only working with the checking functions, memcpy_chk,
790// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
791// strcat_chk and strncat_chk.
792Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
793  if (CI->getCalledFunction() == 0) return 0;
794
795  InstCombineFortifiedLibCalls Simplifier(this);
796  Simplifier.fold(CI, TD, TLI);
797  return Simplifier.NewInstruction;
798}
799
800static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
801  // Strip off at most one level of pointer casts, looking for an alloca.  This
802  // is good enough in practice and simpler than handling any number of casts.
803  Value *Underlying = TrampMem->stripPointerCasts();
804  if (Underlying != TrampMem &&
805      (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem))
806    return 0;
807  if (!isa<AllocaInst>(Underlying))
808    return 0;
809
810  IntrinsicInst *InitTrampoline = 0;
811  for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end();
812       I != E; I++) {
813    IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I);
814    if (!II)
815      return 0;
816    if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
817      if (InitTrampoline)
818        // More than one init_trampoline writes to this value.  Give up.
819        return 0;
820      InitTrampoline = II;
821      continue;
822    }
823    if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
824      // Allow any number of calls to adjust.trampoline.
825      continue;
826    return 0;
827  }
828
829  // No call to init.trampoline found.
830  if (!InitTrampoline)
831    return 0;
832
833  // Check that the alloca is being used in the expected way.
834  if (InitTrampoline->getOperand(0) != TrampMem)
835    return 0;
836
837  return InitTrampoline;
838}
839
840static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
841                                               Value *TrampMem) {
842  // Visit all the previous instructions in the basic block, and try to find a
843  // init.trampoline which has a direct path to the adjust.trampoline.
844  for (BasicBlock::iterator I = AdjustTramp,
845       E = AdjustTramp->getParent()->begin(); I != E; ) {
846    Instruction *Inst = --I;
847    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
848      if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
849          II->getOperand(0) == TrampMem)
850        return II;
851    if (Inst->mayWriteToMemory())
852      return 0;
853  }
854  return 0;
855}
856
857// Given a call to llvm.adjust.trampoline, find and return the corresponding
858// call to llvm.init.trampoline if the call to the trampoline can be optimized
859// to a direct call to a function.  Otherwise return NULL.
860//
861static IntrinsicInst *FindInitTrampoline(Value *Callee) {
862  Callee = Callee->stripPointerCasts();
863  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
864  if (!AdjustTramp ||
865      AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
866    return 0;
867
868  Value *TrampMem = AdjustTramp->getOperand(0);
869
870  if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
871    return IT;
872  if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
873    return IT;
874  return 0;
875}
876
877// visitCallSite - Improvements for call and invoke instructions.
878//
879Instruction *InstCombiner::visitCallSite(CallSite CS) {
880  if (isAllocLikeFn(CS.getInstruction(), TLI))
881    return visitAllocSite(*CS.getInstruction());
882
883  bool Changed = false;
884
885  // If the callee is a pointer to a function, attempt to move any casts to the
886  // arguments of the call/invoke.
887  Value *Callee = CS.getCalledValue();
888  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
889    return 0;
890
891  if (Function *CalleeF = dyn_cast<Function>(Callee))
892    // If the call and callee calling conventions don't match, this call must
893    // be unreachable, as the call is undefined.
894    if (CalleeF->getCallingConv() != CS.getCallingConv() &&
895        // Only do this for calls to a function with a body.  A prototype may
896        // not actually end up matching the implementation's calling conv for a
897        // variety of reasons (e.g. it may be written in assembly).
898        !CalleeF->isDeclaration()) {
899      Instruction *OldCall = CS.getInstruction();
900      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
901                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
902                                  OldCall);
903      // If OldCall dues not return void then replaceAllUsesWith undef.
904      // This allows ValueHandlers and custom metadata to adjust itself.
905      if (!OldCall->getType()->isVoidTy())
906        ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
907      if (isa<CallInst>(OldCall))
908        return EraseInstFromFunction(*OldCall);
909
910      // We cannot remove an invoke, because it would change the CFG, just
911      // change the callee to a null pointer.
912      cast<InvokeInst>(OldCall)->setCalledFunction(
913                                    Constant::getNullValue(CalleeF->getType()));
914      return 0;
915    }
916
917  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
918    // If CS does not return void then replaceAllUsesWith undef.
919    // This allows ValueHandlers and custom metadata to adjust itself.
920    if (!CS.getInstruction()->getType()->isVoidTy())
921      ReplaceInstUsesWith(*CS.getInstruction(),
922                          UndefValue::get(CS.getInstruction()->getType()));
923
924    if (isa<InvokeInst>(CS.getInstruction())) {
925      // Can't remove an invoke because we cannot change the CFG.
926      return 0;
927    }
928
929    // This instruction is not reachable, just remove it.  We insert a store to
930    // undef so that we know that this code is not reachable, despite the fact
931    // that we can't modify the CFG here.
932    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
933                  UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
934                  CS.getInstruction());
935
936    return EraseInstFromFunction(*CS.getInstruction());
937  }
938
939  if (IntrinsicInst *II = FindInitTrampoline(Callee))
940    return transformCallThroughTrampoline(CS, II);
941
942  PointerType *PTy = cast<PointerType>(Callee->getType());
943  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
944  if (FTy->isVarArg()) {
945    int ix = FTy->getNumParams();
946    // See if we can optimize any arguments passed through the varargs area of
947    // the call.
948    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
949           E = CS.arg_end(); I != E; ++I, ++ix) {
950      CastInst *CI = dyn_cast<CastInst>(*I);
951      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
952        *I = CI->getOperand(0);
953        Changed = true;
954      }
955    }
956  }
957
958  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
959    // Inline asm calls cannot throw - mark them 'nounwind'.
960    CS.setDoesNotThrow();
961    Changed = true;
962  }
963
964  // Try to optimize the call if possible, we require TargetData for most of
965  // this.  None of these calls are seen as possibly dead so go ahead and
966  // delete the instruction now.
967  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
968    Instruction *I = tryOptimizeCall(CI, TD);
969    // If we changed something return the result, etc. Otherwise let
970    // the fallthrough check.
971    if (I) return EraseInstFromFunction(*I);
972  }
973
974  return Changed ? CS.getInstruction() : 0;
975}
976
977// transformConstExprCastCall - If the callee is a constexpr cast of a function,
978// attempt to move the cast to the arguments of the call/invoke.
979//
980bool InstCombiner::transformConstExprCastCall(CallSite CS) {
981  Function *Callee =
982    dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
983  if (Callee == 0)
984    return false;
985  Instruction *Caller = CS.getInstruction();
986  const AttrListPtr &CallerPAL = CS.getAttributes();
987
988  // Okay, this is a cast from a function to a different type.  Unless doing so
989  // would cause a type conversion of one of our arguments, change this call to
990  // be a direct call with arguments casted to the appropriate types.
991  //
992  FunctionType *FT = Callee->getFunctionType();
993  Type *OldRetTy = Caller->getType();
994  Type *NewRetTy = FT->getReturnType();
995
996  if (NewRetTy->isStructTy())
997    return false; // TODO: Handle multiple return values.
998
999  // Check to see if we are changing the return type...
1000  if (OldRetTy != NewRetTy) {
1001    if (Callee->isDeclaration() &&
1002        // Conversion is ok if changing from one pointer type to another or from
1003        // a pointer to an integer of the same size.
1004        !((OldRetTy->isPointerTy() || !TD ||
1005           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
1006          (NewRetTy->isPointerTy() || !TD ||
1007           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
1008      return false;   // Cannot transform this return value.
1009
1010    if (!Caller->use_empty() &&
1011        // void -> non-void is handled specially
1012        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
1013      return false;   // Cannot transform this return value.
1014
1015    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
1016      Attributes RAttrs = CallerPAL.getRetAttributes();
1017      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
1018        return false;   // Attribute not compatible with transformed value.
1019    }
1020
1021    // If the callsite is an invoke instruction, and the return value is used by
1022    // a PHI node in a successor, we cannot change the return type of the call
1023    // because there is no place to put the cast instruction (without breaking
1024    // the critical edge).  Bail out in this case.
1025    if (!Caller->use_empty())
1026      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
1027        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
1028             UI != E; ++UI)
1029          if (PHINode *PN = dyn_cast<PHINode>(*UI))
1030            if (PN->getParent() == II->getNormalDest() ||
1031                PN->getParent() == II->getUnwindDest())
1032              return false;
1033  }
1034
1035  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
1036  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
1037
1038  CallSite::arg_iterator AI = CS.arg_begin();
1039  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
1040    Type *ParamTy = FT->getParamType(i);
1041    Type *ActTy = (*AI)->getType();
1042
1043    if (!CastInst::isCastable(ActTy, ParamTy))
1044      return false;   // Cannot transform this parameter value.
1045
1046    Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
1047    if (Attrs & Attribute::typeIncompatible(ParamTy))
1048      return false;   // Attribute not compatible with transformed value.
1049
1050    // If the parameter is passed as a byval argument, then we have to have a
1051    // sized type and the sized type has to have the same size as the old type.
1052    if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
1053      PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
1054      if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
1055        return false;
1056
1057      Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
1058      if (TD->getTypeAllocSize(CurElTy) !=
1059          TD->getTypeAllocSize(ParamPTy->getElementType()))
1060        return false;
1061    }
1062
1063    // Converting from one pointer type to another or between a pointer and an
1064    // integer of the same size is safe even if we do not have a body.
1065    bool isConvertible = ActTy == ParamTy ||
1066      (TD && ((ParamTy->isPointerTy() ||
1067      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
1068              (ActTy->isPointerTy() ||
1069              ActTy == TD->getIntPtrType(Caller->getContext()))));
1070    if (Callee->isDeclaration() && !isConvertible) return false;
1071  }
1072
1073  if (Callee->isDeclaration()) {
1074    // Do not delete arguments unless we have a function body.
1075    if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
1076      return false;
1077
1078    // If the callee is just a declaration, don't change the varargsness of the
1079    // call.  We don't want to introduce a varargs call where one doesn't
1080    // already exist.
1081    PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
1082    if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
1083      return false;
1084
1085    // If both the callee and the cast type are varargs, we still have to make
1086    // sure the number of fixed parameters are the same or we have the same
1087    // ABI issues as if we introduce a varargs call.
1088    if (FT->isVarArg() &&
1089        cast<FunctionType>(APTy->getElementType())->isVarArg() &&
1090        FT->getNumParams() !=
1091        cast<FunctionType>(APTy->getElementType())->getNumParams())
1092      return false;
1093  }
1094
1095  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
1096      !CallerPAL.isEmpty())
1097    // In this case we have more arguments than the new function type, but we
1098    // won't be dropping them.  Check that these extra arguments have attributes
1099    // that are compatible with being a vararg call argument.
1100    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
1101      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
1102        break;
1103      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
1104      if (PAttrs & Attribute::VarArgsIncompatible)
1105        return false;
1106    }
1107
1108
1109  // Okay, we decided that this is a safe thing to do: go ahead and start
1110  // inserting cast instructions as necessary.
1111  std::vector<Value*> Args;
1112  Args.reserve(NumActualArgs);
1113  SmallVector<AttributeWithIndex, 8> attrVec;
1114  attrVec.reserve(NumCommonArgs);
1115
1116  // Get any return attributes.
1117  Attributes RAttrs = CallerPAL.getRetAttributes();
1118
1119  // If the return value is not being used, the type may not be compatible
1120  // with the existing attributes.  Wipe out any problematic attributes.
1121  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
1122
1123  // Add the new return attributes.
1124  if (RAttrs)
1125    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
1126
1127  AI = CS.arg_begin();
1128  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
1129    Type *ParamTy = FT->getParamType(i);
1130    if ((*AI)->getType() == ParamTy) {
1131      Args.push_back(*AI);
1132    } else {
1133      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
1134          false, ParamTy, false);
1135      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy));
1136    }
1137
1138    // Add any parameter attributes.
1139    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
1140      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
1141  }
1142
1143  // If the function takes more arguments than the call was taking, add them
1144  // now.
1145  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
1146    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
1147
1148  // If we are removing arguments to the function, emit an obnoxious warning.
1149  if (FT->getNumParams() < NumActualArgs) {
1150    if (!FT->isVarArg()) {
1151      errs() << "WARNING: While resolving call to function '"
1152             << Callee->getName() << "' arguments were dropped!\n";
1153    } else {
1154      // Add all of the arguments in their promoted form to the arg list.
1155      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
1156        Type *PTy = getPromotedType((*AI)->getType());
1157        if (PTy != (*AI)->getType()) {
1158          // Must promote to pass through va_arg area!
1159          Instruction::CastOps opcode =
1160            CastInst::getCastOpcode(*AI, false, PTy, false);
1161          Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
1162        } else {
1163          Args.push_back(*AI);
1164        }
1165
1166        // Add any parameter attributes.
1167        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
1168          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
1169      }
1170    }
1171  }
1172
1173  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
1174    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
1175
1176  if (NewRetTy->isVoidTy())
1177    Caller->setName("");   // Void type should not have a name.
1178
1179  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec);
1180
1181  Instruction *NC;
1182  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1183    NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
1184                               II->getUnwindDest(), Args);
1185    NC->takeName(II);
1186    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
1187    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
1188  } else {
1189    CallInst *CI = cast<CallInst>(Caller);
1190    NC = Builder->CreateCall(Callee, Args);
1191    NC->takeName(CI);
1192    if (CI->isTailCall())
1193      cast<CallInst>(NC)->setTailCall();
1194    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
1195    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
1196  }
1197
1198  // Insert a cast of the return type as necessary.
1199  Value *NV = NC;
1200  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
1201    if (!NV->getType()->isVoidTy()) {
1202      Instruction::CastOps opcode =
1203        CastInst::getCastOpcode(NC, false, OldRetTy, false);
1204      NV = NC = CastInst::Create(opcode, NC, OldRetTy);
1205      NC->setDebugLoc(Caller->getDebugLoc());
1206
1207      // If this is an invoke instruction, we should insert it after the first
1208      // non-phi, instruction in the normal successor block.
1209      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1210        BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
1211        InsertNewInstBefore(NC, *I);
1212      } else {
1213        // Otherwise, it's a call, just insert cast right after the call.
1214        InsertNewInstBefore(NC, *Caller);
1215      }
1216      Worklist.AddUsersToWorkList(*Caller);
1217    } else {
1218      NV = UndefValue::get(Caller->getType());
1219    }
1220  }
1221
1222  if (!Caller->use_empty())
1223    ReplaceInstUsesWith(*Caller, NV);
1224
1225  EraseInstFromFunction(*Caller);
1226  return true;
1227}
1228
1229// transformCallThroughTrampoline - Turn a call to a function created by
1230// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
1231// underlying function.
1232//
1233Instruction *
1234InstCombiner::transformCallThroughTrampoline(CallSite CS,
1235                                             IntrinsicInst *Tramp) {
1236  Value *Callee = CS.getCalledValue();
1237  PointerType *PTy = cast<PointerType>(Callee->getType());
1238  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
1239  const AttrListPtr &Attrs = CS.getAttributes();
1240
1241  // If the call already has the 'nest' attribute somewhere then give up -
1242  // otherwise 'nest' would occur twice after splicing in the chain.
1243  if (Attrs.hasAttrSomewhere(Attribute::Nest))
1244    return 0;
1245
1246  assert(Tramp &&
1247         "transformCallThroughTrampoline called with incorrect CallSite.");
1248
1249  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
1250  PointerType *NestFPTy = cast<PointerType>(NestF->getType());
1251  FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
1252
1253  const AttrListPtr &NestAttrs = NestF->getAttributes();
1254  if (!NestAttrs.isEmpty()) {
1255    unsigned NestIdx = 1;
1256    Type *NestTy = 0;
1257    Attributes NestAttr = Attribute::None;
1258
1259    // Look for a parameter marked with the 'nest' attribute.
1260    for (FunctionType::param_iterator I = NestFTy->param_begin(),
1261         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
1262      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
1263        // Record the parameter type and any other attributes.
1264        NestTy = *I;
1265        NestAttr = NestAttrs.getParamAttributes(NestIdx);
1266        break;
1267      }
1268
1269    if (NestTy) {
1270      Instruction *Caller = CS.getInstruction();
1271      std::vector<Value*> NewArgs;
1272      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
1273
1274      SmallVector<AttributeWithIndex, 8> NewAttrs;
1275      NewAttrs.reserve(Attrs.getNumSlots() + 1);
1276
1277      // Insert the nest argument into the call argument list, which may
1278      // mean appending it.  Likewise for attributes.
1279
1280      // Add any result attributes.
1281      if (Attributes Attr = Attrs.getRetAttributes())
1282        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
1283
1284      {
1285        unsigned Idx = 1;
1286        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
1287        do {
1288          if (Idx == NestIdx) {
1289            // Add the chain argument and attributes.
1290            Value *NestVal = Tramp->getArgOperand(2);
1291            if (NestVal->getType() != NestTy)
1292              NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
1293            NewArgs.push_back(NestVal);
1294            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
1295          }
1296
1297          if (I == E)
1298            break;
1299
1300          // Add the original argument and attributes.
1301          NewArgs.push_back(*I);
1302          if (Attributes Attr = Attrs.getParamAttributes(Idx))
1303            NewAttrs.push_back
1304              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
1305
1306          ++Idx, ++I;
1307        } while (1);
1308      }
1309
1310      // Add any function attributes.
1311      if (Attributes Attr = Attrs.getFnAttributes())
1312        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
1313
1314      // The trampoline may have been bitcast to a bogus type (FTy).
1315      // Handle this by synthesizing a new function type, equal to FTy
1316      // with the chain parameter inserted.
1317
1318      std::vector<Type*> NewTypes;
1319      NewTypes.reserve(FTy->getNumParams()+1);
1320
1321      // Insert the chain's type into the list of parameter types, which may
1322      // mean appending it.
1323      {
1324        unsigned Idx = 1;
1325        FunctionType::param_iterator I = FTy->param_begin(),
1326          E = FTy->param_end();
1327
1328        do {
1329          if (Idx == NestIdx)
1330            // Add the chain's type.
1331            NewTypes.push_back(NestTy);
1332
1333          if (I == E)
1334            break;
1335
1336          // Add the original type.
1337          NewTypes.push_back(*I);
1338
1339          ++Idx, ++I;
1340        } while (1);
1341      }
1342
1343      // Replace the trampoline call with a direct call.  Let the generic
1344      // code sort out any function type mismatches.
1345      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
1346                                                FTy->isVarArg());
1347      Constant *NewCallee =
1348        NestF->getType() == PointerType::getUnqual(NewFTy) ?
1349        NestF : ConstantExpr::getBitCast(NestF,
1350                                         PointerType::getUnqual(NewFTy));
1351      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs);
1352
1353      Instruction *NewCaller;
1354      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1355        NewCaller = InvokeInst::Create(NewCallee,
1356                                       II->getNormalDest(), II->getUnwindDest(),
1357                                       NewArgs);
1358        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
1359        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
1360      } else {
1361        NewCaller = CallInst::Create(NewCallee, NewArgs);
1362        if (cast<CallInst>(Caller)->isTailCall())
1363          cast<CallInst>(NewCaller)->setTailCall();
1364        cast<CallInst>(NewCaller)->
1365          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
1366        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
1367      }
1368
1369      return NewCaller;
1370    }
1371  }
1372
1373  // Replace the trampoline call with a direct call.  Since there is no 'nest'
1374  // parameter, there is no need to adjust the argument list.  Let the generic
1375  // code sort out any function type mismatches.
1376  Constant *NewCallee =
1377    NestF->getType() == PTy ? NestF :
1378                              ConstantExpr::getBitCast(NestF, PTy);
1379  CS.setCalledFunction(NewCallee);
1380  return CS.getInstruction();
1381}
1382