InstCombineCalls.cpp revision 9ee17208115482441953127615231c59a2f4d052
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the visitCall and visitInvoke functions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombine.h"
15#include "llvm/IntrinsicInst.h"
16#include "llvm/Support/CallSite.h"
17#include "llvm/Target/TargetData.h"
18#include "llvm/Analysis/MemoryBuiltins.h"
19#include "llvm/Transforms/Utils/BuildLibCalls.h"
20using namespace llvm;
21
22/// getPromotedType - Return the specified type promoted as it would be to pass
23/// though a va_arg area.
24static const Type *getPromotedType(const Type *Ty) {
25  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
26    if (ITy->getBitWidth() < 32)
27      return Type::getInt32Ty(Ty->getContext());
28  }
29  return Ty;
30}
31
32/// EnforceKnownAlignment - If the specified pointer points to an object that
33/// we control, modify the object's alignment to PrefAlign. This isn't
34/// often possible though. If alignment is important, a more reliable approach
35/// is to simply align all global variables and allocation instructions to
36/// their preferred alignment from the beginning.
37///
38static unsigned EnforceKnownAlignment(Value *V,
39                                      unsigned Align, unsigned PrefAlign) {
40
41  User *U = dyn_cast<User>(V);
42  if (!U) return Align;
43
44  switch (Operator::getOpcode(U)) {
45  default: break;
46  case Instruction::BitCast:
47    return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
48  case Instruction::GetElementPtr: {
49    // If all indexes are zero, it is just the alignment of the base pointer.
50    bool AllZeroOperands = true;
51    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
52      if (!isa<Constant>(*i) ||
53          !cast<Constant>(*i)->isNullValue()) {
54        AllZeroOperands = false;
55        break;
56      }
57
58    if (AllZeroOperands) {
59      // Treat this like a bitcast.
60      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
61    }
62    break;
63  }
64  }
65
66  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
67    // If there is a large requested alignment and we can, bump up the alignment
68    // of the global.
69    if (!GV->isDeclaration()) {
70      if (GV->getAlignment() >= PrefAlign)
71        Align = GV->getAlignment();
72      else {
73        GV->setAlignment(PrefAlign);
74        Align = PrefAlign;
75      }
76    }
77  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
78    // If there is a requested alignment and if this is an alloca, round up.
79    if (AI->getAlignment() >= PrefAlign)
80      Align = AI->getAlignment();
81    else {
82      AI->setAlignment(PrefAlign);
83      Align = PrefAlign;
84    }
85  }
86
87  return Align;
88}
89
90/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
91/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
92/// and it is more than the alignment of the ultimate object, see if we can
93/// increase the alignment of the ultimate object, making this check succeed.
94unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
95                                                  unsigned PrefAlign) {
96  unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
97                      sizeof(PrefAlign) * CHAR_BIT;
98  APInt Mask = APInt::getAllOnesValue(BitWidth);
99  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
100  ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
101  unsigned TrailZ = KnownZero.countTrailingOnes();
102  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
103
104  if (PrefAlign > Align)
105    Align = EnforceKnownAlignment(V, Align, PrefAlign);
106
107    // We don't need to make any adjustment.
108  return Align;
109}
110
111Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
112  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
113  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
114  unsigned MinAlign = std::min(DstAlign, SrcAlign);
115  unsigned CopyAlign = MI->getAlignment();
116
117  if (CopyAlign < MinAlign) {
118    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
119                                             MinAlign, false));
120    return MI;
121  }
122
123  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
124  // load/store.
125  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
126  if (MemOpLength == 0) return 0;
127
128  // Source and destination pointer types are always "i8*" for intrinsic.  See
129  // if the size is something we can handle with a single primitive load/store.
130  // A single load+store correctly handles overlapping memory in the memmove
131  // case.
132  unsigned Size = MemOpLength->getZExtValue();
133  if (Size == 0) return MI;  // Delete this mem transfer.
134
135  if (Size > 8 || (Size&(Size-1)))
136    return 0;  // If not 1/2/4/8 bytes, exit.
137
138  // Use an integer load+store unless we can find something better.
139  unsigned SrcAddrSp =
140    cast<PointerType>(MI->getOperand(2)->getType())->getAddressSpace();
141  unsigned DstAddrSp =
142    cast<PointerType>(MI->getOperand(1)->getType())->getAddressSpace();
143
144  const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
145  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
146  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
147
148  // Memcpy forces the use of i8* for the source and destination.  That means
149  // that if you're using memcpy to move one double around, you'll get a cast
150  // from double* to i8*.  We'd much rather use a double load+store rather than
151  // an i64 load+store, here because this improves the odds that the source or
152  // dest address will be promotable.  See if we can find a better type than the
153  // integer datatype.
154  Value *StrippedDest = MI->getOperand(1)->stripPointerCasts();
155  if (StrippedDest != MI->getOperand(1)) {
156    const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
157                                    ->getElementType();
158    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
159      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
160      // down through these levels if so.
161      while (!SrcETy->isSingleValueType()) {
162        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
163          if (STy->getNumElements() == 1)
164            SrcETy = STy->getElementType(0);
165          else
166            break;
167        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
168          if (ATy->getNumElements() == 1)
169            SrcETy = ATy->getElementType();
170          else
171            break;
172        } else
173          break;
174      }
175
176      if (SrcETy->isSingleValueType()) {
177        NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
178        NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
179      }
180    }
181  }
182
183
184  // If the memcpy/memmove provides better alignment info than we can
185  // infer, use it.
186  SrcAlign = std::max(SrcAlign, CopyAlign);
187  DstAlign = std::max(DstAlign, CopyAlign);
188
189  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy);
190  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy);
191  Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign);
192  InsertNewInstBefore(L, *MI);
193  InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign),
194                      *MI);
195
196  // Set the size of the copy to 0, it will be deleted on the next iteration.
197  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
198  return MI;
199}
200
201Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
202  unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
203  if (MI->getAlignment() < Alignment) {
204    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
205                                             Alignment, false));
206    return MI;
207  }
208
209  // Extract the length and alignment and fill if they are constant.
210  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
211  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
212  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
213    return 0;
214  uint64_t Len = LenC->getZExtValue();
215  Alignment = MI->getAlignment();
216
217  // If the length is zero, this is a no-op
218  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
219
220  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
221  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
222    const Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
223
224    Value *Dest = MI->getDest();
225    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
226
227    // Alignment 0 is identity for alignment 1 for memset, but not store.
228    if (Alignment == 0) Alignment = 1;
229
230    // Extract the fill value and store.
231    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
232    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
233                                      Dest, false, Alignment), *MI);
234
235    // Set the size of the copy to 0, it will be deleted on the next iteration.
236    MI->setLength(Constant::getNullValue(LenC->getType()));
237    return MI;
238  }
239
240  return 0;
241}
242
243/// visitCallInst - CallInst simplification.  This mostly only handles folding
244/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
245/// the heavy lifting.
246///
247Instruction *InstCombiner::visitCallInst(CallInst &CI) {
248  if (isFreeCall(&CI))
249    return visitFree(CI);
250
251  // If the caller function is nounwind, mark the call as nounwind, even if the
252  // callee isn't.
253  if (CI.getParent()->getParent()->doesNotThrow() &&
254      !CI.doesNotThrow()) {
255    CI.setDoesNotThrow();
256    return &CI;
257  }
258
259  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
260  if (!II) return visitCallSite(&CI);
261
262  // Intrinsics cannot occur in an invoke, so handle them here instead of in
263  // visitCallSite.
264  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
265    bool Changed = false;
266
267    // memmove/cpy/set of zero bytes is a noop.
268    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
269      if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
270
271      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
272        if (CI->getZExtValue() == 1) {
273          // Replace the instruction with just byte operations.  We would
274          // transform other cases to loads/stores, but we don't know if
275          // alignment is sufficient.
276        }
277    }
278
279    // If we have a memmove and the source operation is a constant global,
280    // then the source and dest pointers can't alias, so we can change this
281    // into a call to memcpy.
282    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
283      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
284        if (GVSrc->isConstant()) {
285          Module *M = CI.getParent()->getParent()->getParent();
286          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
287          const Type *Tys[3] = { CI.getOperand(1)->getType(),
288                                 CI.getOperand(2)->getType(),
289                                 CI.getOperand(3)->getType() };
290          CI.setOperand(0,
291                        Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
292          Changed = true;
293        }
294    }
295
296    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
297      // memmove(x,x,size) -> noop.
298      if (MTI->getSource() == MTI->getDest())
299        return EraseInstFromFunction(CI);
300    }
301
302    // If we can determine a pointer alignment that is bigger than currently
303    // set, update the alignment.
304    if (isa<MemTransferInst>(MI)) {
305      if (Instruction *I = SimplifyMemTransfer(MI))
306        return I;
307    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
308      if (Instruction *I = SimplifyMemSet(MSI))
309        return I;
310    }
311
312    if (Changed) return II;
313  }
314
315  switch (II->getIntrinsicID()) {
316  default: break;
317  case Intrinsic::objectsize: {
318    // We need target data for just about everything so depend on it.
319    if (!TD) break;
320
321    const Type *ReturnTy = CI.getType();
322    bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
323
324    // Get to the real allocated thing and offset as fast as possible.
325    Value *Op1 = II->getOperand(1)->stripPointerCasts();
326
327    // If we've stripped down to a single global variable that we
328    // can know the size of then just return that.
329    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
330      if (GV->hasDefinitiveInitializer()) {
331        Constant *C = GV->getInitializer();
332        uint64_t GlobalSize = TD->getTypeAllocSize(C->getType());
333        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize));
334      } else {
335        // Can't determine size of the GV.
336        Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
337        return ReplaceInstUsesWith(CI, RetVal);
338      }
339    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
340      // Get alloca size.
341      if (AI->getAllocatedType()->isSized()) {
342        uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
343        if (AI->isArrayAllocation()) {
344          const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
345          if (!C) break;
346          AllocaSize *= C->getZExtValue();
347        }
348        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize));
349      }
350    } else if (CallInst *MI = extractMallocCall(Op1)) {
351      const Type* MallocType = getMallocAllocatedType(MI);
352      // Get alloca size.
353      if (MallocType && MallocType->isSized()) {
354        if (Value *NElems = getMallocArraySize(MI, TD, true)) {
355          if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
356        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy,
357               (NElements->getZExtValue() * TD->getTypeAllocSize(MallocType))));
358        }
359      }
360    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {
361      // Only handle constant GEPs here.
362      if (CE->getOpcode() != Instruction::GetElementPtr) break;
363      GEPOperator *GEP = cast<GEPOperator>(CE);
364
365      // Make sure we're not a constant offset from an external
366      // global.
367      Value *Operand = GEP->getPointerOperand();
368      Operand = Operand->stripPointerCasts();
369      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand))
370        if (!GV->hasDefinitiveInitializer()) break;
371
372      // Get what we're pointing to and its size.
373      const PointerType *BaseType =
374        cast<PointerType>(Operand->getType());
375      uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType());
376
377      // Get the current byte offset into the thing. Use the original
378      // operand in case we're looking through a bitcast.
379      SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end());
380      const PointerType *OffsetType =
381        cast<PointerType>(GEP->getPointerOperand()->getType());
382      uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
383
384      if (Size < Offset) {
385        // Out of bound reference? Negative index normalized to large
386        // index? Just return "I don't know".
387        Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
388        return ReplaceInstUsesWith(CI, RetVal);
389      }
390
391      Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
392      return ReplaceInstUsesWith(CI, RetVal);
393
394    }
395
396    // Do not return "I don't know" here. Later optimization passes could
397    // make it possible to evaluate objectsize to a constant.
398    break;
399  }
400  case Intrinsic::bswap:
401    // bswap(bswap(x)) -> x
402    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
403      if (Operand->getIntrinsicID() == Intrinsic::bswap)
404        return ReplaceInstUsesWith(CI, Operand->getOperand(1));
405
406    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
407    if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) {
408      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
409        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
410          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
411                       TI->getType()->getPrimitiveSizeInBits();
412          Value *CV = ConstantInt::get(Operand->getType(), C);
413          Value *V = Builder->CreateLShr(Operand->getOperand(1), CV);
414          return new TruncInst(V, TI->getType());
415        }
416    }
417
418    break;
419  case Intrinsic::powi:
420    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
421      // powi(x, 0) -> 1.0
422      if (Power->isZero())
423        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
424      // powi(x, 1) -> x
425      if (Power->isOne())
426        return ReplaceInstUsesWith(CI, II->getOperand(1));
427      // powi(x, -1) -> 1/x
428      if (Power->isAllOnesValue())
429        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
430                                          II->getOperand(1));
431    }
432    break;
433  case Intrinsic::cttz: {
434    // If all bits below the first known one are known zero,
435    // this value is constant.
436    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
437    uint32_t BitWidth = IT->getBitWidth();
438    APInt KnownZero(BitWidth, 0);
439    APInt KnownOne(BitWidth, 0);
440    ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
441                      KnownZero, KnownOne);
442    unsigned TrailingZeros = KnownOne.countTrailingZeros();
443    APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
444    if ((Mask & KnownZero) == Mask)
445      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
446                                 APInt(BitWidth, TrailingZeros)));
447
448    }
449    break;
450  case Intrinsic::ctlz: {
451    // If all bits above the first known one are known zero,
452    // this value is constant.
453    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
454    uint32_t BitWidth = IT->getBitWidth();
455    APInt KnownZero(BitWidth, 0);
456    APInt KnownOne(BitWidth, 0);
457    ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
458                      KnownZero, KnownOne);
459    unsigned LeadingZeros = KnownOne.countLeadingZeros();
460    APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
461    if ((Mask & KnownZero) == Mask)
462      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
463                                 APInt(BitWidth, LeadingZeros)));
464
465    }
466    break;
467  case Intrinsic::uadd_with_overflow: {
468    Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
469    const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
470    uint32_t BitWidth = IT->getBitWidth();
471    APInt Mask = APInt::getSignBit(BitWidth);
472    APInt LHSKnownZero(BitWidth, 0);
473    APInt LHSKnownOne(BitWidth, 0);
474    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
475    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
476    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
477
478    if (LHSKnownNegative || LHSKnownPositive) {
479      APInt RHSKnownZero(BitWidth, 0);
480      APInt RHSKnownOne(BitWidth, 0);
481      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
482      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
483      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
484      if (LHSKnownNegative && RHSKnownNegative) {
485        // The sign bit is set in both cases: this MUST overflow.
486        // Create a simple add instruction, and insert it into the struct.
487        Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
488        Worklist.Add(Add);
489        Constant *V[] = {
490          UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext())
491        };
492        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
493        return InsertValueInst::Create(Struct, Add, 0);
494      }
495
496      if (LHSKnownPositive && RHSKnownPositive) {
497        // The sign bit is clear in both cases: this CANNOT overflow.
498        // Create a simple add instruction, and insert it into the struct.
499        Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
500        Worklist.Add(Add);
501        Constant *V[] = {
502          UndefValue::get(LHS->getType()),
503          ConstantInt::getFalse(II->getContext())
504        };
505        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
506        return InsertValueInst::Create(Struct, Add, 0);
507      }
508    }
509  }
510  // FALL THROUGH uadd into sadd
511  case Intrinsic::sadd_with_overflow:
512    // Canonicalize constants into the RHS.
513    if (isa<Constant>(II->getOperand(1)) &&
514        !isa<Constant>(II->getOperand(2))) {
515      Value *LHS = II->getOperand(1);
516      II->setOperand(1, II->getOperand(2));
517      II->setOperand(2, LHS);
518      return II;
519    }
520
521    // X + undef -> undef
522    if (isa<UndefValue>(II->getOperand(2)))
523      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
524
525    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
526      // X + 0 -> {X, false}
527      if (RHS->isZero()) {
528        Constant *V[] = {
529          UndefValue::get(II->getOperand(0)->getType()),
530          ConstantInt::getFalse(II->getContext())
531        };
532        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
533        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
534      }
535    }
536    break;
537  case Intrinsic::usub_with_overflow:
538  case Intrinsic::ssub_with_overflow:
539    // undef - X -> undef
540    // X - undef -> undef
541    if (isa<UndefValue>(II->getOperand(1)) ||
542        isa<UndefValue>(II->getOperand(2)))
543      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
544
545    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
546      // X - 0 -> {X, false}
547      if (RHS->isZero()) {
548        Constant *V[] = {
549          UndefValue::get(II->getOperand(1)->getType()),
550          ConstantInt::getFalse(II->getContext())
551        };
552        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
553        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
554      }
555    }
556    break;
557  case Intrinsic::umul_with_overflow:
558  case Intrinsic::smul_with_overflow:
559    // Canonicalize constants into the RHS.
560    if (isa<Constant>(II->getOperand(1)) &&
561        !isa<Constant>(II->getOperand(2))) {
562      Value *LHS = II->getOperand(1);
563      II->setOperand(1, II->getOperand(2));
564      II->setOperand(2, LHS);
565      return II;
566    }
567
568    // X * undef -> undef
569    if (isa<UndefValue>(II->getOperand(2)))
570      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
571
572    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
573      // X*0 -> {0, false}
574      if (RHSI->isZero())
575        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
576
577      // X * 1 -> {X, false}
578      if (RHSI->equalsInt(1)) {
579        Constant *V[] = {
580          UndefValue::get(II->getOperand(1)->getType()),
581          ConstantInt::getFalse(II->getContext())
582        };
583        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
584        return InsertValueInst::Create(Struct, II->getOperand(1), 0);
585      }
586    }
587    break;
588  case Intrinsic::ppc_altivec_lvx:
589  case Intrinsic::ppc_altivec_lvxl:
590  case Intrinsic::x86_sse_loadu_ps:
591  case Intrinsic::x86_sse2_loadu_pd:
592  case Intrinsic::x86_sse2_loadu_dq:
593    // Turn PPC lvx     -> load if the pointer is known aligned.
594    // Turn X86 loadups -> load if the pointer is known aligned.
595    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
596      Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
597                                         PointerType::getUnqual(II->getType()));
598      return new LoadInst(Ptr);
599    }
600    break;
601  case Intrinsic::ppc_altivec_stvx:
602  case Intrinsic::ppc_altivec_stvxl:
603    // Turn stvx -> store if the pointer is known aligned.
604    if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
605      const Type *OpPtrTy =
606        PointerType::getUnqual(II->getOperand(1)->getType());
607      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
608      return new StoreInst(II->getOperand(1), Ptr);
609    }
610    break;
611  case Intrinsic::x86_sse_storeu_ps:
612  case Intrinsic::x86_sse2_storeu_pd:
613  case Intrinsic::x86_sse2_storeu_dq:
614    // Turn X86 storeu -> store if the pointer is known aligned.
615    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
616      const Type *OpPtrTy =
617        PointerType::getUnqual(II->getOperand(2)->getType());
618      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
619      return new StoreInst(II->getOperand(2), Ptr);
620    }
621    break;
622
623  case Intrinsic::x86_sse_cvttss2si: {
624    // These intrinsics only demands the 0th element of its input vector.  If
625    // we can simplify the input based on that, do so now.
626    unsigned VWidth =
627      cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
628    APInt DemandedElts(VWidth, 1);
629    APInt UndefElts(VWidth, 0);
630    if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
631                                              UndefElts)) {
632      II->setOperand(1, V);
633      return II;
634    }
635    break;
636  }
637
638  case Intrinsic::ppc_altivec_vperm:
639    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
640    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
641      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
642
643      // Check that all of the elements are integer constants or undefs.
644      bool AllEltsOk = true;
645      for (unsigned i = 0; i != 16; ++i) {
646        if (!isa<ConstantInt>(Mask->getOperand(i)) &&
647            !isa<UndefValue>(Mask->getOperand(i))) {
648          AllEltsOk = false;
649          break;
650        }
651      }
652
653      if (AllEltsOk) {
654        // Cast the input vectors to byte vectors.
655        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
656        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
657        Value *Result = UndefValue::get(Op0->getType());
658
659        // Only extract each element once.
660        Value *ExtractedElts[32];
661        memset(ExtractedElts, 0, sizeof(ExtractedElts));
662
663        for (unsigned i = 0; i != 16; ++i) {
664          if (isa<UndefValue>(Mask->getOperand(i)))
665            continue;
666          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
667          Idx &= 31;  // Match the hardware behavior.
668
669          if (ExtractedElts[Idx] == 0) {
670            ExtractedElts[Idx] =
671              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
672                  ConstantInt::get(Type::getInt32Ty(II->getContext()),
673                                   Idx&15, false), "tmp");
674          }
675
676          // Insert this value into the result vector.
677          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
678                         ConstantInt::get(Type::getInt32Ty(II->getContext()),
679                                          i, false), "tmp");
680        }
681        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
682      }
683    }
684    break;
685
686  case Intrinsic::stackrestore: {
687    // If the save is right next to the restore, remove the restore.  This can
688    // happen when variable allocas are DCE'd.
689    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
690      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
691        BasicBlock::iterator BI = SS;
692        if (&*++BI == II)
693          return EraseInstFromFunction(CI);
694      }
695    }
696
697    // Scan down this block to see if there is another stack restore in the
698    // same block without an intervening call/alloca.
699    BasicBlock::iterator BI = II;
700    TerminatorInst *TI = II->getParent()->getTerminator();
701    bool CannotRemove = false;
702    for (++BI; &*BI != TI; ++BI) {
703      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
704        CannotRemove = true;
705        break;
706      }
707      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
708        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
709          // If there is a stackrestore below this one, remove this one.
710          if (II->getIntrinsicID() == Intrinsic::stackrestore)
711            return EraseInstFromFunction(CI);
712          // Otherwise, ignore the intrinsic.
713        } else {
714          // If we found a non-intrinsic call, we can't remove the stack
715          // restore.
716          CannotRemove = true;
717          break;
718        }
719      }
720    }
721
722    // If the stack restore is in a return/unwind block and if there are no
723    // allocas or calls between the restore and the return, nuke the restore.
724    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
725      return EraseInstFromFunction(CI);
726    break;
727  }
728  }
729
730  return visitCallSite(II);
731}
732
733// InvokeInst simplification
734//
735Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
736  return visitCallSite(&II);
737}
738
739/// isSafeToEliminateVarargsCast - If this cast does not affect the value
740/// passed through the varargs area, we can eliminate the use of the cast.
741static bool isSafeToEliminateVarargsCast(const CallSite CS,
742                                         const CastInst * const CI,
743                                         const TargetData * const TD,
744                                         const int ix) {
745  if (!CI->isLosslessCast())
746    return false;
747
748  // The size of ByVal arguments is derived from the type, so we
749  // can't change to a type with a different size.  If the size were
750  // passed explicitly we could avoid this check.
751  if (!CS.paramHasAttr(ix, Attribute::ByVal))
752    return true;
753
754  const Type* SrcTy =
755            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
756  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
757  if (!SrcTy->isSized() || !DstTy->isSized())
758    return false;
759  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
760    return false;
761  return true;
762}
763
764namespace {
765class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
766  InstCombiner *IC;
767protected:
768  void replaceCall(Value *With) {
769    NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
770  }
771  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
772    if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) {
773      if (SizeCI->isAllOnesValue())
774        return true;
775      if (isString)
776        return SizeCI->getZExtValue() >=
777               GetStringLength(CI->getOperand(SizeArgOp));
778      if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getOperand(SizeArgOp)))
779        return SizeCI->getZExtValue() >= Arg->getZExtValue();
780    }
781    return false;
782  }
783public:
784  InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
785  Instruction *NewInstruction;
786};
787} // end anonymous namespace
788
789// Try to fold some different type of calls here.
790// Currently we're only working with the checking functions, memcpy_chk,
791// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
792// strcat_chk and strncat_chk.
793Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
794  if (CI->getCalledFunction() == 0) return 0;
795
796  InstCombineFortifiedLibCalls Simplifier(this);
797  Simplifier.fold(CI, TD);
798  return Simplifier.NewInstruction;
799}
800
801// visitCallSite - Improvements for call and invoke instructions.
802//
803Instruction *InstCombiner::visitCallSite(CallSite CS) {
804  bool Changed = false;
805
806  // If the callee is a constexpr cast of a function, attempt to move the cast
807  // to the arguments of the call/invoke.
808  if (transformConstExprCastCall(CS)) return 0;
809
810  Value *Callee = CS.getCalledValue();
811
812  if (Function *CalleeF = dyn_cast<Function>(Callee))
813    // If the call and callee calling conventions don't match, this call must
814    // be unreachable, as the call is undefined.
815    if (CalleeF->getCallingConv() != CS.getCallingConv() &&
816        // Only do this for calls to a function with a body.  A prototype may
817        // not actually end up matching the implementation's calling conv for a
818        // variety of reasons (e.g. it may be written in assembly).
819        !CalleeF->isDeclaration()) {
820      Instruction *OldCall = CS.getInstruction();
821      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
822                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
823                                  OldCall);
824      // If OldCall dues not return void then replaceAllUsesWith undef.
825      // This allows ValueHandlers and custom metadata to adjust itself.
826      if (!OldCall->getType()->isVoidTy())
827        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
828      if (isa<CallInst>(OldCall))
829        return EraseInstFromFunction(*OldCall);
830
831      // We cannot remove an invoke, because it would change the CFG, just
832      // change the callee to a null pointer.
833      cast<InvokeInst>(OldCall)->setCalledFunction(
834                                    Constant::getNullValue(CalleeF->getType()));
835      return 0;
836    }
837
838  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
839    // This instruction is not reachable, just remove it.  We insert a store to
840    // undef so that we know that this code is not reachable, despite the fact
841    // that we can't modify the CFG here.
842    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
843               UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
844                  CS.getInstruction());
845
846    // If CS dues not return void then replaceAllUsesWith undef.
847    // This allows ValueHandlers and custom metadata to adjust itself.
848    if (!CS.getInstruction()->getType()->isVoidTy())
849      CS.getInstruction()->
850        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
851
852    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
853      // Don't break the CFG, insert a dummy cond branch.
854      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
855                         ConstantInt::getTrue(Callee->getContext()), II);
856    }
857    return EraseInstFromFunction(*CS.getInstruction());
858  }
859
860  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
861    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
862      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
863        return transformCallThroughTrampoline(CS);
864
865  const PointerType *PTy = cast<PointerType>(Callee->getType());
866  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
867  if (FTy->isVarArg()) {
868    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
869    // See if we can optimize any arguments passed through the varargs area of
870    // the call.
871    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
872           E = CS.arg_end(); I != E; ++I, ++ix) {
873      CastInst *CI = dyn_cast<CastInst>(*I);
874      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
875        *I = CI->getOperand(0);
876        Changed = true;
877      }
878    }
879  }
880
881  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
882    // Inline asm calls cannot throw - mark them 'nounwind'.
883    CS.setDoesNotThrow();
884    Changed = true;
885  }
886
887  // Try to optimize the call if possible, we require TargetData for most of
888  // this.  None of these calls are seen as possibly dead so go ahead and
889  // delete the instruction now.
890  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
891    Instruction *I = tryOptimizeCall(CI, TD);
892    // If we changed something return the result, etc. Otherwise let
893    // the fallthrough check.
894    if (I) return EraseInstFromFunction(*I);
895  }
896
897  return Changed ? CS.getInstruction() : 0;
898}
899
900// transformConstExprCastCall - If the callee is a constexpr cast of a function,
901// attempt to move the cast to the arguments of the call/invoke.
902//
903bool InstCombiner::transformConstExprCastCall(CallSite CS) {
904  if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
905  ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
906  if (CE->getOpcode() != Instruction::BitCast ||
907      !isa<Function>(CE->getOperand(0)))
908    return false;
909  Function *Callee = cast<Function>(CE->getOperand(0));
910  Instruction *Caller = CS.getInstruction();
911  const AttrListPtr &CallerPAL = CS.getAttributes();
912
913  // Okay, this is a cast from a function to a different type.  Unless doing so
914  // would cause a type conversion of one of our arguments, change this call to
915  // be a direct call with arguments casted to the appropriate types.
916  //
917  const FunctionType *FT = Callee->getFunctionType();
918  const Type *OldRetTy = Caller->getType();
919  const Type *NewRetTy = FT->getReturnType();
920
921  if (NewRetTy->isStructTy())
922    return false; // TODO: Handle multiple return values.
923
924  // Check to see if we are changing the return type...
925  if (OldRetTy != NewRetTy) {
926    if (Callee->isDeclaration() &&
927        // Conversion is ok if changing from one pointer type to another or from
928        // a pointer to an integer of the same size.
929        !((OldRetTy->isPointerTy() || !TD ||
930           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
931          (NewRetTy->isPointerTy() || !TD ||
932           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
933      return false;   // Cannot transform this return value.
934
935    if (!Caller->use_empty() &&
936        // void -> non-void is handled specially
937        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
938      return false;   // Cannot transform this return value.
939
940    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
941      Attributes RAttrs = CallerPAL.getRetAttributes();
942      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
943        return false;   // Attribute not compatible with transformed value.
944    }
945
946    // If the callsite is an invoke instruction, and the return value is used by
947    // a PHI node in a successor, we cannot change the return type of the call
948    // because there is no place to put the cast instruction (without breaking
949    // the critical edge).  Bail out in this case.
950    if (!Caller->use_empty())
951      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
952        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
953             UI != E; ++UI)
954          if (PHINode *PN = dyn_cast<PHINode>(*UI))
955            if (PN->getParent() == II->getNormalDest() ||
956                PN->getParent() == II->getUnwindDest())
957              return false;
958  }
959
960  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
961  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
962
963  CallSite::arg_iterator AI = CS.arg_begin();
964  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
965    const Type *ParamTy = FT->getParamType(i);
966    const Type *ActTy = (*AI)->getType();
967
968    if (!CastInst::isCastable(ActTy, ParamTy))
969      return false;   // Cannot transform this parameter value.
970
971    if (CallerPAL.getParamAttributes(i + 1)
972        & Attribute::typeIncompatible(ParamTy))
973      return false;   // Attribute not compatible with transformed value.
974
975    // Converting from one pointer type to another or between a pointer and an
976    // integer of the same size is safe even if we do not have a body.
977    bool isConvertible = ActTy == ParamTy ||
978      (TD && ((ParamTy->isPointerTy() ||
979      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
980              (ActTy->isPointerTy() ||
981              ActTy == TD->getIntPtrType(Caller->getContext()))));
982    if (Callee->isDeclaration() && !isConvertible) return false;
983  }
984
985  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
986      Callee->isDeclaration())
987    return false;   // Do not delete arguments unless we have a function body.
988
989  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
990      !CallerPAL.isEmpty())
991    // In this case we have more arguments than the new function type, but we
992    // won't be dropping them.  Check that these extra arguments have attributes
993    // that are compatible with being a vararg call argument.
994    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
995      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
996        break;
997      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
998      if (PAttrs & Attribute::VarArgsIncompatible)
999        return false;
1000    }
1001
1002  // Okay, we decided that this is a safe thing to do: go ahead and start
1003  // inserting cast instructions as necessary...
1004  std::vector<Value*> Args;
1005  Args.reserve(NumActualArgs);
1006  SmallVector<AttributeWithIndex, 8> attrVec;
1007  attrVec.reserve(NumCommonArgs);
1008
1009  // Get any return attributes.
1010  Attributes RAttrs = CallerPAL.getRetAttributes();
1011
1012  // If the return value is not being used, the type may not be compatible
1013  // with the existing attributes.  Wipe out any problematic attributes.
1014  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
1015
1016  // Add the new return attributes.
1017  if (RAttrs)
1018    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
1019
1020  AI = CS.arg_begin();
1021  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
1022    const Type *ParamTy = FT->getParamType(i);
1023    if ((*AI)->getType() == ParamTy) {
1024      Args.push_back(*AI);
1025    } else {
1026      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
1027          false, ParamTy, false);
1028      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
1029    }
1030
1031    // Add any parameter attributes.
1032    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
1033      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
1034  }
1035
1036  // If the function takes more arguments than the call was taking, add them
1037  // now.
1038  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
1039    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
1040
1041  // If we are removing arguments to the function, emit an obnoxious warning.
1042  if (FT->getNumParams() < NumActualArgs) {
1043    if (!FT->isVarArg()) {
1044      errs() << "WARNING: While resolving call to function '"
1045             << Callee->getName() << "' arguments were dropped!\n";
1046    } else {
1047      // Add all of the arguments in their promoted form to the arg list.
1048      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
1049        const Type *PTy = getPromotedType((*AI)->getType());
1050        if (PTy != (*AI)->getType()) {
1051          // Must promote to pass through va_arg area!
1052          Instruction::CastOps opcode =
1053            CastInst::getCastOpcode(*AI, false, PTy, false);
1054          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
1055        } else {
1056          Args.push_back(*AI);
1057        }
1058
1059        // Add any parameter attributes.
1060        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
1061          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
1062      }
1063    }
1064  }
1065
1066  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
1067    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
1068
1069  if (NewRetTy->isVoidTy())
1070    Caller->setName("");   // Void type should not have a name.
1071
1072  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
1073                                                     attrVec.end());
1074
1075  Instruction *NC;
1076  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1077    NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
1078                            Args.begin(), Args.end(),
1079                            Caller->getName(), Caller);
1080    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
1081    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
1082  } else {
1083    NC = CallInst::Create(Callee, Args.begin(), Args.end(),
1084                          Caller->getName(), Caller);
1085    CallInst *CI = cast<CallInst>(Caller);
1086    if (CI->isTailCall())
1087      cast<CallInst>(NC)->setTailCall();
1088    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
1089    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
1090  }
1091
1092  // Insert a cast of the return type as necessary.
1093  Value *NV = NC;
1094  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
1095    if (!NV->getType()->isVoidTy()) {
1096      Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
1097                                                            OldRetTy, false);
1098      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
1099
1100      // If this is an invoke instruction, we should insert it after the first
1101      // non-phi, instruction in the normal successor block.
1102      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1103        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
1104        InsertNewInstBefore(NC, *I);
1105      } else {
1106        // Otherwise, it's a call, just insert cast right after the call instr
1107        InsertNewInstBefore(NC, *Caller);
1108      }
1109      Worklist.AddUsersToWorkList(*Caller);
1110    } else {
1111      NV = UndefValue::get(Caller->getType());
1112    }
1113  }
1114
1115
1116  if (!Caller->use_empty())
1117    Caller->replaceAllUsesWith(NV);
1118
1119  EraseInstFromFunction(*Caller);
1120  return true;
1121}
1122
1123// transformCallThroughTrampoline - Turn a call to a function created by the
1124// init_trampoline intrinsic into a direct call to the underlying function.
1125//
1126Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
1127  Value *Callee = CS.getCalledValue();
1128  const PointerType *PTy = cast<PointerType>(Callee->getType());
1129  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
1130  const AttrListPtr &Attrs = CS.getAttributes();
1131
1132  // If the call already has the 'nest' attribute somewhere then give up -
1133  // otherwise 'nest' would occur twice after splicing in the chain.
1134  if (Attrs.hasAttrSomewhere(Attribute::Nest))
1135    return 0;
1136
1137  IntrinsicInst *Tramp =
1138    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
1139
1140  Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
1141  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
1142  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
1143
1144  const AttrListPtr &NestAttrs = NestF->getAttributes();
1145  if (!NestAttrs.isEmpty()) {
1146    unsigned NestIdx = 1;
1147    const Type *NestTy = 0;
1148    Attributes NestAttr = Attribute::None;
1149
1150    // Look for a parameter marked with the 'nest' attribute.
1151    for (FunctionType::param_iterator I = NestFTy->param_begin(),
1152         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
1153      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
1154        // Record the parameter type and any other attributes.
1155        NestTy = *I;
1156        NestAttr = NestAttrs.getParamAttributes(NestIdx);
1157        break;
1158      }
1159
1160    if (NestTy) {
1161      Instruction *Caller = CS.getInstruction();
1162      std::vector<Value*> NewArgs;
1163      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
1164
1165      SmallVector<AttributeWithIndex, 8> NewAttrs;
1166      NewAttrs.reserve(Attrs.getNumSlots() + 1);
1167
1168      // Insert the nest argument into the call argument list, which may
1169      // mean appending it.  Likewise for attributes.
1170
1171      // Add any result attributes.
1172      if (Attributes Attr = Attrs.getRetAttributes())
1173        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
1174
1175      {
1176        unsigned Idx = 1;
1177        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
1178        do {
1179          if (Idx == NestIdx) {
1180            // Add the chain argument and attributes.
1181            Value *NestVal = Tramp->getOperand(3);
1182            if (NestVal->getType() != NestTy)
1183              NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
1184            NewArgs.push_back(NestVal);
1185            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
1186          }
1187
1188          if (I == E)
1189            break;
1190
1191          // Add the original argument and attributes.
1192          NewArgs.push_back(*I);
1193          if (Attributes Attr = Attrs.getParamAttributes(Idx))
1194            NewAttrs.push_back
1195              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
1196
1197          ++Idx, ++I;
1198        } while (1);
1199      }
1200
1201      // Add any function attributes.
1202      if (Attributes Attr = Attrs.getFnAttributes())
1203        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
1204
1205      // The trampoline may have been bitcast to a bogus type (FTy).
1206      // Handle this by synthesizing a new function type, equal to FTy
1207      // with the chain parameter inserted.
1208
1209      std::vector<const Type*> NewTypes;
1210      NewTypes.reserve(FTy->getNumParams()+1);
1211
1212      // Insert the chain's type into the list of parameter types, which may
1213      // mean appending it.
1214      {
1215        unsigned Idx = 1;
1216        FunctionType::param_iterator I = FTy->param_begin(),
1217          E = FTy->param_end();
1218
1219        do {
1220          if (Idx == NestIdx)
1221            // Add the chain's type.
1222            NewTypes.push_back(NestTy);
1223
1224          if (I == E)
1225            break;
1226
1227          // Add the original type.
1228          NewTypes.push_back(*I);
1229
1230          ++Idx, ++I;
1231        } while (1);
1232      }
1233
1234      // Replace the trampoline call with a direct call.  Let the generic
1235      // code sort out any function type mismatches.
1236      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
1237                                                FTy->isVarArg());
1238      Constant *NewCallee =
1239        NestF->getType() == PointerType::getUnqual(NewFTy) ?
1240        NestF : ConstantExpr::getBitCast(NestF,
1241                                         PointerType::getUnqual(NewFTy));
1242      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
1243                                                   NewAttrs.end());
1244
1245      Instruction *NewCaller;
1246      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
1247        NewCaller = InvokeInst::Create(NewCallee,
1248                                       II->getNormalDest(), II->getUnwindDest(),
1249                                       NewArgs.begin(), NewArgs.end(),
1250                                       Caller->getName(), Caller);
1251        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
1252        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
1253      } else {
1254        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
1255                                     Caller->getName(), Caller);
1256        if (cast<CallInst>(Caller)->isTailCall())
1257          cast<CallInst>(NewCaller)->setTailCall();
1258        cast<CallInst>(NewCaller)->
1259          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
1260        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
1261      }
1262      if (!Caller->getType()->isVoidTy())
1263        Caller->replaceAllUsesWith(NewCaller);
1264      Caller->eraseFromParent();
1265      Worklist.Remove(Caller);
1266      return 0;
1267    }
1268  }
1269
1270  // Replace the trampoline call with a direct call.  Since there is no 'nest'
1271  // parameter, there is no need to adjust the argument list.  Let the generic
1272  // code sort out any function type mismatches.
1273  Constant *NewCallee =
1274    NestF->getType() == PTy ? NestF :
1275                              ConstantExpr::getBitCast(NestF, PTy);
1276  CS.setCalledFunction(NewCallee);
1277  return CS.getInstruction();
1278}
1279
1280