CGBuiltin.cpp revision 06b6c589a5fff8e5476fe2b4cd6a660af71bfddd
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This contains code to emit Builtin calls as LLVM code.
11//
12//===----------------------------------------------------------------------===//
13
14#include "TargetInfo.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "CGObjCRuntime.h"
18#include "clang/Basic/TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/ASTContext.h"
21#include "clang/AST/Decl.h"
22#include "clang/Basic/TargetBuiltins.h"
23#include "llvm/Intrinsics.h"
24#include "llvm/Target/TargetData.h"
25using namespace clang;
26using namespace CodeGen;
27using namespace llvm;
28
29static void EmitMemoryBarrier(CodeGenFunction &CGF,
30                              bool LoadLoad, bool LoadStore,
31                              bool StoreLoad, bool StoreStore,
32                              bool Device) {
33  Value *True = llvm::ConstantInt::getTrue(CGF.getLLVMContext());
34  Value *False = llvm::ConstantInt::getFalse(CGF.getLLVMContext());
35  Value *C[5] = { LoadLoad ? True : False,
36                  LoadStore ? True : False,
37                  StoreLoad ? True : False,
38                  StoreStore  ? True : False,
39                  Device ? True : False };
40  CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::memory_barrier),
41                         C, C + 5);
42}
43
44static Value *EmitCastToInt(CodeGenFunction &CGF,
45                            const llvm::Type *ToType, Value *Val) {
46  if (Val->getType()->isPointerTy()) {
47    return CGF.Builder.CreatePtrToInt(Val, ToType);
48  }
49  assert(Val->getType()->isIntegerTy() &&
50         "Used a non-integer and non-pointer type with atomic builtin");
51  assert(Val->getType()->getScalarSizeInBits() <=
52         ToType->getScalarSizeInBits() && "Integer type too small");
53  return CGF.Builder.CreateSExtOrBitCast(Val, ToType);
54}
55
56static Value *EmitCastFromInt(CodeGenFunction &CGF, QualType ToQualType,
57                              Value *Val) {
58  const llvm::Type *ToType = CGF.ConvertType(ToQualType);
59  if (ToType->isPointerTy()) {
60    return CGF.Builder.CreateIntToPtr(Val, ToType);
61  }
62  assert(Val->getType()->isIntegerTy() &&
63         "Used a non-integer and non-pointer type with atomic builtin");
64  assert(Val->getType()->getScalarSizeInBits() >=
65         ToType->getScalarSizeInBits() && "Integer type too small");
66  return CGF.Builder.CreateTruncOrBitCast(Val, ToType);
67}
68
69// The atomic builtins are also full memory barriers. This is a utility for
70// wrapping a call to the builtins with memory barriers.
71static Value *EmitCallWithBarrier(CodeGenFunction &CGF, Value *Fn,
72                                  Value **ArgBegin, Value **ArgEnd) {
73  // FIXME: We need a target hook for whether this applies to device memory or
74  // not.
75  bool Device = true;
76
77  // Create barriers both before and after the call.
78  EmitMemoryBarrier(CGF, true, true, true, true, Device);
79  Value *Result = CGF.Builder.CreateCall(Fn, ArgBegin, ArgEnd);
80  EmitMemoryBarrier(CGF, true, true, true, true, Device);
81  return Result;
82}
83
84/// Utility to insert an atomic instruction based on Instrinsic::ID
85/// and the expression node.
86static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
87                               Intrinsic::ID Id, const CallExpr *E) {
88  const llvm::Type *ValueType =
89    llvm::IntegerType::get(CGF.getLLVMContext(),
90                           CGF.getContext().getTypeSize(E->getType()));
91  const llvm::Type *PtrType = ValueType->getPointerTo();
92  const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
93  Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2);
94
95  Value *Args[2] = { CGF.Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)),
96                                               PtrType),
97                     EmitCastToInt(CGF, ValueType,
98                                   CGF.EmitScalarExpr(E->getArg(1))) };
99  return RValue::get(EmitCastFromInt(CGF, E->getType(),
100                                     EmitCallWithBarrier(CGF, AtomF, Args,
101                                                         Args + 2)));
102}
103
104/// Utility to insert an atomic instruction based Instrinsic::ID and
105// the expression node, where the return value is the result of the
106// operation.
107static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
108                                   Intrinsic::ID Id, const CallExpr *E,
109                                   Instruction::BinaryOps Op) {
110  const llvm::Type *ValueType =
111    llvm::IntegerType::get(CGF.getLLVMContext(),
112                           CGF.getContext().getTypeSize(E->getType()));
113  const llvm::Type *PtrType = ValueType->getPointerTo();
114  const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
115  Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2);
116
117  Value *Args[2] = { CGF.Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)),
118                                               PtrType),
119                     EmitCastToInt(CGF, ValueType,
120                                   CGF.EmitScalarExpr(E->getArg(1))) };
121  Value *Result = EmitCallWithBarrier(CGF, AtomF, Args, Args + 2);
122  return RValue::get(EmitCastFromInt(CGF, E->getType(),
123                                     CGF.Builder.CreateBinOp(Op, Result,
124                                                             Args[1])));
125}
126
127/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
128/// which must be a scalar floating point type.
129static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
130  const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
131  assert(ValTyP && "isn't scalar fp type!");
132
133  StringRef FnName;
134  switch (ValTyP->getKind()) {
135  default: assert(0 && "Isn't a scalar fp type!");
136  case BuiltinType::Float:      FnName = "fabsf"; break;
137  case BuiltinType::Double:     FnName = "fabs"; break;
138  case BuiltinType::LongDouble: FnName = "fabsl"; break;
139  }
140
141  // The prototype is something that takes and returns whatever V's type is.
142  std::vector<const llvm::Type*> Args;
143  Args.push_back(V->getType());
144  llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), Args, false);
145  llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
146
147  return CGF.Builder.CreateCall(Fn, V, "abs");
148}
149
150RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
151                                        unsigned BuiltinID, const CallExpr *E) {
152  // See if we can constant fold this builtin.  If so, don't emit it at all.
153  Expr::EvalResult Result;
154  if (E->Evaluate(Result, CGM.getContext())) {
155    if (Result.Val.isInt())
156      return RValue::get(llvm::ConstantInt::get(VMContext,
157                                                Result.Val.getInt()));
158    else if (Result.Val.isFloat())
159      return RValue::get(ConstantFP::get(VMContext, Result.Val.getFloat()));
160  }
161
162  switch (BuiltinID) {
163  default: break;  // Handle intrinsics and libm functions below.
164  case Builtin::BI__builtin___CFStringMakeConstantString:
165  case Builtin::BI__builtin___NSStringMakeConstantString:
166    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
167  case Builtin::BI__builtin_stdarg_start:
168  case Builtin::BI__builtin_va_start:
169  case Builtin::BI__builtin_va_end: {
170    Value *ArgValue = EmitVAListRef(E->getArg(0));
171    const llvm::Type *DestType = llvm::Type::getInt8PtrTy(VMContext);
172    if (ArgValue->getType() != DestType)
173      ArgValue = Builder.CreateBitCast(ArgValue, DestType,
174                                       ArgValue->getName().data());
175
176    Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
177      Intrinsic::vaend : Intrinsic::vastart;
178    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
179  }
180  case Builtin::BI__builtin_va_copy: {
181    Value *DstPtr = EmitVAListRef(E->getArg(0));
182    Value *SrcPtr = EmitVAListRef(E->getArg(1));
183
184    const llvm::Type *Type = llvm::Type::getInt8PtrTy(VMContext);
185
186    DstPtr = Builder.CreateBitCast(DstPtr, Type);
187    SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
188    return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
189                                           DstPtr, SrcPtr));
190  }
191  case Builtin::BI__builtin_abs: {
192    Value *ArgValue = EmitScalarExpr(E->getArg(0));
193
194    Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
195    Value *CmpResult =
196    Builder.CreateICmpSGE(ArgValue,
197                          llvm::Constant::getNullValue(ArgValue->getType()),
198                                                            "abscond");
199    Value *Result =
200      Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
201
202    return RValue::get(Result);
203  }
204  case Builtin::BI__builtin_ctz:
205  case Builtin::BI__builtin_ctzl:
206  case Builtin::BI__builtin_ctzll: {
207    Value *ArgValue = EmitScalarExpr(E->getArg(0));
208
209    const llvm::Type *ArgType = ArgValue->getType();
210    Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1);
211
212    const llvm::Type *ResultType = ConvertType(E->getType());
213    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
214    if (Result->getType() != ResultType)
215      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
216                                     "cast");
217    return RValue::get(Result);
218  }
219  case Builtin::BI__builtin_clz:
220  case Builtin::BI__builtin_clzl:
221  case Builtin::BI__builtin_clzll: {
222    Value *ArgValue = EmitScalarExpr(E->getArg(0));
223
224    const llvm::Type *ArgType = ArgValue->getType();
225    Value *F = CGM.getIntrinsic(Intrinsic::ctlz, &ArgType, 1);
226
227    const llvm::Type *ResultType = ConvertType(E->getType());
228    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
229    if (Result->getType() != ResultType)
230      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
231                                     "cast");
232    return RValue::get(Result);
233  }
234  case Builtin::BI__builtin_ffs:
235  case Builtin::BI__builtin_ffsl:
236  case Builtin::BI__builtin_ffsll: {
237    // ffs(x) -> x ? cttz(x) + 1 : 0
238    Value *ArgValue = EmitScalarExpr(E->getArg(0));
239
240    const llvm::Type *ArgType = ArgValue->getType();
241    Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1);
242
243    const llvm::Type *ResultType = ConvertType(E->getType());
244    Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue, "tmp"),
245                                   llvm::ConstantInt::get(ArgType, 1), "tmp");
246    Value *Zero = llvm::Constant::getNullValue(ArgType);
247    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
248    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
249    if (Result->getType() != ResultType)
250      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
251                                     "cast");
252    return RValue::get(Result);
253  }
254  case Builtin::BI__builtin_parity:
255  case Builtin::BI__builtin_parityl:
256  case Builtin::BI__builtin_parityll: {
257    // parity(x) -> ctpop(x) & 1
258    Value *ArgValue = EmitScalarExpr(E->getArg(0));
259
260    const llvm::Type *ArgType = ArgValue->getType();
261    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1);
262
263    const llvm::Type *ResultType = ConvertType(E->getType());
264    Value *Tmp = Builder.CreateCall(F, ArgValue, "tmp");
265    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1),
266                                      "tmp");
267    if (Result->getType() != ResultType)
268      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
269                                     "cast");
270    return RValue::get(Result);
271  }
272  case Builtin::BI__builtin_popcount:
273  case Builtin::BI__builtin_popcountl:
274  case Builtin::BI__builtin_popcountll: {
275    Value *ArgValue = EmitScalarExpr(E->getArg(0));
276
277    const llvm::Type *ArgType = ArgValue->getType();
278    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1);
279
280    const llvm::Type *ResultType = ConvertType(E->getType());
281    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
282    if (Result->getType() != ResultType)
283      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
284                                     "cast");
285    return RValue::get(Result);
286  }
287  case Builtin::BI__builtin_expect: {
288    // FIXME: pass expect through to LLVM
289    if (E->getArg(1)->HasSideEffects(getContext()))
290      (void)EmitScalarExpr(E->getArg(1));
291    return RValue::get(EmitScalarExpr(E->getArg(0)));
292  }
293  case Builtin::BI__builtin_bswap32:
294  case Builtin::BI__builtin_bswap64: {
295    Value *ArgValue = EmitScalarExpr(E->getArg(0));
296    const llvm::Type *ArgType = ArgValue->getType();
297    Value *F = CGM.getIntrinsic(Intrinsic::bswap, &ArgType, 1);
298    return RValue::get(Builder.CreateCall(F, ArgValue, "tmp"));
299  }
300  case Builtin::BI__builtin_object_size: {
301    // We pass this builtin onto the optimizer so that it can
302    // figure out the object size in more complex cases.
303    const llvm::Type *ResType[] = {
304      ConvertType(E->getType())
305    };
306
307    // LLVM only supports 0 and 2, make sure that we pass along that
308    // as a boolean.
309    Value *Ty = EmitScalarExpr(E->getArg(1));
310    ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
311    assert(CI);
312    uint64_t val = CI->getZExtValue();
313    CI = ConstantInt::get(llvm::Type::getInt1Ty(VMContext), (val & 0x2) >> 1);
314
315    Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType, 1);
316    return RValue::get(Builder.CreateCall2(F,
317                                           EmitScalarExpr(E->getArg(0)),
318                                           CI));
319  }
320  case Builtin::BI__builtin_prefetch: {
321    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
322    // FIXME: Technically these constants should of type 'int', yes?
323    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
324      llvm::ConstantInt::get(Int32Ty, 0);
325    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
326      llvm::ConstantInt::get(Int32Ty, 3);
327    Value *F = CGM.getIntrinsic(Intrinsic::prefetch, 0, 0);
328    return RValue::get(Builder.CreateCall3(F, Address, RW, Locality));
329  }
330  case Builtin::BI__builtin_trap: {
331    Value *F = CGM.getIntrinsic(Intrinsic::trap, 0, 0);
332    return RValue::get(Builder.CreateCall(F));
333  }
334  case Builtin::BI__builtin_unreachable: {
335    if (CatchUndefined && HaveInsertPoint())
336      EmitBranch(getTrapBB());
337    Value *V = Builder.CreateUnreachable();
338    Builder.ClearInsertionPoint();
339    return RValue::get(V);
340  }
341
342  case Builtin::BI__builtin_powi:
343  case Builtin::BI__builtin_powif:
344  case Builtin::BI__builtin_powil: {
345    Value *Base = EmitScalarExpr(E->getArg(0));
346    Value *Exponent = EmitScalarExpr(E->getArg(1));
347    const llvm::Type *ArgType = Base->getType();
348    Value *F = CGM.getIntrinsic(Intrinsic::powi, &ArgType, 1);
349    return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
350  }
351
352  case Builtin::BI__builtin_isgreater:
353  case Builtin::BI__builtin_isgreaterequal:
354  case Builtin::BI__builtin_isless:
355  case Builtin::BI__builtin_islessequal:
356  case Builtin::BI__builtin_islessgreater:
357  case Builtin::BI__builtin_isunordered: {
358    // Ordered comparisons: we know the arguments to these are matching scalar
359    // floating point values.
360    Value *LHS = EmitScalarExpr(E->getArg(0));
361    Value *RHS = EmitScalarExpr(E->getArg(1));
362
363    switch (BuiltinID) {
364    default: assert(0 && "Unknown ordered comparison");
365    case Builtin::BI__builtin_isgreater:
366      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
367      break;
368    case Builtin::BI__builtin_isgreaterequal:
369      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
370      break;
371    case Builtin::BI__builtin_isless:
372      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
373      break;
374    case Builtin::BI__builtin_islessequal:
375      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
376      break;
377    case Builtin::BI__builtin_islessgreater:
378      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
379      break;
380    case Builtin::BI__builtin_isunordered:
381      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
382      break;
383    }
384    // ZExt bool to int type.
385    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()),
386                                          "tmp"));
387  }
388  case Builtin::BI__builtin_isnan: {
389    Value *V = EmitScalarExpr(E->getArg(0));
390    V = Builder.CreateFCmpUNO(V, V, "cmp");
391    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
392  }
393
394  case Builtin::BI__builtin_isinf: {
395    // isinf(x) --> fabs(x) == infinity
396    Value *V = EmitScalarExpr(E->getArg(0));
397    V = EmitFAbs(*this, V, E->getArg(0)->getType());
398
399    V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
400    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
401  }
402
403  // TODO: BI__builtin_isinf_sign
404  //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
405
406  case Builtin::BI__builtin_isnormal: {
407    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
408    Value *V = EmitScalarExpr(E->getArg(0));
409    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
410
411    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
412    Value *IsLessThanInf =
413      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
414    APFloat Smallest = APFloat::getSmallestNormalized(
415                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
416    Value *IsNormal =
417      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
418                            "isnormal");
419    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
420    V = Builder.CreateAnd(V, IsNormal, "and");
421    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
422  }
423
424  case Builtin::BI__builtin_isfinite: {
425    // isfinite(x) --> x == x && fabs(x) != infinity; }
426    Value *V = EmitScalarExpr(E->getArg(0));
427    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
428
429    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
430    Value *IsNotInf =
431      Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
432
433    V = Builder.CreateAnd(Eq, IsNotInf, "and");
434    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
435  }
436
437  case Builtin::BI__builtin_fpclassify: {
438    Value *V = EmitScalarExpr(E->getArg(5));
439    const llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
440
441    // Create Result
442    BasicBlock *Begin = Builder.GetInsertBlock();
443    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
444    Builder.SetInsertPoint(End);
445    PHINode *Result =
446      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()),
447                        "fpclassify_result");
448
449    // if (V==0) return FP_ZERO
450    Builder.SetInsertPoint(Begin);
451    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
452                                          "iszero");
453    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
454    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
455    Builder.CreateCondBr(IsZero, End, NotZero);
456    Result->addIncoming(ZeroLiteral, Begin);
457
458    // if (V != V) return FP_NAN
459    Builder.SetInsertPoint(NotZero);
460    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
461    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
462    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
463    Builder.CreateCondBr(IsNan, End, NotNan);
464    Result->addIncoming(NanLiteral, NotZero);
465
466    // if (fabs(V) == infinity) return FP_INFINITY
467    Builder.SetInsertPoint(NotNan);
468    Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
469    Value *IsInf =
470      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
471                            "isinf");
472    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
473    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
474    Builder.CreateCondBr(IsInf, End, NotInf);
475    Result->addIncoming(InfLiteral, NotNan);
476
477    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
478    Builder.SetInsertPoint(NotInf);
479    APFloat Smallest = APFloat::getSmallestNormalized(
480        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
481    Value *IsNormal =
482      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
483                            "isnormal");
484    Value *NormalResult =
485      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
486                           EmitScalarExpr(E->getArg(3)));
487    Builder.CreateBr(End);
488    Result->addIncoming(NormalResult, NotInf);
489
490    // return Result
491    Builder.SetInsertPoint(End);
492    return RValue::get(Result);
493  }
494
495  case Builtin::BIalloca:
496  case Builtin::BI__builtin_alloca: {
497    Value *Size = EmitScalarExpr(E->getArg(0));
498    return RValue::get(Builder.CreateAlloca(llvm::Type::getInt8Ty(VMContext), Size, "tmp"));
499  }
500  case Builtin::BIbzero:
501  case Builtin::BI__builtin_bzero: {
502    Value *Address = EmitScalarExpr(E->getArg(0));
503    Value *SizeVal = EmitScalarExpr(E->getArg(1));
504    Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()),
505                   Address,
506                   llvm::ConstantInt::get(llvm::Type::getInt8Ty(VMContext), 0),
507                   SizeVal,
508                   llvm::ConstantInt::get(Int32Ty, 1),
509                   llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
510    return RValue::get(Address);
511  }
512  case Builtin::BImemcpy:
513  case Builtin::BI__builtin_memcpy: {
514    Value *Address = EmitScalarExpr(E->getArg(0));
515    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
516    Value *SizeVal = EmitScalarExpr(E->getArg(2));
517    Builder.CreateCall5(CGM.getMemCpyFn(Address->getType(), SrcAddr->getType(),
518                                        SizeVal->getType()),
519                  Address, SrcAddr, SizeVal,
520                  llvm::ConstantInt::get(Int32Ty, 1),
521                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
522    return RValue::get(Address);
523  }
524
525  case Builtin::BI__builtin_objc_memmove_collectable: {
526    Value *Address = EmitScalarExpr(E->getArg(0));
527    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
528    Value *SizeVal = EmitScalarExpr(E->getArg(2));
529    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
530                                                  Address, SrcAddr, SizeVal);
531    return RValue::get(Address);
532  }
533
534  case Builtin::BImemmove:
535  case Builtin::BI__builtin_memmove: {
536    Value *Address = EmitScalarExpr(E->getArg(0));
537    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
538    Value *SizeVal = EmitScalarExpr(E->getArg(2));
539    Builder.CreateCall5(CGM.getMemMoveFn(Address->getType(), SrcAddr->getType(),
540                                         SizeVal->getType()),
541                  Address, SrcAddr, SizeVal,
542                  llvm::ConstantInt::get(Int32Ty, 1),
543                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
544    return RValue::get(Address);
545  }
546  case Builtin::BImemset:
547  case Builtin::BI__builtin_memset: {
548    Value *Address = EmitScalarExpr(E->getArg(0));
549    Value *SizeVal = EmitScalarExpr(E->getArg(2));
550    Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()),
551                  Address,
552                  Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
553                                      llvm::Type::getInt8Ty(VMContext)),
554                  SizeVal,
555                  llvm::ConstantInt::get(Int32Ty, 1),
556                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
557    return RValue::get(Address);
558  }
559  case Builtin::BI__builtin_dwarf_cfa: {
560    // The offset in bytes from the first argument to the CFA.
561    //
562    // Why on earth is this in the frontend?  Is there any reason at
563    // all that the backend can't reasonably determine this while
564    // lowering llvm.eh.dwarf.cfa()?
565    //
566    // TODO: If there's a satisfactory reason, add a target hook for
567    // this instead of hard-coding 0, which is correct for most targets.
568    int32_t Offset = 0;
569
570    Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa, 0, 0);
571    return RValue::get(Builder.CreateCall(F,
572                                      llvm::ConstantInt::get(Int32Ty, Offset)));
573  }
574  case Builtin::BI__builtin_return_address: {
575    Value *Depth = EmitScalarExpr(E->getArg(0));
576    Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
577    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress, 0, 0);
578    return RValue::get(Builder.CreateCall(F, Depth));
579  }
580  case Builtin::BI__builtin_frame_address: {
581    Value *Depth = EmitScalarExpr(E->getArg(0));
582    Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
583    Value *F = CGM.getIntrinsic(Intrinsic::frameaddress, 0, 0);
584    return RValue::get(Builder.CreateCall(F, Depth));
585  }
586  case Builtin::BI__builtin_extract_return_addr: {
587    Value *Address = EmitScalarExpr(E->getArg(0));
588    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
589    return RValue::get(Result);
590  }
591  case Builtin::BI__builtin_frob_return_addr: {
592    Value *Address = EmitScalarExpr(E->getArg(0));
593    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
594    return RValue::get(Result);
595  }
596  case Builtin::BI__builtin_dwarf_sp_column: {
597    const llvm::IntegerType *Ty
598      = cast<llvm::IntegerType>(ConvertType(E->getType()));
599    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
600    if (Column == -1) {
601      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
602      return RValue::get(llvm::UndefValue::get(Ty));
603    }
604    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
605  }
606  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
607    Value *Address = EmitScalarExpr(E->getArg(0));
608    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
609      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
610    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
611  }
612  case Builtin::BI__builtin_eh_return: {
613    Value *Int = EmitScalarExpr(E->getArg(0));
614    Value *Ptr = EmitScalarExpr(E->getArg(1));
615
616    const llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
617    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
618           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
619    Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
620                                  ? Intrinsic::eh_return_i32
621                                  : Intrinsic::eh_return_i64,
622                                0, 0);
623    Builder.CreateCall2(F, Int, Ptr);
624    Value *V = Builder.CreateUnreachable();
625    Builder.ClearInsertionPoint();
626    return RValue::get(V);
627  }
628  case Builtin::BI__builtin_unwind_init: {
629    Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init, 0, 0);
630    return RValue::get(Builder.CreateCall(F));
631  }
632  case Builtin::BI__builtin_extend_pointer: {
633    // Extends a pointer to the size of an _Unwind_Word, which is
634    // uint64_t on all platforms.  Generally this gets poked into a
635    // register and eventually used as an address, so if the
636    // addressing registers are wider than pointers and the platform
637    // doesn't implicitly ignore high-order bits when doing
638    // addressing, we need to make sure we zext / sext based on
639    // the platform's expectations.
640    //
641    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
642
643    LLVMContext &C = CGM.getLLVMContext();
644
645    // Cast the pointer to intptr_t.
646    Value *Ptr = EmitScalarExpr(E->getArg(0));
647    const llvm::IntegerType *IntPtrTy = CGM.getTargetData().getIntPtrType(C);
648    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
649
650    // If that's 64 bits, we're done.
651    if (IntPtrTy->getBitWidth() == 64)
652      return RValue::get(Result);
653
654    // Otherwise, ask the codegen data what to do.
655    if (getTargetHooks().extendPointerWithSExt())
656      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
657    else
658      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
659  }
660  case Builtin::BI__builtin_setjmp: {
661    // Buffer is a void**.
662    Value *Buf = EmitScalarExpr(E->getArg(0));
663
664    // Store the frame pointer to the setjmp buffer.
665    Value *FrameAddr =
666      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
667                         ConstantInt::get(Int32Ty, 0));
668    Builder.CreateStore(FrameAddr, Buf);
669
670    // Store the stack pointer to the setjmp buffer.
671    Value *StackAddr =
672      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
673    Value *StackSaveSlot =
674      Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
675    Builder.CreateStore(StackAddr, StackSaveSlot);
676
677    // Call LLVM's EH setjmp, which is lightweight.
678    Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
679    Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext));
680    return RValue::get(Builder.CreateCall(F, Buf));
681  }
682  case Builtin::BI__builtin_longjmp: {
683    Value *Buf = EmitScalarExpr(E->getArg(0));
684    Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext));
685
686    // Call LLVM's EH longjmp, which is lightweight.
687    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
688
689    // longjmp doesn't return; mark this as unreachable
690    Value *V = Builder.CreateUnreachable();
691    Builder.ClearInsertionPoint();
692    return RValue::get(V);
693  }
694  case Builtin::BI__sync_fetch_and_add:
695  case Builtin::BI__sync_fetch_and_sub:
696  case Builtin::BI__sync_fetch_and_or:
697  case Builtin::BI__sync_fetch_and_and:
698  case Builtin::BI__sync_fetch_and_xor:
699  case Builtin::BI__sync_add_and_fetch:
700  case Builtin::BI__sync_sub_and_fetch:
701  case Builtin::BI__sync_and_and_fetch:
702  case Builtin::BI__sync_or_and_fetch:
703  case Builtin::BI__sync_xor_and_fetch:
704  case Builtin::BI__sync_val_compare_and_swap:
705  case Builtin::BI__sync_bool_compare_and_swap:
706  case Builtin::BI__sync_lock_test_and_set:
707  case Builtin::BI__sync_lock_release:
708    assert(0 && "Shouldn't make it through sema");
709  case Builtin::BI__sync_fetch_and_add_1:
710  case Builtin::BI__sync_fetch_and_add_2:
711  case Builtin::BI__sync_fetch_and_add_4:
712  case Builtin::BI__sync_fetch_and_add_8:
713  case Builtin::BI__sync_fetch_and_add_16:
714    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_add, E);
715  case Builtin::BI__sync_fetch_and_sub_1:
716  case Builtin::BI__sync_fetch_and_sub_2:
717  case Builtin::BI__sync_fetch_and_sub_4:
718  case Builtin::BI__sync_fetch_and_sub_8:
719  case Builtin::BI__sync_fetch_and_sub_16:
720    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_sub, E);
721  case Builtin::BI__sync_fetch_and_or_1:
722  case Builtin::BI__sync_fetch_and_or_2:
723  case Builtin::BI__sync_fetch_and_or_4:
724  case Builtin::BI__sync_fetch_and_or_8:
725  case Builtin::BI__sync_fetch_and_or_16:
726    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_or, E);
727  case Builtin::BI__sync_fetch_and_and_1:
728  case Builtin::BI__sync_fetch_and_and_2:
729  case Builtin::BI__sync_fetch_and_and_4:
730  case Builtin::BI__sync_fetch_and_and_8:
731  case Builtin::BI__sync_fetch_and_and_16:
732    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_and, E);
733  case Builtin::BI__sync_fetch_and_xor_1:
734  case Builtin::BI__sync_fetch_and_xor_2:
735  case Builtin::BI__sync_fetch_and_xor_4:
736  case Builtin::BI__sync_fetch_and_xor_8:
737  case Builtin::BI__sync_fetch_and_xor_16:
738    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_xor, E);
739
740  // Clang extensions: not overloaded yet.
741  case Builtin::BI__sync_fetch_and_min:
742    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_min, E);
743  case Builtin::BI__sync_fetch_and_max:
744    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_max, E);
745  case Builtin::BI__sync_fetch_and_umin:
746    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umin, E);
747  case Builtin::BI__sync_fetch_and_umax:
748    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umax, E);
749
750  case Builtin::BI__sync_add_and_fetch_1:
751  case Builtin::BI__sync_add_and_fetch_2:
752  case Builtin::BI__sync_add_and_fetch_4:
753  case Builtin::BI__sync_add_and_fetch_8:
754  case Builtin::BI__sync_add_and_fetch_16:
755    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_add, E,
756                                llvm::Instruction::Add);
757  case Builtin::BI__sync_sub_and_fetch_1:
758  case Builtin::BI__sync_sub_and_fetch_2:
759  case Builtin::BI__sync_sub_and_fetch_4:
760  case Builtin::BI__sync_sub_and_fetch_8:
761  case Builtin::BI__sync_sub_and_fetch_16:
762    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_sub, E,
763                                llvm::Instruction::Sub);
764  case Builtin::BI__sync_and_and_fetch_1:
765  case Builtin::BI__sync_and_and_fetch_2:
766  case Builtin::BI__sync_and_and_fetch_4:
767  case Builtin::BI__sync_and_and_fetch_8:
768  case Builtin::BI__sync_and_and_fetch_16:
769    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_and, E,
770                                llvm::Instruction::And);
771  case Builtin::BI__sync_or_and_fetch_1:
772  case Builtin::BI__sync_or_and_fetch_2:
773  case Builtin::BI__sync_or_and_fetch_4:
774  case Builtin::BI__sync_or_and_fetch_8:
775  case Builtin::BI__sync_or_and_fetch_16:
776    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_or, E,
777                                llvm::Instruction::Or);
778  case Builtin::BI__sync_xor_and_fetch_1:
779  case Builtin::BI__sync_xor_and_fetch_2:
780  case Builtin::BI__sync_xor_and_fetch_4:
781  case Builtin::BI__sync_xor_and_fetch_8:
782  case Builtin::BI__sync_xor_and_fetch_16:
783    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_xor, E,
784                                llvm::Instruction::Xor);
785
786  case Builtin::BI__sync_val_compare_and_swap_1:
787  case Builtin::BI__sync_val_compare_and_swap_2:
788  case Builtin::BI__sync_val_compare_and_swap_4:
789  case Builtin::BI__sync_val_compare_and_swap_8:
790  case Builtin::BI__sync_val_compare_and_swap_16: {
791    const llvm::Type *ValueType =
792      llvm::IntegerType::get(CGF.getLLVMContext(),
793                             CGF.getContext().getTypeSize(E->getType()));
794    const llvm::Type *PtrType = ValueType->getPointerTo();
795    const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
796    Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
797                                    IntrinsicTypes, 2);
798
799    Value *Args[3] = { Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)),
800                                             PtrType),
801                       EmitCastToInt(CGF, ValueType,
802                                     CGF.EmitScalarExpr(E->getArg(1))),
803                       EmitCastToInt(CGF, ValueType,
804                                     CGF.EmitScalarExpr(E->getArg(2))) };
805    return RValue::get(EmitCastFromInt(CGF, E->getType(),
806                                       EmitCallWithBarrier(CGF, AtomF, Args,
807                                                           Args + 3)));
808  }
809
810  case Builtin::BI__sync_bool_compare_and_swap_1:
811  case Builtin::BI__sync_bool_compare_and_swap_2:
812  case Builtin::BI__sync_bool_compare_and_swap_4:
813  case Builtin::BI__sync_bool_compare_and_swap_8:
814  case Builtin::BI__sync_bool_compare_and_swap_16: {
815    const llvm::Type *ValueType =
816      llvm::IntegerType::get(
817        CGF.getLLVMContext(),
818        CGF.getContext().getTypeSize(E->getArg(1)->getType()));
819    const llvm::Type *PtrType = ValueType->getPointerTo();
820    const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
821    Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
822                                    IntrinsicTypes, 2);
823
824    Value *Args[3] = { Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)),
825                                             PtrType),
826                       EmitCastToInt(CGF, ValueType,
827                                     CGF.EmitScalarExpr(E->getArg(1))),
828                       EmitCastToInt(CGF, ValueType,
829                                     CGF.EmitScalarExpr(E->getArg(2))) };
830    Value *OldVal = Args[1];
831    Value *PrevVal = EmitCallWithBarrier(*this, AtomF, Args, Args + 3);
832    Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
833    // zext bool to int.
834    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
835  }
836
837  case Builtin::BI__sync_lock_test_and_set_1:
838  case Builtin::BI__sync_lock_test_and_set_2:
839  case Builtin::BI__sync_lock_test_and_set_4:
840  case Builtin::BI__sync_lock_test_and_set_8:
841  case Builtin::BI__sync_lock_test_and_set_16:
842    return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E);
843
844  case Builtin::BI__sync_lock_release_1:
845  case Builtin::BI__sync_lock_release_2:
846  case Builtin::BI__sync_lock_release_4:
847  case Builtin::BI__sync_lock_release_8:
848  case Builtin::BI__sync_lock_release_16: {
849    Value *Ptr = EmitScalarExpr(E->getArg(0));
850    const llvm::Type *ElTy =
851      cast<llvm::PointerType>(Ptr->getType())->getElementType();
852    llvm::StoreInst *Store =
853      Builder.CreateStore(llvm::Constant::getNullValue(ElTy), Ptr);
854    Store->setVolatile(true);
855    return RValue::get(0);
856  }
857
858  case Builtin::BI__sync_synchronize: {
859    // We assume like gcc appears to, that this only applies to cached memory.
860    EmitMemoryBarrier(*this, true, true, true, true, false);
861    return RValue::get(0);
862  }
863
864  case Builtin::BI__builtin_llvm_memory_barrier: {
865    Value *C[5] = {
866      EmitScalarExpr(E->getArg(0)),
867      EmitScalarExpr(E->getArg(1)),
868      EmitScalarExpr(E->getArg(2)),
869      EmitScalarExpr(E->getArg(3)),
870      EmitScalarExpr(E->getArg(4))
871    };
872    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::memory_barrier), C, C + 5);
873    return RValue::get(0);
874  }
875
876    // Library functions with special handling.
877  case Builtin::BIsqrt:
878  case Builtin::BIsqrtf:
879  case Builtin::BIsqrtl: {
880    // TODO: there is currently no set of optimizer flags
881    // sufficient for us to rewrite sqrt to @llvm.sqrt.
882    // -fmath-errno=0 is not good enough; we need finiteness.
883    // We could probably precondition the call with an ult
884    // against 0, but is that worth the complexity?
885    break;
886  }
887
888  case Builtin::BIpow:
889  case Builtin::BIpowf:
890  case Builtin::BIpowl: {
891    // Rewrite sqrt to intrinsic if allowed.
892    if (!FD->hasAttr<ConstAttr>())
893      break;
894    Value *Base = EmitScalarExpr(E->getArg(0));
895    Value *Exponent = EmitScalarExpr(E->getArg(1));
896    const llvm::Type *ArgType = Base->getType();
897    Value *F = CGM.getIntrinsic(Intrinsic::pow, &ArgType, 1);
898    return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
899  }
900
901  case Builtin::BI__builtin_signbit:
902  case Builtin::BI__builtin_signbitf:
903  case Builtin::BI__builtin_signbitl: {
904    LLVMContext &C = CGM.getLLVMContext();
905
906    Value *Arg = EmitScalarExpr(E->getArg(0));
907    const llvm::Type *ArgTy = Arg->getType();
908    if (ArgTy->isPPC_FP128Ty())
909      break; // FIXME: I'm not sure what the right implementation is here.
910    int ArgWidth = ArgTy->getPrimitiveSizeInBits();
911    const llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
912    Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
913    Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
914    Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
915    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
916  }
917  }
918
919  // If this is an alias for a libm function (e.g. __builtin_sin) turn it into
920  // that function.
921  if (getContext().BuiltinInfo.isLibFunction(BuiltinID) ||
922      getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
923    return EmitCall(E->getCallee()->getType(),
924                    CGM.getBuiltinLibFunction(FD, BuiltinID),
925                    ReturnValueSlot(),
926                    E->arg_begin(), E->arg_end());
927
928  // See if we have a target specific intrinsic.
929  const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
930  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
931  if (const char *Prefix =
932      llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
933    IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
934
935  if (IntrinsicID != Intrinsic::not_intrinsic) {
936    SmallVector<Value*, 16> Args;
937
938    Function *F = CGM.getIntrinsic(IntrinsicID);
939    const llvm::FunctionType *FTy = F->getFunctionType();
940
941    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
942      Value *ArgValue = EmitScalarExpr(E->getArg(i));
943
944      // If the intrinsic arg type is different from the builtin arg type
945      // we need to do a bit cast.
946      const llvm::Type *PTy = FTy->getParamType(i);
947      if (PTy != ArgValue->getType()) {
948        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
949               "Must be able to losslessly bit cast to param");
950        ArgValue = Builder.CreateBitCast(ArgValue, PTy);
951      }
952
953      Args.push_back(ArgValue);
954    }
955
956    Value *V = Builder.CreateCall(F, Args.data(), Args.data() + Args.size());
957    QualType BuiltinRetType = E->getType();
958
959    const llvm::Type *RetTy = llvm::Type::getVoidTy(VMContext);
960    if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType);
961
962    if (RetTy != V->getType()) {
963      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
964             "Must be able to losslessly bit cast result type");
965      V = Builder.CreateBitCast(V, RetTy);
966    }
967
968    return RValue::get(V);
969  }
970
971  // See if we have a target specific builtin that needs to be lowered.
972  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
973    return RValue::get(V);
974
975  ErrorUnsupported(E, "builtin function");
976
977  // Unknown builtin, for now just dump it out and return undef.
978  if (hasAggregateLLVMType(E->getType()))
979    return RValue::getAggregate(CreateMemTemp(E->getType()));
980  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
981}
982
983Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
984                                              const CallExpr *E) {
985  switch (Target.getTriple().getArch()) {
986  case llvm::Triple::arm:
987  case llvm::Triple::thumb:
988    return EmitARMBuiltinExpr(BuiltinID, E);
989  case llvm::Triple::x86:
990  case llvm::Triple::x86_64:
991    return EmitX86BuiltinExpr(BuiltinID, E);
992  case llvm::Triple::ppc:
993  case llvm::Triple::ppc64:
994    return EmitPPCBuiltinExpr(BuiltinID, E);
995  default:
996    return 0;
997  }
998}
999
1000const llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) {
1001  switch (type) {
1002    default: break;
1003    case 0:
1004    case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q);
1005    case 6:
1006    case 7:
1007    case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q);
1008    case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q);
1009    case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q);
1010    case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q);
1011  };
1012  return 0;
1013}
1014
1015Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C, bool widen) {
1016  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1017  if (widen)
1018    nElts <<= 1;
1019  SmallVector<Constant*, 16> Indices(nElts, C);
1020  Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1021  return Builder.CreateShuffleVector(V, V, SV, "lane");
1022}
1023
1024Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1025                                     const char *name, bool splat,
1026                                     unsigned shift, bool rightshift) {
1027  unsigned j = 0;
1028  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1029       ai != ae; ++ai, ++j)
1030    if (shift > 0 && shift == j)
1031      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1032    else
1033      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1034
1035  if (splat) {
1036    Ops[j-1] = EmitNeonSplat(Ops[j-1], cast<Constant>(Ops[j]));
1037    Ops.resize(j);
1038  }
1039  return Builder.CreateCall(F, Ops.begin(), Ops.end(), name);
1040}
1041
1042Value *CodeGenFunction::EmitNeonShiftVector(Value *V, const llvm::Type *Ty,
1043                                            bool neg) {
1044  ConstantInt *CI = cast<ConstantInt>(V);
1045  int SV = CI->getSExtValue();
1046
1047  const llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1048  llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1049  SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C);
1050  return llvm::ConstantVector::get(CV.begin(), CV.size());
1051}
1052
1053/// GetPointeeAlignment - Given an expression with a pointer type, find the
1054/// alignment of the type referenced by the pointer.  Skip over implicit
1055/// casts.
1056static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) {
1057  unsigned Align = 1;
1058  // Check if the type is a pointer.  The implicit cast operand might not be.
1059  while (Addr->getType()->isPointerType()) {
1060    QualType PtTy = Addr->getType()->getPointeeType();
1061    unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity();
1062    if (NewA > Align)
1063      Align = NewA;
1064
1065    // If the address is an implicit cast, repeat with the cast operand.
1066    if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) {
1067      Addr = CastAddr->getSubExpr();
1068      continue;
1069    }
1070    break;
1071  }
1072  return llvm::ConstantInt::get(CGF.Int32Ty, Align);
1073}
1074
1075Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1076                                           const CallExpr *E) {
1077  if (BuiltinID == ARM::BI__clear_cache) {
1078    const FunctionDecl *FD = E->getDirectCallee();
1079    Value *a = EmitScalarExpr(E->getArg(0));
1080    Value *b = EmitScalarExpr(E->getArg(1));
1081    const llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1082    const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1083    llvm::StringRef Name = FD->getName();
1084    return Builder.CreateCall2(CGM.CreateRuntimeFunction(FTy, Name),
1085                               a, b);
1086  }
1087
1088  llvm::SmallVector<Value*, 4> Ops;
1089  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
1090    Ops.push_back(EmitScalarExpr(E->getArg(i)));
1091
1092  llvm::APSInt Result;
1093  const Expr *Arg = E->getArg(E->getNumArgs()-1);
1094  if (!Arg->isIntegerConstantExpr(Result, getContext()))
1095    return 0;
1096
1097  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1098      BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1099    // Determine the overloaded type of this builtin.
1100    const llvm::Type *Ty;
1101    if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1102      Ty = llvm::Type::getFloatTy(VMContext);
1103    else
1104      Ty = llvm::Type::getDoubleTy(VMContext);
1105
1106    // Determine whether this is an unsigned conversion or not.
1107    bool usgn = Result.getZExtValue() == 1;
1108    unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1109
1110    // Call the appropriate intrinsic.
1111    Function *F = CGM.getIntrinsic(Int, &Ty, 1);
1112    return Builder.CreateCall(F, Ops.begin(), Ops.end(), "vcvtr");
1113  }
1114
1115  // Determine the type of this overloaded NEON intrinsic.
1116  unsigned type = Result.getZExtValue();
1117  bool usgn = type & 0x08;
1118  bool quad = type & 0x10;
1119  bool poly = (type & 0x7) == 5 || (type & 0x7) == 6;
1120  bool splat = false;
1121
1122  const llvm::VectorType *VTy = GetNeonType(VMContext, type & 0x7, quad);
1123  const llvm::Type *Ty = VTy;
1124  if (!Ty)
1125    return 0;
1126
1127  unsigned Int;
1128  switch (BuiltinID) {
1129  default: return 0;
1130  case ARM::BI__builtin_neon_vaba_v:
1131  case ARM::BI__builtin_neon_vabaq_v:
1132    Int = usgn ? Intrinsic::arm_neon_vabau : Intrinsic::arm_neon_vabas;
1133    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaba");
1134  case ARM::BI__builtin_neon_vabal_v:
1135    Int = usgn ? Intrinsic::arm_neon_vabalu : Intrinsic::arm_neon_vabals;
1136    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabal");
1137  case ARM::BI__builtin_neon_vabd_v:
1138  case ARM::BI__builtin_neon_vabdq_v:
1139    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1140    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabd");
1141  case ARM::BI__builtin_neon_vabdl_v:
1142    Int = usgn ? Intrinsic::arm_neon_vabdlu : Intrinsic::arm_neon_vabdls;
1143    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabdl");
1144  case ARM::BI__builtin_neon_vabs_v:
1145  case ARM::BI__builtin_neon_vabsq_v:
1146    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1),
1147                        Ops, "vabs");
1148  case ARM::BI__builtin_neon_vaddhn_v:
1149    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1),
1150                        Ops, "vaddhn");
1151  case ARM::BI__builtin_neon_vaddl_v:
1152    Int = usgn ? Intrinsic::arm_neon_vaddlu : Intrinsic::arm_neon_vaddls;
1153    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaddl");
1154  case ARM::BI__builtin_neon_vaddw_v:
1155    Int = usgn ? Intrinsic::arm_neon_vaddws : Intrinsic::arm_neon_vaddwu;
1156    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaddw");
1157  case ARM::BI__builtin_neon_vcale_v:
1158    std::swap(Ops[0], Ops[1]);
1159  case ARM::BI__builtin_neon_vcage_v: {
1160    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged, &Ty, 1);
1161    return EmitNeonCall(F, Ops, "vcage");
1162  }
1163  case ARM::BI__builtin_neon_vcaleq_v:
1164    std::swap(Ops[0], Ops[1]);
1165  case ARM::BI__builtin_neon_vcageq_v: {
1166    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq, &Ty, 1);
1167    return EmitNeonCall(F, Ops, "vcage");
1168  }
1169  case ARM::BI__builtin_neon_vcalt_v:
1170    std::swap(Ops[0], Ops[1]);
1171  case ARM::BI__builtin_neon_vcagt_v: {
1172    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd, &Ty, 1);
1173    return EmitNeonCall(F, Ops, "vcagt");
1174  }
1175  case ARM::BI__builtin_neon_vcaltq_v:
1176    std::swap(Ops[0], Ops[1]);
1177  case ARM::BI__builtin_neon_vcagtq_v: {
1178    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq, &Ty, 1);
1179    return EmitNeonCall(F, Ops, "vcagt");
1180  }
1181  case ARM::BI__builtin_neon_vcls_v:
1182  case ARM::BI__builtin_neon_vclsq_v: {
1183    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1);
1184    return EmitNeonCall(F, Ops, "vcls");
1185  }
1186  case ARM::BI__builtin_neon_vclz_v:
1187  case ARM::BI__builtin_neon_vclzq_v: {
1188    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1);
1189    return EmitNeonCall(F, Ops, "vclz");
1190  }
1191  case ARM::BI__builtin_neon_vcnt_v:
1192  case ARM::BI__builtin_neon_vcntq_v: {
1193    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1);
1194    return EmitNeonCall(F, Ops, "vcnt");
1195  }
1196  // FIXME: intrinsics for f16<->f32 convert missing from ARM target.
1197  case ARM::BI__builtin_neon_vcvt_f32_v:
1198  case ARM::BI__builtin_neon_vcvtq_f32_v: {
1199    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1200    Ty = GetNeonType(VMContext, 4, quad);
1201    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1202                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1203  }
1204  case ARM::BI__builtin_neon_vcvt_s32_v:
1205  case ARM::BI__builtin_neon_vcvt_u32_v:
1206  case ARM::BI__builtin_neon_vcvtq_s32_v:
1207  case ARM::BI__builtin_neon_vcvtq_u32_v: {
1208    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(VMContext, 4, quad));
1209    return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1210                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1211  }
1212  case ARM::BI__builtin_neon_vcvt_n_f32_v:
1213  case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1214    const llvm::Type *Tys[2] = { GetNeonType(VMContext, 4, quad), Ty };
1215    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp;
1216    Function *F = CGM.getIntrinsic(Int, Tys, 2);
1217    return EmitNeonCall(F, Ops, "vcvt_n");
1218  }
1219  case ARM::BI__builtin_neon_vcvt_n_s32_v:
1220  case ARM::BI__builtin_neon_vcvt_n_u32_v:
1221  case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1222  case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1223    const llvm::Type *Tys[2] = { Ty, GetNeonType(VMContext, 4, quad) };
1224    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs;
1225    Function *F = CGM.getIntrinsic(Int, Tys, 2);
1226    return EmitNeonCall(F, Ops, "vcvt_n");
1227  }
1228  case ARM::BI__builtin_neon_vdup_lane_v:
1229    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1230    return EmitNeonSplat(Ops[0], cast<Constant>(Ops[1]));
1231  case ARM::BI__builtin_neon_vdupq_lane_v:
1232    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1233    return EmitNeonSplat(Ops[0], cast<Constant>(Ops[1]), true);
1234  case ARM::BI__builtin_neon_vext_v:
1235  case ARM::BI__builtin_neon_vextq_v: {
1236    ConstantInt *C = dyn_cast<ConstantInt>(Ops[2]);
1237    int CV = C->getSExtValue();
1238    SmallVector<Constant*, 16> Indices;
1239    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1240      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1241
1242    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1243    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1244    Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1245    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1246  }
1247  case ARM::BI__builtin_neon_vget_lane_i8:
1248  case ARM::BI__builtin_neon_vget_lane_i16:
1249  case ARM::BI__builtin_neon_vget_lane_i32:
1250  case ARM::BI__builtin_neon_vget_lane_i64:
1251  case ARM::BI__builtin_neon_vget_lane_f32:
1252  case ARM::BI__builtin_neon_vgetq_lane_i8:
1253  case ARM::BI__builtin_neon_vgetq_lane_i16:
1254  case ARM::BI__builtin_neon_vgetq_lane_i32:
1255  case ARM::BI__builtin_neon_vgetq_lane_i64:
1256  case ARM::BI__builtin_neon_vgetq_lane_f32:
1257    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1258                                        "vget_lane");
1259  case ARM::BI__builtin_neon_vhadd_v:
1260  case ARM::BI__builtin_neon_vhaddq_v:
1261    Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1262    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhadd");
1263  case ARM::BI__builtin_neon_vhsub_v:
1264  case ARM::BI__builtin_neon_vhsubq_v:
1265    Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1266    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhsub");
1267  case ARM::BI__builtin_neon_vld1_v:
1268  case ARM::BI__builtin_neon_vld1q_v:
1269    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1270    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, &Ty, 1),
1271                        Ops, "vld1");
1272  case ARM::BI__builtin_neon_vld1_lane_v:
1273  case ARM::BI__builtin_neon_vld1q_lane_v:
1274    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1275    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1276    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1277    Ops[0] = Builder.CreateLoad(Ops[0]);
1278    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
1279  case ARM::BI__builtin_neon_vld1_dup_v:
1280  case ARM::BI__builtin_neon_vld1q_dup_v: {
1281    Value *V = UndefValue::get(Ty);
1282    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1283    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1284    Ops[0] = Builder.CreateLoad(Ops[0]);
1285    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1286    Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
1287    return EmitNeonSplat(Ops[0], CI);
1288  }
1289  case ARM::BI__builtin_neon_vld2_v:
1290  case ARM::BI__builtin_neon_vld2q_v: {
1291    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, &Ty, 1);
1292    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1293    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1294    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1295    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1296    return Builder.CreateStore(Ops[1], Ops[0]);
1297  }
1298  case ARM::BI__builtin_neon_vld3_v:
1299  case ARM::BI__builtin_neon_vld3q_v: {
1300    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, &Ty, 1);
1301    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1302    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1303    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1304    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1305    return Builder.CreateStore(Ops[1], Ops[0]);
1306  }
1307  case ARM::BI__builtin_neon_vld4_v:
1308  case ARM::BI__builtin_neon_vld4q_v: {
1309    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, &Ty, 1);
1310    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1311    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1312    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1313    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1314    return Builder.CreateStore(Ops[1], Ops[0]);
1315  }
1316  case ARM::BI__builtin_neon_vld2_lane_v:
1317  case ARM::BI__builtin_neon_vld2q_lane_v: {
1318    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, &Ty, 1);
1319    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1320    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1321    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1322    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld2_lane");
1323    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1324    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1325    return Builder.CreateStore(Ops[1], Ops[0]);
1326  }
1327  case ARM::BI__builtin_neon_vld3_lane_v:
1328  case ARM::BI__builtin_neon_vld3q_lane_v: {
1329    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, &Ty, 1);
1330    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1331    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1332    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1333    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1334    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
1335    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1336    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1337    return Builder.CreateStore(Ops[1], Ops[0]);
1338  }
1339  case ARM::BI__builtin_neon_vld4_lane_v:
1340  case ARM::BI__builtin_neon_vld4q_lane_v: {
1341    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, &Ty, 1);
1342    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1343    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1344    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1345    Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
1346    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1347    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
1348    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1349    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1350    return Builder.CreateStore(Ops[1], Ops[0]);
1351  }
1352  case ARM::BI__builtin_neon_vld2_dup_v:
1353  case ARM::BI__builtin_neon_vld3_dup_v:
1354  case ARM::BI__builtin_neon_vld4_dup_v: {
1355    switch (BuiltinID) {
1356    case ARM::BI__builtin_neon_vld2_dup_v:
1357      Int = Intrinsic::arm_neon_vld2lane;
1358      break;
1359    case ARM::BI__builtin_neon_vld3_dup_v:
1360      Int = Intrinsic::arm_neon_vld2lane;
1361      break;
1362    case ARM::BI__builtin_neon_vld4_dup_v:
1363      Int = Intrinsic::arm_neon_vld2lane;
1364      break;
1365    default: assert(0 && "unknown vld_dup intrinsic?");
1366    }
1367    Function *F = CGM.getIntrinsic(Int, &Ty, 1);
1368    const llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
1369
1370    SmallVector<Value*, 6> Args;
1371    Args.push_back(Ops[1]);
1372    Args.append(STy->getNumElements(), UndefValue::get(Ty));
1373
1374    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1375    Args.push_back(CI);
1376    Args.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1377
1378    Ops[1] = Builder.CreateCall(F, Args.begin(), Args.end(), "vld_dup");
1379    // splat lane 0 to all elts in each vector of the result.
1380    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1381      Value *Val = Builder.CreateExtractValue(Ops[1], i);
1382      Value *Elt = Builder.CreateBitCast(Val, Ty);
1383      Elt = EmitNeonSplat(Elt, CI);
1384      Elt = Builder.CreateBitCast(Elt, Val->getType());
1385      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
1386    }
1387    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1388    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1389    return Builder.CreateStore(Ops[1], Ops[0]);
1390  }
1391  case ARM::BI__builtin_neon_vmax_v:
1392  case ARM::BI__builtin_neon_vmaxq_v:
1393    Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
1394    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmax");
1395  case ARM::BI__builtin_neon_vmin_v:
1396  case ARM::BI__builtin_neon_vminq_v:
1397    Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
1398    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin");
1399  case ARM::BI__builtin_neon_vmlal_lane_v:
1400    splat = true;
1401  case ARM::BI__builtin_neon_vmlal_v:
1402    Int = usgn ? Intrinsic::arm_neon_vmlalu : Intrinsic::arm_neon_vmlals;
1403    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal", splat);
1404  case ARM::BI__builtin_neon_vmlsl_lane_v:
1405    splat = true;
1406  case ARM::BI__builtin_neon_vmlsl_v:
1407    Int = usgn ? Intrinsic::arm_neon_vmlslu : Intrinsic::arm_neon_vmlsls;
1408    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlsl", splat);
1409  case ARM::BI__builtin_neon_vmovl_v:
1410    if (usgn)
1411      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1412    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1413  case ARM::BI__builtin_neon_vmovn_v:
1414    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmovn, &Ty, 1),
1415                        Ops, "vmovn");
1416  case ARM::BI__builtin_neon_vmull_lane_v:
1417    splat = true;
1418  case ARM::BI__builtin_neon_vmull_v:
1419    Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1420    Int = poly ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1421    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal", splat);
1422  case ARM::BI__builtin_neon_vpadal_v:
1423  case ARM::BI__builtin_neon_vpadalq_v:
1424    Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
1425    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpadal");
1426  case ARM::BI__builtin_neon_vpadd_v:
1427    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, &Ty, 1),
1428                        Ops, "vpadd");
1429  case ARM::BI__builtin_neon_vpaddl_v:
1430  case ARM::BI__builtin_neon_vpaddlq_v:
1431    Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
1432    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpaddl");
1433  case ARM::BI__builtin_neon_vpmax_v:
1434    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
1435    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmax");
1436  case ARM::BI__builtin_neon_vpmin_v:
1437    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
1438    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmin");
1439  case ARM::BI__builtin_neon_vqabs_v:
1440  case ARM::BI__builtin_neon_vqabsq_v:
1441    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, &Ty, 1),
1442                        Ops, "vqabs");
1443  case ARM::BI__builtin_neon_vqadd_v:
1444  case ARM::BI__builtin_neon_vqaddq_v:
1445    Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
1446    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqadd");
1447  case ARM::BI__builtin_neon_vqdmlal_lane_v:
1448    splat = true;
1449  case ARM::BI__builtin_neon_vqdmlal_v:
1450    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, &Ty, 1),
1451                        Ops, "vqdmlal", splat);
1452  case ARM::BI__builtin_neon_vqdmlsl_lane_v:
1453    splat = true;
1454  case ARM::BI__builtin_neon_vqdmlsl_v:
1455    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, &Ty, 1),
1456                        Ops, "vqdmlsl", splat);
1457  case ARM::BI__builtin_neon_vqdmulh_lane_v:
1458  case ARM::BI__builtin_neon_vqdmulhq_lane_v:
1459    splat = true;
1460  case ARM::BI__builtin_neon_vqdmulh_v:
1461  case ARM::BI__builtin_neon_vqdmulhq_v:
1462    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, &Ty, 1),
1463                        Ops, "vqdmulh", splat);
1464  case ARM::BI__builtin_neon_vqdmull_lane_v:
1465    splat = true;
1466  case ARM::BI__builtin_neon_vqdmull_v:
1467    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, &Ty, 1),
1468                        Ops, "vqdmull", splat);
1469  case ARM::BI__builtin_neon_vqmovn_v:
1470    Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
1471    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqmovn");
1472  case ARM::BI__builtin_neon_vqmovun_v:
1473    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, &Ty, 1),
1474                        Ops, "vqdmull");
1475  case ARM::BI__builtin_neon_vqneg_v:
1476    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, &Ty, 1),
1477                        Ops, "vqneg");
1478  case ARM::BI__builtin_neon_vqrdmulh_lane_v:
1479  case ARM::BI__builtin_neon_vqrdmulhq_lane_v:
1480    splat = true;
1481  case ARM::BI__builtin_neon_vqrdmulh_v:
1482  case ARM::BI__builtin_neon_vqrdmulhq_v:
1483    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, &Ty, 1),
1484                        Ops, "vqrdmulh", splat);
1485  case ARM::BI__builtin_neon_vqrshl_v:
1486  case ARM::BI__builtin_neon_vqrshlq_v:
1487    Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
1488    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshl");
1489  case ARM::BI__builtin_neon_vqrshrn_n_v:
1490    Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
1491    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshrn_n", false,
1492                        1, true);
1493  case ARM::BI__builtin_neon_vqrshrun_n_v:
1494    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, &Ty, 1),
1495                        Ops, "vqrshrun_n", false, 1, true);
1496  case ARM::BI__builtin_neon_vqshl_v:
1497  case ARM::BI__builtin_neon_vqshlq_v:
1498    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1499    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl");
1500  case ARM::BI__builtin_neon_vqshl_n_v:
1501  case ARM::BI__builtin_neon_vqshlq_n_v:
1502    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1503    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl_n", false,
1504                        1, false);
1505  case ARM::BI__builtin_neon_vqshlu_n_v:
1506  case ARM::BI__builtin_neon_vqshluq_n_v:
1507    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, &Ty, 1),
1508                        Ops, "vqshlu", 1, false);
1509  case ARM::BI__builtin_neon_vqshrn_n_v:
1510    Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
1511    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshrn_n", false,
1512                        1, true);
1513  case ARM::BI__builtin_neon_vqshrun_n_v:
1514    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, &Ty, 1),
1515                        Ops, "vqshrun_n", false, 1, true);
1516  case ARM::BI__builtin_neon_vqsub_v:
1517  case ARM::BI__builtin_neon_vqsubq_v:
1518    Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
1519    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqsub");
1520  case ARM::BI__builtin_neon_vraddhn_v:
1521    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, &Ty, 1),
1522                        Ops, "vraddhn");
1523  case ARM::BI__builtin_neon_vrecpe_v:
1524  case ARM::BI__builtin_neon_vrecpeq_v:
1525    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, &Ty, 1),
1526                        Ops, "vrecpe");
1527  case ARM::BI__builtin_neon_vrecps_v:
1528  case ARM::BI__builtin_neon_vrecpsq_v:
1529    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, &Ty, 1),
1530                        Ops, "vrecps");
1531  case ARM::BI__builtin_neon_vrhadd_v:
1532  case ARM::BI__builtin_neon_vrhaddq_v:
1533    Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
1534    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrhadd");
1535  case ARM::BI__builtin_neon_vrshl_v:
1536  case ARM::BI__builtin_neon_vrshlq_v:
1537    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1538    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshl");
1539  case ARM::BI__builtin_neon_vrshrn_n_v:
1540    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, &Ty, 1),
1541                        Ops, "vrshrn_n", false, 1, true);
1542  case ARM::BI__builtin_neon_vrshr_n_v:
1543  case ARM::BI__builtin_neon_vrshrq_n_v:
1544    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1545    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshr_n", false,
1546                        1, true);
1547  case ARM::BI__builtin_neon_vrsqrte_v:
1548  case ARM::BI__builtin_neon_vrsqrteq_v:
1549    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, &Ty, 1),
1550                        Ops, "vrsqrte");
1551  case ARM::BI__builtin_neon_vrsqrts_v:
1552  case ARM::BI__builtin_neon_vrsqrtsq_v:
1553    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, &Ty, 1),
1554                        Ops, "vrsqrts");
1555  case ARM::BI__builtin_neon_vrsra_n_v:
1556  case ARM::BI__builtin_neon_vrsraq_n_v:
1557    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1558    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1559    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
1560    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1561    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, &Ty, 1), Ops[1], Ops[2]);
1562    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
1563  case ARM::BI__builtin_neon_vrsubhn_v:
1564    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, &Ty, 1),
1565                        Ops, "vrsubhn");
1566  case ARM::BI__builtin_neon_vset_lane_i8:
1567  case ARM::BI__builtin_neon_vset_lane_i16:
1568  case ARM::BI__builtin_neon_vset_lane_i32:
1569  case ARM::BI__builtin_neon_vset_lane_i64:
1570  case ARM::BI__builtin_neon_vset_lane_f32:
1571  case ARM::BI__builtin_neon_vsetq_lane_i8:
1572  case ARM::BI__builtin_neon_vsetq_lane_i16:
1573  case ARM::BI__builtin_neon_vsetq_lane_i32:
1574  case ARM::BI__builtin_neon_vsetq_lane_i64:
1575  case ARM::BI__builtin_neon_vsetq_lane_f32:
1576    Ops.push_back(EmitScalarExpr(E->getArg(2)));
1577    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1578  case ARM::BI__builtin_neon_vshl_v:
1579  case ARM::BI__builtin_neon_vshlq_v:
1580    Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
1581    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshl");
1582  case ARM::BI__builtin_neon_vshll_n_v:
1583    Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
1584    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshll", false, 1);
1585  case ARM::BI__builtin_neon_vshl_n_v:
1586  case ARM::BI__builtin_neon_vshlq_n_v:
1587    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1588    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
1589  case ARM::BI__builtin_neon_vshrn_n_v:
1590    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, &Ty, 1),
1591                        Ops, "vshrn_n", false, 1, true);
1592  case ARM::BI__builtin_neon_vshr_n_v:
1593  case ARM::BI__builtin_neon_vshrq_n_v:
1594    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1595    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1596    if (usgn)
1597      return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
1598    else
1599      return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
1600  case ARM::BI__builtin_neon_vsri_n_v:
1601  case ARM::BI__builtin_neon_vsriq_n_v:
1602    poly = true;
1603  case ARM::BI__builtin_neon_vsli_n_v:
1604  case ARM::BI__builtin_neon_vsliq_n_v:
1605    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, poly);
1606    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, &Ty, 1),
1607                        Ops, "vsli_n");
1608  case ARM::BI__builtin_neon_vsra_n_v:
1609  case ARM::BI__builtin_neon_vsraq_n_v:
1610    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1611    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1612    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
1613    if (usgn)
1614      Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
1615    else
1616      Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
1617    return Builder.CreateAdd(Ops[0], Ops[1]);
1618  case ARM::BI__builtin_neon_vst1_v:
1619  case ARM::BI__builtin_neon_vst1q_v:
1620    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1621    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, &Ty, 1),
1622                        Ops, "");
1623  case ARM::BI__builtin_neon_vst1_lane_v:
1624  case ARM::BI__builtin_neon_vst1q_lane_v:
1625    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1626    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
1627    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1628    return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
1629  case ARM::BI__builtin_neon_vst2_v:
1630  case ARM::BI__builtin_neon_vst2q_v:
1631    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1632    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, &Ty, 1),
1633                        Ops, "");
1634  case ARM::BI__builtin_neon_vst2_lane_v:
1635  case ARM::BI__builtin_neon_vst2q_lane_v:
1636    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1637    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, &Ty, 1),
1638                        Ops, "");
1639  case ARM::BI__builtin_neon_vst3_v:
1640  case ARM::BI__builtin_neon_vst3q_v:
1641    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1642    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, &Ty, 1),
1643                        Ops, "");
1644  case ARM::BI__builtin_neon_vst3_lane_v:
1645  case ARM::BI__builtin_neon_vst3q_lane_v:
1646    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1647    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, &Ty, 1),
1648                        Ops, "");
1649  case ARM::BI__builtin_neon_vst4_v:
1650  case ARM::BI__builtin_neon_vst4q_v:
1651    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1652    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, &Ty, 1),
1653                        Ops, "");
1654  case ARM::BI__builtin_neon_vst4_lane_v:
1655  case ARM::BI__builtin_neon_vst4q_lane_v:
1656    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1657    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, &Ty, 1),
1658                        Ops, "");
1659  case ARM::BI__builtin_neon_vsubhn_v:
1660    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, &Ty, 1),
1661                        Ops, "vsubhn");
1662  case ARM::BI__builtin_neon_vsubl_v:
1663    Int = usgn ? Intrinsic::arm_neon_vsublu : Intrinsic::arm_neon_vsubls;
1664    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vsubl");
1665  case ARM::BI__builtin_neon_vsubw_v:
1666    Int = usgn ? Intrinsic::arm_neon_vsubws : Intrinsic::arm_neon_vsubwu;
1667    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vsubw");
1668  case ARM::BI__builtin_neon_vtbl1_v:
1669    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
1670                        Ops, "vtbl1");
1671  case ARM::BI__builtin_neon_vtbl2_v:
1672    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
1673                        Ops, "vtbl2");
1674  case ARM::BI__builtin_neon_vtbl3_v:
1675    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
1676                        Ops, "vtbl3");
1677  case ARM::BI__builtin_neon_vtbl4_v:
1678    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
1679                        Ops, "vtbl4");
1680  case ARM::BI__builtin_neon_vtbx1_v:
1681    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
1682                        Ops, "vtbx1");
1683  case ARM::BI__builtin_neon_vtbx2_v:
1684    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
1685                        Ops, "vtbx2");
1686  case ARM::BI__builtin_neon_vtbx3_v:
1687    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
1688                        Ops, "vtbx3");
1689  case ARM::BI__builtin_neon_vtbx4_v:
1690    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
1691                        Ops, "vtbx4");
1692  case ARM::BI__builtin_neon_vtst_v:
1693  case ARM::BI__builtin_neon_vtstq_v: {
1694    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1695    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1696    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1697    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1698                                ConstantAggregateZero::get(Ty));
1699    return Builder.CreateSExt(Ops[0], Ty, "vtst");
1700  }
1701  case ARM::BI__builtin_neon_vtrn_v:
1702  case ARM::BI__builtin_neon_vtrnq_v: {
1703    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1704    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1705    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1706    Value *SV;
1707
1708    for (unsigned vi = 0; vi != 2; ++vi) {
1709      SmallVector<Constant*, 16> Indices;
1710      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1711        Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
1712        Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
1713      }
1714      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1715      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1716      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
1717      SV = Builder.CreateStore(SV, Addr);
1718    }
1719    return SV;
1720  }
1721  case ARM::BI__builtin_neon_vuzp_v:
1722  case ARM::BI__builtin_neon_vuzpq_v: {
1723    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1724    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1725    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1726    Value *SV;
1727
1728    for (unsigned vi = 0; vi != 2; ++vi) {
1729      SmallVector<Constant*, 16> Indices;
1730      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1731        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
1732
1733      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1734      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1735      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
1736      SV = Builder.CreateStore(SV, Addr);
1737    }
1738    return SV;
1739  }
1740  case ARM::BI__builtin_neon_vzip_v:
1741  case ARM::BI__builtin_neon_vzipq_v: {
1742    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1743    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1744    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1745    Value *SV;
1746
1747    for (unsigned vi = 0; vi != 2; ++vi) {
1748      SmallVector<Constant*, 16> Indices;
1749      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1750        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
1751        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
1752      }
1753      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1754      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1755      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
1756      SV = Builder.CreateStore(SV, Addr);
1757    }
1758    return SV;
1759  }
1760  }
1761}
1762
1763Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1764                                           const CallExpr *E) {
1765
1766  llvm::SmallVector<Value*, 4> Ops;
1767
1768  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
1769    Ops.push_back(EmitScalarExpr(E->getArg(i)));
1770
1771  switch (BuiltinID) {
1772  default: return 0;
1773  case X86::BI__builtin_ia32_pslldi128:
1774  case X86::BI__builtin_ia32_psllqi128:
1775  case X86::BI__builtin_ia32_psllwi128:
1776  case X86::BI__builtin_ia32_psradi128:
1777  case X86::BI__builtin_ia32_psrawi128:
1778  case X86::BI__builtin_ia32_psrldi128:
1779  case X86::BI__builtin_ia32_psrlqi128:
1780  case X86::BI__builtin_ia32_psrlwi128: {
1781    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
1782    const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
1783    llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
1784    Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty),
1785                                         Ops[1], Zero, "insert");
1786    Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast");
1787    const char *name = 0;
1788    Intrinsic::ID ID = Intrinsic::not_intrinsic;
1789
1790    switch (BuiltinID) {
1791    default: assert(0 && "Unsupported shift intrinsic!");
1792    case X86::BI__builtin_ia32_pslldi128:
1793      name = "pslldi";
1794      ID = Intrinsic::x86_sse2_psll_d;
1795      break;
1796    case X86::BI__builtin_ia32_psllqi128:
1797      name = "psllqi";
1798      ID = Intrinsic::x86_sse2_psll_q;
1799      break;
1800    case X86::BI__builtin_ia32_psllwi128:
1801      name = "psllwi";
1802      ID = Intrinsic::x86_sse2_psll_w;
1803      break;
1804    case X86::BI__builtin_ia32_psradi128:
1805      name = "psradi";
1806      ID = Intrinsic::x86_sse2_psra_d;
1807      break;
1808    case X86::BI__builtin_ia32_psrawi128:
1809      name = "psrawi";
1810      ID = Intrinsic::x86_sse2_psra_w;
1811      break;
1812    case X86::BI__builtin_ia32_psrldi128:
1813      name = "psrldi";
1814      ID = Intrinsic::x86_sse2_psrl_d;
1815      break;
1816    case X86::BI__builtin_ia32_psrlqi128:
1817      name = "psrlqi";
1818      ID = Intrinsic::x86_sse2_psrl_q;
1819      break;
1820    case X86::BI__builtin_ia32_psrlwi128:
1821      name = "psrlwi";
1822      ID = Intrinsic::x86_sse2_psrl_w;
1823      break;
1824    }
1825    llvm::Function *F = CGM.getIntrinsic(ID);
1826    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
1827  }
1828  case X86::BI__builtin_ia32_pslldi:
1829  case X86::BI__builtin_ia32_psllqi:
1830  case X86::BI__builtin_ia32_psllwi:
1831  case X86::BI__builtin_ia32_psradi:
1832  case X86::BI__builtin_ia32_psrawi:
1833  case X86::BI__builtin_ia32_psrldi:
1834  case X86::BI__builtin_ia32_psrlqi:
1835  case X86::BI__builtin_ia32_psrlwi: {
1836    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
1837    const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1);
1838    Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast");
1839    const char *name = 0;
1840    Intrinsic::ID ID = Intrinsic::not_intrinsic;
1841
1842    switch (BuiltinID) {
1843    default: assert(0 && "Unsupported shift intrinsic!");
1844    case X86::BI__builtin_ia32_pslldi:
1845      name = "pslldi";
1846      ID = Intrinsic::x86_mmx_psll_d;
1847      break;
1848    case X86::BI__builtin_ia32_psllqi:
1849      name = "psllqi";
1850      ID = Intrinsic::x86_mmx_psll_q;
1851      break;
1852    case X86::BI__builtin_ia32_psllwi:
1853      name = "psllwi";
1854      ID = Intrinsic::x86_mmx_psll_w;
1855      break;
1856    case X86::BI__builtin_ia32_psradi:
1857      name = "psradi";
1858      ID = Intrinsic::x86_mmx_psra_d;
1859      break;
1860    case X86::BI__builtin_ia32_psrawi:
1861      name = "psrawi";
1862      ID = Intrinsic::x86_mmx_psra_w;
1863      break;
1864    case X86::BI__builtin_ia32_psrldi:
1865      name = "psrldi";
1866      ID = Intrinsic::x86_mmx_psrl_d;
1867      break;
1868    case X86::BI__builtin_ia32_psrlqi:
1869      name = "psrlqi";
1870      ID = Intrinsic::x86_mmx_psrl_q;
1871      break;
1872    case X86::BI__builtin_ia32_psrlwi:
1873      name = "psrlwi";
1874      ID = Intrinsic::x86_mmx_psrl_w;
1875      break;
1876    }
1877    llvm::Function *F = CGM.getIntrinsic(ID);
1878    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
1879  }
1880  case X86::BI__builtin_ia32_cmpps: {
1881    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
1882    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpps");
1883  }
1884  case X86::BI__builtin_ia32_cmpss: {
1885    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
1886    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpss");
1887  }
1888  case X86::BI__builtin_ia32_ldmxcsr: {
1889    const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext);
1890    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1891    Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
1892    Builder.CreateStore(Ops[0], Tmp);
1893    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
1894                              Builder.CreateBitCast(Tmp, PtrTy));
1895  }
1896  case X86::BI__builtin_ia32_stmxcsr: {
1897    const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext);
1898    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1899    Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
1900    One = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
1901                             Builder.CreateBitCast(Tmp, PtrTy));
1902    return Builder.CreateLoad(Tmp, "stmxcsr");
1903  }
1904  case X86::BI__builtin_ia32_cmppd: {
1905    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
1906    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmppd");
1907  }
1908  case X86::BI__builtin_ia32_cmpsd: {
1909    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
1910    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpsd");
1911  }
1912  case X86::BI__builtin_ia32_storehps:
1913  case X86::BI__builtin_ia32_storelps: {
1914    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
1915    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
1916
1917    // cast val v2i64
1918    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
1919
1920    // extract (0, 1)
1921    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
1922    llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
1923    Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
1924
1925    // cast pointer to i64 & store
1926    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
1927    return Builder.CreateStore(Ops[1], Ops[0]);
1928  }
1929  case X86::BI__builtin_ia32_palignr: {
1930    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1931
1932    // If palignr is shifting the pair of input vectors less than 9 bytes,
1933    // emit a shuffle instruction.
1934    if (shiftVal <= 8) {
1935      llvm::SmallVector<llvm::Constant*, 8> Indices;
1936      for (unsigned i = 0; i != 8; ++i)
1937        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
1938
1939      Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1940      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
1941    }
1942
1943    // If palignr is shifting the pair of input vectors more than 8 but less
1944    // than 16 bytes, emit a logical right shift of the destination.
1945    if (shiftVal < 16) {
1946      // MMX has these as 1 x i64 vectors for some odd optimization reasons.
1947      const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
1948
1949      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
1950      Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
1951
1952      // create i32 constant
1953      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
1954      return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
1955    }
1956
1957    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
1958    return llvm::Constant::getNullValue(ConvertType(E->getType()));
1959  }
1960  case X86::BI__builtin_ia32_palignr128: {
1961    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1962
1963    // If palignr is shifting the pair of input vectors less than 17 bytes,
1964    // emit a shuffle instruction.
1965    if (shiftVal <= 16) {
1966      llvm::SmallVector<llvm::Constant*, 16> Indices;
1967      for (unsigned i = 0; i != 16; ++i)
1968        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
1969
1970      Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1971      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
1972    }
1973
1974    // If palignr is shifting the pair of input vectors more than 16 but less
1975    // than 32 bytes, emit a logical right shift of the destination.
1976    if (shiftVal < 32) {
1977      const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
1978
1979      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
1980      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
1981
1982      // create i32 constant
1983      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
1984      return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
1985    }
1986
1987    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
1988    return llvm::Constant::getNullValue(ConvertType(E->getType()));
1989  }
1990  }
1991}
1992
1993Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
1994                                           const CallExpr *E) {
1995  llvm::SmallVector<Value*, 4> Ops;
1996
1997  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
1998    Ops.push_back(EmitScalarExpr(E->getArg(i)));
1999
2000  Intrinsic::ID ID = Intrinsic::not_intrinsic;
2001
2002  switch (BuiltinID) {
2003  default: return 0;
2004
2005  // vec_ld, vec_lvsl, vec_lvsr
2006  case PPC::BI__builtin_altivec_lvx:
2007  case PPC::BI__builtin_altivec_lvxl:
2008  case PPC::BI__builtin_altivec_lvebx:
2009  case PPC::BI__builtin_altivec_lvehx:
2010  case PPC::BI__builtin_altivec_lvewx:
2011  case PPC::BI__builtin_altivec_lvsl:
2012  case PPC::BI__builtin_altivec_lvsr:
2013  {
2014    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::Type::getInt8PtrTy(VMContext));
2015
2016    Ops[0] = Builder.CreateGEP(Ops[1], Ops[0], "tmp");
2017    Ops.pop_back();
2018
2019    switch (BuiltinID) {
2020    default: assert(0 && "Unsupported ld/lvsl/lvsr intrinsic!");
2021    case PPC::BI__builtin_altivec_lvx:
2022      ID = Intrinsic::ppc_altivec_lvx;
2023      break;
2024    case PPC::BI__builtin_altivec_lvxl:
2025      ID = Intrinsic::ppc_altivec_lvxl;
2026      break;
2027    case PPC::BI__builtin_altivec_lvebx:
2028      ID = Intrinsic::ppc_altivec_lvebx;
2029      break;
2030    case PPC::BI__builtin_altivec_lvehx:
2031      ID = Intrinsic::ppc_altivec_lvehx;
2032      break;
2033    case PPC::BI__builtin_altivec_lvewx:
2034      ID = Intrinsic::ppc_altivec_lvewx;
2035      break;
2036    case PPC::BI__builtin_altivec_lvsl:
2037      ID = Intrinsic::ppc_altivec_lvsl;
2038      break;
2039    case PPC::BI__builtin_altivec_lvsr:
2040      ID = Intrinsic::ppc_altivec_lvsr;
2041      break;
2042    }
2043    llvm::Function *F = CGM.getIntrinsic(ID);
2044    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "");
2045  }
2046
2047  // vec_st
2048  case PPC::BI__builtin_altivec_stvx:
2049  case PPC::BI__builtin_altivec_stvxl:
2050  case PPC::BI__builtin_altivec_stvebx:
2051  case PPC::BI__builtin_altivec_stvehx:
2052  case PPC::BI__builtin_altivec_stvewx:
2053  {
2054    Ops[2] = Builder.CreateBitCast(Ops[2], llvm::Type::getInt8PtrTy(VMContext));
2055    Ops[1] = Builder.CreateGEP(Ops[2], Ops[1], "tmp");
2056    Ops.pop_back();
2057
2058    switch (BuiltinID) {
2059    default: assert(0 && "Unsupported st intrinsic!");
2060    case PPC::BI__builtin_altivec_stvx:
2061      ID = Intrinsic::ppc_altivec_stvx;
2062      break;
2063    case PPC::BI__builtin_altivec_stvxl:
2064      ID = Intrinsic::ppc_altivec_stvxl;
2065      break;
2066    case PPC::BI__builtin_altivec_stvebx:
2067      ID = Intrinsic::ppc_altivec_stvebx;
2068      break;
2069    case PPC::BI__builtin_altivec_stvehx:
2070      ID = Intrinsic::ppc_altivec_stvehx;
2071      break;
2072    case PPC::BI__builtin_altivec_stvewx:
2073      ID = Intrinsic::ppc_altivec_stvewx;
2074      break;
2075    }
2076    llvm::Function *F = CGM.getIntrinsic(ID);
2077    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "");
2078  }
2079  }
2080  return 0;
2081}
2082