CGBuiltin.cpp revision bf346e95f1d60f37fb37d89c288e1daa7839fc01
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This contains code to emit Builtin calls as LLVM code.
11//
12//===----------------------------------------------------------------------===//
13
14#include "TargetInfo.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "CGObjCRuntime.h"
18#include "clang/Basic/TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/ASTContext.h"
21#include "clang/AST/Decl.h"
22#include "clang/Basic/TargetBuiltins.h"
23#include "llvm/Intrinsics.h"
24#include "llvm/Target/TargetData.h"
25using namespace clang;
26using namespace CodeGen;
27using namespace llvm;
28
29static void EmitMemoryBarrier(CodeGenFunction &CGF,
30                              bool LoadLoad, bool LoadStore,
31                              bool StoreLoad, bool StoreStore,
32                              bool Device) {
33  Value *True = llvm::ConstantInt::getTrue(CGF.getLLVMContext());
34  Value *False = llvm::ConstantInt::getFalse(CGF.getLLVMContext());
35  Value *C[5] = { LoadLoad ? True : False,
36                  LoadStore ? True : False,
37                  StoreLoad ? True : False,
38                  StoreStore  ? True : False,
39                  Device ? True : False };
40  CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::memory_barrier),
41                         C, C + 5);
42}
43
44static Value *EmitCastToInt(CodeGenFunction &CGF,
45                            const llvm::Type *ToType, Value *Val) {
46  if (Val->getType()->isPointerTy()) {
47    return CGF.Builder.CreatePtrToInt(Val, ToType);
48  }
49  assert(Val->getType()->isIntegerTy() &&
50         "Used a non-integer and non-pointer type with atomic builtin");
51  assert(Val->getType()->getScalarSizeInBits() <=
52         ToType->getScalarSizeInBits() && "Integer type too small");
53  return CGF.Builder.CreateSExtOrBitCast(Val, ToType);
54}
55
56static Value *EmitCastFromInt(CodeGenFunction &CGF, QualType ToQualType,
57                              Value *Val) {
58  const llvm::Type *ToType = CGF.ConvertType(ToQualType);
59  if (ToType->isPointerTy()) {
60    return CGF.Builder.CreateIntToPtr(Val, ToType);
61  }
62  assert(Val->getType()->isIntegerTy() &&
63         "Used a non-integer and non-pointer type with atomic builtin");
64  assert(Val->getType()->getScalarSizeInBits() >=
65         ToType->getScalarSizeInBits() && "Integer type too small");
66  return CGF.Builder.CreateTruncOrBitCast(Val, ToType);
67}
68
69// The atomic builtins are also full memory barriers. This is a utility for
70// wrapping a call to the builtins with memory barriers.
71static Value *EmitCallWithBarrier(CodeGenFunction &CGF, Value *Fn,
72                                  Value **ArgBegin, Value **ArgEnd) {
73  // FIXME: We need a target hook for whether this applies to device memory or
74  // not.
75  bool Device = true;
76
77  // Create barriers both before and after the call.
78  EmitMemoryBarrier(CGF, true, true, true, true, Device);
79  Value *Result = CGF.Builder.CreateCall(Fn, ArgBegin, ArgEnd);
80  EmitMemoryBarrier(CGF, true, true, true, true, Device);
81  return Result;
82}
83
84/// Utility to insert an atomic instruction based on Instrinsic::ID
85/// and the expression node.
86static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
87                               Intrinsic::ID Id, const CallExpr *E) {
88  const llvm::Type *ValueType =
89    llvm::IntegerType::get(CGF.getLLVMContext(),
90                           CGF.getContext().getTypeSize(E->getType()));
91  const llvm::Type *PtrType = ValueType->getPointerTo();
92  const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
93  Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2);
94
95  Value *Args[2] = { CGF.Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)),
96                                               PtrType),
97                     EmitCastToInt(CGF, ValueType,
98                                   CGF.EmitScalarExpr(E->getArg(1))) };
99  return RValue::get(EmitCastFromInt(CGF, E->getType(),
100                                     EmitCallWithBarrier(CGF, AtomF, Args,
101                                                         Args + 2)));
102}
103
104/// Utility to insert an atomic instruction based Instrinsic::ID and
105// the expression node, where the return value is the result of the
106// operation.
107static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
108                                   Intrinsic::ID Id, const CallExpr *E,
109                                   Instruction::BinaryOps Op) {
110  const llvm::Type *ValueType =
111    llvm::IntegerType::get(CGF.getLLVMContext(),
112                           CGF.getContext().getTypeSize(E->getType()));
113  const llvm::Type *PtrType = ValueType->getPointerTo();
114  const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
115  Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2);
116
117  Value *Args[2] = { CGF.Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)),
118                                               PtrType),
119                     EmitCastToInt(CGF, ValueType,
120                                   CGF.EmitScalarExpr(E->getArg(1))) };
121  Value *Result = EmitCallWithBarrier(CGF, AtomF, Args, Args + 2);
122  return RValue::get(EmitCastFromInt(CGF, E->getType(),
123                                     CGF.Builder.CreateBinOp(Op, Result,
124                                                             Args[1])));
125}
126
127/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
128/// which must be a scalar floating point type.
129static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
130  const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
131  assert(ValTyP && "isn't scalar fp type!");
132
133  StringRef FnName;
134  switch (ValTyP->getKind()) {
135  default: assert(0 && "Isn't a scalar fp type!");
136  case BuiltinType::Float:      FnName = "fabsf"; break;
137  case BuiltinType::Double:     FnName = "fabs"; break;
138  case BuiltinType::LongDouble: FnName = "fabsl"; break;
139  }
140
141  // The prototype is something that takes and returns whatever V's type is.
142  std::vector<const llvm::Type*> Args;
143  Args.push_back(V->getType());
144  llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), Args, false);
145  llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
146
147  return CGF.Builder.CreateCall(Fn, V, "abs");
148}
149
150RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
151                                        unsigned BuiltinID, const CallExpr *E) {
152  // See if we can constant fold this builtin.  If so, don't emit it at all.
153  Expr::EvalResult Result;
154  if (E->Evaluate(Result, CGM.getContext())) {
155    if (Result.Val.isInt())
156      return RValue::get(llvm::ConstantInt::get(VMContext,
157                                                Result.Val.getInt()));
158    else if (Result.Val.isFloat())
159      return RValue::get(ConstantFP::get(VMContext, Result.Val.getFloat()));
160  }
161
162  switch (BuiltinID) {
163  default: break;  // Handle intrinsics and libm functions below.
164  case Builtin::BI__builtin___CFStringMakeConstantString:
165  case Builtin::BI__builtin___NSStringMakeConstantString:
166    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
167  case Builtin::BI__builtin_stdarg_start:
168  case Builtin::BI__builtin_va_start:
169  case Builtin::BI__builtin_va_end: {
170    Value *ArgValue = EmitVAListRef(E->getArg(0));
171    const llvm::Type *DestType = llvm::Type::getInt8PtrTy(VMContext);
172    if (ArgValue->getType() != DestType)
173      ArgValue = Builder.CreateBitCast(ArgValue, DestType,
174                                       ArgValue->getName().data());
175
176    Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
177      Intrinsic::vaend : Intrinsic::vastart;
178    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
179  }
180  case Builtin::BI__builtin_va_copy: {
181    Value *DstPtr = EmitVAListRef(E->getArg(0));
182    Value *SrcPtr = EmitVAListRef(E->getArg(1));
183
184    const llvm::Type *Type = llvm::Type::getInt8PtrTy(VMContext);
185
186    DstPtr = Builder.CreateBitCast(DstPtr, Type);
187    SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
188    return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
189                                           DstPtr, SrcPtr));
190  }
191  case Builtin::BI__builtin_abs: {
192    Value *ArgValue = EmitScalarExpr(E->getArg(0));
193
194    Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
195    Value *CmpResult =
196    Builder.CreateICmpSGE(ArgValue,
197                          llvm::Constant::getNullValue(ArgValue->getType()),
198                                                            "abscond");
199    Value *Result =
200      Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
201
202    return RValue::get(Result);
203  }
204  case Builtin::BI__builtin_ctz:
205  case Builtin::BI__builtin_ctzl:
206  case Builtin::BI__builtin_ctzll: {
207    Value *ArgValue = EmitScalarExpr(E->getArg(0));
208
209    const llvm::Type *ArgType = ArgValue->getType();
210    Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1);
211
212    const llvm::Type *ResultType = ConvertType(E->getType());
213    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
214    if (Result->getType() != ResultType)
215      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
216                                     "cast");
217    return RValue::get(Result);
218  }
219  case Builtin::BI__builtin_clz:
220  case Builtin::BI__builtin_clzl:
221  case Builtin::BI__builtin_clzll: {
222    Value *ArgValue = EmitScalarExpr(E->getArg(0));
223
224    const llvm::Type *ArgType = ArgValue->getType();
225    Value *F = CGM.getIntrinsic(Intrinsic::ctlz, &ArgType, 1);
226
227    const llvm::Type *ResultType = ConvertType(E->getType());
228    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
229    if (Result->getType() != ResultType)
230      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
231                                     "cast");
232    return RValue::get(Result);
233  }
234  case Builtin::BI__builtin_ffs:
235  case Builtin::BI__builtin_ffsl:
236  case Builtin::BI__builtin_ffsll: {
237    // ffs(x) -> x ? cttz(x) + 1 : 0
238    Value *ArgValue = EmitScalarExpr(E->getArg(0));
239
240    const llvm::Type *ArgType = ArgValue->getType();
241    Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1);
242
243    const llvm::Type *ResultType = ConvertType(E->getType());
244    Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue, "tmp"),
245                                   llvm::ConstantInt::get(ArgType, 1), "tmp");
246    Value *Zero = llvm::Constant::getNullValue(ArgType);
247    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
248    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
249    if (Result->getType() != ResultType)
250      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
251                                     "cast");
252    return RValue::get(Result);
253  }
254  case Builtin::BI__builtin_parity:
255  case Builtin::BI__builtin_parityl:
256  case Builtin::BI__builtin_parityll: {
257    // parity(x) -> ctpop(x) & 1
258    Value *ArgValue = EmitScalarExpr(E->getArg(0));
259
260    const llvm::Type *ArgType = ArgValue->getType();
261    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1);
262
263    const llvm::Type *ResultType = ConvertType(E->getType());
264    Value *Tmp = Builder.CreateCall(F, ArgValue, "tmp");
265    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1),
266                                      "tmp");
267    if (Result->getType() != ResultType)
268      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
269                                     "cast");
270    return RValue::get(Result);
271  }
272  case Builtin::BI__builtin_popcount:
273  case Builtin::BI__builtin_popcountl:
274  case Builtin::BI__builtin_popcountll: {
275    Value *ArgValue = EmitScalarExpr(E->getArg(0));
276
277    const llvm::Type *ArgType = ArgValue->getType();
278    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1);
279
280    const llvm::Type *ResultType = ConvertType(E->getType());
281    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
282    if (Result->getType() != ResultType)
283      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
284                                     "cast");
285    return RValue::get(Result);
286  }
287  case Builtin::BI__builtin_expect: {
288    // FIXME: pass expect through to LLVM
289    if (E->getArg(1)->HasSideEffects(getContext()))
290      (void)EmitScalarExpr(E->getArg(1));
291    return RValue::get(EmitScalarExpr(E->getArg(0)));
292  }
293  case Builtin::BI__builtin_bswap32:
294  case Builtin::BI__builtin_bswap64: {
295    Value *ArgValue = EmitScalarExpr(E->getArg(0));
296    const llvm::Type *ArgType = ArgValue->getType();
297    Value *F = CGM.getIntrinsic(Intrinsic::bswap, &ArgType, 1);
298    return RValue::get(Builder.CreateCall(F, ArgValue, "tmp"));
299  }
300  case Builtin::BI__builtin_object_size: {
301    // We pass this builtin onto the optimizer so that it can
302    // figure out the object size in more complex cases.
303    const llvm::Type *ResType[] = {
304      ConvertType(E->getType())
305    };
306
307    // LLVM only supports 0 and 2, make sure that we pass along that
308    // as a boolean.
309    Value *Ty = EmitScalarExpr(E->getArg(1));
310    ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
311    assert(CI);
312    uint64_t val = CI->getZExtValue();
313    CI = ConstantInt::get(llvm::Type::getInt1Ty(VMContext), (val & 0x2) >> 1);
314
315    Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType, 1);
316    return RValue::get(Builder.CreateCall2(F,
317                                           EmitScalarExpr(E->getArg(0)),
318                                           CI));
319  }
320  case Builtin::BI__builtin_prefetch: {
321    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
322    // FIXME: Technically these constants should of type 'int', yes?
323    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
324      llvm::ConstantInt::get(Int32Ty, 0);
325    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
326      llvm::ConstantInt::get(Int32Ty, 3);
327    Value *F = CGM.getIntrinsic(Intrinsic::prefetch, 0, 0);
328    return RValue::get(Builder.CreateCall3(F, Address, RW, Locality));
329  }
330  case Builtin::BI__builtin_trap: {
331    Value *F = CGM.getIntrinsic(Intrinsic::trap, 0, 0);
332    return RValue::get(Builder.CreateCall(F));
333  }
334  case Builtin::BI__builtin_unreachable: {
335    if (CatchUndefined && HaveInsertPoint())
336      EmitBranch(getTrapBB());
337    Value *V = Builder.CreateUnreachable();
338    Builder.ClearInsertionPoint();
339    return RValue::get(V);
340  }
341
342  case Builtin::BI__builtin_powi:
343  case Builtin::BI__builtin_powif:
344  case Builtin::BI__builtin_powil: {
345    Value *Base = EmitScalarExpr(E->getArg(0));
346    Value *Exponent = EmitScalarExpr(E->getArg(1));
347    const llvm::Type *ArgType = Base->getType();
348    Value *F = CGM.getIntrinsic(Intrinsic::powi, &ArgType, 1);
349    return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
350  }
351
352  case Builtin::BI__builtin_isgreater:
353  case Builtin::BI__builtin_isgreaterequal:
354  case Builtin::BI__builtin_isless:
355  case Builtin::BI__builtin_islessequal:
356  case Builtin::BI__builtin_islessgreater:
357  case Builtin::BI__builtin_isunordered: {
358    // Ordered comparisons: we know the arguments to these are matching scalar
359    // floating point values.
360    Value *LHS = EmitScalarExpr(E->getArg(0));
361    Value *RHS = EmitScalarExpr(E->getArg(1));
362
363    switch (BuiltinID) {
364    default: assert(0 && "Unknown ordered comparison");
365    case Builtin::BI__builtin_isgreater:
366      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
367      break;
368    case Builtin::BI__builtin_isgreaterequal:
369      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
370      break;
371    case Builtin::BI__builtin_isless:
372      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
373      break;
374    case Builtin::BI__builtin_islessequal:
375      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
376      break;
377    case Builtin::BI__builtin_islessgreater:
378      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
379      break;
380    case Builtin::BI__builtin_isunordered:
381      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
382      break;
383    }
384    // ZExt bool to int type.
385    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()),
386                                          "tmp"));
387  }
388  case Builtin::BI__builtin_isnan: {
389    Value *V = EmitScalarExpr(E->getArg(0));
390    V = Builder.CreateFCmpUNO(V, V, "cmp");
391    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
392  }
393
394  case Builtin::BI__builtin_isinf: {
395    // isinf(x) --> fabs(x) == infinity
396    Value *V = EmitScalarExpr(E->getArg(0));
397    V = EmitFAbs(*this, V, E->getArg(0)->getType());
398
399    V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
400    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
401  }
402
403  // TODO: BI__builtin_isinf_sign
404  //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
405
406  case Builtin::BI__builtin_isnormal: {
407    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
408    Value *V = EmitScalarExpr(E->getArg(0));
409    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
410
411    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
412    Value *IsLessThanInf =
413      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
414    APFloat Smallest = APFloat::getSmallestNormalized(
415                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
416    Value *IsNormal =
417      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
418                            "isnormal");
419    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
420    V = Builder.CreateAnd(V, IsNormal, "and");
421    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
422  }
423
424  case Builtin::BI__builtin_isfinite: {
425    // isfinite(x) --> x == x && fabs(x) != infinity; }
426    Value *V = EmitScalarExpr(E->getArg(0));
427    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
428
429    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
430    Value *IsNotInf =
431      Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
432
433    V = Builder.CreateAnd(Eq, IsNotInf, "and");
434    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
435  }
436
437  case Builtin::BI__builtin_fpclassify: {
438    Value *V = EmitScalarExpr(E->getArg(5));
439    const llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
440
441    // Create Result
442    BasicBlock *Begin = Builder.GetInsertBlock();
443    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
444    Builder.SetInsertPoint(End);
445    PHINode *Result =
446      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()),
447                        "fpclassify_result");
448
449    // if (V==0) return FP_ZERO
450    Builder.SetInsertPoint(Begin);
451    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
452                                          "iszero");
453    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
454    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
455    Builder.CreateCondBr(IsZero, End, NotZero);
456    Result->addIncoming(ZeroLiteral, Begin);
457
458    // if (V != V) return FP_NAN
459    Builder.SetInsertPoint(NotZero);
460    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
461    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
462    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
463    Builder.CreateCondBr(IsNan, End, NotNan);
464    Result->addIncoming(NanLiteral, NotZero);
465
466    // if (fabs(V) == infinity) return FP_INFINITY
467    Builder.SetInsertPoint(NotNan);
468    Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
469    Value *IsInf =
470      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
471                            "isinf");
472    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
473    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
474    Builder.CreateCondBr(IsInf, End, NotInf);
475    Result->addIncoming(InfLiteral, NotNan);
476
477    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
478    Builder.SetInsertPoint(NotInf);
479    APFloat Smallest = APFloat::getSmallestNormalized(
480        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
481    Value *IsNormal =
482      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
483                            "isnormal");
484    Value *NormalResult =
485      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
486                           EmitScalarExpr(E->getArg(3)));
487    Builder.CreateBr(End);
488    Result->addIncoming(NormalResult, NotInf);
489
490    // return Result
491    Builder.SetInsertPoint(End);
492    return RValue::get(Result);
493  }
494
495  case Builtin::BIalloca:
496  case Builtin::BI__builtin_alloca: {
497    Value *Size = EmitScalarExpr(E->getArg(0));
498    return RValue::get(Builder.CreateAlloca(llvm::Type::getInt8Ty(VMContext), Size, "tmp"));
499  }
500  case Builtin::BIbzero:
501  case Builtin::BI__builtin_bzero: {
502    Value *Address = EmitScalarExpr(E->getArg(0));
503    Value *SizeVal = EmitScalarExpr(E->getArg(1));
504    Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()),
505                   Address,
506                   llvm::ConstantInt::get(llvm::Type::getInt8Ty(VMContext), 0),
507                   SizeVal,
508                   llvm::ConstantInt::get(Int32Ty, 1),
509                   llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
510    return RValue::get(Address);
511  }
512  case Builtin::BImemcpy:
513  case Builtin::BI__builtin_memcpy: {
514    Value *Address = EmitScalarExpr(E->getArg(0));
515    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
516    Value *SizeVal = EmitScalarExpr(E->getArg(2));
517    Builder.CreateCall5(CGM.getMemCpyFn(Address->getType(), SrcAddr->getType(),
518                                        SizeVal->getType()),
519                  Address, SrcAddr, SizeVal,
520                  llvm::ConstantInt::get(Int32Ty, 1),
521                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
522    return RValue::get(Address);
523  }
524
525  case Builtin::BI__builtin_objc_memmove_collectable: {
526    Value *Address = EmitScalarExpr(E->getArg(0));
527    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
528    Value *SizeVal = EmitScalarExpr(E->getArg(2));
529    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
530                                                  Address, SrcAddr, SizeVal);
531    return RValue::get(Address);
532  }
533
534  case Builtin::BImemmove:
535  case Builtin::BI__builtin_memmove: {
536    Value *Address = EmitScalarExpr(E->getArg(0));
537    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
538    Value *SizeVal = EmitScalarExpr(E->getArg(2));
539    Builder.CreateCall5(CGM.getMemMoveFn(Address->getType(), SrcAddr->getType(),
540                                         SizeVal->getType()),
541                  Address, SrcAddr, SizeVal,
542                  llvm::ConstantInt::get(Int32Ty, 1),
543                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
544    return RValue::get(Address);
545  }
546  case Builtin::BImemset:
547  case Builtin::BI__builtin_memset: {
548    Value *Address = EmitScalarExpr(E->getArg(0));
549    Value *SizeVal = EmitScalarExpr(E->getArg(2));
550    Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()),
551                  Address,
552                  Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
553                                      llvm::Type::getInt8Ty(VMContext)),
554                  SizeVal,
555                  llvm::ConstantInt::get(Int32Ty, 1),
556                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
557    return RValue::get(Address);
558  }
559  case Builtin::BI__builtin_dwarf_cfa: {
560    // The offset in bytes from the first argument to the CFA.
561    //
562    // Why on earth is this in the frontend?  Is there any reason at
563    // all that the backend can't reasonably determine this while
564    // lowering llvm.eh.dwarf.cfa()?
565    //
566    // TODO: If there's a satisfactory reason, add a target hook for
567    // this instead of hard-coding 0, which is correct for most targets.
568    int32_t Offset = 0;
569
570    Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa, 0, 0);
571    return RValue::get(Builder.CreateCall(F,
572                                      llvm::ConstantInt::get(Int32Ty, Offset)));
573  }
574  case Builtin::BI__builtin_return_address: {
575    Value *Depth = EmitScalarExpr(E->getArg(0));
576    Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
577    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress, 0, 0);
578    return RValue::get(Builder.CreateCall(F, Depth));
579  }
580  case Builtin::BI__builtin_frame_address: {
581    Value *Depth = EmitScalarExpr(E->getArg(0));
582    Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
583    Value *F = CGM.getIntrinsic(Intrinsic::frameaddress, 0, 0);
584    return RValue::get(Builder.CreateCall(F, Depth));
585  }
586  case Builtin::BI__builtin_extract_return_addr: {
587    Value *Address = EmitScalarExpr(E->getArg(0));
588    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
589    return RValue::get(Result);
590  }
591  case Builtin::BI__builtin_frob_return_addr: {
592    Value *Address = EmitScalarExpr(E->getArg(0));
593    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
594    return RValue::get(Result);
595  }
596  case Builtin::BI__builtin_dwarf_sp_column: {
597    const llvm::IntegerType *Ty
598      = cast<llvm::IntegerType>(ConvertType(E->getType()));
599    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
600    if (Column == -1) {
601      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
602      return RValue::get(llvm::UndefValue::get(Ty));
603    }
604    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
605  }
606  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
607    Value *Address = EmitScalarExpr(E->getArg(0));
608    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
609      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
610    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
611  }
612  case Builtin::BI__builtin_eh_return: {
613    Value *Int = EmitScalarExpr(E->getArg(0));
614    Value *Ptr = EmitScalarExpr(E->getArg(1));
615
616    const llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
617    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
618           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
619    Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
620                                  ? Intrinsic::eh_return_i32
621                                  : Intrinsic::eh_return_i64,
622                                0, 0);
623    Builder.CreateCall2(F, Int, Ptr);
624    Value *V = Builder.CreateUnreachable();
625    Builder.ClearInsertionPoint();
626    return RValue::get(V);
627  }
628  case Builtin::BI__builtin_unwind_init: {
629    Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init, 0, 0);
630    return RValue::get(Builder.CreateCall(F));
631  }
632  case Builtin::BI__builtin_extend_pointer: {
633    // Extends a pointer to the size of an _Unwind_Word, which is
634    // uint64_t on all platforms.  Generally this gets poked into a
635    // register and eventually used as an address, so if the
636    // addressing registers are wider than pointers and the platform
637    // doesn't implicitly ignore high-order bits when doing
638    // addressing, we need to make sure we zext / sext based on
639    // the platform's expectations.
640    //
641    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
642
643    LLVMContext &C = CGM.getLLVMContext();
644
645    // Cast the pointer to intptr_t.
646    Value *Ptr = EmitScalarExpr(E->getArg(0));
647    const llvm::IntegerType *IntPtrTy = CGM.getTargetData().getIntPtrType(C);
648    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
649
650    // If that's 64 bits, we're done.
651    if (IntPtrTy->getBitWidth() == 64)
652      return RValue::get(Result);
653
654    // Otherwise, ask the codegen data what to do.
655    if (getTargetHooks().extendPointerWithSExt())
656      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
657    else
658      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
659  }
660  case Builtin::BI__builtin_setjmp: {
661    // Buffer is a void**.
662    Value *Buf = EmitScalarExpr(E->getArg(0));
663
664    // Store the frame pointer to the setjmp buffer.
665    Value *FrameAddr =
666      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
667                         ConstantInt::get(Int32Ty, 0));
668    Builder.CreateStore(FrameAddr, Buf);
669
670    // Store the stack pointer to the setjmp buffer.
671    Value *StackAddr =
672      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
673    Value *StackSaveSlot =
674      Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
675    Builder.CreateStore(StackAddr, StackSaveSlot);
676
677    // Call LLVM's EH setjmp, which is lightweight.
678    Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
679    Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext));
680    return RValue::get(Builder.CreateCall(F, Buf));
681  }
682  case Builtin::BI__builtin_longjmp: {
683    Value *Buf = EmitScalarExpr(E->getArg(0));
684    Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext));
685
686    // Call LLVM's EH longjmp, which is lightweight.
687    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
688
689    // longjmp doesn't return; mark this as unreachable
690    Value *V = Builder.CreateUnreachable();
691    Builder.ClearInsertionPoint();
692    return RValue::get(V);
693  }
694  case Builtin::BI__sync_fetch_and_add:
695  case Builtin::BI__sync_fetch_and_sub:
696  case Builtin::BI__sync_fetch_and_or:
697  case Builtin::BI__sync_fetch_and_and:
698  case Builtin::BI__sync_fetch_and_xor:
699  case Builtin::BI__sync_add_and_fetch:
700  case Builtin::BI__sync_sub_and_fetch:
701  case Builtin::BI__sync_and_and_fetch:
702  case Builtin::BI__sync_or_and_fetch:
703  case Builtin::BI__sync_xor_and_fetch:
704  case Builtin::BI__sync_val_compare_and_swap:
705  case Builtin::BI__sync_bool_compare_and_swap:
706  case Builtin::BI__sync_lock_test_and_set:
707  case Builtin::BI__sync_lock_release:
708    assert(0 && "Shouldn't make it through sema");
709  case Builtin::BI__sync_fetch_and_add_1:
710  case Builtin::BI__sync_fetch_and_add_2:
711  case Builtin::BI__sync_fetch_and_add_4:
712  case Builtin::BI__sync_fetch_and_add_8:
713  case Builtin::BI__sync_fetch_and_add_16:
714    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_add, E);
715  case Builtin::BI__sync_fetch_and_sub_1:
716  case Builtin::BI__sync_fetch_and_sub_2:
717  case Builtin::BI__sync_fetch_and_sub_4:
718  case Builtin::BI__sync_fetch_and_sub_8:
719  case Builtin::BI__sync_fetch_and_sub_16:
720    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_sub, E);
721  case Builtin::BI__sync_fetch_and_or_1:
722  case Builtin::BI__sync_fetch_and_or_2:
723  case Builtin::BI__sync_fetch_and_or_4:
724  case Builtin::BI__sync_fetch_and_or_8:
725  case Builtin::BI__sync_fetch_and_or_16:
726    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_or, E);
727  case Builtin::BI__sync_fetch_and_and_1:
728  case Builtin::BI__sync_fetch_and_and_2:
729  case Builtin::BI__sync_fetch_and_and_4:
730  case Builtin::BI__sync_fetch_and_and_8:
731  case Builtin::BI__sync_fetch_and_and_16:
732    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_and, E);
733  case Builtin::BI__sync_fetch_and_xor_1:
734  case Builtin::BI__sync_fetch_and_xor_2:
735  case Builtin::BI__sync_fetch_and_xor_4:
736  case Builtin::BI__sync_fetch_and_xor_8:
737  case Builtin::BI__sync_fetch_and_xor_16:
738    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_xor, E);
739
740  // Clang extensions: not overloaded yet.
741  case Builtin::BI__sync_fetch_and_min:
742    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_min, E);
743  case Builtin::BI__sync_fetch_and_max:
744    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_max, E);
745  case Builtin::BI__sync_fetch_and_umin:
746    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umin, E);
747  case Builtin::BI__sync_fetch_and_umax:
748    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umax, E);
749
750  case Builtin::BI__sync_add_and_fetch_1:
751  case Builtin::BI__sync_add_and_fetch_2:
752  case Builtin::BI__sync_add_and_fetch_4:
753  case Builtin::BI__sync_add_and_fetch_8:
754  case Builtin::BI__sync_add_and_fetch_16:
755    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_add, E,
756                                llvm::Instruction::Add);
757  case Builtin::BI__sync_sub_and_fetch_1:
758  case Builtin::BI__sync_sub_and_fetch_2:
759  case Builtin::BI__sync_sub_and_fetch_4:
760  case Builtin::BI__sync_sub_and_fetch_8:
761  case Builtin::BI__sync_sub_and_fetch_16:
762    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_sub, E,
763                                llvm::Instruction::Sub);
764  case Builtin::BI__sync_and_and_fetch_1:
765  case Builtin::BI__sync_and_and_fetch_2:
766  case Builtin::BI__sync_and_and_fetch_4:
767  case Builtin::BI__sync_and_and_fetch_8:
768  case Builtin::BI__sync_and_and_fetch_16:
769    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_and, E,
770                                llvm::Instruction::And);
771  case Builtin::BI__sync_or_and_fetch_1:
772  case Builtin::BI__sync_or_and_fetch_2:
773  case Builtin::BI__sync_or_and_fetch_4:
774  case Builtin::BI__sync_or_and_fetch_8:
775  case Builtin::BI__sync_or_and_fetch_16:
776    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_or, E,
777                                llvm::Instruction::Or);
778  case Builtin::BI__sync_xor_and_fetch_1:
779  case Builtin::BI__sync_xor_and_fetch_2:
780  case Builtin::BI__sync_xor_and_fetch_4:
781  case Builtin::BI__sync_xor_and_fetch_8:
782  case Builtin::BI__sync_xor_and_fetch_16:
783    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_xor, E,
784                                llvm::Instruction::Xor);
785
786  case Builtin::BI__sync_val_compare_and_swap_1:
787  case Builtin::BI__sync_val_compare_and_swap_2:
788  case Builtin::BI__sync_val_compare_and_swap_4:
789  case Builtin::BI__sync_val_compare_and_swap_8:
790  case Builtin::BI__sync_val_compare_and_swap_16: {
791    const llvm::Type *ValueType =
792      llvm::IntegerType::get(CGF.getLLVMContext(),
793                             CGF.getContext().getTypeSize(E->getType()));
794    const llvm::Type *PtrType = ValueType->getPointerTo();
795    const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
796    Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
797                                    IntrinsicTypes, 2);
798
799    Value *Args[3] = { Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)),
800                                             PtrType),
801                       EmitCastToInt(CGF, ValueType,
802                                     CGF.EmitScalarExpr(E->getArg(1))),
803                       EmitCastToInt(CGF, ValueType,
804                                     CGF.EmitScalarExpr(E->getArg(2))) };
805    return RValue::get(EmitCastFromInt(CGF, E->getType(),
806                                       EmitCallWithBarrier(CGF, AtomF, Args,
807                                                           Args + 3)));
808  }
809
810  case Builtin::BI__sync_bool_compare_and_swap_1:
811  case Builtin::BI__sync_bool_compare_and_swap_2:
812  case Builtin::BI__sync_bool_compare_and_swap_4:
813  case Builtin::BI__sync_bool_compare_and_swap_8:
814  case Builtin::BI__sync_bool_compare_and_swap_16: {
815    const llvm::Type *ValueType =
816      llvm::IntegerType::get(
817        CGF.getLLVMContext(),
818        CGF.getContext().getTypeSize(E->getArg(1)->getType()));
819    const llvm::Type *PtrType = ValueType->getPointerTo();
820    const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
821    Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
822                                    IntrinsicTypes, 2);
823
824    Value *Args[3] = { Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)),
825                                             PtrType),
826                       EmitCastToInt(CGF, ValueType,
827                                     CGF.EmitScalarExpr(E->getArg(1))),
828                       EmitCastToInt(CGF, ValueType,
829                                     CGF.EmitScalarExpr(E->getArg(2))) };
830    Value *OldVal = Args[1];
831    Value *PrevVal = EmitCallWithBarrier(*this, AtomF, Args, Args + 3);
832    Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
833    // zext bool to int.
834    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
835  }
836
837  case Builtin::BI__sync_lock_test_and_set_1:
838  case Builtin::BI__sync_lock_test_and_set_2:
839  case Builtin::BI__sync_lock_test_and_set_4:
840  case Builtin::BI__sync_lock_test_and_set_8:
841  case Builtin::BI__sync_lock_test_and_set_16:
842    return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E);
843
844  case Builtin::BI__sync_lock_release_1:
845  case Builtin::BI__sync_lock_release_2:
846  case Builtin::BI__sync_lock_release_4:
847  case Builtin::BI__sync_lock_release_8:
848  case Builtin::BI__sync_lock_release_16: {
849    Value *Ptr = EmitScalarExpr(E->getArg(0));
850    const llvm::Type *ElTy =
851      cast<llvm::PointerType>(Ptr->getType())->getElementType();
852    llvm::StoreInst *Store =
853      Builder.CreateStore(llvm::Constant::getNullValue(ElTy), Ptr);
854    Store->setVolatile(true);
855    return RValue::get(0);
856  }
857
858  case Builtin::BI__sync_synchronize: {
859    // We assume like gcc appears to, that this only applies to cached memory.
860    EmitMemoryBarrier(*this, true, true, true, true, false);
861    return RValue::get(0);
862  }
863
864  case Builtin::BI__builtin_llvm_memory_barrier: {
865    Value *C[5] = {
866      EmitScalarExpr(E->getArg(0)),
867      EmitScalarExpr(E->getArg(1)),
868      EmitScalarExpr(E->getArg(2)),
869      EmitScalarExpr(E->getArg(3)),
870      EmitScalarExpr(E->getArg(4))
871    };
872    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::memory_barrier), C, C + 5);
873    return RValue::get(0);
874  }
875
876    // Library functions with special handling.
877  case Builtin::BIsqrt:
878  case Builtin::BIsqrtf:
879  case Builtin::BIsqrtl: {
880    // TODO: there is currently no set of optimizer flags
881    // sufficient for us to rewrite sqrt to @llvm.sqrt.
882    // -fmath-errno=0 is not good enough; we need finiteness.
883    // We could probably precondition the call with an ult
884    // against 0, but is that worth the complexity?
885    break;
886  }
887
888  case Builtin::BIpow:
889  case Builtin::BIpowf:
890  case Builtin::BIpowl: {
891    // Rewrite sqrt to intrinsic if allowed.
892    if (!FD->hasAttr<ConstAttr>())
893      break;
894    Value *Base = EmitScalarExpr(E->getArg(0));
895    Value *Exponent = EmitScalarExpr(E->getArg(1));
896    const llvm::Type *ArgType = Base->getType();
897    Value *F = CGM.getIntrinsic(Intrinsic::pow, &ArgType, 1);
898    return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
899  }
900
901  case Builtin::BI__builtin_signbit:
902  case Builtin::BI__builtin_signbitf:
903  case Builtin::BI__builtin_signbitl: {
904    LLVMContext &C = CGM.getLLVMContext();
905
906    Value *Arg = EmitScalarExpr(E->getArg(0));
907    const llvm::Type *ArgTy = Arg->getType();
908    if (ArgTy->isPPC_FP128Ty())
909      break; // FIXME: I'm not sure what the right implementation is here.
910    int ArgWidth = ArgTy->getPrimitiveSizeInBits();
911    const llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
912    Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
913    Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
914    Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
915    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
916  }
917  }
918
919  // If this is an alias for a libm function (e.g. __builtin_sin) turn it into
920  // that function.
921  if (getContext().BuiltinInfo.isLibFunction(BuiltinID) ||
922      getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
923    return EmitCall(E->getCallee()->getType(),
924                    CGM.getBuiltinLibFunction(FD, BuiltinID),
925                    ReturnValueSlot(),
926                    E->arg_begin(), E->arg_end());
927
928  // See if we have a target specific intrinsic.
929  const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
930  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
931  if (const char *Prefix =
932      llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
933    IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
934
935  if (IntrinsicID != Intrinsic::not_intrinsic) {
936    SmallVector<Value*, 16> Args;
937
938    Function *F = CGM.getIntrinsic(IntrinsicID);
939    const llvm::FunctionType *FTy = F->getFunctionType();
940
941    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
942      Value *ArgValue = EmitScalarExpr(E->getArg(i));
943
944      // If the intrinsic arg type is different from the builtin arg type
945      // we need to do a bit cast.
946      const llvm::Type *PTy = FTy->getParamType(i);
947      if (PTy != ArgValue->getType()) {
948        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
949               "Must be able to losslessly bit cast to param");
950        ArgValue = Builder.CreateBitCast(ArgValue, PTy);
951      }
952
953      Args.push_back(ArgValue);
954    }
955
956    Value *V = Builder.CreateCall(F, Args.data(), Args.data() + Args.size());
957    QualType BuiltinRetType = E->getType();
958
959    const llvm::Type *RetTy = llvm::Type::getVoidTy(VMContext);
960    if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType);
961
962    if (RetTy != V->getType()) {
963      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
964             "Must be able to losslessly bit cast result type");
965      V = Builder.CreateBitCast(V, RetTy);
966    }
967
968    return RValue::get(V);
969  }
970
971  // See if we have a target specific builtin that needs to be lowered.
972  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
973    return RValue::get(V);
974
975  ErrorUnsupported(E, "builtin function");
976
977  // Unknown builtin, for now just dump it out and return undef.
978  if (hasAggregateLLVMType(E->getType()))
979    return RValue::getAggregate(CreateMemTemp(E->getType()));
980  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
981}
982
983Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
984                                              const CallExpr *E) {
985  switch (Target.getTriple().getArch()) {
986  case llvm::Triple::arm:
987  case llvm::Triple::thumb:
988    return EmitARMBuiltinExpr(BuiltinID, E);
989  case llvm::Triple::x86:
990  case llvm::Triple::x86_64:
991    return EmitX86BuiltinExpr(BuiltinID, E);
992  case llvm::Triple::ppc:
993  case llvm::Triple::ppc64:
994    return EmitPPCBuiltinExpr(BuiltinID, E);
995  default:
996    return 0;
997  }
998}
999
1000const llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) {
1001  switch (type) {
1002    default: break;
1003    case 0:
1004    case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q);
1005    case 6:
1006    case 7:
1007    case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q);
1008    case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q);
1009    case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q);
1010    case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q);
1011  };
1012  return 0;
1013}
1014
1015Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C, bool widen) {
1016  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1017  if (widen)
1018    nElts <<= 1;
1019  SmallVector<Constant*, 16> Indices(nElts, C);
1020  Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1021  return Builder.CreateShuffleVector(V, V, SV, "lane");
1022}
1023
1024Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1025                                     const char *name, bool splat,
1026                                     unsigned shift, bool rightshift) {
1027  unsigned j = 0;
1028  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1029       ai != ae; ++ai, ++j)
1030    if (shift > 0 && shift == j)
1031      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1032    else
1033      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1034
1035  if (splat) {
1036    Ops[j-1] = EmitNeonSplat(Ops[j-1], cast<Constant>(Ops[j]));
1037    Ops.resize(j);
1038  }
1039  return Builder.CreateCall(F, Ops.begin(), Ops.end(), name);
1040}
1041
1042Value *CodeGenFunction::EmitNeonShiftVector(Value *V, const llvm::Type *Ty,
1043                                            bool neg) {
1044  ConstantInt *CI = cast<ConstantInt>(V);
1045  int SV = CI->getSExtValue();
1046
1047  const llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1048  llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1049  SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C);
1050  return llvm::ConstantVector::get(CV.begin(), CV.size());
1051}
1052
1053/// GetPointeeAlignment - Given an expression with a pointer type, find the
1054/// alignment of the type referenced by the pointer.  Skip over implicit
1055/// casts.
1056static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) {
1057  unsigned Align = 1;
1058  // Check if the type is a pointer.  The implicit cast operand might not be.
1059  while (Addr->getType()->isPointerType()) {
1060    QualType PtTy = Addr->getType()->getPointeeType();
1061    unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity();
1062    if (NewA > Align)
1063      Align = NewA;
1064
1065    // If the address is an implicit cast, repeat with the cast operand.
1066    if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) {
1067      Addr = CastAddr->getSubExpr();
1068      continue;
1069    }
1070    break;
1071  }
1072  return llvm::ConstantInt::get(CGF.Int32Ty, Align);
1073}
1074
1075Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1076                                           const CallExpr *E) {
1077  if (BuiltinID == ARM::BI__clear_cache) {
1078    const FunctionDecl *FD = E->getDirectCallee();
1079    Value *a = EmitScalarExpr(E->getArg(0));
1080    Value *b = EmitScalarExpr(E->getArg(1));
1081    const llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1082    const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1083    llvm::StringRef Name = FD->getName();
1084    return Builder.CreateCall2(CGM.CreateRuntimeFunction(FTy, Name),
1085                               a, b);
1086  }
1087
1088  llvm::SmallVector<Value*, 4> Ops;
1089  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
1090    Ops.push_back(EmitScalarExpr(E->getArg(i)));
1091
1092  llvm::APSInt Result;
1093  const Expr *Arg = E->getArg(E->getNumArgs()-1);
1094  if (!Arg->isIntegerConstantExpr(Result, getContext()))
1095    return 0;
1096
1097  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1098      BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1099    // Determine the overloaded type of this builtin.
1100    const llvm::Type *Ty;
1101    if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1102      Ty = llvm::Type::getFloatTy(VMContext);
1103    else
1104      Ty = llvm::Type::getDoubleTy(VMContext);
1105
1106    // Determine whether this is an unsigned conversion or not.
1107    bool usgn = Result.getZExtValue() == 1;
1108    unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1109
1110    // Call the appropriate intrinsic.
1111    Function *F = CGM.getIntrinsic(Int, &Ty, 1);
1112    return Builder.CreateCall(F, Ops.begin(), Ops.end(), "vcvtr");
1113  }
1114
1115  // Determine the type of this overloaded NEON intrinsic.
1116  unsigned type = Result.getZExtValue();
1117  bool usgn = type & 0x08;
1118  bool quad = type & 0x10;
1119  bool poly = (type & 0x7) == 5 || (type & 0x7) == 6;
1120  bool splat = false;
1121
1122  const llvm::VectorType *VTy = GetNeonType(VMContext, type & 0x7, quad);
1123  const llvm::Type *Ty = VTy;
1124  if (!Ty)
1125    return 0;
1126
1127  unsigned Int;
1128  switch (BuiltinID) {
1129  default: return 0;
1130  case ARM::BI__builtin_neon_vaba_v:
1131  case ARM::BI__builtin_neon_vabaq_v:
1132    Int = usgn ? Intrinsic::arm_neon_vabau : Intrinsic::arm_neon_vabas;
1133    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaba");
1134  case ARM::BI__builtin_neon_vabal_v:
1135    Int = usgn ? Intrinsic::arm_neon_vabalu : Intrinsic::arm_neon_vabals;
1136    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabal");
1137  case ARM::BI__builtin_neon_vabd_v:
1138  case ARM::BI__builtin_neon_vabdq_v:
1139    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1140    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabd");
1141  case ARM::BI__builtin_neon_vabdl_v:
1142    Int = usgn ? Intrinsic::arm_neon_vabdlu : Intrinsic::arm_neon_vabdls;
1143    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabdl");
1144  case ARM::BI__builtin_neon_vabs_v:
1145  case ARM::BI__builtin_neon_vabsq_v:
1146    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1),
1147                        Ops, "vabs");
1148  case ARM::BI__builtin_neon_vaddhn_v:
1149    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1),
1150                        Ops, "vaddhn");
1151  case ARM::BI__builtin_neon_vaddl_v:
1152    if (usgn) {
1153      Ops[0] = Builder.CreateZExt(Ops[0], Ty);
1154      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1155    } else {
1156      Ops[0] = Builder.CreateSExt(Ops[0], Ty);
1157      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1158    }
1159    return Builder.CreateAdd(Ops[0], Ops[1], "vaddl");
1160  case ARM::BI__builtin_neon_vaddw_v:
1161    if (usgn)
1162      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1163    else
1164      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1165    return Builder.CreateAdd(Ops[0], Ops[1], "vaddw");
1166  case ARM::BI__builtin_neon_vcale_v:
1167    std::swap(Ops[0], Ops[1]);
1168  case ARM::BI__builtin_neon_vcage_v: {
1169    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged, &Ty, 1);
1170    return EmitNeonCall(F, Ops, "vcage");
1171  }
1172  case ARM::BI__builtin_neon_vcaleq_v:
1173    std::swap(Ops[0], Ops[1]);
1174  case ARM::BI__builtin_neon_vcageq_v: {
1175    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq, &Ty, 1);
1176    return EmitNeonCall(F, Ops, "vcage");
1177  }
1178  case ARM::BI__builtin_neon_vcalt_v:
1179    std::swap(Ops[0], Ops[1]);
1180  case ARM::BI__builtin_neon_vcagt_v: {
1181    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd, &Ty, 1);
1182    return EmitNeonCall(F, Ops, "vcagt");
1183  }
1184  case ARM::BI__builtin_neon_vcaltq_v:
1185    std::swap(Ops[0], Ops[1]);
1186  case ARM::BI__builtin_neon_vcagtq_v: {
1187    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq, &Ty, 1);
1188    return EmitNeonCall(F, Ops, "vcagt");
1189  }
1190  case ARM::BI__builtin_neon_vcls_v:
1191  case ARM::BI__builtin_neon_vclsq_v: {
1192    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1);
1193    return EmitNeonCall(F, Ops, "vcls");
1194  }
1195  case ARM::BI__builtin_neon_vclz_v:
1196  case ARM::BI__builtin_neon_vclzq_v: {
1197    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1);
1198    return EmitNeonCall(F, Ops, "vclz");
1199  }
1200  case ARM::BI__builtin_neon_vcnt_v:
1201  case ARM::BI__builtin_neon_vcntq_v: {
1202    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1);
1203    return EmitNeonCall(F, Ops, "vcnt");
1204  }
1205  // FIXME: intrinsics for f16<->f32 convert missing from ARM target.
1206  case ARM::BI__builtin_neon_vcvt_f32_v:
1207  case ARM::BI__builtin_neon_vcvtq_f32_v: {
1208    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1209    Ty = GetNeonType(VMContext, 4, quad);
1210    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1211                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1212  }
1213  case ARM::BI__builtin_neon_vcvt_s32_v:
1214  case ARM::BI__builtin_neon_vcvt_u32_v:
1215  case ARM::BI__builtin_neon_vcvtq_s32_v:
1216  case ARM::BI__builtin_neon_vcvtq_u32_v: {
1217    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(VMContext, 4, quad));
1218    return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1219                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1220  }
1221  case ARM::BI__builtin_neon_vcvt_n_f32_v:
1222  case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1223    const llvm::Type *Tys[2] = { GetNeonType(VMContext, 4, quad), Ty };
1224    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp;
1225    Function *F = CGM.getIntrinsic(Int, Tys, 2);
1226    return EmitNeonCall(F, Ops, "vcvt_n");
1227  }
1228  case ARM::BI__builtin_neon_vcvt_n_s32_v:
1229  case ARM::BI__builtin_neon_vcvt_n_u32_v:
1230  case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1231  case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1232    const llvm::Type *Tys[2] = { Ty, GetNeonType(VMContext, 4, quad) };
1233    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs;
1234    Function *F = CGM.getIntrinsic(Int, Tys, 2);
1235    return EmitNeonCall(F, Ops, "vcvt_n");
1236  }
1237  case ARM::BI__builtin_neon_vdup_lane_v:
1238    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1239    return EmitNeonSplat(Ops[0], cast<Constant>(Ops[1]));
1240  case ARM::BI__builtin_neon_vdupq_lane_v:
1241    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1242    return EmitNeonSplat(Ops[0], cast<Constant>(Ops[1]), true);
1243  case ARM::BI__builtin_neon_vext_v:
1244  case ARM::BI__builtin_neon_vextq_v: {
1245    ConstantInt *C = dyn_cast<ConstantInt>(Ops[2]);
1246    int CV = C->getSExtValue();
1247    SmallVector<Constant*, 16> Indices;
1248    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1249      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1250
1251    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1252    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1253    Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1254    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1255  }
1256  case ARM::BI__builtin_neon_vget_lane_i8:
1257  case ARM::BI__builtin_neon_vget_lane_i16:
1258  case ARM::BI__builtin_neon_vget_lane_i32:
1259  case ARM::BI__builtin_neon_vget_lane_i64:
1260  case ARM::BI__builtin_neon_vget_lane_f32:
1261  case ARM::BI__builtin_neon_vgetq_lane_i8:
1262  case ARM::BI__builtin_neon_vgetq_lane_i16:
1263  case ARM::BI__builtin_neon_vgetq_lane_i32:
1264  case ARM::BI__builtin_neon_vgetq_lane_i64:
1265  case ARM::BI__builtin_neon_vgetq_lane_f32:
1266    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1267                                        "vget_lane");
1268  case ARM::BI__builtin_neon_vhadd_v:
1269  case ARM::BI__builtin_neon_vhaddq_v:
1270    Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1271    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhadd");
1272  case ARM::BI__builtin_neon_vhsub_v:
1273  case ARM::BI__builtin_neon_vhsubq_v:
1274    Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1275    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhsub");
1276  case ARM::BI__builtin_neon_vld1_v:
1277  case ARM::BI__builtin_neon_vld1q_v:
1278    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1279    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, &Ty, 1),
1280                        Ops, "vld1");
1281  case ARM::BI__builtin_neon_vld1_lane_v:
1282  case ARM::BI__builtin_neon_vld1q_lane_v:
1283    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1284    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1285    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1286    Ops[0] = Builder.CreateLoad(Ops[0]);
1287    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
1288  case ARM::BI__builtin_neon_vld1_dup_v:
1289  case ARM::BI__builtin_neon_vld1q_dup_v: {
1290    Value *V = UndefValue::get(Ty);
1291    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1292    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1293    Ops[0] = Builder.CreateLoad(Ops[0]);
1294    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1295    Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
1296    return EmitNeonSplat(Ops[0], CI);
1297  }
1298  case ARM::BI__builtin_neon_vld2_v:
1299  case ARM::BI__builtin_neon_vld2q_v: {
1300    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, &Ty, 1);
1301    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1302    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1303    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1304    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1305    return Builder.CreateStore(Ops[1], Ops[0]);
1306  }
1307  case ARM::BI__builtin_neon_vld3_v:
1308  case ARM::BI__builtin_neon_vld3q_v: {
1309    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, &Ty, 1);
1310    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1311    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1312    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1313    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1314    return Builder.CreateStore(Ops[1], Ops[0]);
1315  }
1316  case ARM::BI__builtin_neon_vld4_v:
1317  case ARM::BI__builtin_neon_vld4q_v: {
1318    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, &Ty, 1);
1319    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1320    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1321    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1322    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1323    return Builder.CreateStore(Ops[1], Ops[0]);
1324  }
1325  case ARM::BI__builtin_neon_vld2_lane_v:
1326  case ARM::BI__builtin_neon_vld2q_lane_v: {
1327    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, &Ty, 1);
1328    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1329    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1330    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1331    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld2_lane");
1332    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1333    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1334    return Builder.CreateStore(Ops[1], Ops[0]);
1335  }
1336  case ARM::BI__builtin_neon_vld3_lane_v:
1337  case ARM::BI__builtin_neon_vld3q_lane_v: {
1338    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, &Ty, 1);
1339    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1340    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1341    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1342    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1343    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
1344    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1345    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1346    return Builder.CreateStore(Ops[1], Ops[0]);
1347  }
1348  case ARM::BI__builtin_neon_vld4_lane_v:
1349  case ARM::BI__builtin_neon_vld4q_lane_v: {
1350    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, &Ty, 1);
1351    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1352    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1353    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1354    Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
1355    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1356    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
1357    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1358    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1359    return Builder.CreateStore(Ops[1], Ops[0]);
1360  }
1361  case ARM::BI__builtin_neon_vld2_dup_v:
1362  case ARM::BI__builtin_neon_vld3_dup_v:
1363  case ARM::BI__builtin_neon_vld4_dup_v: {
1364    switch (BuiltinID) {
1365    case ARM::BI__builtin_neon_vld2_dup_v:
1366      Int = Intrinsic::arm_neon_vld2lane;
1367      break;
1368    case ARM::BI__builtin_neon_vld3_dup_v:
1369      Int = Intrinsic::arm_neon_vld2lane;
1370      break;
1371    case ARM::BI__builtin_neon_vld4_dup_v:
1372      Int = Intrinsic::arm_neon_vld2lane;
1373      break;
1374    default: assert(0 && "unknown vld_dup intrinsic?");
1375    }
1376    Function *F = CGM.getIntrinsic(Int, &Ty, 1);
1377    const llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
1378
1379    SmallVector<Value*, 6> Args;
1380    Args.push_back(Ops[1]);
1381    Args.append(STy->getNumElements(), UndefValue::get(Ty));
1382
1383    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1384    Args.push_back(CI);
1385    Args.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1386
1387    Ops[1] = Builder.CreateCall(F, Args.begin(), Args.end(), "vld_dup");
1388    // splat lane 0 to all elts in each vector of the result.
1389    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1390      Value *Val = Builder.CreateExtractValue(Ops[1], i);
1391      Value *Elt = Builder.CreateBitCast(Val, Ty);
1392      Elt = EmitNeonSplat(Elt, CI);
1393      Elt = Builder.CreateBitCast(Elt, Val->getType());
1394      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
1395    }
1396    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1397    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1398    return Builder.CreateStore(Ops[1], Ops[0]);
1399  }
1400  case ARM::BI__builtin_neon_vmax_v:
1401  case ARM::BI__builtin_neon_vmaxq_v:
1402    Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
1403    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmax");
1404  case ARM::BI__builtin_neon_vmin_v:
1405  case ARM::BI__builtin_neon_vminq_v:
1406    Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
1407    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin");
1408  case ARM::BI__builtin_neon_vmlal_lane_v:
1409    Ops[2] = EmitNeonSplat(Ops[2], cast<Constant>(Ops[3]));
1410  case ARM::BI__builtin_neon_vmlal_v:
1411    if (usgn) {
1412      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1413      Ops[2] = Builder.CreateZExt(Ops[2], Ty);
1414    } else {
1415      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1416      Ops[2] = Builder.CreateSExt(Ops[2], Ty);
1417    }
1418    Ops[1] = Builder.CreateMul(Ops[1], Ops[2]);
1419    return Builder.CreateAdd(Ops[0], Ops[1], "vmlal");
1420  case ARM::BI__builtin_neon_vmlsl_lane_v:
1421    Ops[2] = EmitNeonSplat(Ops[2], cast<Constant>(Ops[3]));
1422  case ARM::BI__builtin_neon_vmlsl_v:
1423    if (usgn) {
1424      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1425      Ops[2] = Builder.CreateZExt(Ops[2], Ty);
1426    } else {
1427      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1428      Ops[2] = Builder.CreateSExt(Ops[2], Ty);
1429    }
1430    Ops[1] = Builder.CreateMul(Ops[1], Ops[2]);
1431    return Builder.CreateSub(Ops[0], Ops[1], "vmlsl");
1432  case ARM::BI__builtin_neon_vmovl_v:
1433    if (usgn)
1434      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1435    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1436  case ARM::BI__builtin_neon_vmovn_v:
1437    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1438  case ARM::BI__builtin_neon_vmull_lane_v:
1439    Ops[1] = EmitNeonSplat(Ops[1], cast<Constant>(Ops[2]));
1440  case ARM::BI__builtin_neon_vmull_v:
1441    if (poly)
1442      return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmullp, &Ty, 1),
1443                          Ops, "vmull");
1444    if (usgn) {
1445      Ops[0] = Builder.CreateZExt(Ops[0], Ty);
1446      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1447    } else {
1448      Ops[0] = Builder.CreateSExt(Ops[0], Ty);
1449      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1450    }
1451    return Builder.CreateMul(Ops[0], Ops[1], "vmull");
1452  case ARM::BI__builtin_neon_vpadal_v:
1453  case ARM::BI__builtin_neon_vpadalq_v:
1454    Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
1455    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpadal");
1456  case ARM::BI__builtin_neon_vpadd_v:
1457    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, &Ty, 1),
1458                        Ops, "vpadd");
1459  case ARM::BI__builtin_neon_vpaddl_v:
1460  case ARM::BI__builtin_neon_vpaddlq_v:
1461    Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
1462    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpaddl");
1463  case ARM::BI__builtin_neon_vpmax_v:
1464    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
1465    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmax");
1466  case ARM::BI__builtin_neon_vpmin_v:
1467    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
1468    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmin");
1469  case ARM::BI__builtin_neon_vqabs_v:
1470  case ARM::BI__builtin_neon_vqabsq_v:
1471    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, &Ty, 1),
1472                        Ops, "vqabs");
1473  case ARM::BI__builtin_neon_vqadd_v:
1474  case ARM::BI__builtin_neon_vqaddq_v:
1475    Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
1476    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqadd");
1477  case ARM::BI__builtin_neon_vqdmlal_lane_v:
1478    splat = true;
1479  case ARM::BI__builtin_neon_vqdmlal_v:
1480    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, &Ty, 1),
1481                        Ops, "vqdmlal", splat);
1482  case ARM::BI__builtin_neon_vqdmlsl_lane_v:
1483    splat = true;
1484  case ARM::BI__builtin_neon_vqdmlsl_v:
1485    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, &Ty, 1),
1486                        Ops, "vqdmlsl", splat);
1487  case ARM::BI__builtin_neon_vqdmulh_lane_v:
1488  case ARM::BI__builtin_neon_vqdmulhq_lane_v:
1489    splat = true;
1490  case ARM::BI__builtin_neon_vqdmulh_v:
1491  case ARM::BI__builtin_neon_vqdmulhq_v:
1492    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, &Ty, 1),
1493                        Ops, "vqdmulh", splat);
1494  case ARM::BI__builtin_neon_vqdmull_lane_v:
1495    splat = true;
1496  case ARM::BI__builtin_neon_vqdmull_v:
1497    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, &Ty, 1),
1498                        Ops, "vqdmull", splat);
1499  case ARM::BI__builtin_neon_vqmovn_v:
1500    Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
1501    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqmovn");
1502  case ARM::BI__builtin_neon_vqmovun_v:
1503    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, &Ty, 1),
1504                        Ops, "vqdmull");
1505  case ARM::BI__builtin_neon_vqneg_v:
1506    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, &Ty, 1),
1507                        Ops, "vqneg");
1508  case ARM::BI__builtin_neon_vqrdmulh_lane_v:
1509  case ARM::BI__builtin_neon_vqrdmulhq_lane_v:
1510    splat = true;
1511  case ARM::BI__builtin_neon_vqrdmulh_v:
1512  case ARM::BI__builtin_neon_vqrdmulhq_v:
1513    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, &Ty, 1),
1514                        Ops, "vqrdmulh", splat);
1515  case ARM::BI__builtin_neon_vqrshl_v:
1516  case ARM::BI__builtin_neon_vqrshlq_v:
1517    Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
1518    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshl");
1519  case ARM::BI__builtin_neon_vqrshrn_n_v:
1520    Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
1521    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshrn_n", false,
1522                        1, true);
1523  case ARM::BI__builtin_neon_vqrshrun_n_v:
1524    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, &Ty, 1),
1525                        Ops, "vqrshrun_n", false, 1, true);
1526  case ARM::BI__builtin_neon_vqshl_v:
1527  case ARM::BI__builtin_neon_vqshlq_v:
1528    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1529    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl");
1530  case ARM::BI__builtin_neon_vqshl_n_v:
1531  case ARM::BI__builtin_neon_vqshlq_n_v:
1532    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1533    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl_n", false,
1534                        1, false);
1535  case ARM::BI__builtin_neon_vqshlu_n_v:
1536  case ARM::BI__builtin_neon_vqshluq_n_v:
1537    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, &Ty, 1),
1538                        Ops, "vqshlu", 1, false);
1539  case ARM::BI__builtin_neon_vqshrn_n_v:
1540    Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
1541    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshrn_n", false,
1542                        1, true);
1543  case ARM::BI__builtin_neon_vqshrun_n_v:
1544    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, &Ty, 1),
1545                        Ops, "vqshrun_n", false, 1, true);
1546  case ARM::BI__builtin_neon_vqsub_v:
1547  case ARM::BI__builtin_neon_vqsubq_v:
1548    Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
1549    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqsub");
1550  case ARM::BI__builtin_neon_vraddhn_v:
1551    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, &Ty, 1),
1552                        Ops, "vraddhn");
1553  case ARM::BI__builtin_neon_vrecpe_v:
1554  case ARM::BI__builtin_neon_vrecpeq_v:
1555    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, &Ty, 1),
1556                        Ops, "vrecpe");
1557  case ARM::BI__builtin_neon_vrecps_v:
1558  case ARM::BI__builtin_neon_vrecpsq_v:
1559    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, &Ty, 1),
1560                        Ops, "vrecps");
1561  case ARM::BI__builtin_neon_vrhadd_v:
1562  case ARM::BI__builtin_neon_vrhaddq_v:
1563    Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
1564    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrhadd");
1565  case ARM::BI__builtin_neon_vrshl_v:
1566  case ARM::BI__builtin_neon_vrshlq_v:
1567    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1568    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshl");
1569  case ARM::BI__builtin_neon_vrshrn_n_v:
1570    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, &Ty, 1),
1571                        Ops, "vrshrn_n", false, 1, true);
1572  case ARM::BI__builtin_neon_vrshr_n_v:
1573  case ARM::BI__builtin_neon_vrshrq_n_v:
1574    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1575    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshr_n", false,
1576                        1, true);
1577  case ARM::BI__builtin_neon_vrsqrte_v:
1578  case ARM::BI__builtin_neon_vrsqrteq_v:
1579    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, &Ty, 1),
1580                        Ops, "vrsqrte");
1581  case ARM::BI__builtin_neon_vrsqrts_v:
1582  case ARM::BI__builtin_neon_vrsqrtsq_v:
1583    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, &Ty, 1),
1584                        Ops, "vrsqrts");
1585  case ARM::BI__builtin_neon_vrsra_n_v:
1586  case ARM::BI__builtin_neon_vrsraq_n_v:
1587    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1588    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1589    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
1590    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1591    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, &Ty, 1), Ops[1], Ops[2]);
1592    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
1593  case ARM::BI__builtin_neon_vrsubhn_v:
1594    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, &Ty, 1),
1595                        Ops, "vrsubhn");
1596  case ARM::BI__builtin_neon_vset_lane_i8:
1597  case ARM::BI__builtin_neon_vset_lane_i16:
1598  case ARM::BI__builtin_neon_vset_lane_i32:
1599  case ARM::BI__builtin_neon_vset_lane_i64:
1600  case ARM::BI__builtin_neon_vset_lane_f32:
1601  case ARM::BI__builtin_neon_vsetq_lane_i8:
1602  case ARM::BI__builtin_neon_vsetq_lane_i16:
1603  case ARM::BI__builtin_neon_vsetq_lane_i32:
1604  case ARM::BI__builtin_neon_vsetq_lane_i64:
1605  case ARM::BI__builtin_neon_vsetq_lane_f32:
1606    Ops.push_back(EmitScalarExpr(E->getArg(2)));
1607    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1608  case ARM::BI__builtin_neon_vshl_v:
1609  case ARM::BI__builtin_neon_vshlq_v:
1610    Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
1611    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshl");
1612  case ARM::BI__builtin_neon_vshll_n_v:
1613    Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
1614    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshll", false, 1);
1615  case ARM::BI__builtin_neon_vshl_n_v:
1616  case ARM::BI__builtin_neon_vshlq_n_v:
1617    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1618    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
1619  case ARM::BI__builtin_neon_vshrn_n_v:
1620    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, &Ty, 1),
1621                        Ops, "vshrn_n", false, 1, true);
1622  case ARM::BI__builtin_neon_vshr_n_v:
1623  case ARM::BI__builtin_neon_vshrq_n_v:
1624    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1625    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1626    if (usgn)
1627      return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
1628    else
1629      return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
1630  case ARM::BI__builtin_neon_vsri_n_v:
1631  case ARM::BI__builtin_neon_vsriq_n_v:
1632    poly = true;
1633  case ARM::BI__builtin_neon_vsli_n_v:
1634  case ARM::BI__builtin_neon_vsliq_n_v:
1635    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, poly);
1636    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, &Ty, 1),
1637                        Ops, "vsli_n");
1638  case ARM::BI__builtin_neon_vsra_n_v:
1639  case ARM::BI__builtin_neon_vsraq_n_v:
1640    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1641    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1642    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
1643    if (usgn)
1644      Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
1645    else
1646      Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
1647    return Builder.CreateAdd(Ops[0], Ops[1]);
1648  case ARM::BI__builtin_neon_vst1_v:
1649  case ARM::BI__builtin_neon_vst1q_v:
1650    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1651    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, &Ty, 1),
1652                        Ops, "");
1653  case ARM::BI__builtin_neon_vst1_lane_v:
1654  case ARM::BI__builtin_neon_vst1q_lane_v:
1655    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1656    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
1657    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1658    return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
1659  case ARM::BI__builtin_neon_vst2_v:
1660  case ARM::BI__builtin_neon_vst2q_v:
1661    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1662    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, &Ty, 1),
1663                        Ops, "");
1664  case ARM::BI__builtin_neon_vst2_lane_v:
1665  case ARM::BI__builtin_neon_vst2q_lane_v:
1666    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1667    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, &Ty, 1),
1668                        Ops, "");
1669  case ARM::BI__builtin_neon_vst3_v:
1670  case ARM::BI__builtin_neon_vst3q_v:
1671    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1672    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, &Ty, 1),
1673                        Ops, "");
1674  case ARM::BI__builtin_neon_vst3_lane_v:
1675  case ARM::BI__builtin_neon_vst3q_lane_v:
1676    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1677    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, &Ty, 1),
1678                        Ops, "");
1679  case ARM::BI__builtin_neon_vst4_v:
1680  case ARM::BI__builtin_neon_vst4q_v:
1681    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1682    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, &Ty, 1),
1683                        Ops, "");
1684  case ARM::BI__builtin_neon_vst4_lane_v:
1685  case ARM::BI__builtin_neon_vst4q_lane_v:
1686    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1687    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, &Ty, 1),
1688                        Ops, "");
1689  case ARM::BI__builtin_neon_vsubhn_v:
1690    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, &Ty, 1),
1691                        Ops, "vsubhn");
1692  case ARM::BI__builtin_neon_vsubl_v:
1693    if (usgn) {
1694      Ops[0] = Builder.CreateZExt(Ops[0], Ty);
1695      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1696    } else {
1697      Ops[0] = Builder.CreateSExt(Ops[0], Ty);
1698      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1699    }
1700    return Builder.CreateSub(Ops[0], Ops[1], "vsubl");
1701  case ARM::BI__builtin_neon_vsubw_v:
1702    if (usgn)
1703      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1704    else
1705      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1706    return Builder.CreateSub(Ops[0], Ops[1], "vsubw");
1707  case ARM::BI__builtin_neon_vtbl1_v:
1708    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
1709                        Ops, "vtbl1");
1710  case ARM::BI__builtin_neon_vtbl2_v:
1711    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
1712                        Ops, "vtbl2");
1713  case ARM::BI__builtin_neon_vtbl3_v:
1714    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
1715                        Ops, "vtbl3");
1716  case ARM::BI__builtin_neon_vtbl4_v:
1717    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
1718                        Ops, "vtbl4");
1719  case ARM::BI__builtin_neon_vtbx1_v:
1720    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
1721                        Ops, "vtbx1");
1722  case ARM::BI__builtin_neon_vtbx2_v:
1723    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
1724                        Ops, "vtbx2");
1725  case ARM::BI__builtin_neon_vtbx3_v:
1726    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
1727                        Ops, "vtbx3");
1728  case ARM::BI__builtin_neon_vtbx4_v:
1729    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
1730                        Ops, "vtbx4");
1731  case ARM::BI__builtin_neon_vtst_v:
1732  case ARM::BI__builtin_neon_vtstq_v: {
1733    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1734    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1735    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1736    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1737                                ConstantAggregateZero::get(Ty));
1738    return Builder.CreateSExt(Ops[0], Ty, "vtst");
1739  }
1740  case ARM::BI__builtin_neon_vtrn_v:
1741  case ARM::BI__builtin_neon_vtrnq_v: {
1742    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1743    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1744    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1745    Value *SV;
1746
1747    for (unsigned vi = 0; vi != 2; ++vi) {
1748      SmallVector<Constant*, 16> Indices;
1749      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1750        Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
1751        Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
1752      }
1753      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1754      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1755      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
1756      SV = Builder.CreateStore(SV, Addr);
1757    }
1758    return SV;
1759  }
1760  case ARM::BI__builtin_neon_vuzp_v:
1761  case ARM::BI__builtin_neon_vuzpq_v: {
1762    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1763    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1764    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1765    Value *SV;
1766
1767    for (unsigned vi = 0; vi != 2; ++vi) {
1768      SmallVector<Constant*, 16> Indices;
1769      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1770        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
1771
1772      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1773      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1774      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
1775      SV = Builder.CreateStore(SV, Addr);
1776    }
1777    return SV;
1778  }
1779  case ARM::BI__builtin_neon_vzip_v:
1780  case ARM::BI__builtin_neon_vzipq_v: {
1781    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1782    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1783    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1784    Value *SV;
1785
1786    for (unsigned vi = 0; vi != 2; ++vi) {
1787      SmallVector<Constant*, 16> Indices;
1788      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1789        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
1790        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
1791      }
1792      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1793      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1794      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
1795      SV = Builder.CreateStore(SV, Addr);
1796    }
1797    return SV;
1798  }
1799  }
1800}
1801
1802Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1803                                           const CallExpr *E) {
1804
1805  llvm::SmallVector<Value*, 4> Ops;
1806
1807  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
1808    Ops.push_back(EmitScalarExpr(E->getArg(i)));
1809
1810  switch (BuiltinID) {
1811  default: return 0;
1812  case X86::BI__builtin_ia32_pslldi128:
1813  case X86::BI__builtin_ia32_psllqi128:
1814  case X86::BI__builtin_ia32_psllwi128:
1815  case X86::BI__builtin_ia32_psradi128:
1816  case X86::BI__builtin_ia32_psrawi128:
1817  case X86::BI__builtin_ia32_psrldi128:
1818  case X86::BI__builtin_ia32_psrlqi128:
1819  case X86::BI__builtin_ia32_psrlwi128: {
1820    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
1821    const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
1822    llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
1823    Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty),
1824                                         Ops[1], Zero, "insert");
1825    Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast");
1826    const char *name = 0;
1827    Intrinsic::ID ID = Intrinsic::not_intrinsic;
1828
1829    switch (BuiltinID) {
1830    default: assert(0 && "Unsupported shift intrinsic!");
1831    case X86::BI__builtin_ia32_pslldi128:
1832      name = "pslldi";
1833      ID = Intrinsic::x86_sse2_psll_d;
1834      break;
1835    case X86::BI__builtin_ia32_psllqi128:
1836      name = "psllqi";
1837      ID = Intrinsic::x86_sse2_psll_q;
1838      break;
1839    case X86::BI__builtin_ia32_psllwi128:
1840      name = "psllwi";
1841      ID = Intrinsic::x86_sse2_psll_w;
1842      break;
1843    case X86::BI__builtin_ia32_psradi128:
1844      name = "psradi";
1845      ID = Intrinsic::x86_sse2_psra_d;
1846      break;
1847    case X86::BI__builtin_ia32_psrawi128:
1848      name = "psrawi";
1849      ID = Intrinsic::x86_sse2_psra_w;
1850      break;
1851    case X86::BI__builtin_ia32_psrldi128:
1852      name = "psrldi";
1853      ID = Intrinsic::x86_sse2_psrl_d;
1854      break;
1855    case X86::BI__builtin_ia32_psrlqi128:
1856      name = "psrlqi";
1857      ID = Intrinsic::x86_sse2_psrl_q;
1858      break;
1859    case X86::BI__builtin_ia32_psrlwi128:
1860      name = "psrlwi";
1861      ID = Intrinsic::x86_sse2_psrl_w;
1862      break;
1863    }
1864    llvm::Function *F = CGM.getIntrinsic(ID);
1865    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
1866  }
1867  case X86::BI__builtin_ia32_pslldi:
1868  case X86::BI__builtin_ia32_psllqi:
1869  case X86::BI__builtin_ia32_psllwi:
1870  case X86::BI__builtin_ia32_psradi:
1871  case X86::BI__builtin_ia32_psrawi:
1872  case X86::BI__builtin_ia32_psrldi:
1873  case X86::BI__builtin_ia32_psrlqi:
1874  case X86::BI__builtin_ia32_psrlwi: {
1875    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
1876    const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1);
1877    Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast");
1878    const char *name = 0;
1879    Intrinsic::ID ID = Intrinsic::not_intrinsic;
1880
1881    switch (BuiltinID) {
1882    default: assert(0 && "Unsupported shift intrinsic!");
1883    case X86::BI__builtin_ia32_pslldi:
1884      name = "pslldi";
1885      ID = Intrinsic::x86_mmx_psll_d;
1886      break;
1887    case X86::BI__builtin_ia32_psllqi:
1888      name = "psllqi";
1889      ID = Intrinsic::x86_mmx_psll_q;
1890      break;
1891    case X86::BI__builtin_ia32_psllwi:
1892      name = "psllwi";
1893      ID = Intrinsic::x86_mmx_psll_w;
1894      break;
1895    case X86::BI__builtin_ia32_psradi:
1896      name = "psradi";
1897      ID = Intrinsic::x86_mmx_psra_d;
1898      break;
1899    case X86::BI__builtin_ia32_psrawi:
1900      name = "psrawi";
1901      ID = Intrinsic::x86_mmx_psra_w;
1902      break;
1903    case X86::BI__builtin_ia32_psrldi:
1904      name = "psrldi";
1905      ID = Intrinsic::x86_mmx_psrl_d;
1906      break;
1907    case X86::BI__builtin_ia32_psrlqi:
1908      name = "psrlqi";
1909      ID = Intrinsic::x86_mmx_psrl_q;
1910      break;
1911    case X86::BI__builtin_ia32_psrlwi:
1912      name = "psrlwi";
1913      ID = Intrinsic::x86_mmx_psrl_w;
1914      break;
1915    }
1916    llvm::Function *F = CGM.getIntrinsic(ID);
1917    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
1918  }
1919  case X86::BI__builtin_ia32_cmpps: {
1920    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
1921    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpps");
1922  }
1923  case X86::BI__builtin_ia32_cmpss: {
1924    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
1925    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpss");
1926  }
1927  case X86::BI__builtin_ia32_ldmxcsr: {
1928    const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext);
1929    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1930    Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
1931    Builder.CreateStore(Ops[0], Tmp);
1932    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
1933                              Builder.CreateBitCast(Tmp, PtrTy));
1934  }
1935  case X86::BI__builtin_ia32_stmxcsr: {
1936    const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext);
1937    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1938    Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
1939    One = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
1940                             Builder.CreateBitCast(Tmp, PtrTy));
1941    return Builder.CreateLoad(Tmp, "stmxcsr");
1942  }
1943  case X86::BI__builtin_ia32_cmppd: {
1944    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
1945    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmppd");
1946  }
1947  case X86::BI__builtin_ia32_cmpsd: {
1948    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
1949    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpsd");
1950  }
1951  case X86::BI__builtin_ia32_storehps:
1952  case X86::BI__builtin_ia32_storelps: {
1953    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
1954    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
1955
1956    // cast val v2i64
1957    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
1958
1959    // extract (0, 1)
1960    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
1961    llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
1962    Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
1963
1964    // cast pointer to i64 & store
1965    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
1966    return Builder.CreateStore(Ops[1], Ops[0]);
1967  }
1968  case X86::BI__builtin_ia32_palignr: {
1969    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
1970
1971    // If palignr is shifting the pair of input vectors less than 9 bytes,
1972    // emit a shuffle instruction.
1973    if (shiftVal <= 8) {
1974      llvm::SmallVector<llvm::Constant*, 8> Indices;
1975      for (unsigned i = 0; i != 8; ++i)
1976        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
1977
1978      Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1979      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
1980    }
1981
1982    // If palignr is shifting the pair of input vectors more than 8 but less
1983    // than 16 bytes, emit a logical right shift of the destination.
1984    if (shiftVal < 16) {
1985      // MMX has these as 1 x i64 vectors for some odd optimization reasons.
1986      const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
1987
1988      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
1989      Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
1990
1991      // create i32 constant
1992      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
1993      return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
1994    }
1995
1996    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
1997    return llvm::Constant::getNullValue(ConvertType(E->getType()));
1998  }
1999  case X86::BI__builtin_ia32_palignr128: {
2000    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2001
2002    // If palignr is shifting the pair of input vectors less than 17 bytes,
2003    // emit a shuffle instruction.
2004    if (shiftVal <= 16) {
2005      llvm::SmallVector<llvm::Constant*, 16> Indices;
2006      for (unsigned i = 0; i != 16; ++i)
2007        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2008
2009      Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
2010      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2011    }
2012
2013    // If palignr is shifting the pair of input vectors more than 16 but less
2014    // than 32 bytes, emit a logical right shift of the destination.
2015    if (shiftVal < 32) {
2016      const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2017
2018      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2019      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2020
2021      // create i32 constant
2022      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2023      return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
2024    }
2025
2026    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2027    return llvm::Constant::getNullValue(ConvertType(E->getType()));
2028  }
2029  }
2030}
2031
2032Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
2033                                           const CallExpr *E) {
2034  llvm::SmallVector<Value*, 4> Ops;
2035
2036  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
2037    Ops.push_back(EmitScalarExpr(E->getArg(i)));
2038
2039  Intrinsic::ID ID = Intrinsic::not_intrinsic;
2040
2041  switch (BuiltinID) {
2042  default: return 0;
2043
2044  // vec_ld, vec_lvsl, vec_lvsr
2045  case PPC::BI__builtin_altivec_lvx:
2046  case PPC::BI__builtin_altivec_lvxl:
2047  case PPC::BI__builtin_altivec_lvebx:
2048  case PPC::BI__builtin_altivec_lvehx:
2049  case PPC::BI__builtin_altivec_lvewx:
2050  case PPC::BI__builtin_altivec_lvsl:
2051  case PPC::BI__builtin_altivec_lvsr:
2052  {
2053    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::Type::getInt8PtrTy(VMContext));
2054
2055    Ops[0] = Builder.CreateGEP(Ops[1], Ops[0], "tmp");
2056    Ops.pop_back();
2057
2058    switch (BuiltinID) {
2059    default: assert(0 && "Unsupported ld/lvsl/lvsr intrinsic!");
2060    case PPC::BI__builtin_altivec_lvx:
2061      ID = Intrinsic::ppc_altivec_lvx;
2062      break;
2063    case PPC::BI__builtin_altivec_lvxl:
2064      ID = Intrinsic::ppc_altivec_lvxl;
2065      break;
2066    case PPC::BI__builtin_altivec_lvebx:
2067      ID = Intrinsic::ppc_altivec_lvebx;
2068      break;
2069    case PPC::BI__builtin_altivec_lvehx:
2070      ID = Intrinsic::ppc_altivec_lvehx;
2071      break;
2072    case PPC::BI__builtin_altivec_lvewx:
2073      ID = Intrinsic::ppc_altivec_lvewx;
2074      break;
2075    case PPC::BI__builtin_altivec_lvsl:
2076      ID = Intrinsic::ppc_altivec_lvsl;
2077      break;
2078    case PPC::BI__builtin_altivec_lvsr:
2079      ID = Intrinsic::ppc_altivec_lvsr;
2080      break;
2081    }
2082    llvm::Function *F = CGM.getIntrinsic(ID);
2083    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "");
2084  }
2085
2086  // vec_st
2087  case PPC::BI__builtin_altivec_stvx:
2088  case PPC::BI__builtin_altivec_stvxl:
2089  case PPC::BI__builtin_altivec_stvebx:
2090  case PPC::BI__builtin_altivec_stvehx:
2091  case PPC::BI__builtin_altivec_stvewx:
2092  {
2093    Ops[2] = Builder.CreateBitCast(Ops[2], llvm::Type::getInt8PtrTy(VMContext));
2094    Ops[1] = Builder.CreateGEP(Ops[2], Ops[1], "tmp");
2095    Ops.pop_back();
2096
2097    switch (BuiltinID) {
2098    default: assert(0 && "Unsupported st intrinsic!");
2099    case PPC::BI__builtin_altivec_stvx:
2100      ID = Intrinsic::ppc_altivec_stvx;
2101      break;
2102    case PPC::BI__builtin_altivec_stvxl:
2103      ID = Intrinsic::ppc_altivec_stvxl;
2104      break;
2105    case PPC::BI__builtin_altivec_stvebx:
2106      ID = Intrinsic::ppc_altivec_stvebx;
2107      break;
2108    case PPC::BI__builtin_altivec_stvehx:
2109      ID = Intrinsic::ppc_altivec_stvehx;
2110      break;
2111    case PPC::BI__builtin_altivec_stvewx:
2112      ID = Intrinsic::ppc_altivec_stvewx;
2113      break;
2114    }
2115    llvm::Function *F = CGM.getIntrinsic(ID);
2116    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "");
2117  }
2118  }
2119  return 0;
2120}
2121