CGBuiltin.cpp revision bf3bc5d37eff88b578d5990f507a87db33f30a6e
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This contains code to emit Builtin calls as LLVM code.
11//
12//===----------------------------------------------------------------------===//
13
14#include "CodeGenFunction.h"
15#include "CGObjCRuntime.h"
16#include "CodeGenModule.h"
17#include "TargetInfo.h"
18#include "clang/AST/ASTContext.h"
19#include "clang/AST/Decl.h"
20#include "clang/Basic/TargetBuiltins.h"
21#include "clang/Basic/TargetInfo.h"
22#include "clang/CodeGen/CGFunctionInfo.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Intrinsics.h"
25
26using namespace clang;
27using namespace CodeGen;
28using namespace llvm;
29
30/// getBuiltinLibFunction - Given a builtin id for a function like
31/// "__builtin_fabsf", return a Function* for "fabsf".
32llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
33                                                  unsigned BuiltinID) {
34  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
35
36  // Get the name, skip over the __builtin_ prefix (if necessary).
37  StringRef Name;
38  GlobalDecl D(FD);
39
40  // If the builtin has been declared explicitly with an assembler label,
41  // use the mangled name. This differs from the plain label on platforms
42  // that prefix labels.
43  if (FD->hasAttr<AsmLabelAttr>())
44    Name = getMangledName(D);
45  else
46    Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
47
48  llvm::FunctionType *Ty =
49    cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
50
51  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
52}
53
54/// Emit the conversions required to turn the given value into an
55/// integer of the given size.
56static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
57                        QualType T, llvm::IntegerType *IntType) {
58  V = CGF.EmitToMemory(V, T);
59
60  if (V->getType()->isPointerTy())
61    return CGF.Builder.CreatePtrToInt(V, IntType);
62
63  assert(V->getType() == IntType);
64  return V;
65}
66
67static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
68                          QualType T, llvm::Type *ResultType) {
69  V = CGF.EmitFromMemory(V, T);
70
71  if (ResultType->isPointerTy())
72    return CGF.Builder.CreateIntToPtr(V, ResultType);
73
74  assert(V->getType() == ResultType);
75  return V;
76}
77
78/// Utility to insert an atomic instruction based on Instrinsic::ID
79/// and the expression node.
80static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
81                               llvm::AtomicRMWInst::BinOp Kind,
82                               const CallExpr *E) {
83  QualType T = E->getType();
84  assert(E->getArg(0)->getType()->isPointerType());
85  assert(CGF.getContext().hasSameUnqualifiedType(T,
86                                  E->getArg(0)->getType()->getPointeeType()));
87  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
88
89  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
90  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
91
92  llvm::IntegerType *IntType =
93    llvm::IntegerType::get(CGF.getLLVMContext(),
94                           CGF.getContext().getTypeSize(T));
95  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
96
97  llvm::Value *Args[2];
98  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
99  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
100  llvm::Type *ValueType = Args[1]->getType();
101  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
102
103  llvm::Value *Result =
104      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
105                                  llvm::SequentiallyConsistent);
106  Result = EmitFromInt(CGF, Result, T, ValueType);
107  return RValue::get(Result);
108}
109
110/// Utility to insert an atomic instruction based Instrinsic::ID and
111/// the expression node, where the return value is the result of the
112/// operation.
113static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
114                                   llvm::AtomicRMWInst::BinOp Kind,
115                                   const CallExpr *E,
116                                   Instruction::BinaryOps Op) {
117  QualType T = E->getType();
118  assert(E->getArg(0)->getType()->isPointerType());
119  assert(CGF.getContext().hasSameUnqualifiedType(T,
120                                  E->getArg(0)->getType()->getPointeeType()));
121  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
122
123  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
124  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
125
126  llvm::IntegerType *IntType =
127    llvm::IntegerType::get(CGF.getLLVMContext(),
128                           CGF.getContext().getTypeSize(T));
129  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
130
131  llvm::Value *Args[2];
132  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
133  llvm::Type *ValueType = Args[1]->getType();
134  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
135  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
136
137  llvm::Value *Result =
138      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
139                                  llvm::SequentiallyConsistent);
140  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
141  Result = EmitFromInt(CGF, Result, T, ValueType);
142  return RValue::get(Result);
143}
144
145/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
146/// which must be a scalar floating point type.
147static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
148  const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
149  assert(ValTyP && "isn't scalar fp type!");
150
151  StringRef FnName;
152  switch (ValTyP->getKind()) {
153  default: llvm_unreachable("Isn't a scalar fp type!");
154  case BuiltinType::Float:      FnName = "fabsf"; break;
155  case BuiltinType::Double:     FnName = "fabs"; break;
156  case BuiltinType::LongDouble: FnName = "fabsl"; break;
157  }
158
159  // The prototype is something that takes and returns whatever V's type is.
160  llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
161                                                   false);
162  llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
163
164  return CGF.EmitNounwindRuntimeCall(Fn, V, "abs");
165}
166
167static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
168                              const CallExpr *E, llvm::Value *calleeValue) {
169  return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E->getLocStart(),
170                      ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
171}
172
173/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
174/// depending on IntrinsicID.
175///
176/// \arg CGF The current codegen function.
177/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
178/// \arg X The first argument to the llvm.*.with.overflow.*.
179/// \arg Y The second argument to the llvm.*.with.overflow.*.
180/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
181/// \returns The result (i.e. sum/product) returned by the intrinsic.
182static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
183                                          const llvm::Intrinsic::ID IntrinsicID,
184                                          llvm::Value *X, llvm::Value *Y,
185                                          llvm::Value *&Carry) {
186  // Make sure we have integers of the same width.
187  assert(X->getType() == Y->getType() &&
188         "Arguments must be the same type. (Did you forget to make sure both "
189         "arguments have the same integer width?)");
190
191  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
192  llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y);
193  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
194  return CGF.Builder.CreateExtractValue(Tmp, 0);
195}
196
197RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
198                                        unsigned BuiltinID, const CallExpr *E) {
199  // See if we can constant fold this builtin.  If so, don't emit it at all.
200  Expr::EvalResult Result;
201  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
202      !Result.hasSideEffects()) {
203    if (Result.Val.isInt())
204      return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
205                                                Result.Val.getInt()));
206    if (Result.Val.isFloat())
207      return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
208                                               Result.Val.getFloat()));
209  }
210
211  switch (BuiltinID) {
212  default: break;  // Handle intrinsics and libm functions below.
213  case Builtin::BI__builtin___CFStringMakeConstantString:
214  case Builtin::BI__builtin___NSStringMakeConstantString:
215    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
216  case Builtin::BI__builtin_stdarg_start:
217  case Builtin::BI__builtin_va_start:
218  case Builtin::BI__builtin_va_end: {
219    Value *ArgValue = EmitVAListRef(E->getArg(0));
220    llvm::Type *DestType = Int8PtrTy;
221    if (ArgValue->getType() != DestType)
222      ArgValue = Builder.CreateBitCast(ArgValue, DestType,
223                                       ArgValue->getName().data());
224
225    Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
226      Intrinsic::vaend : Intrinsic::vastart;
227    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
228  }
229  case Builtin::BI__builtin_va_copy: {
230    Value *DstPtr = EmitVAListRef(E->getArg(0));
231    Value *SrcPtr = EmitVAListRef(E->getArg(1));
232
233    llvm::Type *Type = Int8PtrTy;
234
235    DstPtr = Builder.CreateBitCast(DstPtr, Type);
236    SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
237    return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
238                                           DstPtr, SrcPtr));
239  }
240  case Builtin::BI__builtin_abs:
241  case Builtin::BI__builtin_labs:
242  case Builtin::BI__builtin_llabs: {
243    Value *ArgValue = EmitScalarExpr(E->getArg(0));
244
245    Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
246    Value *CmpResult =
247    Builder.CreateICmpSGE(ArgValue,
248                          llvm::Constant::getNullValue(ArgValue->getType()),
249                                                            "abscond");
250    Value *Result =
251      Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
252
253    return RValue::get(Result);
254  }
255
256  case Builtin::BI__builtin_conj:
257  case Builtin::BI__builtin_conjf:
258  case Builtin::BI__builtin_conjl: {
259    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
260    Value *Real = ComplexVal.first;
261    Value *Imag = ComplexVal.second;
262    Value *Zero =
263      Imag->getType()->isFPOrFPVectorTy()
264        ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
265        : llvm::Constant::getNullValue(Imag->getType());
266
267    Imag = Builder.CreateFSub(Zero, Imag, "sub");
268    return RValue::getComplex(std::make_pair(Real, Imag));
269  }
270  case Builtin::BI__builtin_creal:
271  case Builtin::BI__builtin_crealf:
272  case Builtin::BI__builtin_creall:
273  case Builtin::BIcreal:
274  case Builtin::BIcrealf:
275  case Builtin::BIcreall: {
276    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
277    return RValue::get(ComplexVal.first);
278  }
279
280  case Builtin::BI__builtin_cimag:
281  case Builtin::BI__builtin_cimagf:
282  case Builtin::BI__builtin_cimagl:
283  case Builtin::BIcimag:
284  case Builtin::BIcimagf:
285  case Builtin::BIcimagl: {
286    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
287    return RValue::get(ComplexVal.second);
288  }
289
290  case Builtin::BI__builtin_ctzs:
291  case Builtin::BI__builtin_ctz:
292  case Builtin::BI__builtin_ctzl:
293  case Builtin::BI__builtin_ctzll: {
294    Value *ArgValue = EmitScalarExpr(E->getArg(0));
295
296    llvm::Type *ArgType = ArgValue->getType();
297    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
298
299    llvm::Type *ResultType = ConvertType(E->getType());
300    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
301    Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
302    if (Result->getType() != ResultType)
303      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
304                                     "cast");
305    return RValue::get(Result);
306  }
307  case Builtin::BI__builtin_clzs:
308  case Builtin::BI__builtin_clz:
309  case Builtin::BI__builtin_clzl:
310  case Builtin::BI__builtin_clzll: {
311    Value *ArgValue = EmitScalarExpr(E->getArg(0));
312
313    llvm::Type *ArgType = ArgValue->getType();
314    Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
315
316    llvm::Type *ResultType = ConvertType(E->getType());
317    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
318    Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
319    if (Result->getType() != ResultType)
320      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
321                                     "cast");
322    return RValue::get(Result);
323  }
324  case Builtin::BI__builtin_ffs:
325  case Builtin::BI__builtin_ffsl:
326  case Builtin::BI__builtin_ffsll: {
327    // ffs(x) -> x ? cttz(x) + 1 : 0
328    Value *ArgValue = EmitScalarExpr(E->getArg(0));
329
330    llvm::Type *ArgType = ArgValue->getType();
331    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
332
333    llvm::Type *ResultType = ConvertType(E->getType());
334    Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue,
335                                                       Builder.getTrue()),
336                                   llvm::ConstantInt::get(ArgType, 1));
337    Value *Zero = llvm::Constant::getNullValue(ArgType);
338    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
339    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
340    if (Result->getType() != ResultType)
341      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
342                                     "cast");
343    return RValue::get(Result);
344  }
345  case Builtin::BI__builtin_parity:
346  case Builtin::BI__builtin_parityl:
347  case Builtin::BI__builtin_parityll: {
348    // parity(x) -> ctpop(x) & 1
349    Value *ArgValue = EmitScalarExpr(E->getArg(0));
350
351    llvm::Type *ArgType = ArgValue->getType();
352    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
353
354    llvm::Type *ResultType = ConvertType(E->getType());
355    Value *Tmp = Builder.CreateCall(F, ArgValue);
356    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
357    if (Result->getType() != ResultType)
358      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
359                                     "cast");
360    return RValue::get(Result);
361  }
362  case Builtin::BI__builtin_popcount:
363  case Builtin::BI__builtin_popcountl:
364  case Builtin::BI__builtin_popcountll: {
365    Value *ArgValue = EmitScalarExpr(E->getArg(0));
366
367    llvm::Type *ArgType = ArgValue->getType();
368    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
369
370    llvm::Type *ResultType = ConvertType(E->getType());
371    Value *Result = Builder.CreateCall(F, ArgValue);
372    if (Result->getType() != ResultType)
373      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
374                                     "cast");
375    return RValue::get(Result);
376  }
377  case Builtin::BI__builtin_expect: {
378    Value *ArgValue = EmitScalarExpr(E->getArg(0));
379    llvm::Type *ArgType = ArgValue->getType();
380
381    Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
382    Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
383
384    Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
385                                        "expval");
386    return RValue::get(Result);
387  }
388  case Builtin::BI__builtin_bswap16:
389  case Builtin::BI__builtin_bswap32:
390  case Builtin::BI__builtin_bswap64: {
391    Value *ArgValue = EmitScalarExpr(E->getArg(0));
392    llvm::Type *ArgType = ArgValue->getType();
393    Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
394    return RValue::get(Builder.CreateCall(F, ArgValue));
395  }
396  case Builtin::BI__builtin_object_size: {
397    // We rely on constant folding to deal with expressions with side effects.
398    assert(!E->getArg(0)->HasSideEffects(getContext()) &&
399           "should have been constant folded");
400
401    // We pass this builtin onto the optimizer so that it can
402    // figure out the object size in more complex cases.
403    llvm::Type *ResType = ConvertType(E->getType());
404
405    // LLVM only supports 0 and 2, make sure that we pass along that
406    // as a boolean.
407    Value *Ty = EmitScalarExpr(E->getArg(1));
408    ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
409    assert(CI);
410    uint64_t val = CI->getZExtValue();
411    CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
412    // FIXME: Get right address space.
413    llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) };
414    Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
415    return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI));
416  }
417  case Builtin::BI__builtin_prefetch: {
418    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
419    // FIXME: Technically these constants should of type 'int', yes?
420    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
421      llvm::ConstantInt::get(Int32Ty, 0);
422    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
423      llvm::ConstantInt::get(Int32Ty, 3);
424    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
425    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
426    return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
427  }
428  case Builtin::BI__builtin_readcyclecounter: {
429    Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
430    return RValue::get(Builder.CreateCall(F));
431  }
432  case Builtin::BI__builtin_trap: {
433    Value *F = CGM.getIntrinsic(Intrinsic::trap);
434    return RValue::get(Builder.CreateCall(F));
435  }
436  case Builtin::BI__debugbreak: {
437    Value *F = CGM.getIntrinsic(Intrinsic::debugtrap);
438    return RValue::get(Builder.CreateCall(F));
439  }
440  case Builtin::BI__builtin_unreachable: {
441    if (SanOpts->Unreachable)
442      EmitCheck(Builder.getFalse(), "builtin_unreachable",
443                EmitCheckSourceLocation(E->getExprLoc()),
444                ArrayRef<llvm::Value *>(), CRK_Unrecoverable);
445    else
446      Builder.CreateUnreachable();
447
448    // We do need to preserve an insertion point.
449    EmitBlock(createBasicBlock("unreachable.cont"));
450
451    return RValue::get(0);
452  }
453
454  case Builtin::BI__builtin_powi:
455  case Builtin::BI__builtin_powif:
456  case Builtin::BI__builtin_powil: {
457    Value *Base = EmitScalarExpr(E->getArg(0));
458    Value *Exponent = EmitScalarExpr(E->getArg(1));
459    llvm::Type *ArgType = Base->getType();
460    Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
461    return RValue::get(Builder.CreateCall2(F, Base, Exponent));
462  }
463
464  case Builtin::BI__builtin_isgreater:
465  case Builtin::BI__builtin_isgreaterequal:
466  case Builtin::BI__builtin_isless:
467  case Builtin::BI__builtin_islessequal:
468  case Builtin::BI__builtin_islessgreater:
469  case Builtin::BI__builtin_isunordered: {
470    // Ordered comparisons: we know the arguments to these are matching scalar
471    // floating point values.
472    Value *LHS = EmitScalarExpr(E->getArg(0));
473    Value *RHS = EmitScalarExpr(E->getArg(1));
474
475    switch (BuiltinID) {
476    default: llvm_unreachable("Unknown ordered comparison");
477    case Builtin::BI__builtin_isgreater:
478      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
479      break;
480    case Builtin::BI__builtin_isgreaterequal:
481      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
482      break;
483    case Builtin::BI__builtin_isless:
484      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
485      break;
486    case Builtin::BI__builtin_islessequal:
487      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
488      break;
489    case Builtin::BI__builtin_islessgreater:
490      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
491      break;
492    case Builtin::BI__builtin_isunordered:
493      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
494      break;
495    }
496    // ZExt bool to int type.
497    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
498  }
499  case Builtin::BI__builtin_isnan: {
500    Value *V = EmitScalarExpr(E->getArg(0));
501    V = Builder.CreateFCmpUNO(V, V, "cmp");
502    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
503  }
504
505  case Builtin::BI__builtin_isinf: {
506    // isinf(x) --> fabs(x) == infinity
507    Value *V = EmitScalarExpr(E->getArg(0));
508    V = EmitFAbs(*this, V, E->getArg(0)->getType());
509
510    V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
511    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
512  }
513
514  // TODO: BI__builtin_isinf_sign
515  //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
516
517  case Builtin::BI__builtin_isnormal: {
518    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
519    Value *V = EmitScalarExpr(E->getArg(0));
520    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
521
522    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
523    Value *IsLessThanInf =
524      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
525    APFloat Smallest = APFloat::getSmallestNormalized(
526                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
527    Value *IsNormal =
528      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
529                            "isnormal");
530    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
531    V = Builder.CreateAnd(V, IsNormal, "and");
532    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
533  }
534
535  case Builtin::BI__builtin_isfinite: {
536    // isfinite(x) --> x == x && fabs(x) != infinity;
537    Value *V = EmitScalarExpr(E->getArg(0));
538    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
539
540    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
541    Value *IsNotInf =
542      Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
543
544    V = Builder.CreateAnd(Eq, IsNotInf, "and");
545    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
546  }
547
548  case Builtin::BI__builtin_fpclassify: {
549    Value *V = EmitScalarExpr(E->getArg(5));
550    llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
551
552    // Create Result
553    BasicBlock *Begin = Builder.GetInsertBlock();
554    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
555    Builder.SetInsertPoint(End);
556    PHINode *Result =
557      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
558                        "fpclassify_result");
559
560    // if (V==0) return FP_ZERO
561    Builder.SetInsertPoint(Begin);
562    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
563                                          "iszero");
564    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
565    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
566    Builder.CreateCondBr(IsZero, End, NotZero);
567    Result->addIncoming(ZeroLiteral, Begin);
568
569    // if (V != V) return FP_NAN
570    Builder.SetInsertPoint(NotZero);
571    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
572    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
573    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
574    Builder.CreateCondBr(IsNan, End, NotNan);
575    Result->addIncoming(NanLiteral, NotZero);
576
577    // if (fabs(V) == infinity) return FP_INFINITY
578    Builder.SetInsertPoint(NotNan);
579    Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
580    Value *IsInf =
581      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
582                            "isinf");
583    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
584    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
585    Builder.CreateCondBr(IsInf, End, NotInf);
586    Result->addIncoming(InfLiteral, NotNan);
587
588    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
589    Builder.SetInsertPoint(NotInf);
590    APFloat Smallest = APFloat::getSmallestNormalized(
591        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
592    Value *IsNormal =
593      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
594                            "isnormal");
595    Value *NormalResult =
596      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
597                           EmitScalarExpr(E->getArg(3)));
598    Builder.CreateBr(End);
599    Result->addIncoming(NormalResult, NotInf);
600
601    // return Result
602    Builder.SetInsertPoint(End);
603    return RValue::get(Result);
604  }
605
606  case Builtin::BIalloca:
607  case Builtin::BI__builtin_alloca: {
608    Value *Size = EmitScalarExpr(E->getArg(0));
609    return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
610  }
611  case Builtin::BIbzero:
612  case Builtin::BI__builtin_bzero: {
613    std::pair<llvm::Value*, unsigned> Dest =
614        EmitPointerWithAlignment(E->getArg(0));
615    Value *SizeVal = EmitScalarExpr(E->getArg(1));
616    Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal,
617                         Dest.second, false);
618    return RValue::get(Dest.first);
619  }
620  case Builtin::BImemcpy:
621  case Builtin::BI__builtin_memcpy: {
622    std::pair<llvm::Value*, unsigned> Dest =
623        EmitPointerWithAlignment(E->getArg(0));
624    std::pair<llvm::Value*, unsigned> Src =
625        EmitPointerWithAlignment(E->getArg(1));
626    Value *SizeVal = EmitScalarExpr(E->getArg(2));
627    unsigned Align = std::min(Dest.second, Src.second);
628    Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
629    return RValue::get(Dest.first);
630  }
631
632  case Builtin::BI__builtin___memcpy_chk: {
633    // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
634    llvm::APSInt Size, DstSize;
635    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
636        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
637      break;
638    if (Size.ugt(DstSize))
639      break;
640    std::pair<llvm::Value*, unsigned> Dest =
641        EmitPointerWithAlignment(E->getArg(0));
642    std::pair<llvm::Value*, unsigned> Src =
643        EmitPointerWithAlignment(E->getArg(1));
644    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
645    unsigned Align = std::min(Dest.second, Src.second);
646    Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
647    return RValue::get(Dest.first);
648  }
649
650  case Builtin::BI__builtin_objc_memmove_collectable: {
651    Value *Address = EmitScalarExpr(E->getArg(0));
652    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
653    Value *SizeVal = EmitScalarExpr(E->getArg(2));
654    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
655                                                  Address, SrcAddr, SizeVal);
656    return RValue::get(Address);
657  }
658
659  case Builtin::BI__builtin___memmove_chk: {
660    // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
661    llvm::APSInt Size, DstSize;
662    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
663        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
664      break;
665    if (Size.ugt(DstSize))
666      break;
667    std::pair<llvm::Value*, unsigned> Dest =
668        EmitPointerWithAlignment(E->getArg(0));
669    std::pair<llvm::Value*, unsigned> Src =
670        EmitPointerWithAlignment(E->getArg(1));
671    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
672    unsigned Align = std::min(Dest.second, Src.second);
673    Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
674    return RValue::get(Dest.first);
675  }
676
677  case Builtin::BImemmove:
678  case Builtin::BI__builtin_memmove: {
679    std::pair<llvm::Value*, unsigned> Dest =
680        EmitPointerWithAlignment(E->getArg(0));
681    std::pair<llvm::Value*, unsigned> Src =
682        EmitPointerWithAlignment(E->getArg(1));
683    Value *SizeVal = EmitScalarExpr(E->getArg(2));
684    unsigned Align = std::min(Dest.second, Src.second);
685    Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
686    return RValue::get(Dest.first);
687  }
688  case Builtin::BImemset:
689  case Builtin::BI__builtin_memset: {
690    std::pair<llvm::Value*, unsigned> Dest =
691        EmitPointerWithAlignment(E->getArg(0));
692    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
693                                         Builder.getInt8Ty());
694    Value *SizeVal = EmitScalarExpr(E->getArg(2));
695    Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
696    return RValue::get(Dest.first);
697  }
698  case Builtin::BI__builtin___memset_chk: {
699    // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
700    llvm::APSInt Size, DstSize;
701    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
702        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
703      break;
704    if (Size.ugt(DstSize))
705      break;
706    std::pair<llvm::Value*, unsigned> Dest =
707        EmitPointerWithAlignment(E->getArg(0));
708    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
709                                         Builder.getInt8Ty());
710    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
711    Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
712    return RValue::get(Dest.first);
713  }
714  case Builtin::BI__builtin_dwarf_cfa: {
715    // The offset in bytes from the first argument to the CFA.
716    //
717    // Why on earth is this in the frontend?  Is there any reason at
718    // all that the backend can't reasonably determine this while
719    // lowering llvm.eh.dwarf.cfa()?
720    //
721    // TODO: If there's a satisfactory reason, add a target hook for
722    // this instead of hard-coding 0, which is correct for most targets.
723    int32_t Offset = 0;
724
725    Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
726    return RValue::get(Builder.CreateCall(F,
727                                      llvm::ConstantInt::get(Int32Ty, Offset)));
728  }
729  case Builtin::BI__builtin_return_address: {
730    Value *Depth = EmitScalarExpr(E->getArg(0));
731    Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
732    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
733    return RValue::get(Builder.CreateCall(F, Depth));
734  }
735  case Builtin::BI__builtin_frame_address: {
736    Value *Depth = EmitScalarExpr(E->getArg(0));
737    Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
738    Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
739    return RValue::get(Builder.CreateCall(F, Depth));
740  }
741  case Builtin::BI__builtin_extract_return_addr: {
742    Value *Address = EmitScalarExpr(E->getArg(0));
743    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
744    return RValue::get(Result);
745  }
746  case Builtin::BI__builtin_frob_return_addr: {
747    Value *Address = EmitScalarExpr(E->getArg(0));
748    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
749    return RValue::get(Result);
750  }
751  case Builtin::BI__builtin_dwarf_sp_column: {
752    llvm::IntegerType *Ty
753      = cast<llvm::IntegerType>(ConvertType(E->getType()));
754    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
755    if (Column == -1) {
756      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
757      return RValue::get(llvm::UndefValue::get(Ty));
758    }
759    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
760  }
761  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
762    Value *Address = EmitScalarExpr(E->getArg(0));
763    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
764      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
765    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
766  }
767  case Builtin::BI__builtin_eh_return: {
768    Value *Int = EmitScalarExpr(E->getArg(0));
769    Value *Ptr = EmitScalarExpr(E->getArg(1));
770
771    llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
772    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
773           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
774    Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
775                                  ? Intrinsic::eh_return_i32
776                                  : Intrinsic::eh_return_i64);
777    Builder.CreateCall2(F, Int, Ptr);
778    Builder.CreateUnreachable();
779
780    // We do need to preserve an insertion point.
781    EmitBlock(createBasicBlock("builtin_eh_return.cont"));
782
783    return RValue::get(0);
784  }
785  case Builtin::BI__builtin_unwind_init: {
786    Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
787    return RValue::get(Builder.CreateCall(F));
788  }
789  case Builtin::BI__builtin_extend_pointer: {
790    // Extends a pointer to the size of an _Unwind_Word, which is
791    // uint64_t on all platforms.  Generally this gets poked into a
792    // register and eventually used as an address, so if the
793    // addressing registers are wider than pointers and the platform
794    // doesn't implicitly ignore high-order bits when doing
795    // addressing, we need to make sure we zext / sext based on
796    // the platform's expectations.
797    //
798    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
799
800    // Cast the pointer to intptr_t.
801    Value *Ptr = EmitScalarExpr(E->getArg(0));
802    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
803
804    // If that's 64 bits, we're done.
805    if (IntPtrTy->getBitWidth() == 64)
806      return RValue::get(Result);
807
808    // Otherwise, ask the codegen data what to do.
809    if (getTargetHooks().extendPointerWithSExt())
810      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
811    else
812      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
813  }
814  case Builtin::BI__builtin_setjmp: {
815    // Buffer is a void**.
816    Value *Buf = EmitScalarExpr(E->getArg(0));
817
818    // Store the frame pointer to the setjmp buffer.
819    Value *FrameAddr =
820      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
821                         ConstantInt::get(Int32Ty, 0));
822    Builder.CreateStore(FrameAddr, Buf);
823
824    // Store the stack pointer to the setjmp buffer.
825    Value *StackAddr =
826      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
827    Value *StackSaveSlot =
828      Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
829    Builder.CreateStore(StackAddr, StackSaveSlot);
830
831    // Call LLVM's EH setjmp, which is lightweight.
832    Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
833    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
834    return RValue::get(Builder.CreateCall(F, Buf));
835  }
836  case Builtin::BI__builtin_longjmp: {
837    Value *Buf = EmitScalarExpr(E->getArg(0));
838    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
839
840    // Call LLVM's EH longjmp, which is lightweight.
841    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
842
843    // longjmp doesn't return; mark this as unreachable.
844    Builder.CreateUnreachable();
845
846    // We do need to preserve an insertion point.
847    EmitBlock(createBasicBlock("longjmp.cont"));
848
849    return RValue::get(0);
850  }
851  case Builtin::BI__sync_fetch_and_add:
852  case Builtin::BI__sync_fetch_and_sub:
853  case Builtin::BI__sync_fetch_and_or:
854  case Builtin::BI__sync_fetch_and_and:
855  case Builtin::BI__sync_fetch_and_xor:
856  case Builtin::BI__sync_add_and_fetch:
857  case Builtin::BI__sync_sub_and_fetch:
858  case Builtin::BI__sync_and_and_fetch:
859  case Builtin::BI__sync_or_and_fetch:
860  case Builtin::BI__sync_xor_and_fetch:
861  case Builtin::BI__sync_val_compare_and_swap:
862  case Builtin::BI__sync_bool_compare_and_swap:
863  case Builtin::BI__sync_lock_test_and_set:
864  case Builtin::BI__sync_lock_release:
865  case Builtin::BI__sync_swap:
866    llvm_unreachable("Shouldn't make it through sema");
867  case Builtin::BI__sync_fetch_and_add_1:
868  case Builtin::BI__sync_fetch_and_add_2:
869  case Builtin::BI__sync_fetch_and_add_4:
870  case Builtin::BI__sync_fetch_and_add_8:
871  case Builtin::BI__sync_fetch_and_add_16:
872    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
873  case Builtin::BI__sync_fetch_and_sub_1:
874  case Builtin::BI__sync_fetch_and_sub_2:
875  case Builtin::BI__sync_fetch_and_sub_4:
876  case Builtin::BI__sync_fetch_and_sub_8:
877  case Builtin::BI__sync_fetch_and_sub_16:
878    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
879  case Builtin::BI__sync_fetch_and_or_1:
880  case Builtin::BI__sync_fetch_and_or_2:
881  case Builtin::BI__sync_fetch_and_or_4:
882  case Builtin::BI__sync_fetch_and_or_8:
883  case Builtin::BI__sync_fetch_and_or_16:
884    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
885  case Builtin::BI__sync_fetch_and_and_1:
886  case Builtin::BI__sync_fetch_and_and_2:
887  case Builtin::BI__sync_fetch_and_and_4:
888  case Builtin::BI__sync_fetch_and_and_8:
889  case Builtin::BI__sync_fetch_and_and_16:
890    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
891  case Builtin::BI__sync_fetch_and_xor_1:
892  case Builtin::BI__sync_fetch_and_xor_2:
893  case Builtin::BI__sync_fetch_and_xor_4:
894  case Builtin::BI__sync_fetch_and_xor_8:
895  case Builtin::BI__sync_fetch_and_xor_16:
896    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
897
898  // Clang extensions: not overloaded yet.
899  case Builtin::BI__sync_fetch_and_min:
900    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
901  case Builtin::BI__sync_fetch_and_max:
902    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
903  case Builtin::BI__sync_fetch_and_umin:
904    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
905  case Builtin::BI__sync_fetch_and_umax:
906    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
907
908  case Builtin::BI__sync_add_and_fetch_1:
909  case Builtin::BI__sync_add_and_fetch_2:
910  case Builtin::BI__sync_add_and_fetch_4:
911  case Builtin::BI__sync_add_and_fetch_8:
912  case Builtin::BI__sync_add_and_fetch_16:
913    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
914                                llvm::Instruction::Add);
915  case Builtin::BI__sync_sub_and_fetch_1:
916  case Builtin::BI__sync_sub_and_fetch_2:
917  case Builtin::BI__sync_sub_and_fetch_4:
918  case Builtin::BI__sync_sub_and_fetch_8:
919  case Builtin::BI__sync_sub_and_fetch_16:
920    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
921                                llvm::Instruction::Sub);
922  case Builtin::BI__sync_and_and_fetch_1:
923  case Builtin::BI__sync_and_and_fetch_2:
924  case Builtin::BI__sync_and_and_fetch_4:
925  case Builtin::BI__sync_and_and_fetch_8:
926  case Builtin::BI__sync_and_and_fetch_16:
927    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
928                                llvm::Instruction::And);
929  case Builtin::BI__sync_or_and_fetch_1:
930  case Builtin::BI__sync_or_and_fetch_2:
931  case Builtin::BI__sync_or_and_fetch_4:
932  case Builtin::BI__sync_or_and_fetch_8:
933  case Builtin::BI__sync_or_and_fetch_16:
934    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
935                                llvm::Instruction::Or);
936  case Builtin::BI__sync_xor_and_fetch_1:
937  case Builtin::BI__sync_xor_and_fetch_2:
938  case Builtin::BI__sync_xor_and_fetch_4:
939  case Builtin::BI__sync_xor_and_fetch_8:
940  case Builtin::BI__sync_xor_and_fetch_16:
941    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
942                                llvm::Instruction::Xor);
943
944  case Builtin::BI__sync_val_compare_and_swap_1:
945  case Builtin::BI__sync_val_compare_and_swap_2:
946  case Builtin::BI__sync_val_compare_and_swap_4:
947  case Builtin::BI__sync_val_compare_and_swap_8:
948  case Builtin::BI__sync_val_compare_and_swap_16: {
949    QualType T = E->getType();
950    llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
951    unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
952
953    llvm::IntegerType *IntType =
954      llvm::IntegerType::get(getLLVMContext(),
955                             getContext().getTypeSize(T));
956    llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
957
958    Value *Args[3];
959    Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
960    Args[1] = EmitScalarExpr(E->getArg(1));
961    llvm::Type *ValueType = Args[1]->getType();
962    Args[1] = EmitToInt(*this, Args[1], T, IntType);
963    Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
964
965    Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
966                                                llvm::SequentiallyConsistent);
967    Result = EmitFromInt(*this, Result, T, ValueType);
968    return RValue::get(Result);
969  }
970
971  case Builtin::BI__sync_bool_compare_and_swap_1:
972  case Builtin::BI__sync_bool_compare_and_swap_2:
973  case Builtin::BI__sync_bool_compare_and_swap_4:
974  case Builtin::BI__sync_bool_compare_and_swap_8:
975  case Builtin::BI__sync_bool_compare_and_swap_16: {
976    QualType T = E->getArg(1)->getType();
977    llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
978    unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
979
980    llvm::IntegerType *IntType =
981      llvm::IntegerType::get(getLLVMContext(),
982                             getContext().getTypeSize(T));
983    llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
984
985    Value *Args[3];
986    Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
987    Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
988    Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
989
990    Value *OldVal = Args[1];
991    Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
992                                                 llvm::SequentiallyConsistent);
993    Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
994    // zext bool to int.
995    Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
996    return RValue::get(Result);
997  }
998
999  case Builtin::BI__sync_swap_1:
1000  case Builtin::BI__sync_swap_2:
1001  case Builtin::BI__sync_swap_4:
1002  case Builtin::BI__sync_swap_8:
1003  case Builtin::BI__sync_swap_16:
1004    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1005
1006  case Builtin::BI__sync_lock_test_and_set_1:
1007  case Builtin::BI__sync_lock_test_and_set_2:
1008  case Builtin::BI__sync_lock_test_and_set_4:
1009  case Builtin::BI__sync_lock_test_and_set_8:
1010  case Builtin::BI__sync_lock_test_and_set_16:
1011    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1012
1013  case Builtin::BI__sync_lock_release_1:
1014  case Builtin::BI__sync_lock_release_2:
1015  case Builtin::BI__sync_lock_release_4:
1016  case Builtin::BI__sync_lock_release_8:
1017  case Builtin::BI__sync_lock_release_16: {
1018    Value *Ptr = EmitScalarExpr(E->getArg(0));
1019    QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1020    CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1021    llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1022                                             StoreSize.getQuantity() * 8);
1023    Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1024    llvm::StoreInst *Store =
1025      Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
1026    Store->setAlignment(StoreSize.getQuantity());
1027    Store->setAtomic(llvm::Release);
1028    return RValue::get(0);
1029  }
1030
1031  case Builtin::BI__sync_synchronize: {
1032    // We assume this is supposed to correspond to a C++0x-style
1033    // sequentially-consistent fence (i.e. this is only usable for
1034    // synchonization, not device I/O or anything like that). This intrinsic
1035    // is really badly designed in the sense that in theory, there isn't
1036    // any way to safely use it... but in practice, it mostly works
1037    // to use it with non-atomic loads and stores to get acquire/release
1038    // semantics.
1039    Builder.CreateFence(llvm::SequentiallyConsistent);
1040    return RValue::get(0);
1041  }
1042
1043  case Builtin::BI__c11_atomic_is_lock_free:
1044  case Builtin::BI__atomic_is_lock_free: {
1045    // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1046    // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1047    // _Atomic(T) is always properly-aligned.
1048    const char *LibCallName = "__atomic_is_lock_free";
1049    CallArgList Args;
1050    Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1051             getContext().getSizeType());
1052    if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1053      Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1054               getContext().VoidPtrTy);
1055    else
1056      Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1057               getContext().VoidPtrTy);
1058    const CGFunctionInfo &FuncInfo =
1059        CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
1060                                               FunctionType::ExtInfo(),
1061                                               RequiredArgs::All);
1062    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1063    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1064    return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1065  }
1066
1067  case Builtin::BI__atomic_test_and_set: {
1068    // Look at the argument type to determine whether this is a volatile
1069    // operation. The parameter type is always volatile.
1070    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1071    bool Volatile =
1072        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1073
1074    Value *Ptr = EmitScalarExpr(E->getArg(0));
1075    unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1076    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1077    Value *NewVal = Builder.getInt8(1);
1078    Value *Order = EmitScalarExpr(E->getArg(1));
1079    if (isa<llvm::ConstantInt>(Order)) {
1080      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1081      AtomicRMWInst *Result = 0;
1082      switch (ord) {
1083      case 0:  // memory_order_relaxed
1084      default: // invalid order
1085        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1086                                         Ptr, NewVal,
1087                                         llvm::Monotonic);
1088        break;
1089      case 1:  // memory_order_consume
1090      case 2:  // memory_order_acquire
1091        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1092                                         Ptr, NewVal,
1093                                         llvm::Acquire);
1094        break;
1095      case 3:  // memory_order_release
1096        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1097                                         Ptr, NewVal,
1098                                         llvm::Release);
1099        break;
1100      case 4:  // memory_order_acq_rel
1101        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1102                                         Ptr, NewVal,
1103                                         llvm::AcquireRelease);
1104        break;
1105      case 5:  // memory_order_seq_cst
1106        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1107                                         Ptr, NewVal,
1108                                         llvm::SequentiallyConsistent);
1109        break;
1110      }
1111      Result->setVolatile(Volatile);
1112      return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1113    }
1114
1115    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1116
1117    llvm::BasicBlock *BBs[5] = {
1118      createBasicBlock("monotonic", CurFn),
1119      createBasicBlock("acquire", CurFn),
1120      createBasicBlock("release", CurFn),
1121      createBasicBlock("acqrel", CurFn),
1122      createBasicBlock("seqcst", CurFn)
1123    };
1124    llvm::AtomicOrdering Orders[5] = {
1125      llvm::Monotonic, llvm::Acquire, llvm::Release,
1126      llvm::AcquireRelease, llvm::SequentiallyConsistent
1127    };
1128
1129    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1130    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1131
1132    Builder.SetInsertPoint(ContBB);
1133    PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1134
1135    for (unsigned i = 0; i < 5; ++i) {
1136      Builder.SetInsertPoint(BBs[i]);
1137      AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1138                                                   Ptr, NewVal, Orders[i]);
1139      RMW->setVolatile(Volatile);
1140      Result->addIncoming(RMW, BBs[i]);
1141      Builder.CreateBr(ContBB);
1142    }
1143
1144    SI->addCase(Builder.getInt32(0), BBs[0]);
1145    SI->addCase(Builder.getInt32(1), BBs[1]);
1146    SI->addCase(Builder.getInt32(2), BBs[1]);
1147    SI->addCase(Builder.getInt32(3), BBs[2]);
1148    SI->addCase(Builder.getInt32(4), BBs[3]);
1149    SI->addCase(Builder.getInt32(5), BBs[4]);
1150
1151    Builder.SetInsertPoint(ContBB);
1152    return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1153  }
1154
1155  case Builtin::BI__atomic_clear: {
1156    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1157    bool Volatile =
1158        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1159
1160    Value *Ptr = EmitScalarExpr(E->getArg(0));
1161    unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1162    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1163    Value *NewVal = Builder.getInt8(0);
1164    Value *Order = EmitScalarExpr(E->getArg(1));
1165    if (isa<llvm::ConstantInt>(Order)) {
1166      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1167      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1168      Store->setAlignment(1);
1169      switch (ord) {
1170      case 0:  // memory_order_relaxed
1171      default: // invalid order
1172        Store->setOrdering(llvm::Monotonic);
1173        break;
1174      case 3:  // memory_order_release
1175        Store->setOrdering(llvm::Release);
1176        break;
1177      case 5:  // memory_order_seq_cst
1178        Store->setOrdering(llvm::SequentiallyConsistent);
1179        break;
1180      }
1181      return RValue::get(0);
1182    }
1183
1184    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1185
1186    llvm::BasicBlock *BBs[3] = {
1187      createBasicBlock("monotonic", CurFn),
1188      createBasicBlock("release", CurFn),
1189      createBasicBlock("seqcst", CurFn)
1190    };
1191    llvm::AtomicOrdering Orders[3] = {
1192      llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1193    };
1194
1195    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1196    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1197
1198    for (unsigned i = 0; i < 3; ++i) {
1199      Builder.SetInsertPoint(BBs[i]);
1200      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1201      Store->setAlignment(1);
1202      Store->setOrdering(Orders[i]);
1203      Builder.CreateBr(ContBB);
1204    }
1205
1206    SI->addCase(Builder.getInt32(0), BBs[0]);
1207    SI->addCase(Builder.getInt32(3), BBs[1]);
1208    SI->addCase(Builder.getInt32(5), BBs[2]);
1209
1210    Builder.SetInsertPoint(ContBB);
1211    return RValue::get(0);
1212  }
1213
1214  case Builtin::BI__atomic_thread_fence:
1215  case Builtin::BI__atomic_signal_fence:
1216  case Builtin::BI__c11_atomic_thread_fence:
1217  case Builtin::BI__c11_atomic_signal_fence: {
1218    llvm::SynchronizationScope Scope;
1219    if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1220        BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1221      Scope = llvm::SingleThread;
1222    else
1223      Scope = llvm::CrossThread;
1224    Value *Order = EmitScalarExpr(E->getArg(0));
1225    if (isa<llvm::ConstantInt>(Order)) {
1226      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1227      switch (ord) {
1228      case 0:  // memory_order_relaxed
1229      default: // invalid order
1230        break;
1231      case 1:  // memory_order_consume
1232      case 2:  // memory_order_acquire
1233        Builder.CreateFence(llvm::Acquire, Scope);
1234        break;
1235      case 3:  // memory_order_release
1236        Builder.CreateFence(llvm::Release, Scope);
1237        break;
1238      case 4:  // memory_order_acq_rel
1239        Builder.CreateFence(llvm::AcquireRelease, Scope);
1240        break;
1241      case 5:  // memory_order_seq_cst
1242        Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1243        break;
1244      }
1245      return RValue::get(0);
1246    }
1247
1248    llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1249    AcquireBB = createBasicBlock("acquire", CurFn);
1250    ReleaseBB = createBasicBlock("release", CurFn);
1251    AcqRelBB = createBasicBlock("acqrel", CurFn);
1252    SeqCstBB = createBasicBlock("seqcst", CurFn);
1253    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1254
1255    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1256    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1257
1258    Builder.SetInsertPoint(AcquireBB);
1259    Builder.CreateFence(llvm::Acquire, Scope);
1260    Builder.CreateBr(ContBB);
1261    SI->addCase(Builder.getInt32(1), AcquireBB);
1262    SI->addCase(Builder.getInt32(2), AcquireBB);
1263
1264    Builder.SetInsertPoint(ReleaseBB);
1265    Builder.CreateFence(llvm::Release, Scope);
1266    Builder.CreateBr(ContBB);
1267    SI->addCase(Builder.getInt32(3), ReleaseBB);
1268
1269    Builder.SetInsertPoint(AcqRelBB);
1270    Builder.CreateFence(llvm::AcquireRelease, Scope);
1271    Builder.CreateBr(ContBB);
1272    SI->addCase(Builder.getInt32(4), AcqRelBB);
1273
1274    Builder.SetInsertPoint(SeqCstBB);
1275    Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1276    Builder.CreateBr(ContBB);
1277    SI->addCase(Builder.getInt32(5), SeqCstBB);
1278
1279    Builder.SetInsertPoint(ContBB);
1280    return RValue::get(0);
1281  }
1282
1283    // Library functions with special handling.
1284  case Builtin::BIsqrt:
1285  case Builtin::BIsqrtf:
1286  case Builtin::BIsqrtl: {
1287    // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1288    // in finite- or unsafe-math mode (the intrinsic has different semantics
1289    // for handling negative numbers compared to the library function, so
1290    // -fmath-errno=0 is not enough).
1291    if (!FD->hasAttr<ConstAttr>())
1292      break;
1293    if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1294          CGM.getCodeGenOpts().NoNaNsFPMath))
1295      break;
1296    Value *Arg0 = EmitScalarExpr(E->getArg(0));
1297    llvm::Type *ArgType = Arg0->getType();
1298    Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1299    return RValue::get(Builder.CreateCall(F, Arg0));
1300  }
1301
1302  case Builtin::BIpow:
1303  case Builtin::BIpowf:
1304  case Builtin::BIpowl: {
1305    // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1306    if (!FD->hasAttr<ConstAttr>())
1307      break;
1308    Value *Base = EmitScalarExpr(E->getArg(0));
1309    Value *Exponent = EmitScalarExpr(E->getArg(1));
1310    llvm::Type *ArgType = Base->getType();
1311    Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1312    return RValue::get(Builder.CreateCall2(F, Base, Exponent));
1313    break;
1314  }
1315
1316  case Builtin::BIfma:
1317  case Builtin::BIfmaf:
1318  case Builtin::BIfmal:
1319  case Builtin::BI__builtin_fma:
1320  case Builtin::BI__builtin_fmaf:
1321  case Builtin::BI__builtin_fmal: {
1322    // Rewrite fma to intrinsic.
1323    Value *FirstArg = EmitScalarExpr(E->getArg(0));
1324    llvm::Type *ArgType = FirstArg->getType();
1325    Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1326    return RValue::get(Builder.CreateCall3(F, FirstArg,
1327                                              EmitScalarExpr(E->getArg(1)),
1328                                              EmitScalarExpr(E->getArg(2))));
1329  }
1330
1331  case Builtin::BI__builtin_signbit:
1332  case Builtin::BI__builtin_signbitf:
1333  case Builtin::BI__builtin_signbitl: {
1334    LLVMContext &C = CGM.getLLVMContext();
1335
1336    Value *Arg = EmitScalarExpr(E->getArg(0));
1337    llvm::Type *ArgTy = Arg->getType();
1338    if (ArgTy->isPPC_FP128Ty())
1339      break; // FIXME: I'm not sure what the right implementation is here.
1340    int ArgWidth = ArgTy->getPrimitiveSizeInBits();
1341    llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
1342    Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
1343    Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
1344    Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
1345    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
1346  }
1347  case Builtin::BI__builtin_annotation: {
1348    llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1349    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1350                                      AnnVal->getType());
1351
1352    // Get the annotation string, go through casts. Sema requires this to be a
1353    // non-wide string literal, potentially casted, so the cast<> is safe.
1354    const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1355    StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1356    return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1357  }
1358  case Builtin::BI__builtin_addcb:
1359  case Builtin::BI__builtin_addcs:
1360  case Builtin::BI__builtin_addc:
1361  case Builtin::BI__builtin_addcl:
1362  case Builtin::BI__builtin_addcll:
1363  case Builtin::BI__builtin_subcb:
1364  case Builtin::BI__builtin_subcs:
1365  case Builtin::BI__builtin_subc:
1366  case Builtin::BI__builtin_subcl:
1367  case Builtin::BI__builtin_subcll: {
1368
1369    // We translate all of these builtins from expressions of the form:
1370    //   int x = ..., y = ..., carryin = ..., carryout, result;
1371    //   result = __builtin_addc(x, y, carryin, &carryout);
1372    //
1373    // to LLVM IR of the form:
1374    //
1375    //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1376    //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1377    //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1378    //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1379    //                                                       i32 %carryin)
1380    //   %result = extractvalue {i32, i1} %tmp2, 0
1381    //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1382    //   %tmp3 = or i1 %carry1, %carry2
1383    //   %tmp4 = zext i1 %tmp3 to i32
1384    //   store i32 %tmp4, i32* %carryout
1385
1386    // Scalarize our inputs.
1387    llvm::Value *X = EmitScalarExpr(E->getArg(0));
1388    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1389    llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1390    std::pair<llvm::Value*, unsigned> CarryOutPtr =
1391      EmitPointerWithAlignment(E->getArg(3));
1392
1393    // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1394    llvm::Intrinsic::ID IntrinsicId;
1395    switch (BuiltinID) {
1396    default: llvm_unreachable("Unknown multiprecision builtin id.");
1397    case Builtin::BI__builtin_addcb:
1398    case Builtin::BI__builtin_addcs:
1399    case Builtin::BI__builtin_addc:
1400    case Builtin::BI__builtin_addcl:
1401    case Builtin::BI__builtin_addcll:
1402      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1403      break;
1404    case Builtin::BI__builtin_subcb:
1405    case Builtin::BI__builtin_subcs:
1406    case Builtin::BI__builtin_subc:
1407    case Builtin::BI__builtin_subcl:
1408    case Builtin::BI__builtin_subcll:
1409      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1410      break;
1411    }
1412
1413    // Construct our resulting LLVM IR expression.
1414    llvm::Value *Carry1;
1415    llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1416                                              X, Y, Carry1);
1417    llvm::Value *Carry2;
1418    llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1419                                              Sum1, Carryin, Carry2);
1420    llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1421                                               X->getType());
1422    llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut,
1423                                                         CarryOutPtr.first);
1424    CarryOutStore->setAlignment(CarryOutPtr.second);
1425    return RValue::get(Sum2);
1426  }
1427  case Builtin::BI__builtin_uadd_overflow:
1428  case Builtin::BI__builtin_uaddl_overflow:
1429  case Builtin::BI__builtin_uaddll_overflow:
1430  case Builtin::BI__builtin_usub_overflow:
1431  case Builtin::BI__builtin_usubl_overflow:
1432  case Builtin::BI__builtin_usubll_overflow:
1433  case Builtin::BI__builtin_umul_overflow:
1434  case Builtin::BI__builtin_umull_overflow:
1435  case Builtin::BI__builtin_umulll_overflow:
1436  case Builtin::BI__builtin_sadd_overflow:
1437  case Builtin::BI__builtin_saddl_overflow:
1438  case Builtin::BI__builtin_saddll_overflow:
1439  case Builtin::BI__builtin_ssub_overflow:
1440  case Builtin::BI__builtin_ssubl_overflow:
1441  case Builtin::BI__builtin_ssubll_overflow:
1442  case Builtin::BI__builtin_smul_overflow:
1443  case Builtin::BI__builtin_smull_overflow:
1444  case Builtin::BI__builtin_smulll_overflow: {
1445
1446    // We translate all of these builtins directly to the relevant llvm IR node.
1447
1448    // Scalarize our inputs.
1449    llvm::Value *X = EmitScalarExpr(E->getArg(0));
1450    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1451    std::pair<llvm::Value *, unsigned> SumOutPtr =
1452      EmitPointerWithAlignment(E->getArg(2));
1453
1454    // Decide which of the overflow intrinsics we are lowering to:
1455    llvm::Intrinsic::ID IntrinsicId;
1456    switch (BuiltinID) {
1457    default: llvm_unreachable("Unknown security overflow builtin id.");
1458    case Builtin::BI__builtin_uadd_overflow:
1459    case Builtin::BI__builtin_uaddl_overflow:
1460    case Builtin::BI__builtin_uaddll_overflow:
1461      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1462      break;
1463    case Builtin::BI__builtin_usub_overflow:
1464    case Builtin::BI__builtin_usubl_overflow:
1465    case Builtin::BI__builtin_usubll_overflow:
1466      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1467      break;
1468    case Builtin::BI__builtin_umul_overflow:
1469    case Builtin::BI__builtin_umull_overflow:
1470    case Builtin::BI__builtin_umulll_overflow:
1471      IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1472      break;
1473    case Builtin::BI__builtin_sadd_overflow:
1474    case Builtin::BI__builtin_saddl_overflow:
1475    case Builtin::BI__builtin_saddll_overflow:
1476      IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1477      break;
1478    case Builtin::BI__builtin_ssub_overflow:
1479    case Builtin::BI__builtin_ssubl_overflow:
1480    case Builtin::BI__builtin_ssubll_overflow:
1481      IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1482      break;
1483    case Builtin::BI__builtin_smul_overflow:
1484    case Builtin::BI__builtin_smull_overflow:
1485    case Builtin::BI__builtin_smulll_overflow:
1486      IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1487      break;
1488    }
1489
1490
1491    llvm::Value *Carry;
1492    llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1493    llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first);
1494    SumOutStore->setAlignment(SumOutPtr.second);
1495
1496    return RValue::get(Carry);
1497  }
1498  case Builtin::BI__builtin_addressof:
1499    return RValue::get(EmitLValue(E->getArg(0)).getAddress());
1500  case Builtin::BI__noop:
1501    return RValue::get(0);
1502  }
1503
1504  // If this is an alias for a lib function (e.g. __builtin_sin), emit
1505  // the call using the normal call path, but using the unmangled
1506  // version of the function name.
1507  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1508    return emitLibraryCall(*this, FD, E,
1509                           CGM.getBuiltinLibFunction(FD, BuiltinID));
1510
1511  // If this is a predefined lib function (e.g. malloc), emit the call
1512  // using exactly the normal call path.
1513  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1514    return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1515
1516  // See if we have a target specific intrinsic.
1517  const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
1518  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1519  if (const char *Prefix =
1520      llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()))
1521    IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1522
1523  if (IntrinsicID != Intrinsic::not_intrinsic) {
1524    SmallVector<Value*, 16> Args;
1525
1526    // Find out if any arguments are required to be integer constant
1527    // expressions.
1528    unsigned ICEArguments = 0;
1529    ASTContext::GetBuiltinTypeError Error;
1530    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1531    assert(Error == ASTContext::GE_None && "Should not codegen an error");
1532
1533    Function *F = CGM.getIntrinsic(IntrinsicID);
1534    llvm::FunctionType *FTy = F->getFunctionType();
1535
1536    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1537      Value *ArgValue;
1538      // If this is a normal argument, just emit it as a scalar.
1539      if ((ICEArguments & (1 << i)) == 0) {
1540        ArgValue = EmitScalarExpr(E->getArg(i));
1541      } else {
1542        // If this is required to be a constant, constant fold it so that we
1543        // know that the generated intrinsic gets a ConstantInt.
1544        llvm::APSInt Result;
1545        bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1546        assert(IsConst && "Constant arg isn't actually constant?");
1547        (void)IsConst;
1548        ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1549      }
1550
1551      // If the intrinsic arg type is different from the builtin arg type
1552      // we need to do a bit cast.
1553      llvm::Type *PTy = FTy->getParamType(i);
1554      if (PTy != ArgValue->getType()) {
1555        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1556               "Must be able to losslessly bit cast to param");
1557        ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1558      }
1559
1560      Args.push_back(ArgValue);
1561    }
1562
1563    Value *V = Builder.CreateCall(F, Args);
1564    QualType BuiltinRetType = E->getType();
1565
1566    llvm::Type *RetTy = VoidTy;
1567    if (!BuiltinRetType->isVoidType())
1568      RetTy = ConvertType(BuiltinRetType);
1569
1570    if (RetTy != V->getType()) {
1571      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1572             "Must be able to losslessly bit cast result type");
1573      V = Builder.CreateBitCast(V, RetTy);
1574    }
1575
1576    return RValue::get(V);
1577  }
1578
1579  // See if we have a target specific builtin that needs to be lowered.
1580  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1581    return RValue::get(V);
1582
1583  ErrorUnsupported(E, "builtin function");
1584
1585  // Unknown builtin, for now just dump it out and return undef.
1586  return GetUndefRValue(E->getType());
1587}
1588
1589Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1590                                              const CallExpr *E) {
1591  switch (getTarget().getTriple().getArch()) {
1592  case llvm::Triple::aarch64:
1593    return EmitAArch64BuiltinExpr(BuiltinID, E);
1594  case llvm::Triple::arm:
1595  case llvm::Triple::thumb:
1596    return EmitARMBuiltinExpr(BuiltinID, E);
1597  case llvm::Triple::x86:
1598  case llvm::Triple::x86_64:
1599    return EmitX86BuiltinExpr(BuiltinID, E);
1600  case llvm::Triple::ppc:
1601  case llvm::Triple::ppc64:
1602  case llvm::Triple::ppc64le:
1603    return EmitPPCBuiltinExpr(BuiltinID, E);
1604  default:
1605    return 0;
1606  }
1607}
1608
1609static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
1610                                     NeonTypeFlags TypeFlags,
1611                                     bool V1Ty=false) {
1612  int IsQuad = TypeFlags.isQuad();
1613  switch (TypeFlags.getEltType()) {
1614  case NeonTypeFlags::Int8:
1615  case NeonTypeFlags::Poly8:
1616    return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
1617  case NeonTypeFlags::Int16:
1618  case NeonTypeFlags::Poly16:
1619  case NeonTypeFlags::Float16:
1620    return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
1621  case NeonTypeFlags::Int32:
1622    return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
1623  case NeonTypeFlags::Int64:
1624    return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
1625  case NeonTypeFlags::Float32:
1626    return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
1627  case NeonTypeFlags::Float64:
1628    return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
1629  }
1630  llvm_unreachable("Unknown vector element type!");
1631}
1632
1633Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1634  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1635  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
1636  return Builder.CreateShuffleVector(V, V, SV, "lane");
1637}
1638
1639Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1640                                     const char *name,
1641                                     unsigned shift, bool rightshift) {
1642  unsigned j = 0;
1643  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1644       ai != ae; ++ai, ++j)
1645    if (shift > 0 && shift == j)
1646      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1647    else
1648      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1649
1650  return Builder.CreateCall(F, Ops, name);
1651}
1652
1653Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
1654                                            bool neg) {
1655  int SV = cast<ConstantInt>(V)->getSExtValue();
1656
1657  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1658  llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1659  return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
1660}
1661
1662// \brief Right-shift a vector by a constant.
1663Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
1664                                          llvm::Type *Ty, bool usgn,
1665                                          const char *name) {
1666  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1667
1668  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
1669  int EltSize = VTy->getScalarSizeInBits();
1670
1671  Vec = Builder.CreateBitCast(Vec, Ty);
1672
1673  // lshr/ashr are undefined when the shift amount is equal to the vector
1674  // element size.
1675  if (ShiftAmt == EltSize) {
1676    if (usgn) {
1677      // Right-shifting an unsigned value by its size yields 0.
1678      llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0);
1679      return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero);
1680    } else {
1681      // Right-shifting a signed value by its size is equivalent
1682      // to a shift of size-1.
1683      --ShiftAmt;
1684      Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
1685    }
1686  }
1687
1688  Shift = EmitNeonShiftVector(Shift, Ty, false);
1689  if (usgn)
1690    return Builder.CreateLShr(Vec, Shift, name);
1691  else
1692    return Builder.CreateAShr(Vec, Shift, name);
1693}
1694
1695/// GetPointeeAlignment - Given an expression with a pointer type, find the
1696/// alignment of the type referenced by the pointer.  Skip over implicit
1697/// casts.
1698std::pair<llvm::Value*, unsigned>
1699CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) {
1700  assert(Addr->getType()->isPointerType());
1701  Addr = Addr->IgnoreParens();
1702  if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) {
1703    if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) &&
1704        ICE->getSubExpr()->getType()->isPointerType()) {
1705      std::pair<llvm::Value*, unsigned> Ptr =
1706          EmitPointerWithAlignment(ICE->getSubExpr());
1707      Ptr.first = Builder.CreateBitCast(Ptr.first,
1708                                        ConvertType(Addr->getType()));
1709      return Ptr;
1710    } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) {
1711      LValue LV = EmitLValue(ICE->getSubExpr());
1712      unsigned Align = LV.getAlignment().getQuantity();
1713      if (!Align) {
1714        // FIXME: Once LValues are fixed to always set alignment,
1715        // zap this code.
1716        QualType PtTy = ICE->getSubExpr()->getType();
1717        if (!PtTy->isIncompleteType())
1718          Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1719        else
1720          Align = 1;
1721      }
1722      return std::make_pair(LV.getAddress(), Align);
1723    }
1724  }
1725  if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) {
1726    if (UO->getOpcode() == UO_AddrOf) {
1727      LValue LV = EmitLValue(UO->getSubExpr());
1728      unsigned Align = LV.getAlignment().getQuantity();
1729      if (!Align) {
1730        // FIXME: Once LValues are fixed to always set alignment,
1731        // zap this code.
1732        QualType PtTy = UO->getSubExpr()->getType();
1733        if (!PtTy->isIncompleteType())
1734          Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1735        else
1736          Align = 1;
1737      }
1738      return std::make_pair(LV.getAddress(), Align);
1739    }
1740  }
1741
1742  unsigned Align = 1;
1743  QualType PtTy = Addr->getType()->getPointeeType();
1744  if (!PtTy->isIncompleteType())
1745    Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1746
1747  return std::make_pair(EmitScalarExpr(Addr), Align);
1748}
1749
1750static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
1751                                           unsigned BuiltinID,
1752                                           const CallExpr *E) {
1753  unsigned int Int = 0;
1754  // Scalar result generated across vectors
1755  bool AcrossVec = false;
1756  // Extend element of one-element vector
1757  bool ExtendEle = false;
1758  bool OverloadInt = false;
1759  bool OverloadCmpInt = false;
1760  bool OverloadWideInt = false;
1761  bool OverloadNarrowInt = false;
1762  const char *s = NULL;
1763
1764  SmallVector<Value *, 4> Ops;
1765  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
1766    Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
1767  }
1768
1769  // AArch64 scalar builtins are not overloaded, they do not have an extra
1770  // argument that specifies the vector type, need to handle each case.
1771  switch (BuiltinID) {
1772  default: break;
1773  case AArch64::BI__builtin_neon_vget_lane_i8:
1774  case AArch64::BI__builtin_neon_vget_lane_i16:
1775  case AArch64::BI__builtin_neon_vget_lane_i32:
1776  case AArch64::BI__builtin_neon_vget_lane_i64:
1777  case AArch64::BI__builtin_neon_vget_lane_f32:
1778  case AArch64::BI__builtin_neon_vget_lane_f64:
1779  case AArch64::BI__builtin_neon_vgetq_lane_i8:
1780  case AArch64::BI__builtin_neon_vgetq_lane_i16:
1781  case AArch64::BI__builtin_neon_vgetq_lane_i32:
1782  case AArch64::BI__builtin_neon_vgetq_lane_i64:
1783  case AArch64::BI__builtin_neon_vgetq_lane_f32:
1784  case AArch64::BI__builtin_neon_vgetq_lane_f64:
1785    return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E);
1786  case AArch64::BI__builtin_neon_vset_lane_i8:
1787  case AArch64::BI__builtin_neon_vset_lane_i16:
1788  case AArch64::BI__builtin_neon_vset_lane_i32:
1789  case AArch64::BI__builtin_neon_vset_lane_i64:
1790  case AArch64::BI__builtin_neon_vset_lane_f32:
1791  case AArch64::BI__builtin_neon_vset_lane_f64:
1792  case AArch64::BI__builtin_neon_vsetq_lane_i8:
1793  case AArch64::BI__builtin_neon_vsetq_lane_i16:
1794  case AArch64::BI__builtin_neon_vsetq_lane_i32:
1795  case AArch64::BI__builtin_neon_vsetq_lane_i64:
1796  case AArch64::BI__builtin_neon_vsetq_lane_f32:
1797  case AArch64::BI__builtin_neon_vsetq_lane_f64:
1798    return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E);
1799  // Crypto
1800  case AArch64::BI__builtin_neon_vsha1h_u32:
1801    Int = Intrinsic::arm_neon_sha1h;
1802    s = "sha1h"; OverloadInt = true; break;
1803  case AArch64::BI__builtin_neon_vsha1cq_u32:
1804    Int = Intrinsic::aarch64_neon_sha1c;
1805    s = "sha1c"; break;
1806  case AArch64::BI__builtin_neon_vsha1pq_u32:
1807    Int = Intrinsic::aarch64_neon_sha1p;
1808    s = "sha1p"; break;
1809  case AArch64::BI__builtin_neon_vsha1mq_u32:
1810    Int = Intrinsic::aarch64_neon_sha1m;
1811    s = "sha1m"; break;
1812  // Scalar Add
1813  case AArch64::BI__builtin_neon_vaddd_s64:
1814    Int = Intrinsic::aarch64_neon_vaddds;
1815    s = "vaddds"; break;
1816  case AArch64::BI__builtin_neon_vaddd_u64:
1817    Int = Intrinsic::aarch64_neon_vadddu;
1818    s = "vadddu"; break;
1819  // Scalar Sub
1820  case AArch64::BI__builtin_neon_vsubd_s64:
1821    Int = Intrinsic::aarch64_neon_vsubds;
1822    s = "vsubds"; break;
1823  case AArch64::BI__builtin_neon_vsubd_u64:
1824    Int = Intrinsic::aarch64_neon_vsubdu;
1825    s = "vsubdu"; break;
1826  // Scalar Saturating Add
1827  case AArch64::BI__builtin_neon_vqaddb_s8:
1828  case AArch64::BI__builtin_neon_vqaddh_s16:
1829  case AArch64::BI__builtin_neon_vqadds_s32:
1830  case AArch64::BI__builtin_neon_vqaddd_s64:
1831    Int = Intrinsic::aarch64_neon_vqadds;
1832    s = "vqadds"; OverloadInt = true; break;
1833  case AArch64::BI__builtin_neon_vqaddb_u8:
1834  case AArch64::BI__builtin_neon_vqaddh_u16:
1835  case AArch64::BI__builtin_neon_vqadds_u32:
1836  case AArch64::BI__builtin_neon_vqaddd_u64:
1837    Int = Intrinsic::aarch64_neon_vqaddu;
1838    s = "vqaddu"; OverloadInt = true; break;
1839  // Scalar Saturating Sub
1840  case AArch64::BI__builtin_neon_vqsubb_s8:
1841  case AArch64::BI__builtin_neon_vqsubh_s16:
1842  case AArch64::BI__builtin_neon_vqsubs_s32:
1843  case AArch64::BI__builtin_neon_vqsubd_s64:
1844    Int = Intrinsic::aarch64_neon_vqsubs;
1845    s = "vqsubs"; OverloadInt = true; break;
1846  case AArch64::BI__builtin_neon_vqsubb_u8:
1847  case AArch64::BI__builtin_neon_vqsubh_u16:
1848  case AArch64::BI__builtin_neon_vqsubs_u32:
1849  case AArch64::BI__builtin_neon_vqsubd_u64:
1850    Int = Intrinsic::aarch64_neon_vqsubu;
1851    s = "vqsubu"; OverloadInt = true; break;
1852  // Scalar Shift Left
1853  case AArch64::BI__builtin_neon_vshld_s64:
1854    Int = Intrinsic::aarch64_neon_vshlds;
1855    s = "vshlds"; break;
1856  case AArch64::BI__builtin_neon_vshld_u64:
1857    Int = Intrinsic::aarch64_neon_vshldu;
1858    s = "vshldu"; break;
1859  // Scalar Saturating Shift Left
1860  case AArch64::BI__builtin_neon_vqshlb_s8:
1861  case AArch64::BI__builtin_neon_vqshlh_s16:
1862  case AArch64::BI__builtin_neon_vqshls_s32:
1863  case AArch64::BI__builtin_neon_vqshld_s64:
1864    Int = Intrinsic::aarch64_neon_vqshls;
1865    s = "vqshls"; OverloadInt = true; break;
1866  case AArch64::BI__builtin_neon_vqshlb_u8:
1867  case AArch64::BI__builtin_neon_vqshlh_u16:
1868  case AArch64::BI__builtin_neon_vqshls_u32:
1869  case AArch64::BI__builtin_neon_vqshld_u64:
1870    Int = Intrinsic::aarch64_neon_vqshlu;
1871    s = "vqshlu"; OverloadInt = true; break;
1872  // Scalar Rouding Shift Left
1873  case AArch64::BI__builtin_neon_vrshld_s64:
1874    Int = Intrinsic::aarch64_neon_vrshlds;
1875    s = "vrshlds"; break;
1876  case AArch64::BI__builtin_neon_vrshld_u64:
1877    Int = Intrinsic::aarch64_neon_vrshldu;
1878    s = "vrshldu"; break;
1879  // Scalar Saturating Rouding Shift Left
1880  case AArch64::BI__builtin_neon_vqrshlb_s8:
1881  case AArch64::BI__builtin_neon_vqrshlh_s16:
1882  case AArch64::BI__builtin_neon_vqrshls_s32:
1883  case AArch64::BI__builtin_neon_vqrshld_s64:
1884    Int = Intrinsic::aarch64_neon_vqrshls;
1885    s = "vqrshls"; OverloadInt = true; break;
1886  case AArch64::BI__builtin_neon_vqrshlb_u8:
1887  case AArch64::BI__builtin_neon_vqrshlh_u16:
1888  case AArch64::BI__builtin_neon_vqrshls_u32:
1889  case AArch64::BI__builtin_neon_vqrshld_u64:
1890    Int = Intrinsic::aarch64_neon_vqrshlu;
1891    s = "vqrshlu"; OverloadInt = true; break;
1892  // Scalar Reduce Pairwise Add
1893  case AArch64::BI__builtin_neon_vpaddd_s64:
1894    Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd";
1895    break;
1896  case AArch64::BI__builtin_neon_vpadds_f32:
1897    Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd";
1898    break;
1899  case AArch64::BI__builtin_neon_vpaddd_f64:
1900    Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq";
1901    break;
1902  // Scalar Reduce Pairwise Floating Point Max
1903  case AArch64::BI__builtin_neon_vpmaxs_f32:
1904    Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax";
1905    break;
1906  case AArch64::BI__builtin_neon_vpmaxqd_f64:
1907    Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq";
1908    break;
1909  // Scalar Reduce Pairwise Floating Point Min
1910  case AArch64::BI__builtin_neon_vpmins_f32:
1911    Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin";
1912    break;
1913  case AArch64::BI__builtin_neon_vpminqd_f64:
1914    Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq";
1915    break;
1916  // Scalar Reduce Pairwise Floating Point Maxnm
1917  case AArch64::BI__builtin_neon_vpmaxnms_f32:
1918    Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm";
1919    break;
1920  case AArch64::BI__builtin_neon_vpmaxnmqd_f64:
1921    Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq";
1922    break;
1923  // Scalar Reduce Pairwise Floating Point Minnm
1924  case AArch64::BI__builtin_neon_vpminnms_f32:
1925    Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm";
1926    break;
1927  case AArch64::BI__builtin_neon_vpminnmqd_f64:
1928    Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq";
1929    break;
1930  // The followings are intrinsics with scalar results generated AcrossVec vectors
1931  case AArch64::BI__builtin_neon_vaddlv_s8:
1932  case AArch64::BI__builtin_neon_vaddlv_s16:
1933  case AArch64::BI__builtin_neon_vaddlvq_s8:
1934  case AArch64::BI__builtin_neon_vaddlvq_s16:
1935  case AArch64::BI__builtin_neon_vaddlvq_s32:
1936    Int = Intrinsic::aarch64_neon_saddlv;
1937    AcrossVec = true; ExtendEle = true; s = "saddlv"; break;
1938  case AArch64::BI__builtin_neon_vaddlv_u8:
1939  case AArch64::BI__builtin_neon_vaddlv_u16:
1940  case AArch64::BI__builtin_neon_vaddlvq_u8:
1941  case AArch64::BI__builtin_neon_vaddlvq_u16:
1942  case AArch64::BI__builtin_neon_vaddlvq_u32:
1943    Int = Intrinsic::aarch64_neon_uaddlv;
1944    AcrossVec = true; ExtendEle = true; s = "uaddlv"; break;
1945  case AArch64::BI__builtin_neon_vmaxv_s8:
1946  case AArch64::BI__builtin_neon_vmaxv_s16:
1947  case AArch64::BI__builtin_neon_vmaxvq_s8:
1948  case AArch64::BI__builtin_neon_vmaxvq_s16:
1949  case AArch64::BI__builtin_neon_vmaxvq_s32:
1950    Int = Intrinsic::aarch64_neon_smaxv;
1951    AcrossVec = true; ExtendEle = false; s = "smaxv"; break;
1952  case AArch64::BI__builtin_neon_vmaxv_u8:
1953  case AArch64::BI__builtin_neon_vmaxv_u16:
1954  case AArch64::BI__builtin_neon_vmaxvq_u8:
1955  case AArch64::BI__builtin_neon_vmaxvq_u16:
1956  case AArch64::BI__builtin_neon_vmaxvq_u32:
1957    Int = Intrinsic::aarch64_neon_umaxv;
1958    AcrossVec = true; ExtendEle = false; s = "umaxv"; break;
1959  case AArch64::BI__builtin_neon_vminv_s8:
1960  case AArch64::BI__builtin_neon_vminv_s16:
1961  case AArch64::BI__builtin_neon_vminvq_s8:
1962  case AArch64::BI__builtin_neon_vminvq_s16:
1963  case AArch64::BI__builtin_neon_vminvq_s32:
1964    Int = Intrinsic::aarch64_neon_sminv;
1965    AcrossVec = true; ExtendEle = false; s = "sminv"; break;
1966  case AArch64::BI__builtin_neon_vminv_u8:
1967  case AArch64::BI__builtin_neon_vminv_u16:
1968  case AArch64::BI__builtin_neon_vminvq_u8:
1969  case AArch64::BI__builtin_neon_vminvq_u16:
1970  case AArch64::BI__builtin_neon_vminvq_u32:
1971    Int = Intrinsic::aarch64_neon_uminv;
1972    AcrossVec = true; ExtendEle = false; s = "uminv"; break;
1973  case AArch64::BI__builtin_neon_vaddv_s8:
1974  case AArch64::BI__builtin_neon_vaddv_s16:
1975  case AArch64::BI__builtin_neon_vaddvq_s8:
1976  case AArch64::BI__builtin_neon_vaddvq_s16:
1977  case AArch64::BI__builtin_neon_vaddvq_s32:
1978  case AArch64::BI__builtin_neon_vaddv_u8:
1979  case AArch64::BI__builtin_neon_vaddv_u16:
1980  case AArch64::BI__builtin_neon_vaddvq_u8:
1981  case AArch64::BI__builtin_neon_vaddvq_u16:
1982  case AArch64::BI__builtin_neon_vaddvq_u32:
1983    Int = Intrinsic::aarch64_neon_vaddv;
1984    AcrossVec = true; ExtendEle = false; s = "vaddv"; break;
1985  case AArch64::BI__builtin_neon_vmaxvq_f32:
1986    Int = Intrinsic::aarch64_neon_vmaxv;
1987    AcrossVec = true; ExtendEle = false; s = "vmaxv"; break;
1988  case AArch64::BI__builtin_neon_vminvq_f32:
1989    Int = Intrinsic::aarch64_neon_vminv;
1990    AcrossVec = true; ExtendEle = false; s = "vminv"; break;
1991  case AArch64::BI__builtin_neon_vmaxnmvq_f32:
1992    Int = Intrinsic::aarch64_neon_vmaxnmv;
1993    AcrossVec = true; ExtendEle = false; s = "vmaxnmv"; break;
1994  case AArch64::BI__builtin_neon_vminnmvq_f32:
1995    Int = Intrinsic::aarch64_neon_vminnmv;
1996    AcrossVec = true; ExtendEle = false; s = "vminnmv"; break;
1997  // Scalar Integer Saturating Doubling Multiply Half High
1998  case AArch64::BI__builtin_neon_vqdmulhh_s16:
1999  case AArch64::BI__builtin_neon_vqdmulhs_s32:
2000    Int = Intrinsic::arm_neon_vqdmulh;
2001    s = "vqdmulh"; OverloadInt = true; break;
2002  // Scalar Integer Saturating Rounding Doubling Multiply Half High
2003  case AArch64::BI__builtin_neon_vqrdmulhh_s16:
2004  case AArch64::BI__builtin_neon_vqrdmulhs_s32:
2005    Int = Intrinsic::arm_neon_vqrdmulh;
2006    s = "vqrdmulh"; OverloadInt = true; break;
2007  // Scalar Floating-point Multiply Extended
2008  case AArch64::BI__builtin_neon_vmulxs_f32:
2009  case AArch64::BI__builtin_neon_vmulxd_f64:
2010    Int = Intrinsic::aarch64_neon_vmulx;
2011    s = "vmulx"; OverloadInt = true; break;
2012  // Scalar Floating-point Reciprocal Step and
2013  case AArch64::BI__builtin_neon_vrecpss_f32:
2014  case AArch64::BI__builtin_neon_vrecpsd_f64:
2015    Int = Intrinsic::arm_neon_vrecps;
2016    s = "vrecps"; OverloadInt = true; break;
2017  // Scalar Floating-point Reciprocal Square Root Step
2018  case AArch64::BI__builtin_neon_vrsqrtss_f32:
2019  case AArch64::BI__builtin_neon_vrsqrtsd_f64:
2020    Int = Intrinsic::arm_neon_vrsqrts;
2021    s = "vrsqrts"; OverloadInt = true; break;
2022  // Scalar Signed Integer Convert To Floating-point
2023  case AArch64::BI__builtin_neon_vcvts_f32_s32:
2024    Int = Intrinsic::aarch64_neon_vcvtf32_s32,
2025    s = "vcvtf"; OverloadInt = false; break;
2026  case AArch64::BI__builtin_neon_vcvtd_f64_s64:
2027    Int = Intrinsic::aarch64_neon_vcvtf64_s64,
2028    s = "vcvtf"; OverloadInt = false; break;
2029  // Scalar Unsigned Integer Convert To Floating-point
2030  case AArch64::BI__builtin_neon_vcvts_f32_u32:
2031    Int = Intrinsic::aarch64_neon_vcvtf32_u32,
2032    s = "vcvtf"; OverloadInt = false; break;
2033  case AArch64::BI__builtin_neon_vcvtd_f64_u64:
2034    Int = Intrinsic::aarch64_neon_vcvtf64_u64,
2035    s = "vcvtf"; OverloadInt = false; break;
2036  // Scalar Floating-point Reciprocal Estimate
2037  case AArch64::BI__builtin_neon_vrecpes_f32:
2038  case AArch64::BI__builtin_neon_vrecped_f64:
2039    Int = Intrinsic::arm_neon_vrecpe;
2040    s = "vrecpe"; OverloadInt = true; break;
2041  // Scalar Floating-point Reciprocal Exponent
2042  case AArch64::BI__builtin_neon_vrecpxs_f32:
2043  case AArch64::BI__builtin_neon_vrecpxd_f64:
2044    Int = Intrinsic::aarch64_neon_vrecpx;
2045    s = "vrecpx"; OverloadInt = true; break;
2046  // Scalar Floating-point Reciprocal Square Root Estimate
2047  case AArch64::BI__builtin_neon_vrsqrtes_f32:
2048  case AArch64::BI__builtin_neon_vrsqrted_f64:
2049    Int = Intrinsic::arm_neon_vrsqrte;
2050    s = "vrsqrte"; OverloadInt = true; break;
2051  // Scalar Compare Equal
2052  case AArch64::BI__builtin_neon_vceqd_s64:
2053  case AArch64::BI__builtin_neon_vceqd_u64:
2054    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
2055    OverloadCmpInt = true; break;
2056  // Scalar Compare Equal To Zero
2057  case AArch64::BI__builtin_neon_vceqzd_s64:
2058  case AArch64::BI__builtin_neon_vceqzd_u64:
2059    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
2060    // Add implicit zero operand.
2061    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2062    OverloadCmpInt = true; break;
2063  // Scalar Compare Greater Than or Equal
2064  case AArch64::BI__builtin_neon_vcged_s64:
2065    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2066    OverloadCmpInt = true; break;
2067  case AArch64::BI__builtin_neon_vcged_u64:
2068    Int = Intrinsic::aarch64_neon_vchs; s = "vcge";
2069    OverloadCmpInt = true; break;
2070  // Scalar Compare Greater Than or Equal To Zero
2071  case AArch64::BI__builtin_neon_vcgezd_s64:
2072    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2073    // Add implicit zero operand.
2074    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2075    OverloadCmpInt = true; break;
2076  // Scalar Compare Greater Than
2077  case AArch64::BI__builtin_neon_vcgtd_s64:
2078    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2079    OverloadCmpInt = true; break;
2080  case AArch64::BI__builtin_neon_vcgtd_u64:
2081    Int = Intrinsic::aarch64_neon_vchi; s = "vcgt";
2082    OverloadCmpInt = true; break;
2083  // Scalar Compare Greater Than Zero
2084  case AArch64::BI__builtin_neon_vcgtzd_s64:
2085    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2086    // Add implicit zero operand.
2087    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2088    OverloadCmpInt = true; break;
2089  // Scalar Compare Less Than or Equal
2090  case AArch64::BI__builtin_neon_vcled_s64:
2091    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2092    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2093  case AArch64::BI__builtin_neon_vcled_u64:
2094    Int = Intrinsic::aarch64_neon_vchs; s = "vchs";
2095    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2096  // Scalar Compare Less Than or Equal To Zero
2097  case AArch64::BI__builtin_neon_vclezd_s64:
2098    Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
2099    // Add implicit zero operand.
2100    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2101    OverloadCmpInt = true; break;
2102  // Scalar Compare Less Than
2103  case AArch64::BI__builtin_neon_vcltd_s64:
2104    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2105    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2106  case AArch64::BI__builtin_neon_vcltd_u64:
2107    Int = Intrinsic::aarch64_neon_vchi; s = "vchi";
2108    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2109  // Scalar Compare Less Than Zero
2110  case AArch64::BI__builtin_neon_vcltzd_s64:
2111    Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
2112    // Add implicit zero operand.
2113    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2114    OverloadCmpInt = true; break;
2115  // Scalar Floating-point Compare Equal
2116  case AArch64::BI__builtin_neon_vceqs_f32:
2117  case AArch64::BI__builtin_neon_vceqd_f64:
2118    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
2119    OverloadCmpInt = true; break;
2120  // Scalar Floating-point Compare Equal To Zero
2121  case AArch64::BI__builtin_neon_vceqzs_f32:
2122  case AArch64::BI__builtin_neon_vceqzd_f64:
2123    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
2124    // Add implicit zero operand.
2125    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2126    OverloadCmpInt = true; break;
2127  // Scalar Floating-point Compare Greater Than Or Equal
2128  case AArch64::BI__builtin_neon_vcges_f32:
2129  case AArch64::BI__builtin_neon_vcged_f64:
2130    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2131    OverloadCmpInt = true; break;
2132  // Scalar Floating-point Compare Greater Than Or Equal To Zero
2133  case AArch64::BI__builtin_neon_vcgezs_f32:
2134  case AArch64::BI__builtin_neon_vcgezd_f64:
2135    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2136    // Add implicit zero operand.
2137    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2138    OverloadCmpInt = true; break;
2139  // Scalar Floating-point Compare Greather Than
2140  case AArch64::BI__builtin_neon_vcgts_f32:
2141  case AArch64::BI__builtin_neon_vcgtd_f64:
2142    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2143    OverloadCmpInt = true; break;
2144  // Scalar Floating-point Compare Greather Than Zero
2145  case AArch64::BI__builtin_neon_vcgtzs_f32:
2146  case AArch64::BI__builtin_neon_vcgtzd_f64:
2147    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2148    // Add implicit zero operand.
2149    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2150    OverloadCmpInt = true; break;
2151  // Scalar Floating-point Compare Less Than or Equal
2152  case AArch64::BI__builtin_neon_vcles_f32:
2153  case AArch64::BI__builtin_neon_vcled_f64:
2154    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2155    OverloadCmpInt = true; break;
2156  // Scalar Floating-point Compare Less Than Or Equal To Zero
2157  case AArch64::BI__builtin_neon_vclezs_f32:
2158  case AArch64::BI__builtin_neon_vclezd_f64:
2159    Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
2160    // Add implicit zero operand.
2161    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2162    OverloadCmpInt = true; break;
2163  // Scalar Floating-point Compare Less Than Zero
2164  case AArch64::BI__builtin_neon_vclts_f32:
2165  case AArch64::BI__builtin_neon_vcltd_f64:
2166    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2167    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2168  // Scalar Floating-point Compare Less Than Zero
2169  case AArch64::BI__builtin_neon_vcltzs_f32:
2170  case AArch64::BI__builtin_neon_vcltzd_f64:
2171    Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
2172    // Add implicit zero operand.
2173    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2174    OverloadCmpInt = true; break;
2175  // Scalar Floating-point Absolute Compare Greater Than Or Equal
2176  case AArch64::BI__builtin_neon_vcages_f32:
2177  case AArch64::BI__builtin_neon_vcaged_f64:
2178    Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
2179    OverloadCmpInt = true; break;
2180  // Scalar Floating-point Absolute Compare Greater Than
2181  case AArch64::BI__builtin_neon_vcagts_f32:
2182  case AArch64::BI__builtin_neon_vcagtd_f64:
2183    Int = Intrinsic::aarch64_neon_vcagt; s = "vcagt";
2184    OverloadCmpInt = true; break;
2185  // Scalar Floating-point Absolute Compare Less Than Or Equal
2186  case AArch64::BI__builtin_neon_vcales_f32:
2187  case AArch64::BI__builtin_neon_vcaled_f64:
2188    Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
2189    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2190  // Scalar Floating-point Absolute Compare Less Than
2191  case AArch64::BI__builtin_neon_vcalts_f32:
2192  case AArch64::BI__builtin_neon_vcaltd_f64:
2193    Int = Intrinsic::aarch64_neon_vcagt; s = "vcalt";
2194    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2195  // Scalar Compare Bitwise Test Bits
2196  case AArch64::BI__builtin_neon_vtstd_s64:
2197  case AArch64::BI__builtin_neon_vtstd_u64:
2198    Int = Intrinsic::aarch64_neon_vtstd; s = "vtst";
2199    OverloadCmpInt = true; break;
2200  // Scalar Absolute Value
2201  case AArch64::BI__builtin_neon_vabsd_s64:
2202    Int = Intrinsic::aarch64_neon_vabs;
2203    s = "vabs"; OverloadInt = false; break;
2204  // Scalar Signed Saturating Absolute Value
2205  case AArch64::BI__builtin_neon_vqabsb_s8:
2206  case AArch64::BI__builtin_neon_vqabsh_s16:
2207  case AArch64::BI__builtin_neon_vqabss_s32:
2208  case AArch64::BI__builtin_neon_vqabsd_s64:
2209    Int = Intrinsic::arm_neon_vqabs;
2210    s = "vqabs"; OverloadInt = true; break;
2211  // Scalar Negate
2212  case AArch64::BI__builtin_neon_vnegd_s64:
2213    Int = Intrinsic::aarch64_neon_vneg;
2214    s = "vneg"; OverloadInt = false; break;
2215  // Scalar Signed Saturating Negate
2216  case AArch64::BI__builtin_neon_vqnegb_s8:
2217  case AArch64::BI__builtin_neon_vqnegh_s16:
2218  case AArch64::BI__builtin_neon_vqnegs_s32:
2219  case AArch64::BI__builtin_neon_vqnegd_s64:
2220    Int = Intrinsic::arm_neon_vqneg;
2221    s = "vqneg"; OverloadInt = true; break;
2222  // Scalar Signed Saturating Accumulated of Unsigned Value
2223  case AArch64::BI__builtin_neon_vuqaddb_s8:
2224  case AArch64::BI__builtin_neon_vuqaddh_s16:
2225  case AArch64::BI__builtin_neon_vuqadds_s32:
2226  case AArch64::BI__builtin_neon_vuqaddd_s64:
2227    Int = Intrinsic::aarch64_neon_vuqadd;
2228    s = "vuqadd"; OverloadInt = true; break;
2229  // Scalar Unsigned Saturating Accumulated of Signed Value
2230  case AArch64::BI__builtin_neon_vsqaddb_u8:
2231  case AArch64::BI__builtin_neon_vsqaddh_u16:
2232  case AArch64::BI__builtin_neon_vsqadds_u32:
2233  case AArch64::BI__builtin_neon_vsqaddd_u64:
2234    Int = Intrinsic::aarch64_neon_vsqadd;
2235    s = "vsqadd"; OverloadInt = true; break;
2236  // Signed Saturating Doubling Multiply-Add Long
2237  case AArch64::BI__builtin_neon_vqdmlalh_s16:
2238  case AArch64::BI__builtin_neon_vqdmlals_s32:
2239    Int = Intrinsic::aarch64_neon_vqdmlal;
2240    s = "vqdmlal"; OverloadWideInt = true; break;
2241  // Signed Saturating Doubling Multiply-Subtract Long
2242  case AArch64::BI__builtin_neon_vqdmlslh_s16:
2243  case AArch64::BI__builtin_neon_vqdmlsls_s32:
2244    Int = Intrinsic::aarch64_neon_vqdmlsl;
2245    s = "vqdmlsl"; OverloadWideInt = true; break;
2246  // Signed Saturating Doubling Multiply Long
2247  case AArch64::BI__builtin_neon_vqdmullh_s16:
2248  case AArch64::BI__builtin_neon_vqdmulls_s32:
2249    Int = Intrinsic::aarch64_neon_vqdmull;
2250    s = "vqdmull"; OverloadWideInt = true; break;
2251  // Scalar Signed Saturating Extract Unsigned Narrow
2252  case AArch64::BI__builtin_neon_vqmovunh_s16:
2253  case AArch64::BI__builtin_neon_vqmovuns_s32:
2254  case AArch64::BI__builtin_neon_vqmovund_s64:
2255    Int = Intrinsic::arm_neon_vqmovnsu;
2256    s = "vqmovun"; OverloadNarrowInt = true; break;
2257  // Scalar Signed Saturating Extract Narrow
2258  case AArch64::BI__builtin_neon_vqmovnh_s16:
2259  case AArch64::BI__builtin_neon_vqmovns_s32:
2260  case AArch64::BI__builtin_neon_vqmovnd_s64:
2261    Int = Intrinsic::arm_neon_vqmovns;
2262    s = "vqmovn"; OverloadNarrowInt = true; break;
2263  // Scalar Unsigned Saturating Extract Narrow
2264  case AArch64::BI__builtin_neon_vqmovnh_u16:
2265  case AArch64::BI__builtin_neon_vqmovns_u32:
2266  case AArch64::BI__builtin_neon_vqmovnd_u64:
2267    Int = Intrinsic::arm_neon_vqmovnu;
2268    s = "vqmovn"; OverloadNarrowInt = true; break;
2269  // Scalar Signed Shift Right (Immediate)
2270  case AArch64::BI__builtin_neon_vshrd_n_s64:
2271    Int = Intrinsic::aarch64_neon_vshrds_n;
2272    s = "vsshr"; OverloadInt = false; break;
2273  // Scalar Unsigned Shift Right (Immediate)
2274  case AArch64::BI__builtin_neon_vshrd_n_u64:
2275    Int = Intrinsic::aarch64_neon_vshrdu_n;
2276    s = "vushr"; OverloadInt = false; break;
2277  // Scalar Signed Rounding Shift Right (Immediate)
2278  case AArch64::BI__builtin_neon_vrshrd_n_s64:
2279    Int = Intrinsic::aarch64_neon_vrshrds_n;
2280    s = "vsrshr"; OverloadInt = false; break;
2281  // Scalar Unsigned Rounding Shift Right (Immediate)
2282  case AArch64::BI__builtin_neon_vrshrd_n_u64:
2283    Int = Intrinsic::aarch64_neon_vrshrdu_n;
2284    s = "vurshr"; OverloadInt = false; break;
2285  // Scalar Signed Shift Right and Accumulate (Immediate)
2286  case AArch64::BI__builtin_neon_vsrad_n_s64:
2287    Int = Intrinsic::aarch64_neon_vsrads_n;
2288    s = "vssra"; OverloadInt = false; break;
2289  // Scalar Unsigned Shift Right and Accumulate (Immediate)
2290  case AArch64::BI__builtin_neon_vsrad_n_u64:
2291    Int = Intrinsic::aarch64_neon_vsradu_n;
2292    s = "vusra"; OverloadInt = false; break;
2293  // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
2294  case AArch64::BI__builtin_neon_vrsrad_n_s64:
2295    Int = Intrinsic::aarch64_neon_vrsrads_n;
2296    s = "vsrsra"; OverloadInt = false; break;
2297  // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
2298  case AArch64::BI__builtin_neon_vrsrad_n_u64:
2299    Int = Intrinsic::aarch64_neon_vrsradu_n;
2300    s = "vursra"; OverloadInt = false; break;
2301  // Scalar Signed/Unsigned Shift Left (Immediate)
2302  case AArch64::BI__builtin_neon_vshld_n_s64:
2303  case AArch64::BI__builtin_neon_vshld_n_u64:
2304    Int = Intrinsic::aarch64_neon_vshld_n;
2305    s = "vshl"; OverloadInt = false; break;
2306  // Signed Saturating Shift Left (Immediate)
2307  case AArch64::BI__builtin_neon_vqshlb_n_s8:
2308  case AArch64::BI__builtin_neon_vqshlh_n_s16:
2309  case AArch64::BI__builtin_neon_vqshls_n_s32:
2310  case AArch64::BI__builtin_neon_vqshld_n_s64:
2311    Int = Intrinsic::aarch64_neon_vqshls_n;
2312    s = "vsqshl"; OverloadInt = true; break;
2313  // Unsigned Saturating Shift Left (Immediate)
2314  case AArch64::BI__builtin_neon_vqshlb_n_u8:
2315  case AArch64::BI__builtin_neon_vqshlh_n_u16:
2316  case AArch64::BI__builtin_neon_vqshls_n_u32:
2317  case AArch64::BI__builtin_neon_vqshld_n_u64:
2318    Int = Intrinsic::aarch64_neon_vqshlu_n;
2319    s = "vuqshl"; OverloadInt = true; break;
2320  // Signed Saturating Shift Left Unsigned (Immediate)
2321  case AArch64::BI__builtin_neon_vqshlub_n_s8:
2322  case AArch64::BI__builtin_neon_vqshluh_n_s16:
2323  case AArch64::BI__builtin_neon_vqshlus_n_s32:
2324  case AArch64::BI__builtin_neon_vqshlud_n_s64:
2325    Int = Intrinsic::aarch64_neon_vqshlus_n;
2326    s = "vsqshlu"; OverloadInt = true; break;
2327  // Shift Right And Insert (Immediate)
2328  case AArch64::BI__builtin_neon_vsrid_n_s64:
2329  case AArch64::BI__builtin_neon_vsrid_n_u64:
2330    Int = Intrinsic::aarch64_neon_vsrid_n;
2331    s = "vsri"; OverloadInt = false; break;
2332  // Shift Left And Insert (Immediate)
2333  case AArch64::BI__builtin_neon_vslid_n_s64:
2334  case AArch64::BI__builtin_neon_vslid_n_u64:
2335    Int = Intrinsic::aarch64_neon_vslid_n;
2336    s = "vsli"; OverloadInt = false; break;
2337  // Signed Saturating Shift Right Narrow (Immediate)
2338  case AArch64::BI__builtin_neon_vqshrnh_n_s16:
2339  case AArch64::BI__builtin_neon_vqshrns_n_s32:
2340  case AArch64::BI__builtin_neon_vqshrnd_n_s64:
2341    Int = Intrinsic::aarch64_neon_vsqshrn;
2342    s = "vsqshrn"; OverloadInt = true; break;
2343  // Unsigned Saturating Shift Right Narrow (Immediate)
2344  case AArch64::BI__builtin_neon_vqshrnh_n_u16:
2345  case AArch64::BI__builtin_neon_vqshrns_n_u32:
2346  case AArch64::BI__builtin_neon_vqshrnd_n_u64:
2347    Int = Intrinsic::aarch64_neon_vuqshrn;
2348    s = "vuqshrn"; OverloadInt = true; break;
2349  // Signed Saturating Rounded Shift Right Narrow (Immediate)
2350  case AArch64::BI__builtin_neon_vqrshrnh_n_s16:
2351  case AArch64::BI__builtin_neon_vqrshrns_n_s32:
2352  case AArch64::BI__builtin_neon_vqrshrnd_n_s64:
2353    Int = Intrinsic::aarch64_neon_vsqrshrn;
2354    s = "vsqrshrn"; OverloadInt = true; break;
2355  // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
2356  case AArch64::BI__builtin_neon_vqrshrnh_n_u16:
2357  case AArch64::BI__builtin_neon_vqrshrns_n_u32:
2358  case AArch64::BI__builtin_neon_vqrshrnd_n_u64:
2359    Int = Intrinsic::aarch64_neon_vuqrshrn;
2360    s = "vuqrshrn"; OverloadInt = true; break;
2361  // Signed Saturating Shift Right Unsigned Narrow (Immediate)
2362  case AArch64::BI__builtin_neon_vqshrunh_n_s16:
2363  case AArch64::BI__builtin_neon_vqshruns_n_s32:
2364  case AArch64::BI__builtin_neon_vqshrund_n_s64:
2365    Int = Intrinsic::aarch64_neon_vsqshrun;
2366    s = "vsqshrun"; OverloadInt = true; break;
2367  // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
2368  case AArch64::BI__builtin_neon_vqrshrunh_n_s16:
2369  case AArch64::BI__builtin_neon_vqrshruns_n_s32:
2370  case AArch64::BI__builtin_neon_vqrshrund_n_s64:
2371    Int = Intrinsic::aarch64_neon_vsqrshrun;
2372    s = "vsqrshrun"; OverloadInt = true; break;
2373  // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
2374  case AArch64::BI__builtin_neon_vcvts_n_f32_s32:
2375    Int = Intrinsic::aarch64_neon_vcvtf32_n_s32;
2376    s = "vcvtf"; OverloadInt = false; break;
2377  case AArch64::BI__builtin_neon_vcvtd_n_f64_s64:
2378    Int = Intrinsic::aarch64_neon_vcvtf64_n_s64;
2379    s = "vcvtf"; OverloadInt = false; break;
2380  // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
2381  case AArch64::BI__builtin_neon_vcvts_n_f32_u32:
2382    Int = Intrinsic::aarch64_neon_vcvtf32_n_u32;
2383    s = "vcvtf"; OverloadInt = false; break;
2384  case AArch64::BI__builtin_neon_vcvtd_n_f64_u64:
2385    Int = Intrinsic::aarch64_neon_vcvtf64_n_u64;
2386    s = "vcvtf"; OverloadInt = false; break;
2387  }
2388
2389  if (!Int)
2390    return 0;
2391
2392  // AArch64 scalar builtin that returns scalar type
2393  // and should be mapped to AArch64 intrinsic that returns
2394  // one-element vector type.
2395  Function *F = 0;
2396  if (AcrossVec) {
2397    // Gen arg type
2398    const Expr *Arg = E->getArg(E->getNumArgs()-1);
2399    llvm::Type *Ty = CGF.ConvertType(Arg->getType());
2400    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2401    llvm::Type *ETy = VTy->getElementType();
2402    llvm::VectorType *RTy = llvm::VectorType::get(ETy, 1);
2403
2404    if (ExtendEle) {
2405      assert(!ETy->isFloatingPointTy());
2406      RTy = llvm::VectorType::getExtendedElementVectorType(RTy);
2407    }
2408
2409    llvm::Type *Tys[2] = {RTy, VTy};
2410    F = CGF.CGM.getIntrinsic(Int, Tys);
2411    assert(E->getNumArgs() == 1);
2412  } else if (OverloadInt) {
2413    // Determine the type of this overloaded AArch64 intrinsic
2414    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
2415    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
2416    assert(VTy);
2417
2418    F = CGF.CGM.getIntrinsic(Int, VTy);
2419  } else if (OverloadWideInt || OverloadNarrowInt) {
2420    // Determine the type of this overloaded AArch64 intrinsic
2421    const Expr *Arg = E->getArg(E->getNumArgs()-1);
2422    llvm::Type *Ty = CGF.ConvertType(Arg->getType());
2423    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
2424    llvm::VectorType *RTy = OverloadWideInt ?
2425      llvm::VectorType::getExtendedElementVectorType(VTy) :
2426      llvm::VectorType::getTruncatedElementVectorType(VTy);
2427    F = CGF.CGM.getIntrinsic(Int, RTy);
2428  } else if (OverloadCmpInt) {
2429    // Determine the types of this overloaded AArch64 intrinsic
2430    SmallVector<llvm::Type *, 3> Tys;
2431    const Expr *Arg = E->getArg(E->getNumArgs()-1);
2432    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
2433    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
2434    Tys.push_back(VTy);
2435    Ty = CGF.ConvertType(Arg->getType());
2436    VTy = llvm::VectorType::get(Ty, 1);
2437    Tys.push_back(VTy);
2438    Tys.push_back(VTy);
2439
2440    F = CGF.CGM.getIntrinsic(Int, Tys);
2441  } else
2442    F = CGF.CGM.getIntrinsic(Int);
2443
2444  Value *Result = CGF.EmitNeonCall(F, Ops, s);
2445  llvm::Type *ResultType = CGF.ConvertType(E->getType());
2446  // AArch64 intrinsic one-element vector type cast to
2447  // scalar type expected by the builtin
2448  return CGF.Builder.CreateBitCast(Result, ResultType, s);
2449}
2450
2451Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
2452                                                     const CallExpr *E) {
2453
2454  // Process AArch64 scalar builtins
2455  if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))
2456    return Result;
2457
2458  if (BuiltinID == AArch64::BI__clear_cache) {
2459    assert(E->getNumArgs() == 2 &&
2460           "Variadic __clear_cache slipped through on AArch64");
2461
2462    const FunctionDecl *FD = E->getDirectCallee();
2463    SmallVector<Value *, 2> Ops;
2464    for (unsigned i = 0; i < E->getNumArgs(); i++)
2465      Ops.push_back(EmitScalarExpr(E->getArg(i)));
2466    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2467    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2468    StringRef Name = FD->getName();
2469    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2470  }
2471
2472  SmallVector<Value *, 4> Ops;
2473  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
2474    Ops.push_back(EmitScalarExpr(E->getArg(i)));
2475  }
2476
2477  // Get the last argument, which specifies the vector type.
2478  llvm::APSInt Result;
2479  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
2480  if (!Arg->isIntegerConstantExpr(Result, getContext()))
2481    return 0;
2482
2483  // Determine the type of this overloaded NEON intrinsic.
2484  NeonTypeFlags Type(Result.getZExtValue());
2485  bool usgn = Type.isUnsigned();
2486
2487  llvm::VectorType *VTy = GetNeonType(this, Type);
2488  llvm::Type *Ty = VTy;
2489  if (!Ty)
2490    return 0;
2491
2492  unsigned Int;
2493  switch (BuiltinID) {
2494  default:
2495    return 0;
2496
2497  // AArch64 builtins mapping to legacy ARM v7 builtins.
2498  // FIXME: the mapped builtins listed correspond to what has been tested
2499  // in aarch64-neon-intrinsics.c so far.
2500  case AArch64::BI__builtin_neon_vuzp_v:
2501    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzp_v, E);
2502  case AArch64::BI__builtin_neon_vuzpq_v:
2503    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vuzpq_v, E);
2504  case AArch64::BI__builtin_neon_vzip_v:
2505    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzip_v, E);
2506  case AArch64::BI__builtin_neon_vzipq_v:
2507    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vzipq_v, E);
2508  case AArch64::BI__builtin_neon_vtrn_v:
2509    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrn_v, E);
2510  case AArch64::BI__builtin_neon_vtrnq_v:
2511    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtrnq_v, E);
2512  case AArch64::BI__builtin_neon_vext_v:
2513    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vext_v, E);
2514  case AArch64::BI__builtin_neon_vextq_v:
2515    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vextq_v, E);
2516  case AArch64::BI__builtin_neon_vmul_v:
2517    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E);
2518  case AArch64::BI__builtin_neon_vmulq_v:
2519    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E);
2520  case AArch64::BI__builtin_neon_vabd_v:
2521    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E);
2522  case AArch64::BI__builtin_neon_vabdq_v:
2523    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E);
2524  case AArch64::BI__builtin_neon_vfma_v:
2525    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E);
2526  case AArch64::BI__builtin_neon_vfmaq_v:
2527    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E);
2528  case AArch64::BI__builtin_neon_vbsl_v:
2529    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E);
2530  case AArch64::BI__builtin_neon_vbslq_v:
2531    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E);
2532  case AArch64::BI__builtin_neon_vrsqrts_v:
2533    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E);
2534  case AArch64::BI__builtin_neon_vrsqrtsq_v:
2535    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E);
2536  case AArch64::BI__builtin_neon_vrecps_v:
2537    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E);
2538  case AArch64::BI__builtin_neon_vrecpsq_v:
2539    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E);
2540  case AArch64::BI__builtin_neon_vcage_v:
2541    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E);
2542  case AArch64::BI__builtin_neon_vcale_v:
2543    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E);
2544  case AArch64::BI__builtin_neon_vcaleq_v:
2545    std::swap(Ops[0], Ops[1]);
2546  case AArch64::BI__builtin_neon_vcageq_v: {
2547    Function *F;
2548    if (VTy->getElementType()->isIntegerTy(64))
2549      F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq);
2550    else
2551      F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
2552    return EmitNeonCall(F, Ops, "vcage");
2553  }
2554  case AArch64::BI__builtin_neon_vcalt_v:
2555    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E);
2556  case AArch64::BI__builtin_neon_vcagt_v:
2557    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E);
2558  case AArch64::BI__builtin_neon_vcaltq_v:
2559    std::swap(Ops[0], Ops[1]);
2560  case AArch64::BI__builtin_neon_vcagtq_v: {
2561    Function *F;
2562    if (VTy->getElementType()->isIntegerTy(64))
2563      F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq);
2564    else
2565      F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
2566    return EmitNeonCall(F, Ops, "vcagt");
2567  }
2568  case AArch64::BI__builtin_neon_vtst_v:
2569    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E);
2570  case AArch64::BI__builtin_neon_vtstq_v:
2571    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E);
2572  case AArch64::BI__builtin_neon_vhadd_v:
2573    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E);
2574  case AArch64::BI__builtin_neon_vhaddq_v:
2575    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E);
2576  case AArch64::BI__builtin_neon_vhsub_v:
2577    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E);
2578  case AArch64::BI__builtin_neon_vhsubq_v:
2579    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E);
2580  case AArch64::BI__builtin_neon_vrhadd_v:
2581    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E);
2582  case AArch64::BI__builtin_neon_vrhaddq_v:
2583    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E);
2584  case AArch64::BI__builtin_neon_vqadd_v:
2585    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E);
2586  case AArch64::BI__builtin_neon_vqaddq_v:
2587    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E);
2588  case AArch64::BI__builtin_neon_vqsub_v:
2589    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E);
2590  case AArch64::BI__builtin_neon_vqsubq_v:
2591    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E);
2592  case AArch64::BI__builtin_neon_vshl_v:
2593    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E);
2594  case AArch64::BI__builtin_neon_vshlq_v:
2595    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E);
2596  case AArch64::BI__builtin_neon_vqshl_v:
2597    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E);
2598  case AArch64::BI__builtin_neon_vqshlq_v:
2599    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E);
2600  case AArch64::BI__builtin_neon_vrshl_v:
2601    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E);
2602  case AArch64::BI__builtin_neon_vrshlq_v:
2603    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E);
2604  case AArch64::BI__builtin_neon_vqrshl_v:
2605    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
2606  case AArch64::BI__builtin_neon_vqrshlq_v:
2607    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
2608  case AArch64::BI__builtin_neon_vaddhn_v:
2609    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E);
2610  case AArch64::BI__builtin_neon_vraddhn_v:
2611    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E);
2612  case AArch64::BI__builtin_neon_vsubhn_v:
2613    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E);
2614  case AArch64::BI__builtin_neon_vrsubhn_v:
2615    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E);
2616  case AArch64::BI__builtin_neon_vmull_v:
2617    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E);
2618  case AArch64::BI__builtin_neon_vqdmull_v:
2619    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E);
2620  case AArch64::BI__builtin_neon_vqdmlal_v:
2621    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E);
2622  case AArch64::BI__builtin_neon_vqdmlsl_v:
2623    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E);
2624  case AArch64::BI__builtin_neon_vmax_v:
2625    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
2626  case AArch64::BI__builtin_neon_vmaxq_v:
2627    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E);
2628  case AArch64::BI__builtin_neon_vmin_v:
2629    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E);
2630  case AArch64::BI__builtin_neon_vminq_v:
2631    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E);
2632  case AArch64::BI__builtin_neon_vpmax_v:
2633    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E);
2634  case AArch64::BI__builtin_neon_vpmin_v:
2635    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E);
2636  case AArch64::BI__builtin_neon_vpadd_v:
2637    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E);
2638  case AArch64::BI__builtin_neon_vqdmulh_v:
2639    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E);
2640  case AArch64::BI__builtin_neon_vqdmulhq_v:
2641    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E);
2642  case AArch64::BI__builtin_neon_vqrdmulh_v:
2643    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E);
2644  case AArch64::BI__builtin_neon_vqrdmulhq_v:
2645    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E);
2646
2647  // Shift by immediate
2648  case AArch64::BI__builtin_neon_vshr_n_v:
2649    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshr_n_v, E);
2650  case AArch64::BI__builtin_neon_vshrq_n_v:
2651    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshrq_n_v, E);
2652  case AArch64::BI__builtin_neon_vrshr_n_v:
2653  case AArch64::BI__builtin_neon_vrshrq_n_v:
2654    Int = usgn ? Intrinsic::aarch64_neon_vurshr
2655               : Intrinsic::aarch64_neon_vsrshr;
2656    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n");
2657  case AArch64::BI__builtin_neon_vsra_n_v:
2658    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsra_n_v, E);
2659  case AArch64::BI__builtin_neon_vsraq_n_v:
2660    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsraq_n_v, E);
2661  case AArch64::BI__builtin_neon_vrsra_n_v:
2662  case AArch64::BI__builtin_neon_vrsraq_n_v: {
2663    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2664    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2665    Int = usgn ? Intrinsic::aarch64_neon_vurshr
2666               : Intrinsic::aarch64_neon_vsrshr;
2667    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
2668    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2669  }
2670  case AArch64::BI__builtin_neon_vshl_n_v:
2671    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E);
2672  case AArch64::BI__builtin_neon_vshlq_n_v:
2673    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E);
2674  case AArch64::BI__builtin_neon_vqshl_n_v:
2675    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_n_v, E);
2676  case AArch64::BI__builtin_neon_vqshlq_n_v:
2677    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_n_v, E);
2678  case AArch64::BI__builtin_neon_vqshlu_n_v:
2679  case AArch64::BI__builtin_neon_vqshluq_n_v:
2680    Int = Intrinsic::aarch64_neon_vsqshlu;
2681    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n");
2682  case AArch64::BI__builtin_neon_vsri_n_v:
2683  case AArch64::BI__builtin_neon_vsriq_n_v:
2684    Int = Intrinsic::aarch64_neon_vsri;
2685    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n");
2686  case AArch64::BI__builtin_neon_vsli_n_v:
2687  case AArch64::BI__builtin_neon_vsliq_n_v:
2688    Int = Intrinsic::aarch64_neon_vsli;
2689    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n");
2690  case AArch64::BI__builtin_neon_vshll_n_v: {
2691    llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
2692    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2693    if (usgn)
2694      Ops[0] = Builder.CreateZExt(Ops[0], VTy);
2695    else
2696      Ops[0] = Builder.CreateSExt(Ops[0], VTy);
2697    Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
2698    return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
2699  }
2700  case AArch64::BI__builtin_neon_vshrn_n_v: {
2701    llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
2702    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2703    Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
2704    if (usgn)
2705      Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
2706    else
2707      Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
2708    return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
2709  }
2710  case AArch64::BI__builtin_neon_vqshrun_n_v:
2711    Int = Intrinsic::aarch64_neon_vsqshrun;
2712    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
2713  case AArch64::BI__builtin_neon_vrshrn_n_v:
2714    Int = Intrinsic::aarch64_neon_vrshrn;
2715    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
2716  case AArch64::BI__builtin_neon_vqrshrun_n_v:
2717    Int = Intrinsic::aarch64_neon_vsqrshrun;
2718    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
2719  case AArch64::BI__builtin_neon_vqshrn_n_v:
2720    Int = usgn ? Intrinsic::aarch64_neon_vuqshrn
2721               : Intrinsic::aarch64_neon_vsqshrn;
2722    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
2723  case AArch64::BI__builtin_neon_vqrshrn_n_v:
2724    Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn
2725               : Intrinsic::aarch64_neon_vsqrshrn;
2726    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
2727
2728  // Convert
2729  case AArch64::BI__builtin_neon_vmovl_v:
2730    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E);
2731  case AArch64::BI__builtin_neon_vcvt_n_f32_v:
2732    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E);
2733  case AArch64::BI__builtin_neon_vcvtq_n_f32_v:
2734    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E);
2735  case AArch64::BI__builtin_neon_vcvtq_n_f64_v: {
2736    llvm::Type *FloatTy =
2737        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
2738    llvm::Type *Tys[2] = { FloatTy, Ty };
2739    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
2740               : Intrinsic::arm_neon_vcvtfxs2fp;
2741    Function *F = CGM.getIntrinsic(Int, Tys);
2742    return EmitNeonCall(F, Ops, "vcvt_n");
2743  }
2744  case AArch64::BI__builtin_neon_vcvt_n_s32_v:
2745    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_s32_v, E);
2746  case AArch64::BI__builtin_neon_vcvtq_n_s32_v:
2747    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_s32_v, E);
2748  case AArch64::BI__builtin_neon_vcvt_n_u32_v:
2749    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E);
2750  case AArch64::BI__builtin_neon_vcvtq_n_u32_v:
2751    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E);
2752  case AArch64::BI__builtin_neon_vcvtq_n_s64_v:
2753  case AArch64::BI__builtin_neon_vcvtq_n_u64_v: {
2754    llvm::Type *FloatTy =
2755        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
2756    llvm::Type *Tys[2] = { Ty, FloatTy };
2757    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
2758               : Intrinsic::arm_neon_vcvtfp2fxs;
2759    Function *F = CGM.getIntrinsic(Int, Tys);
2760    return EmitNeonCall(F, Ops, "vcvt_n");
2761  }
2762
2763  // Load/Store
2764  case AArch64::BI__builtin_neon_vld1_v:
2765    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_v, E);
2766  case AArch64::BI__builtin_neon_vld1q_v:
2767    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_v, E);
2768  case AArch64::BI__builtin_neon_vld2_v:
2769    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2_v, E);
2770  case AArch64::BI__builtin_neon_vld2q_v:
2771    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_v, E);
2772  case AArch64::BI__builtin_neon_vld3_v:
2773    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_v, E);
2774  case AArch64::BI__builtin_neon_vld3q_v:
2775    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_v, E);
2776  case AArch64::BI__builtin_neon_vld4_v:
2777    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_v, E);
2778  case AArch64::BI__builtin_neon_vld4q_v:
2779    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_v, E);
2780  case AArch64::BI__builtin_neon_vst1_v:
2781    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1_v, E);
2782  case AArch64::BI__builtin_neon_vst1q_v:
2783    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1q_v, E);
2784  case AArch64::BI__builtin_neon_vst2_v:
2785    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_v, E);
2786  case AArch64::BI__builtin_neon_vst2q_v:
2787    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_v, E);
2788  case AArch64::BI__builtin_neon_vst3_v:
2789    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_v, E);
2790  case AArch64::BI__builtin_neon_vst3q_v:
2791    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_v, E);
2792  case AArch64::BI__builtin_neon_vst4_v:
2793    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E);
2794  case AArch64::BI__builtin_neon_vst4q_v:
2795    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
2796
2797  // Crypto
2798  case AArch64::BI__builtin_neon_vaeseq_v:
2799    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aese, Ty),
2800                        Ops, "aese");
2801  case AArch64::BI__builtin_neon_vaesdq_v:
2802    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesd, Ty),
2803                        Ops, "aesd");
2804  case AArch64::BI__builtin_neon_vaesmcq_v:
2805    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesmc, Ty),
2806                        Ops, "aesmc");
2807  case AArch64::BI__builtin_neon_vaesimcq_v:
2808    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesimc, Ty),
2809                        Ops, "aesimc");
2810  case AArch64::BI__builtin_neon_vsha1su1q_v:
2811    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su1, Ty),
2812                        Ops, "sha1su1");
2813  case AArch64::BI__builtin_neon_vsha256su0q_v:
2814    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su0, Ty),
2815                        Ops, "sha256su0");
2816  case AArch64::BI__builtin_neon_vsha1su0q_v:
2817    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su0, Ty),
2818                        Ops, "sha1su0");
2819  case AArch64::BI__builtin_neon_vsha256hq_v:
2820    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h, Ty),
2821                        Ops, "sha256h");
2822  case AArch64::BI__builtin_neon_vsha256h2q_v:
2823    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h2, Ty),
2824                        Ops, "sha256h2");
2825  case AArch64::BI__builtin_neon_vsha256su1q_v:
2826    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1, Ty),
2827                        Ops, "sha256su1");
2828
2829  // AArch64-only builtins
2830  case AArch64::BI__builtin_neon_vfma_lane_v:
2831  case AArch64::BI__builtin_neon_vfmaq_laneq_v: {
2832    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2833    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2834    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2835
2836    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2837    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
2838    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
2839  }
2840  case AArch64::BI__builtin_neon_vfmaq_lane_v: {
2841    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2842    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2843    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2844
2845    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2846    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
2847                                            VTy->getNumElements() / 2);
2848    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
2849    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
2850                                               cast<ConstantInt>(Ops[3]));
2851    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
2852
2853    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
2854  }
2855  case AArch64::BI__builtin_neon_vfma_laneq_v: {
2856    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2857    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2858    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2859
2860    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2861    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
2862                                            VTy->getNumElements() * 2);
2863    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
2864    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
2865                                               cast<ConstantInt>(Ops[3]));
2866    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
2867
2868    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
2869  }
2870  case AArch64::BI__builtin_neon_vfms_v:
2871  case AArch64::BI__builtin_neon_vfmsq_v: {
2872    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2873    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2874    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2875    Ops[1] = Builder.CreateFNeg(Ops[1]);
2876    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2877
2878    // LLVM's fma intrinsic puts the accumulator in the last position, but the
2879    // AArch64 intrinsic has it first.
2880    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
2881  }
2882  case AArch64::BI__builtin_neon_vmaxnm_v:
2883  case AArch64::BI__builtin_neon_vmaxnmq_v: {
2884    Int = Intrinsic::aarch64_neon_vmaxnm;
2885    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
2886  }
2887  case AArch64::BI__builtin_neon_vminnm_v:
2888  case AArch64::BI__builtin_neon_vminnmq_v: {
2889    Int = Intrinsic::aarch64_neon_vminnm;
2890    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
2891  }
2892  case AArch64::BI__builtin_neon_vpmaxnm_v:
2893  case AArch64::BI__builtin_neon_vpmaxnmq_v: {
2894    Int = Intrinsic::aarch64_neon_vpmaxnm;
2895    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
2896  }
2897  case AArch64::BI__builtin_neon_vpminnm_v:
2898  case AArch64::BI__builtin_neon_vpminnmq_v: {
2899    Int = Intrinsic::aarch64_neon_vpminnm;
2900    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
2901  }
2902  case AArch64::BI__builtin_neon_vpmaxq_v: {
2903    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
2904    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
2905  }
2906  case AArch64::BI__builtin_neon_vpminq_v: {
2907    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
2908    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
2909  }
2910  case AArch64::BI__builtin_neon_vpaddq_v: {
2911    Int = Intrinsic::arm_neon_vpadd;
2912    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd");
2913  }
2914  case AArch64::BI__builtin_neon_vmulx_v:
2915  case AArch64::BI__builtin_neon_vmulxq_v: {
2916    Int = Intrinsic::aarch64_neon_vmulx;
2917    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
2918  }
2919  }
2920}
2921
2922Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
2923                                           const CallExpr *E) {
2924  if (BuiltinID == ARM::BI__clear_cache) {
2925    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2926    const FunctionDecl *FD = E->getDirectCallee();
2927    SmallVector<Value*, 2> Ops;
2928    for (unsigned i = 0; i < 2; i++)
2929      Ops.push_back(EmitScalarExpr(E->getArg(i)));
2930    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2931    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2932    StringRef Name = FD->getName();
2933    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2934  }
2935
2936  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
2937      (BuiltinID == ARM::BI__builtin_arm_ldrex &&
2938       getContext().getTypeSize(E->getType()) == 64)) {
2939    Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2940
2941    Value *LdPtr = EmitScalarExpr(E->getArg(0));
2942    Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
2943                                    "ldrexd");
2944
2945    Value *Val0 = Builder.CreateExtractValue(Val, 1);
2946    Value *Val1 = Builder.CreateExtractValue(Val, 0);
2947    Val0 = Builder.CreateZExt(Val0, Int64Ty);
2948    Val1 = Builder.CreateZExt(Val1, Int64Ty);
2949
2950    Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2951    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2952    Val = Builder.CreateOr(Val, Val1);
2953    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2954  }
2955
2956  if (BuiltinID == ARM::BI__builtin_arm_ldrex) {
2957    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2958
2959    QualType Ty = E->getType();
2960    llvm::Type *RealResTy = ConvertType(Ty);
2961    llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
2962                                                  getContext().getTypeSize(Ty));
2963    LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
2964
2965    Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrex, LoadAddr->getType());
2966    Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2967
2968    if (RealResTy->isPointerTy())
2969      return Builder.CreateIntToPtr(Val, RealResTy);
2970    else {
2971      Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
2972      return Builder.CreateBitCast(Val, RealResTy);
2973    }
2974  }
2975
2976  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
2977      (BuiltinID == ARM::BI__builtin_arm_strex &&
2978       getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2979    Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
2980    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
2981
2982    Value *Tmp = CreateMemTemp(E->getArg(0)->getType());
2983    Value *Val = EmitScalarExpr(E->getArg(0));
2984    Builder.CreateStore(Val, Tmp);
2985
2986    Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
2987    Val = Builder.CreateLoad(LdPtr);
2988
2989    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2990    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2991    Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
2992    return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
2993  }
2994
2995  if (BuiltinID == ARM::BI__builtin_arm_strex) {
2996    Value *StoreVal = EmitScalarExpr(E->getArg(0));
2997    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2998
2999    QualType Ty = E->getArg(0)->getType();
3000    llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
3001                                                 getContext().getTypeSize(Ty));
3002    StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
3003
3004    if (StoreVal->getType()->isPointerTy())
3005      StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
3006    else {
3007      StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
3008      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
3009    }
3010
3011    Function *F = CGM.getIntrinsic(Intrinsic::arm_strex, StoreAddr->getType());
3012    return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex");
3013  }
3014
3015  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
3016    Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
3017    return Builder.CreateCall(F);
3018  }
3019
3020  if (BuiltinID == ARM::BI__builtin_arm_sevl) {
3021    Function *F = CGM.getIntrinsic(Intrinsic::arm_sevl);
3022    return Builder.CreateCall(F);
3023  }
3024
3025  // CRC32
3026  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3027  switch (BuiltinID) {
3028  case ARM::BI__builtin_arm_crc32b:
3029    CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3030  case ARM::BI__builtin_arm_crc32cb:
3031    CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3032  case ARM::BI__builtin_arm_crc32h:
3033    CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3034  case ARM::BI__builtin_arm_crc32ch:
3035    CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3036  case ARM::BI__builtin_arm_crc32w:
3037  case ARM::BI__builtin_arm_crc32d:
3038    CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3039  case ARM::BI__builtin_arm_crc32cw:
3040  case ARM::BI__builtin_arm_crc32cd:
3041    CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3042  }
3043
3044  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3045    Value *Arg0 = EmitScalarExpr(E->getArg(0));
3046    Value *Arg1 = EmitScalarExpr(E->getArg(1));
3047
3048    // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
3049    // intrinsics, hence we need different codegen for these cases.
3050    if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
3051        BuiltinID == ARM::BI__builtin_arm_crc32cd) {
3052      Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3053      Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3054      Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3055      Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3056
3057      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3058      Value *Res = Builder.CreateCall2(F, Arg0, Arg1a);
3059      return Builder.CreateCall2(F, Res, Arg1b);
3060    } else {
3061      Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3062
3063      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3064      return Builder.CreateCall2(F, Arg0, Arg1);
3065    }
3066  }
3067
3068  SmallVector<Value*, 4> Ops;
3069  llvm::Value *Align = 0;
3070  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
3071    if (i == 0) {
3072      switch (BuiltinID) {
3073      case ARM::BI__builtin_neon_vld1_v:
3074      case ARM::BI__builtin_neon_vld1q_v:
3075      case ARM::BI__builtin_neon_vld1q_lane_v:
3076      case ARM::BI__builtin_neon_vld1_lane_v:
3077      case ARM::BI__builtin_neon_vld1_dup_v:
3078      case ARM::BI__builtin_neon_vld1q_dup_v:
3079      case ARM::BI__builtin_neon_vst1_v:
3080      case ARM::BI__builtin_neon_vst1q_v:
3081      case ARM::BI__builtin_neon_vst1q_lane_v:
3082      case ARM::BI__builtin_neon_vst1_lane_v:
3083      case ARM::BI__builtin_neon_vst2_v:
3084      case ARM::BI__builtin_neon_vst2q_v:
3085      case ARM::BI__builtin_neon_vst2_lane_v:
3086      case ARM::BI__builtin_neon_vst2q_lane_v:
3087      case ARM::BI__builtin_neon_vst3_v:
3088      case ARM::BI__builtin_neon_vst3q_v:
3089      case ARM::BI__builtin_neon_vst3_lane_v:
3090      case ARM::BI__builtin_neon_vst3q_lane_v:
3091      case ARM::BI__builtin_neon_vst4_v:
3092      case ARM::BI__builtin_neon_vst4q_v:
3093      case ARM::BI__builtin_neon_vst4_lane_v:
3094      case ARM::BI__builtin_neon_vst4q_lane_v:
3095        // Get the alignment for the argument in addition to the value;
3096        // we'll use it later.
3097        std::pair<llvm::Value*, unsigned> Src =
3098            EmitPointerWithAlignment(E->getArg(0));
3099        Ops.push_back(Src.first);
3100        Align = Builder.getInt32(Src.second);
3101        continue;
3102      }
3103    }
3104    if (i == 1) {
3105      switch (BuiltinID) {
3106      case ARM::BI__builtin_neon_vld2_v:
3107      case ARM::BI__builtin_neon_vld2q_v:
3108      case ARM::BI__builtin_neon_vld3_v:
3109      case ARM::BI__builtin_neon_vld3q_v:
3110      case ARM::BI__builtin_neon_vld4_v:
3111      case ARM::BI__builtin_neon_vld4q_v:
3112      case ARM::BI__builtin_neon_vld2_lane_v:
3113      case ARM::BI__builtin_neon_vld2q_lane_v:
3114      case ARM::BI__builtin_neon_vld3_lane_v:
3115      case ARM::BI__builtin_neon_vld3q_lane_v:
3116      case ARM::BI__builtin_neon_vld4_lane_v:
3117      case ARM::BI__builtin_neon_vld4q_lane_v:
3118      case ARM::BI__builtin_neon_vld2_dup_v:
3119      case ARM::BI__builtin_neon_vld3_dup_v:
3120      case ARM::BI__builtin_neon_vld4_dup_v:
3121        // Get the alignment for the argument in addition to the value;
3122        // we'll use it later.
3123        std::pair<llvm::Value*, unsigned> Src =
3124            EmitPointerWithAlignment(E->getArg(1));
3125        Ops.push_back(Src.first);
3126        Align = Builder.getInt32(Src.second);
3127        continue;
3128      }
3129    }
3130    Ops.push_back(EmitScalarExpr(E->getArg(i)));
3131  }
3132
3133  // vget_lane and vset_lane are not overloaded and do not have an extra
3134  // argument that specifies the vector type.
3135  switch (BuiltinID) {
3136  default: break;
3137  case ARM::BI__builtin_neon_vget_lane_i8:
3138  case ARM::BI__builtin_neon_vget_lane_i16:
3139  case ARM::BI__builtin_neon_vget_lane_i32:
3140  case ARM::BI__builtin_neon_vget_lane_i64:
3141  case ARM::BI__builtin_neon_vget_lane_f32:
3142  case ARM::BI__builtin_neon_vgetq_lane_i8:
3143  case ARM::BI__builtin_neon_vgetq_lane_i16:
3144  case ARM::BI__builtin_neon_vgetq_lane_i32:
3145  case ARM::BI__builtin_neon_vgetq_lane_i64:
3146  case ARM::BI__builtin_neon_vgetq_lane_f32:
3147    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
3148                                        "vget_lane");
3149  case ARM::BI__builtin_neon_vset_lane_i8:
3150  case ARM::BI__builtin_neon_vset_lane_i16:
3151  case ARM::BI__builtin_neon_vset_lane_i32:
3152  case ARM::BI__builtin_neon_vset_lane_i64:
3153  case ARM::BI__builtin_neon_vset_lane_f32:
3154  case ARM::BI__builtin_neon_vsetq_lane_i8:
3155  case ARM::BI__builtin_neon_vsetq_lane_i16:
3156  case ARM::BI__builtin_neon_vsetq_lane_i32:
3157  case ARM::BI__builtin_neon_vsetq_lane_i64:
3158  case ARM::BI__builtin_neon_vsetq_lane_f32:
3159    Ops.push_back(EmitScalarExpr(E->getArg(2)));
3160    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3161  }
3162
3163  // Get the last argument, which specifies the vector type.
3164  llvm::APSInt Result;
3165  const Expr *Arg = E->getArg(E->getNumArgs()-1);
3166  if (!Arg->isIntegerConstantExpr(Result, getContext()))
3167    return 0;
3168
3169  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
3170      BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
3171    // Determine the overloaded type of this builtin.
3172    llvm::Type *Ty;
3173    if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
3174      Ty = FloatTy;
3175    else
3176      Ty = DoubleTy;
3177
3178    // Determine whether this is an unsigned conversion or not.
3179    bool usgn = Result.getZExtValue() == 1;
3180    unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3181
3182    // Call the appropriate intrinsic.
3183    Function *F = CGM.getIntrinsic(Int, Ty);
3184    return Builder.CreateCall(F, Ops, "vcvtr");
3185  }
3186
3187  // Determine the type of this overloaded NEON intrinsic.
3188  NeonTypeFlags Type(Result.getZExtValue());
3189  bool usgn = Type.isUnsigned();
3190  bool quad = Type.isQuad();
3191  bool rightShift = false;
3192
3193  llvm::VectorType *VTy = GetNeonType(this, Type);
3194  llvm::Type *Ty = VTy;
3195  if (!Ty)
3196    return 0;
3197
3198  unsigned Int;
3199  switch (BuiltinID) {
3200  default: return 0;
3201  case ARM::BI__builtin_neon_vbsl_v:
3202  case ARM::BI__builtin_neon_vbslq_v:
3203    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty),
3204                        Ops, "vbsl");
3205  case ARM::BI__builtin_neon_vabd_v:
3206  case ARM::BI__builtin_neon_vabdq_v:
3207    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
3208    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
3209  case ARM::BI__builtin_neon_vabs_v:
3210  case ARM::BI__builtin_neon_vabsq_v:
3211    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
3212                        Ops, "vabs");
3213  case ARM::BI__builtin_neon_vaddhn_v: {
3214    llvm::VectorType *SrcTy =
3215        llvm::VectorType::getExtendedElementVectorType(VTy);
3216
3217    // %sum = add <4 x i32> %lhs, %rhs
3218    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3219    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3220    Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3221
3222    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3223    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
3224                                       SrcTy->getScalarSizeInBits() / 2);
3225    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
3226    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3227
3228    // %res = trunc <4 x i32> %high to <4 x i16>
3229    return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3230  }
3231  case ARM::BI__builtin_neon_vcale_v:
3232    std::swap(Ops[0], Ops[1]);
3233  case ARM::BI__builtin_neon_vcage_v: {
3234    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
3235    return EmitNeonCall(F, Ops, "vcage");
3236  }
3237  case ARM::BI__builtin_neon_vcaleq_v:
3238    std::swap(Ops[0], Ops[1]);
3239  case ARM::BI__builtin_neon_vcageq_v: {
3240    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
3241    return EmitNeonCall(F, Ops, "vcage");
3242  }
3243  case ARM::BI__builtin_neon_vcalt_v:
3244    std::swap(Ops[0], Ops[1]);
3245  case ARM::BI__builtin_neon_vcagt_v: {
3246    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
3247    return EmitNeonCall(F, Ops, "vcagt");
3248  }
3249  case ARM::BI__builtin_neon_vcaltq_v:
3250    std::swap(Ops[0], Ops[1]);
3251  case ARM::BI__builtin_neon_vcagtq_v: {
3252    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
3253    return EmitNeonCall(F, Ops, "vcagt");
3254  }
3255  case ARM::BI__builtin_neon_vcls_v:
3256  case ARM::BI__builtin_neon_vclsq_v: {
3257    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
3258    return EmitNeonCall(F, Ops, "vcls");
3259  }
3260  case ARM::BI__builtin_neon_vclz_v:
3261  case ARM::BI__builtin_neon_vclzq_v: {
3262    // Generate target-independent intrinsic; also need to add second argument
3263    // for whether or not clz of zero is undefined; on ARM it isn't.
3264    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
3265    Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3266    return EmitNeonCall(F, Ops, "vclz");
3267  }
3268  case ARM::BI__builtin_neon_vcnt_v:
3269  case ARM::BI__builtin_neon_vcntq_v: {
3270    // generate target-independent intrinsic
3271    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
3272    return EmitNeonCall(F, Ops, "vctpop");
3273  }
3274  case ARM::BI__builtin_neon_vcvt_f16_v: {
3275    assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
3276           "unexpected vcvt_f16_v builtin");
3277    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
3278    return EmitNeonCall(F, Ops, "vcvt");
3279  }
3280  case ARM::BI__builtin_neon_vcvt_f32_f16: {
3281    assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
3282           "unexpected vcvt_f32_f16 builtin");
3283    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
3284    return EmitNeonCall(F, Ops, "vcvt");
3285  }
3286  case ARM::BI__builtin_neon_vcvt_f32_v:
3287  case ARM::BI__builtin_neon_vcvtq_f32_v:
3288    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3289    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
3290    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3291                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3292  case ARM::BI__builtin_neon_vcvt_s32_v:
3293  case ARM::BI__builtin_neon_vcvt_u32_v:
3294  case ARM::BI__builtin_neon_vcvtq_s32_v:
3295  case ARM::BI__builtin_neon_vcvtq_u32_v: {
3296    llvm::Type *FloatTy =
3297      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
3298    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
3299    return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3300                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3301  }
3302  case ARM::BI__builtin_neon_vcvt_n_f32_v:
3303  case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
3304    llvm::Type *FloatTy =
3305      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
3306    llvm::Type *Tys[2] = { FloatTy, Ty };
3307    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
3308               : Intrinsic::arm_neon_vcvtfxs2fp;
3309    Function *F = CGM.getIntrinsic(Int, Tys);
3310    return EmitNeonCall(F, Ops, "vcvt_n");
3311  }
3312  case ARM::BI__builtin_neon_vcvt_n_s32_v:
3313  case ARM::BI__builtin_neon_vcvt_n_u32_v:
3314  case ARM::BI__builtin_neon_vcvtq_n_s32_v:
3315  case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
3316    llvm::Type *FloatTy =
3317      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
3318    llvm::Type *Tys[2] = { Ty, FloatTy };
3319    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
3320               : Intrinsic::arm_neon_vcvtfp2fxs;
3321    Function *F = CGM.getIntrinsic(Int, Tys);
3322    return EmitNeonCall(F, Ops, "vcvt_n");
3323  }
3324  case ARM::BI__builtin_neon_vext_v:
3325  case ARM::BI__builtin_neon_vextq_v: {
3326    int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3327    SmallVector<Constant*, 16> Indices;
3328    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3329      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
3330
3331    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3332    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3333    Value *SV = llvm::ConstantVector::get(Indices);
3334    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
3335  }
3336  case ARM::BI__builtin_neon_vhadd_v:
3337  case ARM::BI__builtin_neon_vhaddq_v:
3338    Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
3339    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
3340  case ARM::BI__builtin_neon_vhsub_v:
3341  case ARM::BI__builtin_neon_vhsubq_v:
3342    Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
3343    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
3344  case ARM::BI__builtin_neon_vld1_v:
3345  case ARM::BI__builtin_neon_vld1q_v:
3346    Ops.push_back(Align);
3347    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
3348                        Ops, "vld1");
3349  case ARM::BI__builtin_neon_vld1q_lane_v:
3350    // Handle 64-bit integer elements as a special case.  Use shuffles of
3351    // one-element vectors to avoid poor code for i64 in the backend.
3352    if (VTy->getElementType()->isIntegerTy(64)) {
3353      // Extract the other lane.
3354      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3355      int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3356      Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3357      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3358      // Load the value as a one-element vector.
3359      Ty = llvm::VectorType::get(VTy->getElementType(), 1);
3360      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
3361      Value *Ld = Builder.CreateCall2(F, Ops[0], Align);
3362      // Combine them.
3363      SmallVector<Constant*, 2> Indices;
3364      Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane));
3365      Indices.push_back(ConstantInt::get(Int32Ty, Lane));
3366      SV = llvm::ConstantVector::get(Indices);
3367      return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
3368    }
3369    // fall through
3370  case ARM::BI__builtin_neon_vld1_lane_v: {
3371    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3372    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3373    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3374    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
3375    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
3376    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3377  }
3378  case ARM::BI__builtin_neon_vld1_dup_v:
3379  case ARM::BI__builtin_neon_vld1q_dup_v: {
3380    Value *V = UndefValue::get(Ty);
3381    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3382    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3383    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
3384    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
3385    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
3386    Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3387    return EmitNeonSplat(Ops[0], CI);
3388  }
3389  case ARM::BI__builtin_neon_vld2_v:
3390  case ARM::BI__builtin_neon_vld2q_v: {
3391    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
3392    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
3393    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3394    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3395    return Builder.CreateStore(Ops[1], Ops[0]);
3396  }
3397  case ARM::BI__builtin_neon_vld3_v:
3398  case ARM::BI__builtin_neon_vld3q_v: {
3399    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
3400    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
3401    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3402    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3403    return Builder.CreateStore(Ops[1], Ops[0]);
3404  }
3405  case ARM::BI__builtin_neon_vld4_v:
3406  case ARM::BI__builtin_neon_vld4q_v: {
3407    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
3408    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
3409    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3410    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3411    return Builder.CreateStore(Ops[1], Ops[0]);
3412  }
3413  case ARM::BI__builtin_neon_vld2_lane_v:
3414  case ARM::BI__builtin_neon_vld2q_lane_v: {
3415    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
3416    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3417    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
3418    Ops.push_back(Align);
3419    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
3420    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3421    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3422    return Builder.CreateStore(Ops[1], Ops[0]);
3423  }
3424  case ARM::BI__builtin_neon_vld3_lane_v:
3425  case ARM::BI__builtin_neon_vld3q_lane_v: {
3426    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
3427    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3428    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
3429    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
3430    Ops.push_back(Align);
3431    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
3432    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3433    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3434    return Builder.CreateStore(Ops[1], Ops[0]);
3435  }
3436  case ARM::BI__builtin_neon_vld4_lane_v:
3437  case ARM::BI__builtin_neon_vld4q_lane_v: {
3438    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
3439    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3440    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
3441    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
3442    Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
3443    Ops.push_back(Align);
3444    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
3445    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3446    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3447    return Builder.CreateStore(Ops[1], Ops[0]);
3448  }
3449  case ARM::BI__builtin_neon_vld2_dup_v:
3450  case ARM::BI__builtin_neon_vld3_dup_v:
3451  case ARM::BI__builtin_neon_vld4_dup_v: {
3452    // Handle 64-bit elements as a special-case.  There is no "dup" needed.
3453    if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
3454      switch (BuiltinID) {
3455      case ARM::BI__builtin_neon_vld2_dup_v:
3456        Int = Intrinsic::arm_neon_vld2;
3457        break;
3458      case ARM::BI__builtin_neon_vld3_dup_v:
3459        Int = Intrinsic::arm_neon_vld3;
3460        break;
3461      case ARM::BI__builtin_neon_vld4_dup_v:
3462        Int = Intrinsic::arm_neon_vld4;
3463        break;
3464      default: llvm_unreachable("unknown vld_dup intrinsic?");
3465      }
3466      Function *F = CGM.getIntrinsic(Int, Ty);
3467      Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
3468      Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3469      Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3470      return Builder.CreateStore(Ops[1], Ops[0]);
3471    }
3472    switch (BuiltinID) {
3473    case ARM::BI__builtin_neon_vld2_dup_v:
3474      Int = Intrinsic::arm_neon_vld2lane;
3475      break;
3476    case ARM::BI__builtin_neon_vld3_dup_v:
3477      Int = Intrinsic::arm_neon_vld3lane;
3478      break;
3479    case ARM::BI__builtin_neon_vld4_dup_v:
3480      Int = Intrinsic::arm_neon_vld4lane;
3481      break;
3482    default: llvm_unreachable("unknown vld_dup intrinsic?");
3483    }
3484    Function *F = CGM.getIntrinsic(Int, Ty);
3485    llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
3486
3487    SmallVector<Value*, 6> Args;
3488    Args.push_back(Ops[1]);
3489    Args.append(STy->getNumElements(), UndefValue::get(Ty));
3490
3491    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
3492    Args.push_back(CI);
3493    Args.push_back(Align);
3494
3495    Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
3496    // splat lane 0 to all elts in each vector of the result.
3497    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3498      Value *Val = Builder.CreateExtractValue(Ops[1], i);
3499      Value *Elt = Builder.CreateBitCast(Val, Ty);
3500      Elt = EmitNeonSplat(Elt, CI);
3501      Elt = Builder.CreateBitCast(Elt, Val->getType());
3502      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
3503    }
3504    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3505    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3506    return Builder.CreateStore(Ops[1], Ops[0]);
3507  }
3508  case ARM::BI__builtin_neon_vmax_v:
3509  case ARM::BI__builtin_neon_vmaxq_v:
3510    Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
3511    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
3512  case ARM::BI__builtin_neon_vmin_v:
3513  case ARM::BI__builtin_neon_vminq_v:
3514    Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
3515    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
3516  case ARM::BI__builtin_neon_vmovl_v: {
3517    llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3518    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3519    if (usgn)
3520      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3521    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3522  }
3523  case ARM::BI__builtin_neon_vmovn_v: {
3524    llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3525    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3526    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3527  }
3528  case ARM::BI__builtin_neon_vmul_v:
3529  case ARM::BI__builtin_neon_vmulq_v:
3530    assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
3531    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
3532                        Ops, "vmul");
3533  case ARM::BI__builtin_neon_vmull_v:
3534    // FIXME: the integer vmull operations could be emitted in terms of pure
3535    // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3536    // hoisting the exts outside loops. Until global ISel comes along that can
3537    // see through such movement this leads to bad CodeGen. So we need an
3538    // intrinsic for now.
3539    Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3540    Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3541    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3542  case ARM::BI__builtin_neon_vfma_v:
3543  case ARM::BI__builtin_neon_vfmaq_v: {
3544    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3545    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3546    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3547    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3548
3549    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3550    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
3551  }
3552  case ARM::BI__builtin_neon_vpadal_v:
3553  case ARM::BI__builtin_neon_vpadalq_v: {
3554    Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
3555    // The source operand type has twice as many elements of half the size.
3556    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3557    llvm::Type *EltTy =
3558      llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3559    llvm::Type *NarrowTy =
3560      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3561    llvm::Type *Tys[2] = { Ty, NarrowTy };
3562    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
3563  }
3564  case ARM::BI__builtin_neon_vpadd_v:
3565    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
3566                        Ops, "vpadd");
3567  case ARM::BI__builtin_neon_vpaddl_v:
3568  case ARM::BI__builtin_neon_vpaddlq_v: {
3569    Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
3570    // The source operand type has twice as many elements of half the size.
3571    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3572    llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3573    llvm::Type *NarrowTy =
3574      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3575    llvm::Type *Tys[2] = { Ty, NarrowTy };
3576    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3577  }
3578  case ARM::BI__builtin_neon_vpmax_v:
3579    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
3580    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
3581  case ARM::BI__builtin_neon_vpmin_v:
3582    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
3583    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
3584  case ARM::BI__builtin_neon_vqabs_v:
3585  case ARM::BI__builtin_neon_vqabsq_v:
3586    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
3587                        Ops, "vqabs");
3588  case ARM::BI__builtin_neon_vqadd_v:
3589  case ARM::BI__builtin_neon_vqaddq_v:
3590    Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
3591    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
3592  case ARM::BI__builtin_neon_vqdmlal_v: {
3593    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3594    Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
3595                              MulOps, "vqdmlal");
3596
3597    SmallVector<Value *, 2> AddOps;
3598    AddOps.push_back(Ops[0]);
3599    AddOps.push_back(Mul);
3600    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqadds, Ty),
3601                        AddOps, "vqdmlal");
3602  }
3603  case ARM::BI__builtin_neon_vqdmlsl_v: {
3604    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3605    Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
3606                              MulOps, "vqdmlsl");
3607
3608    SmallVector<Value *, 2> SubOps;
3609    SubOps.push_back(Ops[0]);
3610    SubOps.push_back(Mul);
3611    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqsubs, Ty),
3612                        SubOps, "vqdmlsl");
3613  }
3614  case ARM::BI__builtin_neon_vqdmulh_v:
3615  case ARM::BI__builtin_neon_vqdmulhq_v:
3616    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
3617                        Ops, "vqdmulh");
3618  case ARM::BI__builtin_neon_vqdmull_v:
3619    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
3620                        Ops, "vqdmull");
3621  case ARM::BI__builtin_neon_vqmovn_v:
3622    Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
3623    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
3624  case ARM::BI__builtin_neon_vqmovun_v:
3625    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
3626                        Ops, "vqdmull");
3627  case ARM::BI__builtin_neon_vqneg_v:
3628  case ARM::BI__builtin_neon_vqnegq_v:
3629    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
3630                        Ops, "vqneg");
3631  case ARM::BI__builtin_neon_vqrdmulh_v:
3632  case ARM::BI__builtin_neon_vqrdmulhq_v:
3633    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
3634                        Ops, "vqrdmulh");
3635  case ARM::BI__builtin_neon_vqrshl_v:
3636  case ARM::BI__builtin_neon_vqrshlq_v:
3637    Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
3638    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
3639  case ARM::BI__builtin_neon_vqrshrn_n_v:
3640    Int =
3641      usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3642    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3643                        1, true);
3644  case ARM::BI__builtin_neon_vqrshrun_n_v:
3645    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3646                        Ops, "vqrshrun_n", 1, true);
3647  case ARM::BI__builtin_neon_vqshl_v:
3648  case ARM::BI__builtin_neon_vqshlq_v:
3649    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
3650    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
3651  case ARM::BI__builtin_neon_vqshl_n_v:
3652  case ARM::BI__builtin_neon_vqshlq_n_v:
3653    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
3654    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3655                        1, false);
3656  case ARM::BI__builtin_neon_vqshlu_n_v:
3657  case ARM::BI__builtin_neon_vqshluq_n_v:
3658    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
3659                        Ops, "vqshlu", 1, false);
3660  case ARM::BI__builtin_neon_vqshrn_n_v:
3661    Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3662    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3663                        1, true);
3664  case ARM::BI__builtin_neon_vqshrun_n_v:
3665    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3666                        Ops, "vqshrun_n", 1, true);
3667  case ARM::BI__builtin_neon_vqsub_v:
3668  case ARM::BI__builtin_neon_vqsubq_v:
3669    Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
3670    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
3671  case ARM::BI__builtin_neon_vraddhn_v:
3672    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
3673                        Ops, "vraddhn");
3674  case ARM::BI__builtin_neon_vrecpe_v:
3675  case ARM::BI__builtin_neon_vrecpeq_v:
3676    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3677                        Ops, "vrecpe");
3678  case ARM::BI__builtin_neon_vrecps_v:
3679  case ARM::BI__builtin_neon_vrecpsq_v:
3680    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
3681                        Ops, "vrecps");
3682  case ARM::BI__builtin_neon_vrhadd_v:
3683  case ARM::BI__builtin_neon_vrhaddq_v:
3684    Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
3685    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
3686  case ARM::BI__builtin_neon_vrshl_v:
3687  case ARM::BI__builtin_neon_vrshlq_v:
3688    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3689    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
3690  case ARM::BI__builtin_neon_vrshrn_n_v:
3691    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3692                        Ops, "vrshrn_n", 1, true);
3693  case ARM::BI__builtin_neon_vrshr_n_v:
3694  case ARM::BI__builtin_neon_vrshrq_n_v:
3695    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3696    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
3697  case ARM::BI__builtin_neon_vrsqrte_v:
3698  case ARM::BI__builtin_neon_vrsqrteq_v:
3699    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
3700                        Ops, "vrsqrte");
3701  case ARM::BI__builtin_neon_vrsqrts_v:
3702  case ARM::BI__builtin_neon_vrsqrtsq_v:
3703    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
3704                        Ops, "vrsqrts");
3705  case ARM::BI__builtin_neon_vrsra_n_v:
3706  case ARM::BI__builtin_neon_vrsraq_n_v:
3707    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3708    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3709    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3710    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3711    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
3712    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3713  case ARM::BI__builtin_neon_vrsubhn_v:
3714    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
3715                        Ops, "vrsubhn");
3716  case ARM::BI__builtin_neon_vshl_v:
3717  case ARM::BI__builtin_neon_vshlq_v:
3718    Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
3719    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
3720  case ARM::BI__builtin_neon_vshll_n_v:
3721    Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
3722    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
3723  case ARM::BI__builtin_neon_vshl_n_v:
3724  case ARM::BI__builtin_neon_vshlq_n_v:
3725    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3726    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3727                             "vshl_n");
3728  case ARM::BI__builtin_neon_vshrn_n_v:
3729    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
3730                        Ops, "vshrn_n", 1, true);
3731  case ARM::BI__builtin_neon_vshr_n_v:
3732  case ARM::BI__builtin_neon_vshrq_n_v:
3733    return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, usgn, "vshr_n");
3734  case ARM::BI__builtin_neon_vsri_n_v:
3735  case ARM::BI__builtin_neon_vsriq_n_v:
3736    rightShift = true;
3737  case ARM::BI__builtin_neon_vsli_n_v:
3738  case ARM::BI__builtin_neon_vsliq_n_v:
3739    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3740    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3741                        Ops, "vsli_n");
3742  case ARM::BI__builtin_neon_vsra_n_v:
3743  case ARM::BI__builtin_neon_vsraq_n_v:
3744    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3745    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3746    return Builder.CreateAdd(Ops[0], Ops[1]);
3747  case ARM::BI__builtin_neon_vst1_v:
3748  case ARM::BI__builtin_neon_vst1q_v:
3749    Ops.push_back(Align);
3750    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
3751                        Ops, "");
3752  case ARM::BI__builtin_neon_vst1q_lane_v:
3753    // Handle 64-bit integer elements as a special case.  Use a shuffle to get
3754    // a one-element vector and avoid poor code for i64 in the backend.
3755    if (VTy->getElementType()->isIntegerTy(64)) {
3756      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3757      Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3758      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3759      Ops[2] = Align;
3760      return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3761                                                 Ops[1]->getType()), Ops);
3762    }
3763    // fall through
3764  case ARM::BI__builtin_neon_vst1_lane_v: {
3765    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3766    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3767    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3768    StoreInst *St = Builder.CreateStore(Ops[1],
3769                                        Builder.CreateBitCast(Ops[0], Ty));
3770    St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
3771    return St;
3772  }
3773  case ARM::BI__builtin_neon_vst2_v:
3774  case ARM::BI__builtin_neon_vst2q_v:
3775    Ops.push_back(Align);
3776    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
3777                        Ops, "");
3778  case ARM::BI__builtin_neon_vst2_lane_v:
3779  case ARM::BI__builtin_neon_vst2q_lane_v:
3780    Ops.push_back(Align);
3781    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
3782                        Ops, "");
3783  case ARM::BI__builtin_neon_vst3_v:
3784  case ARM::BI__builtin_neon_vst3q_v:
3785    Ops.push_back(Align);
3786    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
3787                        Ops, "");
3788  case ARM::BI__builtin_neon_vst3_lane_v:
3789  case ARM::BI__builtin_neon_vst3q_lane_v:
3790    Ops.push_back(Align);
3791    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
3792                        Ops, "");
3793  case ARM::BI__builtin_neon_vst4_v:
3794  case ARM::BI__builtin_neon_vst4q_v:
3795    Ops.push_back(Align);
3796    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
3797                        Ops, "");
3798  case ARM::BI__builtin_neon_vst4_lane_v:
3799  case ARM::BI__builtin_neon_vst4q_lane_v:
3800    Ops.push_back(Align);
3801    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
3802                        Ops, "");
3803  case ARM::BI__builtin_neon_vsubhn_v: {
3804    llvm::VectorType *SrcTy =
3805        llvm::VectorType::getExtendedElementVectorType(VTy);
3806
3807    // %sum = add <4 x i32> %lhs, %rhs
3808    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3809    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3810    Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3811
3812    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3813    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
3814                                       SrcTy->getScalarSizeInBits() / 2);
3815    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
3816    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3817
3818    // %res = trunc <4 x i32> %high to <4 x i16>
3819    return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3820  }
3821  case ARM::BI__builtin_neon_vtbl1_v:
3822    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3823                        Ops, "vtbl1");
3824  case ARM::BI__builtin_neon_vtbl2_v:
3825    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3826                        Ops, "vtbl2");
3827  case ARM::BI__builtin_neon_vtbl3_v:
3828    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3829                        Ops, "vtbl3");
3830  case ARM::BI__builtin_neon_vtbl4_v:
3831    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3832                        Ops, "vtbl4");
3833  case ARM::BI__builtin_neon_vtbx1_v:
3834    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3835                        Ops, "vtbx1");
3836  case ARM::BI__builtin_neon_vtbx2_v:
3837    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3838                        Ops, "vtbx2");
3839  case ARM::BI__builtin_neon_vtbx3_v:
3840    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3841                        Ops, "vtbx3");
3842  case ARM::BI__builtin_neon_vtbx4_v:
3843    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3844                        Ops, "vtbx4");
3845  case ARM::BI__builtin_neon_vtst_v:
3846  case ARM::BI__builtin_neon_vtstq_v: {
3847    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3848    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3849    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3850    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3851                                ConstantAggregateZero::get(Ty));
3852    return Builder.CreateSExt(Ops[0], Ty, "vtst");
3853  }
3854  case ARM::BI__builtin_neon_vtrn_v:
3855  case ARM::BI__builtin_neon_vtrnq_v: {
3856    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3857    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3858    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3859    Value *SV = 0;
3860
3861    for (unsigned vi = 0; vi != 2; ++vi) {
3862      SmallVector<Constant*, 16> Indices;
3863      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3864        Indices.push_back(Builder.getInt32(i+vi));
3865        Indices.push_back(Builder.getInt32(i+e+vi));
3866      }
3867      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
3868      SV = llvm::ConstantVector::get(Indices);
3869      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
3870      SV = Builder.CreateStore(SV, Addr);
3871    }
3872    return SV;
3873  }
3874  case ARM::BI__builtin_neon_vuzp_v:
3875  case ARM::BI__builtin_neon_vuzpq_v: {
3876    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3877    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3878    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3879    Value *SV = 0;
3880
3881    for (unsigned vi = 0; vi != 2; ++vi) {
3882      SmallVector<Constant*, 16> Indices;
3883      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3884        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
3885
3886      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
3887      SV = llvm::ConstantVector::get(Indices);
3888      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
3889      SV = Builder.CreateStore(SV, Addr);
3890    }
3891    return SV;
3892  }
3893  case ARM::BI__builtin_neon_vzip_v:
3894  case ARM::BI__builtin_neon_vzipq_v: {
3895    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3896    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3897    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3898    Value *SV = 0;
3899
3900    for (unsigned vi = 0; vi != 2; ++vi) {
3901      SmallVector<Constant*, 16> Indices;
3902      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3903        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
3904        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
3905      }
3906      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
3907      SV = llvm::ConstantVector::get(Indices);
3908      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
3909      SV = Builder.CreateStore(SV, Addr);
3910    }
3911    return SV;
3912  }
3913  }
3914}
3915
3916llvm::Value *CodeGenFunction::
3917BuildVector(ArrayRef<llvm::Value*> Ops) {
3918  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
3919         "Not a power-of-two sized vector!");
3920  bool AllConstants = true;
3921  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
3922    AllConstants &= isa<Constant>(Ops[i]);
3923
3924  // If this is a constant vector, create a ConstantVector.
3925  if (AllConstants) {
3926    SmallVector<llvm::Constant*, 16> CstOps;
3927    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
3928      CstOps.push_back(cast<Constant>(Ops[i]));
3929    return llvm::ConstantVector::get(CstOps);
3930  }
3931
3932  // Otherwise, insertelement the values to build the vector.
3933  Value *Result =
3934    llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
3935
3936  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
3937    Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
3938
3939  return Result;
3940}
3941
3942Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
3943                                           const CallExpr *E) {
3944  SmallVector<Value*, 4> Ops;
3945
3946  // Find out if any arguments are required to be integer constant expressions.
3947  unsigned ICEArguments = 0;
3948  ASTContext::GetBuiltinTypeError Error;
3949  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3950  assert(Error == ASTContext::GE_None && "Should not codegen an error");
3951
3952  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
3953    // If this is a normal argument, just emit it as a scalar.
3954    if ((ICEArguments & (1 << i)) == 0) {
3955      Ops.push_back(EmitScalarExpr(E->getArg(i)));
3956      continue;
3957    }
3958
3959    // If this is required to be a constant, constant fold it so that we know
3960    // that the generated intrinsic gets a ConstantInt.
3961    llvm::APSInt Result;
3962    bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
3963    assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
3964    Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
3965  }
3966
3967  switch (BuiltinID) {
3968  default: return 0;
3969  case X86::BI__builtin_ia32_vec_init_v8qi:
3970  case X86::BI__builtin_ia32_vec_init_v4hi:
3971  case X86::BI__builtin_ia32_vec_init_v2si:
3972    return Builder.CreateBitCast(BuildVector(Ops),
3973                                 llvm::Type::getX86_MMXTy(getLLVMContext()));
3974  case X86::BI__builtin_ia32_vec_ext_v2si:
3975    return Builder.CreateExtractElement(Ops[0],
3976                                  llvm::ConstantInt::get(Ops[1]->getType(), 0));
3977  case X86::BI__builtin_ia32_ldmxcsr: {
3978    Value *Tmp = CreateMemTemp(E->getArg(0)->getType());
3979    Builder.CreateStore(Ops[0], Tmp);
3980    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
3981                              Builder.CreateBitCast(Tmp, Int8PtrTy));
3982  }
3983  case X86::BI__builtin_ia32_stmxcsr: {
3984    Value *Tmp = CreateMemTemp(E->getType());
3985    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
3986                       Builder.CreateBitCast(Tmp, Int8PtrTy));
3987    return Builder.CreateLoad(Tmp, "stmxcsr");
3988  }
3989  case X86::BI__builtin_ia32_storehps:
3990  case X86::BI__builtin_ia32_storelps: {
3991    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
3992    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
3993
3994    // cast val v2i64
3995    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
3996
3997    // extract (0, 1)
3998    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
3999    llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
4000    Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
4001
4002    // cast pointer to i64 & store
4003    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
4004    return Builder.CreateStore(Ops[1], Ops[0]);
4005  }
4006  case X86::BI__builtin_ia32_palignr: {
4007    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
4008
4009    // If palignr is shifting the pair of input vectors less than 9 bytes,
4010    // emit a shuffle instruction.
4011    if (shiftVal <= 8) {
4012      SmallVector<llvm::Constant*, 8> Indices;
4013      for (unsigned i = 0; i != 8; ++i)
4014        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
4015
4016      Value* SV = llvm::ConstantVector::get(Indices);
4017      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
4018    }
4019
4020    // If palignr is shifting the pair of input vectors more than 8 but less
4021    // than 16 bytes, emit a logical right shift of the destination.
4022    if (shiftVal < 16) {
4023      // MMX has these as 1 x i64 vectors for some odd optimization reasons.
4024      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
4025
4026      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
4027      Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
4028
4029      // create i32 constant
4030      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
4031      return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
4032    }
4033
4034    // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
4035    return llvm::Constant::getNullValue(ConvertType(E->getType()));
4036  }
4037  case X86::BI__builtin_ia32_palignr128: {
4038    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
4039
4040    // If palignr is shifting the pair of input vectors less than 17 bytes,
4041    // emit a shuffle instruction.
4042    if (shiftVal <= 16) {
4043      SmallVector<llvm::Constant*, 16> Indices;
4044      for (unsigned i = 0; i != 16; ++i)
4045        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
4046
4047      Value* SV = llvm::ConstantVector::get(Indices);
4048      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
4049    }
4050
4051    // If palignr is shifting the pair of input vectors more than 16 but less
4052    // than 32 bytes, emit a logical right shift of the destination.
4053    if (shiftVal < 32) {
4054      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
4055
4056      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
4057      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
4058
4059      // create i32 constant
4060      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
4061      return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
4062    }
4063
4064    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
4065    return llvm::Constant::getNullValue(ConvertType(E->getType()));
4066  }
4067  case X86::BI__builtin_ia32_palignr256: {
4068    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
4069
4070    // If palignr is shifting the pair of input vectors less than 17 bytes,
4071    // emit a shuffle instruction.
4072    if (shiftVal <= 16) {
4073      SmallVector<llvm::Constant*, 32> Indices;
4074      // 256-bit palignr operates on 128-bit lanes so we need to handle that
4075      for (unsigned l = 0; l != 2; ++l) {
4076        unsigned LaneStart = l * 16;
4077        unsigned LaneEnd = (l+1) * 16;
4078        for (unsigned i = 0; i != 16; ++i) {
4079          unsigned Idx = shiftVal + i + LaneStart;
4080          if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand
4081          Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx));
4082        }
4083      }
4084
4085      Value* SV = llvm::ConstantVector::get(Indices);
4086      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
4087    }
4088
4089    // If palignr is shifting the pair of input vectors more than 16 but less
4090    // than 32 bytes, emit a logical right shift of the destination.
4091    if (shiftVal < 32) {
4092      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4);
4093
4094      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
4095      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
4096
4097      // create i32 constant
4098      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq);
4099      return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
4100    }
4101
4102    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
4103    return llvm::Constant::getNullValue(ConvertType(E->getType()));
4104  }
4105  case X86::BI__builtin_ia32_movntps:
4106  case X86::BI__builtin_ia32_movntps256:
4107  case X86::BI__builtin_ia32_movntpd:
4108  case X86::BI__builtin_ia32_movntpd256:
4109  case X86::BI__builtin_ia32_movntdq:
4110  case X86::BI__builtin_ia32_movntdq256:
4111  case X86::BI__builtin_ia32_movnti:
4112  case X86::BI__builtin_ia32_movnti64: {
4113    llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
4114                                           Builder.getInt32(1));
4115
4116    // Convert the type of the pointer to a pointer to the stored type.
4117    Value *BC = Builder.CreateBitCast(Ops[0],
4118                                llvm::PointerType::getUnqual(Ops[1]->getType()),
4119                                      "cast");
4120    StoreInst *SI = Builder.CreateStore(Ops[1], BC);
4121    SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
4122
4123    // If the operand is an integer, we can't assume alignment. Otherwise,
4124    // assume natural alignment.
4125    QualType ArgTy = E->getArg(1)->getType();
4126    unsigned Align;
4127    if (ArgTy->isIntegerType())
4128      Align = 1;
4129    else
4130      Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
4131    SI->setAlignment(Align);
4132    return SI;
4133  }
4134  // 3DNow!
4135  case X86::BI__builtin_ia32_pswapdsf:
4136  case X86::BI__builtin_ia32_pswapdsi: {
4137    const char *name = 0;
4138    Intrinsic::ID ID = Intrinsic::not_intrinsic;
4139    switch(BuiltinID) {
4140    default: llvm_unreachable("Unsupported intrinsic!");
4141    case X86::BI__builtin_ia32_pswapdsf:
4142    case X86::BI__builtin_ia32_pswapdsi:
4143      name = "pswapd";
4144      ID = Intrinsic::x86_3dnowa_pswapd;
4145      break;
4146    }
4147    llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
4148    Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
4149    llvm::Function *F = CGM.getIntrinsic(ID);
4150    return Builder.CreateCall(F, Ops, name);
4151  }
4152  case X86::BI__builtin_ia32_rdrand16_step:
4153  case X86::BI__builtin_ia32_rdrand32_step:
4154  case X86::BI__builtin_ia32_rdrand64_step:
4155  case X86::BI__builtin_ia32_rdseed16_step:
4156  case X86::BI__builtin_ia32_rdseed32_step:
4157  case X86::BI__builtin_ia32_rdseed64_step: {
4158    Intrinsic::ID ID;
4159    switch (BuiltinID) {
4160    default: llvm_unreachable("Unsupported intrinsic!");
4161    case X86::BI__builtin_ia32_rdrand16_step:
4162      ID = Intrinsic::x86_rdrand_16;
4163      break;
4164    case X86::BI__builtin_ia32_rdrand32_step:
4165      ID = Intrinsic::x86_rdrand_32;
4166      break;
4167    case X86::BI__builtin_ia32_rdrand64_step:
4168      ID = Intrinsic::x86_rdrand_64;
4169      break;
4170    case X86::BI__builtin_ia32_rdseed16_step:
4171      ID = Intrinsic::x86_rdseed_16;
4172      break;
4173    case X86::BI__builtin_ia32_rdseed32_step:
4174      ID = Intrinsic::x86_rdseed_32;
4175      break;
4176    case X86::BI__builtin_ia32_rdseed64_step:
4177      ID = Intrinsic::x86_rdseed_64;
4178      break;
4179    }
4180
4181    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
4182    Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
4183    return Builder.CreateExtractValue(Call, 1);
4184  }
4185  // AVX2 broadcast
4186  case X86::BI__builtin_ia32_vbroadcastsi256: {
4187    Value *VecTmp = CreateMemTemp(E->getArg(0)->getType());
4188    Builder.CreateStore(Ops[0], VecTmp);
4189    Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128);
4190    return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy));
4191  }
4192  }
4193}
4194
4195
4196Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
4197                                           const CallExpr *E) {
4198  SmallVector<Value*, 4> Ops;
4199
4200  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
4201    Ops.push_back(EmitScalarExpr(E->getArg(i)));
4202
4203  Intrinsic::ID ID = Intrinsic::not_intrinsic;
4204
4205  switch (BuiltinID) {
4206  default: return 0;
4207
4208  // vec_ld, vec_lvsl, vec_lvsr
4209  case PPC::BI__builtin_altivec_lvx:
4210  case PPC::BI__builtin_altivec_lvxl:
4211  case PPC::BI__builtin_altivec_lvebx:
4212  case PPC::BI__builtin_altivec_lvehx:
4213  case PPC::BI__builtin_altivec_lvewx:
4214  case PPC::BI__builtin_altivec_lvsl:
4215  case PPC::BI__builtin_altivec_lvsr:
4216  {
4217    Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
4218
4219    Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
4220    Ops.pop_back();
4221
4222    switch (BuiltinID) {
4223    default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
4224    case PPC::BI__builtin_altivec_lvx:
4225      ID = Intrinsic::ppc_altivec_lvx;
4226      break;
4227    case PPC::BI__builtin_altivec_lvxl:
4228      ID = Intrinsic::ppc_altivec_lvxl;
4229      break;
4230    case PPC::BI__builtin_altivec_lvebx:
4231      ID = Intrinsic::ppc_altivec_lvebx;
4232      break;
4233    case PPC::BI__builtin_altivec_lvehx:
4234      ID = Intrinsic::ppc_altivec_lvehx;
4235      break;
4236    case PPC::BI__builtin_altivec_lvewx:
4237      ID = Intrinsic::ppc_altivec_lvewx;
4238      break;
4239    case PPC::BI__builtin_altivec_lvsl:
4240      ID = Intrinsic::ppc_altivec_lvsl;
4241      break;
4242    case PPC::BI__builtin_altivec_lvsr:
4243      ID = Intrinsic::ppc_altivec_lvsr;
4244      break;
4245    }
4246    llvm::Function *F = CGM.getIntrinsic(ID);
4247    return Builder.CreateCall(F, Ops, "");
4248  }
4249
4250  // vec_st
4251  case PPC::BI__builtin_altivec_stvx:
4252  case PPC::BI__builtin_altivec_stvxl:
4253  case PPC::BI__builtin_altivec_stvebx:
4254  case PPC::BI__builtin_altivec_stvehx:
4255  case PPC::BI__builtin_altivec_stvewx:
4256  {
4257    Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
4258    Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
4259    Ops.pop_back();
4260
4261    switch (BuiltinID) {
4262    default: llvm_unreachable("Unsupported st intrinsic!");
4263    case PPC::BI__builtin_altivec_stvx:
4264      ID = Intrinsic::ppc_altivec_stvx;
4265      break;
4266    case PPC::BI__builtin_altivec_stvxl:
4267      ID = Intrinsic::ppc_altivec_stvxl;
4268      break;
4269    case PPC::BI__builtin_altivec_stvebx:
4270      ID = Intrinsic::ppc_altivec_stvebx;
4271      break;
4272    case PPC::BI__builtin_altivec_stvehx:
4273      ID = Intrinsic::ppc_altivec_stvehx;
4274      break;
4275    case PPC::BI__builtin_altivec_stvewx:
4276      ID = Intrinsic::ppc_altivec_stvewx;
4277      break;
4278    }
4279    llvm::Function *F = CGM.getIntrinsic(ID);
4280    return Builder.CreateCall(F, Ops, "");
4281  }
4282  }
4283}
4284