1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This contains code to emit Builtin calls as LLVM code.
11//
12//===----------------------------------------------------------------------===//
13
14#include "CodeGenFunction.h"
15#include "CGCXXABI.h"
16#include "CGObjCRuntime.h"
17#include "CodeGenModule.h"
18#include "TargetInfo.h"
19#include "clang/AST/ASTContext.h"
20#include "clang/AST/Decl.h"
21#include "clang/Basic/TargetBuiltins.h"
22#include "clang/Basic/TargetInfo.h"
23#include "clang/CodeGen/CGFunctionInfo.h"
24#include "llvm/ADT/StringExtras.h"
25#include "llvm/IR/CallSite.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/InlineAsm.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/MDBuilder.h"
30#include <sstream>
31
32using namespace clang;
33using namespace CodeGen;
34using namespace llvm;
35
36/// getBuiltinLibFunction - Given a builtin id for a function like
37/// "__builtin_fabsf", return a Function* for "fabsf".
38llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
39                                                  unsigned BuiltinID) {
40  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
41
42  // Get the name, skip over the __builtin_ prefix (if necessary).
43  StringRef Name;
44  GlobalDecl D(FD);
45
46  // If the builtin has been declared explicitly with an assembler label,
47  // use the mangled name. This differs from the plain label on platforms
48  // that prefix labels.
49  if (FD->hasAttr<AsmLabelAttr>())
50    Name = getMangledName(D);
51  else
52    Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
53
54  llvm::FunctionType *Ty =
55    cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
56
57  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
58}
59
60/// Emit the conversions required to turn the given value into an
61/// integer of the given size.
62static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
63                        QualType T, llvm::IntegerType *IntType) {
64  V = CGF.EmitToMemory(V, T);
65
66  if (V->getType()->isPointerTy())
67    return CGF.Builder.CreatePtrToInt(V, IntType);
68
69  assert(V->getType() == IntType);
70  return V;
71}
72
73static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
74                          QualType T, llvm::Type *ResultType) {
75  V = CGF.EmitFromMemory(V, T);
76
77  if (ResultType->isPointerTy())
78    return CGF.Builder.CreateIntToPtr(V, ResultType);
79
80  assert(V->getType() == ResultType);
81  return V;
82}
83
84/// Utility to insert an atomic instruction based on Instrinsic::ID
85/// and the expression node.
86static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
87                                    llvm::AtomicRMWInst::BinOp Kind,
88                                    const CallExpr *E) {
89  QualType T = E->getType();
90  assert(E->getArg(0)->getType()->isPointerType());
91  assert(CGF.getContext().hasSameUnqualifiedType(T,
92                                  E->getArg(0)->getType()->getPointeeType()));
93  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
94
95  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
96  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
97
98  llvm::IntegerType *IntType =
99    llvm::IntegerType::get(CGF.getLLVMContext(),
100                           CGF.getContext().getTypeSize(T));
101  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
102
103  llvm::Value *Args[2];
104  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
105  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
106  llvm::Type *ValueType = Args[1]->getType();
107  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
108
109  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
110      Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
111  return EmitFromInt(CGF, Result, T, ValueType);
112}
113
114static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
115  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
116  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
117
118  // Convert the type of the pointer to a pointer to the stored type.
119  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
120  Value *BC = CGF.Builder.CreateBitCast(
121      Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
122  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
123  LV.setNontemporal(true);
124  CGF.EmitStoreOfScalar(Val, LV, false);
125  return nullptr;
126}
127
128static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
129  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
130
131  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
132  LV.setNontemporal(true);
133  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
134}
135
136static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
137                               llvm::AtomicRMWInst::BinOp Kind,
138                               const CallExpr *E) {
139  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
140}
141
142/// Utility to insert an atomic instruction based Instrinsic::ID and
143/// the expression node, where the return value is the result of the
144/// operation.
145static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
146                                   llvm::AtomicRMWInst::BinOp Kind,
147                                   const CallExpr *E,
148                                   Instruction::BinaryOps Op,
149                                   bool Invert = false) {
150  QualType T = E->getType();
151  assert(E->getArg(0)->getType()->isPointerType());
152  assert(CGF.getContext().hasSameUnqualifiedType(T,
153                                  E->getArg(0)->getType()->getPointeeType()));
154  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
155
156  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
157  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
158
159  llvm::IntegerType *IntType =
160    llvm::IntegerType::get(CGF.getLLVMContext(),
161                           CGF.getContext().getTypeSize(T));
162  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
163
164  llvm::Value *Args[2];
165  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
166  llvm::Type *ValueType = Args[1]->getType();
167  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
168  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
169
170  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
171      Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
172  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
173  if (Invert)
174    Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
175                                     llvm::ConstantInt::get(IntType, -1));
176  Result = EmitFromInt(CGF, Result, T, ValueType);
177  return RValue::get(Result);
178}
179
180/// @brief Utility to insert an atomic cmpxchg instruction.
181///
182/// @param CGF The current codegen function.
183/// @param E   Builtin call expression to convert to cmpxchg.
184///            arg0 - address to operate on
185///            arg1 - value to compare with
186///            arg2 - new value
187/// @param ReturnBool Specifies whether to return success flag of
188///                   cmpxchg result or the old value.
189///
190/// @returns result of cmpxchg, according to ReturnBool
191static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
192                                     bool ReturnBool) {
193  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
194  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
195  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
196
197  llvm::IntegerType *IntType = llvm::IntegerType::get(
198      CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
199  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
200
201  Value *Args[3];
202  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
203  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
204  llvm::Type *ValueType = Args[1]->getType();
205  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
206  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
207
208  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
209      Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
210      llvm::AtomicOrdering::SequentiallyConsistent);
211  if (ReturnBool)
212    // Extract boolean success flag and zext it to int.
213    return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
214                                  CGF.ConvertType(E->getType()));
215  else
216    // Extract old value and emit it using the same type as compare value.
217    return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
218                       ValueType);
219}
220
221// Emit a simple mangled intrinsic that has 1 argument and a return type
222// matching the argument type.
223static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
224                               const CallExpr *E,
225                               unsigned IntrinsicID) {
226  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
227
228  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
229  return CGF.Builder.CreateCall(F, Src0);
230}
231
232// Emit an intrinsic that has 2 operands of the same type as its result.
233static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
234                                const CallExpr *E,
235                                unsigned IntrinsicID) {
236  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
237  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
238
239  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240  return CGF.Builder.CreateCall(F, { Src0, Src1 });
241}
242
243// Emit an intrinsic that has 3 operands of the same type as its result.
244static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
245                                 const CallExpr *E,
246                                 unsigned IntrinsicID) {
247  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
250
251  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
252  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
253}
254
255// Emit an intrinsic that has 1 float or double operand, and 1 integer.
256static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
257                               const CallExpr *E,
258                               unsigned IntrinsicID) {
259  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
260  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
261
262  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263  return CGF.Builder.CreateCall(F, {Src0, Src1});
264}
265
266/// EmitFAbs - Emit a call to @llvm.fabs().
267static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
268  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
269  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
270  Call->setDoesNotAccessMemory();
271  return Call;
272}
273
274/// Emit the computation of the sign bit for a floating point value. Returns
275/// the i1 sign bit value.
276static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
277  LLVMContext &C = CGF.CGM.getLLVMContext();
278
279  llvm::Type *Ty = V->getType();
280  int Width = Ty->getPrimitiveSizeInBits();
281  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
282  V = CGF.Builder.CreateBitCast(V, IntTy);
283  if (Ty->isPPC_FP128Ty()) {
284    // We want the sign bit of the higher-order double. The bitcast we just
285    // did works as if the double-double was stored to memory and then
286    // read as an i128. The "store" will put the higher-order double in the
287    // lower address in both little- and big-Endian modes, but the "load"
288    // will treat those bits as a different part of the i128: the low bits in
289    // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
290    // we need to shift the high bits down to the low before truncating.
291    Width >>= 1;
292    if (CGF.getTarget().isBigEndian()) {
293      Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
294      V = CGF.Builder.CreateLShr(V, ShiftCst);
295    }
296    // We are truncating value in order to extract the higher-order
297    // double, which we will be using to extract the sign from.
298    IntTy = llvm::IntegerType::get(C, Width);
299    V = CGF.Builder.CreateTrunc(V, IntTy);
300  }
301  Value *Zero = llvm::Constant::getNullValue(IntTy);
302  return CGF.Builder.CreateICmpSLT(V, Zero);
303}
304
305static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
306                              const CallExpr *E, llvm::Value *calleeValue) {
307  return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
308                      ReturnValueSlot(), Fn);
309}
310
311/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
312/// depending on IntrinsicID.
313///
314/// \arg CGF The current codegen function.
315/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
316/// \arg X The first argument to the llvm.*.with.overflow.*.
317/// \arg Y The second argument to the llvm.*.with.overflow.*.
318/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
319/// \returns The result (i.e. sum/product) returned by the intrinsic.
320static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
321                                          const llvm::Intrinsic::ID IntrinsicID,
322                                          llvm::Value *X, llvm::Value *Y,
323                                          llvm::Value *&Carry) {
324  // Make sure we have integers of the same width.
325  assert(X->getType() == Y->getType() &&
326         "Arguments must be the same type. (Did you forget to make sure both "
327         "arguments have the same integer width?)");
328
329  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
330  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
331  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
332  return CGF.Builder.CreateExtractValue(Tmp, 0);
333}
334
335static Value *emitRangedBuiltin(CodeGenFunction &CGF,
336                                unsigned IntrinsicID,
337                                int low, int high) {
338    llvm::MDBuilder MDHelper(CGF.getLLVMContext());
339    llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
340    Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
341    llvm::Instruction *Call = CGF.Builder.CreateCall(F);
342    Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
343    return Call;
344}
345
346namespace {
347  struct WidthAndSignedness {
348    unsigned Width;
349    bool Signed;
350  };
351}
352
353static WidthAndSignedness
354getIntegerWidthAndSignedness(const clang::ASTContext &context,
355                             const clang::QualType Type) {
356  assert(Type->isIntegerType() && "Given type is not an integer.");
357  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
358  bool Signed = Type->isSignedIntegerType();
359  return {Width, Signed};
360}
361
362// Given one or more integer types, this function produces an integer type that
363// encompasses them: any value in one of the given types could be expressed in
364// the encompassing type.
365static struct WidthAndSignedness
366EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
367  assert(Types.size() > 0 && "Empty list of types.");
368
369  // If any of the given types is signed, we must return a signed type.
370  bool Signed = false;
371  for (const auto &Type : Types) {
372    Signed |= Type.Signed;
373  }
374
375  // The encompassing type must have a width greater than or equal to the width
376  // of the specified types.  Aditionally, if the encompassing type is signed,
377  // its width must be strictly greater than the width of any unsigned types
378  // given.
379  unsigned Width = 0;
380  for (const auto &Type : Types) {
381    unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
382    if (Width < MinWidth) {
383      Width = MinWidth;
384    }
385  }
386
387  return {Width, Signed};
388}
389
390Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
391  llvm::Type *DestType = Int8PtrTy;
392  if (ArgValue->getType() != DestType)
393    ArgValue =
394        Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
395
396  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
397  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
398}
399
400/// Checks if using the result of __builtin_object_size(p, @p From) in place of
401/// __builtin_object_size(p, @p To) is correct
402static bool areBOSTypesCompatible(int From, int To) {
403  // Note: Our __builtin_object_size implementation currently treats Type=0 and
404  // Type=2 identically. Encoding this implementation detail here may make
405  // improving __builtin_object_size difficult in the future, so it's omitted.
406  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
407}
408
409static llvm::Value *
410getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
411  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
412}
413
414llvm::Value *
415CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
416                                                 llvm::IntegerType *ResType) {
417  uint64_t ObjectSize;
418  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
419    return emitBuiltinObjectSize(E, Type, ResType);
420  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
421}
422
423/// Returns a Value corresponding to the size of the given expression.
424/// This Value may be either of the following:
425///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
426///     it)
427///   - A call to the @llvm.objectsize intrinsic
428llvm::Value *
429CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
430                                       llvm::IntegerType *ResType) {
431  // We need to reference an argument if the pointer is a parameter with the
432  // pass_object_size attribute.
433  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
434    auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
435    auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
436    if (Param != nullptr && PS != nullptr &&
437        areBOSTypesCompatible(PS->getType(), Type)) {
438      auto Iter = SizeArguments.find(Param);
439      assert(Iter != SizeArguments.end());
440
441      const ImplicitParamDecl *D = Iter->second;
442      auto DIter = LocalDeclMap.find(D);
443      assert(DIter != LocalDeclMap.end());
444
445      return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
446                              getContext().getSizeType(), E->getLocStart());
447    }
448  }
449
450  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
451  // evaluate E for side-effects. In either case, we shouldn't lower to
452  // @llvm.objectsize.
453  if (Type == 3 || E->HasSideEffects(getContext()))
454    return getDefaultBuiltinObjectSizeResult(Type, ResType);
455
456  // LLVM only supports 0 and 2, make sure that we pass along that
457  // as a boolean.
458  auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
459  // FIXME: Get right address space.
460  llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
461  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
462  return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
463}
464
465RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
466                                        unsigned BuiltinID, const CallExpr *E,
467                                        ReturnValueSlot ReturnValue) {
468  // See if we can constant fold this builtin.  If so, don't emit it at all.
469  Expr::EvalResult Result;
470  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
471      !Result.hasSideEffects()) {
472    if (Result.Val.isInt())
473      return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
474                                                Result.Val.getInt()));
475    if (Result.Val.isFloat())
476      return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
477                                               Result.Val.getFloat()));
478  }
479
480  switch (BuiltinID) {
481  default: break;  // Handle intrinsics and libm functions below.
482  case Builtin::BI__builtin___CFStringMakeConstantString:
483  case Builtin::BI__builtin___NSStringMakeConstantString:
484    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
485  case Builtin::BI__builtin_stdarg_start:
486  case Builtin::BI__builtin_va_start:
487  case Builtin::BI__va_start:
488  case Builtin::BI__builtin_va_end:
489    return RValue::get(
490        EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
491                           ? EmitScalarExpr(E->getArg(0))
492                           : EmitVAListRef(E->getArg(0)).getPointer(),
493                       BuiltinID != Builtin::BI__builtin_va_end));
494  case Builtin::BI__builtin_va_copy: {
495    Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
496    Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
497
498    llvm::Type *Type = Int8PtrTy;
499
500    DstPtr = Builder.CreateBitCast(DstPtr, Type);
501    SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
502    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
503                                          {DstPtr, SrcPtr}));
504  }
505  case Builtin::BI__builtin_abs:
506  case Builtin::BI__builtin_labs:
507  case Builtin::BI__builtin_llabs: {
508    Value *ArgValue = EmitScalarExpr(E->getArg(0));
509
510    Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
511    Value *CmpResult =
512    Builder.CreateICmpSGE(ArgValue,
513                          llvm::Constant::getNullValue(ArgValue->getType()),
514                                                            "abscond");
515    Value *Result =
516      Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
517
518    return RValue::get(Result);
519  }
520  case Builtin::BI__builtin_fabs:
521  case Builtin::BI__builtin_fabsf:
522  case Builtin::BI__builtin_fabsl: {
523    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
524  }
525  case Builtin::BI__builtin_fmod:
526  case Builtin::BI__builtin_fmodf:
527  case Builtin::BI__builtin_fmodl: {
528    Value *Arg1 = EmitScalarExpr(E->getArg(0));
529    Value *Arg2 = EmitScalarExpr(E->getArg(1));
530    Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
531    return RValue::get(Result);
532  }
533  case Builtin::BI__builtin_copysign:
534  case Builtin::BI__builtin_copysignf:
535  case Builtin::BI__builtin_copysignl: {
536    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
537  }
538  case Builtin::BI__builtin_ceil:
539  case Builtin::BI__builtin_ceilf:
540  case Builtin::BI__builtin_ceill: {
541    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
542  }
543  case Builtin::BI__builtin_floor:
544  case Builtin::BI__builtin_floorf:
545  case Builtin::BI__builtin_floorl: {
546    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
547  }
548  case Builtin::BI__builtin_trunc:
549  case Builtin::BI__builtin_truncf:
550  case Builtin::BI__builtin_truncl: {
551    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
552  }
553  case Builtin::BI__builtin_rint:
554  case Builtin::BI__builtin_rintf:
555  case Builtin::BI__builtin_rintl: {
556    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
557  }
558  case Builtin::BI__builtin_nearbyint:
559  case Builtin::BI__builtin_nearbyintf:
560  case Builtin::BI__builtin_nearbyintl: {
561    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
562  }
563  case Builtin::BI__builtin_round:
564  case Builtin::BI__builtin_roundf:
565  case Builtin::BI__builtin_roundl: {
566    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
567  }
568  case Builtin::BI__builtin_fmin:
569  case Builtin::BI__builtin_fminf:
570  case Builtin::BI__builtin_fminl: {
571    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
572  }
573  case Builtin::BI__builtin_fmax:
574  case Builtin::BI__builtin_fmaxf:
575  case Builtin::BI__builtin_fmaxl: {
576    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
577  }
578  case Builtin::BI__builtin_conj:
579  case Builtin::BI__builtin_conjf:
580  case Builtin::BI__builtin_conjl: {
581    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
582    Value *Real = ComplexVal.first;
583    Value *Imag = ComplexVal.second;
584    Value *Zero =
585      Imag->getType()->isFPOrFPVectorTy()
586        ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
587        : llvm::Constant::getNullValue(Imag->getType());
588
589    Imag = Builder.CreateFSub(Zero, Imag, "sub");
590    return RValue::getComplex(std::make_pair(Real, Imag));
591  }
592  case Builtin::BI__builtin_creal:
593  case Builtin::BI__builtin_crealf:
594  case Builtin::BI__builtin_creall:
595  case Builtin::BIcreal:
596  case Builtin::BIcrealf:
597  case Builtin::BIcreall: {
598    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
599    return RValue::get(ComplexVal.first);
600  }
601
602  case Builtin::BI__builtin_cimag:
603  case Builtin::BI__builtin_cimagf:
604  case Builtin::BI__builtin_cimagl:
605  case Builtin::BIcimag:
606  case Builtin::BIcimagf:
607  case Builtin::BIcimagl: {
608    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
609    return RValue::get(ComplexVal.second);
610  }
611
612  case Builtin::BI__builtin_ctzs:
613  case Builtin::BI__builtin_ctz:
614  case Builtin::BI__builtin_ctzl:
615  case Builtin::BI__builtin_ctzll: {
616    Value *ArgValue = EmitScalarExpr(E->getArg(0));
617
618    llvm::Type *ArgType = ArgValue->getType();
619    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
620
621    llvm::Type *ResultType = ConvertType(E->getType());
622    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
623    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
624    if (Result->getType() != ResultType)
625      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
626                                     "cast");
627    return RValue::get(Result);
628  }
629  case Builtin::BI__builtin_clzs:
630  case Builtin::BI__builtin_clz:
631  case Builtin::BI__builtin_clzl:
632  case Builtin::BI__builtin_clzll: {
633    Value *ArgValue = EmitScalarExpr(E->getArg(0));
634
635    llvm::Type *ArgType = ArgValue->getType();
636    Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
637
638    llvm::Type *ResultType = ConvertType(E->getType());
639    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
640    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
641    if (Result->getType() != ResultType)
642      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
643                                     "cast");
644    return RValue::get(Result);
645  }
646  case Builtin::BI__builtin_ffs:
647  case Builtin::BI__builtin_ffsl:
648  case Builtin::BI__builtin_ffsll: {
649    // ffs(x) -> x ? cttz(x) + 1 : 0
650    Value *ArgValue = EmitScalarExpr(E->getArg(0));
651
652    llvm::Type *ArgType = ArgValue->getType();
653    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
654
655    llvm::Type *ResultType = ConvertType(E->getType());
656    Value *Tmp =
657        Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
658                          llvm::ConstantInt::get(ArgType, 1));
659    Value *Zero = llvm::Constant::getNullValue(ArgType);
660    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
661    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
662    if (Result->getType() != ResultType)
663      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
664                                     "cast");
665    return RValue::get(Result);
666  }
667  case Builtin::BI__builtin_parity:
668  case Builtin::BI__builtin_parityl:
669  case Builtin::BI__builtin_parityll: {
670    // parity(x) -> ctpop(x) & 1
671    Value *ArgValue = EmitScalarExpr(E->getArg(0));
672
673    llvm::Type *ArgType = ArgValue->getType();
674    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
675
676    llvm::Type *ResultType = ConvertType(E->getType());
677    Value *Tmp = Builder.CreateCall(F, ArgValue);
678    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
679    if (Result->getType() != ResultType)
680      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
681                                     "cast");
682    return RValue::get(Result);
683  }
684  case Builtin::BI__builtin_popcount:
685  case Builtin::BI__builtin_popcountl:
686  case Builtin::BI__builtin_popcountll: {
687    Value *ArgValue = EmitScalarExpr(E->getArg(0));
688
689    llvm::Type *ArgType = ArgValue->getType();
690    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
691
692    llvm::Type *ResultType = ConvertType(E->getType());
693    Value *Result = Builder.CreateCall(F, ArgValue);
694    if (Result->getType() != ResultType)
695      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
696                                     "cast");
697    return RValue::get(Result);
698  }
699  case Builtin::BI__builtin_unpredictable: {
700    // Always return the argument of __builtin_unpredictable. LLVM does not
701    // handle this builtin. Metadata for this builtin should be added directly
702    // to instructions such as branches or switches that use it.
703    return RValue::get(EmitScalarExpr(E->getArg(0)));
704  }
705  case Builtin::BI__builtin_expect: {
706    Value *ArgValue = EmitScalarExpr(E->getArg(0));
707    llvm::Type *ArgType = ArgValue->getType();
708
709    Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
710    // Don't generate llvm.expect on -O0 as the backend won't use it for
711    // anything.
712    // Note, we still IRGen ExpectedValue because it could have side-effects.
713    if (CGM.getCodeGenOpts().OptimizationLevel == 0)
714      return RValue::get(ArgValue);
715
716    Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
717    Value *Result =
718        Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
719    return RValue::get(Result);
720  }
721  case Builtin::BI__builtin_assume_aligned: {
722    Value *PtrValue = EmitScalarExpr(E->getArg(0));
723    Value *OffsetValue =
724      (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
725
726    Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
727    ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
728    unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
729
730    EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
731    return RValue::get(PtrValue);
732  }
733  case Builtin::BI__assume:
734  case Builtin::BI__builtin_assume: {
735    if (E->getArg(0)->HasSideEffects(getContext()))
736      return RValue::get(nullptr);
737
738    Value *ArgValue = EmitScalarExpr(E->getArg(0));
739    Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
740    return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
741  }
742  case Builtin::BI__builtin_bswap16:
743  case Builtin::BI__builtin_bswap32:
744  case Builtin::BI__builtin_bswap64: {
745    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
746  }
747  case Builtin::BI__builtin_bitreverse8:
748  case Builtin::BI__builtin_bitreverse16:
749  case Builtin::BI__builtin_bitreverse32:
750  case Builtin::BI__builtin_bitreverse64: {
751    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
752  }
753  case Builtin::BI__builtin_object_size: {
754    unsigned Type =
755        E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
756    auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
757
758    // We pass this builtin onto the optimizer so that it can figure out the
759    // object size in more complex cases.
760    return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
761  }
762  case Builtin::BI__builtin_prefetch: {
763    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
764    // FIXME: Technically these constants should of type 'int', yes?
765    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
766      llvm::ConstantInt::get(Int32Ty, 0);
767    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
768      llvm::ConstantInt::get(Int32Ty, 3);
769    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
770    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
771    return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
772  }
773  case Builtin::BI__builtin_readcyclecounter: {
774    Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
775    return RValue::get(Builder.CreateCall(F));
776  }
777  case Builtin::BI__builtin___clear_cache: {
778    Value *Begin = EmitScalarExpr(E->getArg(0));
779    Value *End = EmitScalarExpr(E->getArg(1));
780    Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
781    return RValue::get(Builder.CreateCall(F, {Begin, End}));
782  }
783  case Builtin::BI__builtin_trap:
784    return RValue::get(EmitTrapCall(Intrinsic::trap));
785  case Builtin::BI__debugbreak:
786    return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
787  case Builtin::BI__builtin_unreachable: {
788    if (SanOpts.has(SanitizerKind::Unreachable)) {
789      SanitizerScope SanScope(this);
790      EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
791                               SanitizerKind::Unreachable),
792                "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
793                None);
794    } else
795      Builder.CreateUnreachable();
796
797    // We do need to preserve an insertion point.
798    EmitBlock(createBasicBlock("unreachable.cont"));
799
800    return RValue::get(nullptr);
801  }
802
803  case Builtin::BI__builtin_powi:
804  case Builtin::BI__builtin_powif:
805  case Builtin::BI__builtin_powil: {
806    Value *Base = EmitScalarExpr(E->getArg(0));
807    Value *Exponent = EmitScalarExpr(E->getArg(1));
808    llvm::Type *ArgType = Base->getType();
809    Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
810    return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
811  }
812
813  case Builtin::BI__builtin_isgreater:
814  case Builtin::BI__builtin_isgreaterequal:
815  case Builtin::BI__builtin_isless:
816  case Builtin::BI__builtin_islessequal:
817  case Builtin::BI__builtin_islessgreater:
818  case Builtin::BI__builtin_isunordered: {
819    // Ordered comparisons: we know the arguments to these are matching scalar
820    // floating point values.
821    Value *LHS = EmitScalarExpr(E->getArg(0));
822    Value *RHS = EmitScalarExpr(E->getArg(1));
823
824    switch (BuiltinID) {
825    default: llvm_unreachable("Unknown ordered comparison");
826    case Builtin::BI__builtin_isgreater:
827      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
828      break;
829    case Builtin::BI__builtin_isgreaterequal:
830      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
831      break;
832    case Builtin::BI__builtin_isless:
833      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
834      break;
835    case Builtin::BI__builtin_islessequal:
836      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
837      break;
838    case Builtin::BI__builtin_islessgreater:
839      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
840      break;
841    case Builtin::BI__builtin_isunordered:
842      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
843      break;
844    }
845    // ZExt bool to int type.
846    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
847  }
848  case Builtin::BI__builtin_isnan: {
849    Value *V = EmitScalarExpr(E->getArg(0));
850    V = Builder.CreateFCmpUNO(V, V, "cmp");
851    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
852  }
853
854  case Builtin::BI__builtin_isinf:
855  case Builtin::BI__builtin_isfinite: {
856    // isinf(x)    --> fabs(x) == infinity
857    // isfinite(x) --> fabs(x) != infinity
858    // x != NaN via the ordered compare in either case.
859    Value *V = EmitScalarExpr(E->getArg(0));
860    Value *Fabs = EmitFAbs(*this, V);
861    Constant *Infinity = ConstantFP::getInfinity(V->getType());
862    CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
863                                  ? CmpInst::FCMP_OEQ
864                                  : CmpInst::FCMP_ONE;
865    Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
866    return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
867  }
868
869  case Builtin::BI__builtin_isinf_sign: {
870    // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
871    Value *Arg = EmitScalarExpr(E->getArg(0));
872    Value *AbsArg = EmitFAbs(*this, Arg);
873    Value *IsInf = Builder.CreateFCmpOEQ(
874        AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
875    Value *IsNeg = EmitSignBit(*this, Arg);
876
877    llvm::Type *IntTy = ConvertType(E->getType());
878    Value *Zero = Constant::getNullValue(IntTy);
879    Value *One = ConstantInt::get(IntTy, 1);
880    Value *NegativeOne = ConstantInt::get(IntTy, -1);
881    Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
882    Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
883    return RValue::get(Result);
884  }
885
886  case Builtin::BI__builtin_isnormal: {
887    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
888    Value *V = EmitScalarExpr(E->getArg(0));
889    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
890
891    Value *Abs = EmitFAbs(*this, V);
892    Value *IsLessThanInf =
893      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
894    APFloat Smallest = APFloat::getSmallestNormalized(
895                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
896    Value *IsNormal =
897      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
898                            "isnormal");
899    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
900    V = Builder.CreateAnd(V, IsNormal, "and");
901    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
902  }
903
904  case Builtin::BI__builtin_fpclassify: {
905    Value *V = EmitScalarExpr(E->getArg(5));
906    llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
907
908    // Create Result
909    BasicBlock *Begin = Builder.GetInsertBlock();
910    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
911    Builder.SetInsertPoint(End);
912    PHINode *Result =
913      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
914                        "fpclassify_result");
915
916    // if (V==0) return FP_ZERO
917    Builder.SetInsertPoint(Begin);
918    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
919                                          "iszero");
920    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
921    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
922    Builder.CreateCondBr(IsZero, End, NotZero);
923    Result->addIncoming(ZeroLiteral, Begin);
924
925    // if (V != V) return FP_NAN
926    Builder.SetInsertPoint(NotZero);
927    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
928    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
929    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
930    Builder.CreateCondBr(IsNan, End, NotNan);
931    Result->addIncoming(NanLiteral, NotZero);
932
933    // if (fabs(V) == infinity) return FP_INFINITY
934    Builder.SetInsertPoint(NotNan);
935    Value *VAbs = EmitFAbs(*this, V);
936    Value *IsInf =
937      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
938                            "isinf");
939    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
940    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
941    Builder.CreateCondBr(IsInf, End, NotInf);
942    Result->addIncoming(InfLiteral, NotNan);
943
944    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
945    Builder.SetInsertPoint(NotInf);
946    APFloat Smallest = APFloat::getSmallestNormalized(
947        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
948    Value *IsNormal =
949      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
950                            "isnormal");
951    Value *NormalResult =
952      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
953                           EmitScalarExpr(E->getArg(3)));
954    Builder.CreateBr(End);
955    Result->addIncoming(NormalResult, NotInf);
956
957    // return Result
958    Builder.SetInsertPoint(End);
959    return RValue::get(Result);
960  }
961
962  case Builtin::BIalloca:
963  case Builtin::BI_alloca:
964  case Builtin::BI__builtin_alloca: {
965    Value *Size = EmitScalarExpr(E->getArg(0));
966    return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
967  }
968  case Builtin::BIbzero:
969  case Builtin::BI__builtin_bzero: {
970    Address Dest = EmitPointerWithAlignment(E->getArg(0));
971    Value *SizeVal = EmitScalarExpr(E->getArg(1));
972    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
973                        E->getArg(0)->getExprLoc(), FD, 0);
974    Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
975    return RValue::get(Dest.getPointer());
976  }
977  case Builtin::BImemcpy:
978  case Builtin::BI__builtin_memcpy: {
979    Address Dest = EmitPointerWithAlignment(E->getArg(0));
980    Address Src = EmitPointerWithAlignment(E->getArg(1));
981    Value *SizeVal = EmitScalarExpr(E->getArg(2));
982    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
983                        E->getArg(0)->getExprLoc(), FD, 0);
984    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
985                        E->getArg(1)->getExprLoc(), FD, 1);
986    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
987    return RValue::get(Dest.getPointer());
988  }
989
990  case Builtin::BI__builtin___memcpy_chk: {
991    // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
992    llvm::APSInt Size, DstSize;
993    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
994        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
995      break;
996    if (Size.ugt(DstSize))
997      break;
998    Address Dest = EmitPointerWithAlignment(E->getArg(0));
999    Address Src = EmitPointerWithAlignment(E->getArg(1));
1000    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1001    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1002    return RValue::get(Dest.getPointer());
1003  }
1004
1005  case Builtin::BI__builtin_objc_memmove_collectable: {
1006    Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1007    Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1008    Value *SizeVal = EmitScalarExpr(E->getArg(2));
1009    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1010                                                  DestAddr, SrcAddr, SizeVal);
1011    return RValue::get(DestAddr.getPointer());
1012  }
1013
1014  case Builtin::BI__builtin___memmove_chk: {
1015    // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1016    llvm::APSInt Size, DstSize;
1017    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1018        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1019      break;
1020    if (Size.ugt(DstSize))
1021      break;
1022    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1023    Address Src = EmitPointerWithAlignment(E->getArg(1));
1024    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1025    Builder.CreateMemMove(Dest, Src, SizeVal, false);
1026    return RValue::get(Dest.getPointer());
1027  }
1028
1029  case Builtin::BImemmove:
1030  case Builtin::BI__builtin_memmove: {
1031    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1032    Address Src = EmitPointerWithAlignment(E->getArg(1));
1033    Value *SizeVal = EmitScalarExpr(E->getArg(2));
1034    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1035                        E->getArg(0)->getExprLoc(), FD, 0);
1036    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1037                        E->getArg(1)->getExprLoc(), FD, 1);
1038    Builder.CreateMemMove(Dest, Src, SizeVal, false);
1039    return RValue::get(Dest.getPointer());
1040  }
1041  case Builtin::BImemset:
1042  case Builtin::BI__builtin_memset: {
1043    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1044    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1045                                         Builder.getInt8Ty());
1046    Value *SizeVal = EmitScalarExpr(E->getArg(2));
1047    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1048                        E->getArg(0)->getExprLoc(), FD, 0);
1049    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1050    return RValue::get(Dest.getPointer());
1051  }
1052  case Builtin::BI__builtin___memset_chk: {
1053    // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1054    llvm::APSInt Size, DstSize;
1055    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1056        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1057      break;
1058    if (Size.ugt(DstSize))
1059      break;
1060    Address Dest = EmitPointerWithAlignment(E->getArg(0));
1061    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1062                                         Builder.getInt8Ty());
1063    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1064    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1065    return RValue::get(Dest.getPointer());
1066  }
1067  case Builtin::BI__builtin_dwarf_cfa: {
1068    // The offset in bytes from the first argument to the CFA.
1069    //
1070    // Why on earth is this in the frontend?  Is there any reason at
1071    // all that the backend can't reasonably determine this while
1072    // lowering llvm.eh.dwarf.cfa()?
1073    //
1074    // TODO: If there's a satisfactory reason, add a target hook for
1075    // this instead of hard-coding 0, which is correct for most targets.
1076    int32_t Offset = 0;
1077
1078    Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1079    return RValue::get(Builder.CreateCall(F,
1080                                      llvm::ConstantInt::get(Int32Ty, Offset)));
1081  }
1082  case Builtin::BI__builtin_return_address: {
1083    Value *Depth =
1084        CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1085    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1086    return RValue::get(Builder.CreateCall(F, Depth));
1087  }
1088  case Builtin::BI__builtin_frame_address: {
1089    Value *Depth =
1090        CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1091    Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1092    return RValue::get(Builder.CreateCall(F, Depth));
1093  }
1094  case Builtin::BI__builtin_extract_return_addr: {
1095    Value *Address = EmitScalarExpr(E->getArg(0));
1096    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1097    return RValue::get(Result);
1098  }
1099  case Builtin::BI__builtin_frob_return_addr: {
1100    Value *Address = EmitScalarExpr(E->getArg(0));
1101    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1102    return RValue::get(Result);
1103  }
1104  case Builtin::BI__builtin_dwarf_sp_column: {
1105    llvm::IntegerType *Ty
1106      = cast<llvm::IntegerType>(ConvertType(E->getType()));
1107    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1108    if (Column == -1) {
1109      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1110      return RValue::get(llvm::UndefValue::get(Ty));
1111    }
1112    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1113  }
1114  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1115    Value *Address = EmitScalarExpr(E->getArg(0));
1116    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1117      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1118    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1119  }
1120  case Builtin::BI__builtin_eh_return: {
1121    Value *Int = EmitScalarExpr(E->getArg(0));
1122    Value *Ptr = EmitScalarExpr(E->getArg(1));
1123
1124    llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1125    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1126           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1127    Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1128                                  ? Intrinsic::eh_return_i32
1129                                  : Intrinsic::eh_return_i64);
1130    Builder.CreateCall(F, {Int, Ptr});
1131    Builder.CreateUnreachable();
1132
1133    // We do need to preserve an insertion point.
1134    EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1135
1136    return RValue::get(nullptr);
1137  }
1138  case Builtin::BI__builtin_unwind_init: {
1139    Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1140    return RValue::get(Builder.CreateCall(F));
1141  }
1142  case Builtin::BI__builtin_extend_pointer: {
1143    // Extends a pointer to the size of an _Unwind_Word, which is
1144    // uint64_t on all platforms.  Generally this gets poked into a
1145    // register and eventually used as an address, so if the
1146    // addressing registers are wider than pointers and the platform
1147    // doesn't implicitly ignore high-order bits when doing
1148    // addressing, we need to make sure we zext / sext based on
1149    // the platform's expectations.
1150    //
1151    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1152
1153    // Cast the pointer to intptr_t.
1154    Value *Ptr = EmitScalarExpr(E->getArg(0));
1155    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1156
1157    // If that's 64 bits, we're done.
1158    if (IntPtrTy->getBitWidth() == 64)
1159      return RValue::get(Result);
1160
1161    // Otherwise, ask the codegen data what to do.
1162    if (getTargetHooks().extendPointerWithSExt())
1163      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1164    else
1165      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1166  }
1167  case Builtin::BI__builtin_setjmp: {
1168    // Buffer is a void**.
1169    Address Buf = EmitPointerWithAlignment(E->getArg(0));
1170
1171    // Store the frame pointer to the setjmp buffer.
1172    Value *FrameAddr =
1173      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1174                         ConstantInt::get(Int32Ty, 0));
1175    Builder.CreateStore(FrameAddr, Buf);
1176
1177    // Store the stack pointer to the setjmp buffer.
1178    Value *StackAddr =
1179        Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1180    Address StackSaveSlot =
1181      Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1182    Builder.CreateStore(StackAddr, StackSaveSlot);
1183
1184    // Call LLVM's EH setjmp, which is lightweight.
1185    Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1186    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1187    return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1188  }
1189  case Builtin::BI__builtin_longjmp: {
1190    Value *Buf = EmitScalarExpr(E->getArg(0));
1191    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1192
1193    // Call LLVM's EH longjmp, which is lightweight.
1194    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1195
1196    // longjmp doesn't return; mark this as unreachable.
1197    Builder.CreateUnreachable();
1198
1199    // We do need to preserve an insertion point.
1200    EmitBlock(createBasicBlock("longjmp.cont"));
1201
1202    return RValue::get(nullptr);
1203  }
1204  case Builtin::BI__sync_fetch_and_add:
1205  case Builtin::BI__sync_fetch_and_sub:
1206  case Builtin::BI__sync_fetch_and_or:
1207  case Builtin::BI__sync_fetch_and_and:
1208  case Builtin::BI__sync_fetch_and_xor:
1209  case Builtin::BI__sync_fetch_and_nand:
1210  case Builtin::BI__sync_add_and_fetch:
1211  case Builtin::BI__sync_sub_and_fetch:
1212  case Builtin::BI__sync_and_and_fetch:
1213  case Builtin::BI__sync_or_and_fetch:
1214  case Builtin::BI__sync_xor_and_fetch:
1215  case Builtin::BI__sync_nand_and_fetch:
1216  case Builtin::BI__sync_val_compare_and_swap:
1217  case Builtin::BI__sync_bool_compare_and_swap:
1218  case Builtin::BI__sync_lock_test_and_set:
1219  case Builtin::BI__sync_lock_release:
1220  case Builtin::BI__sync_swap:
1221    llvm_unreachable("Shouldn't make it through sema");
1222  case Builtin::BI__sync_fetch_and_add_1:
1223  case Builtin::BI__sync_fetch_and_add_2:
1224  case Builtin::BI__sync_fetch_and_add_4:
1225  case Builtin::BI__sync_fetch_and_add_8:
1226  case Builtin::BI__sync_fetch_and_add_16:
1227    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1228  case Builtin::BI__sync_fetch_and_sub_1:
1229  case Builtin::BI__sync_fetch_and_sub_2:
1230  case Builtin::BI__sync_fetch_and_sub_4:
1231  case Builtin::BI__sync_fetch_and_sub_8:
1232  case Builtin::BI__sync_fetch_and_sub_16:
1233    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1234  case Builtin::BI__sync_fetch_and_or_1:
1235  case Builtin::BI__sync_fetch_and_or_2:
1236  case Builtin::BI__sync_fetch_and_or_4:
1237  case Builtin::BI__sync_fetch_and_or_8:
1238  case Builtin::BI__sync_fetch_and_or_16:
1239    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1240  case Builtin::BI__sync_fetch_and_and_1:
1241  case Builtin::BI__sync_fetch_and_and_2:
1242  case Builtin::BI__sync_fetch_and_and_4:
1243  case Builtin::BI__sync_fetch_and_and_8:
1244  case Builtin::BI__sync_fetch_and_and_16:
1245    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1246  case Builtin::BI__sync_fetch_and_xor_1:
1247  case Builtin::BI__sync_fetch_and_xor_2:
1248  case Builtin::BI__sync_fetch_and_xor_4:
1249  case Builtin::BI__sync_fetch_and_xor_8:
1250  case Builtin::BI__sync_fetch_and_xor_16:
1251    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1252  case Builtin::BI__sync_fetch_and_nand_1:
1253  case Builtin::BI__sync_fetch_and_nand_2:
1254  case Builtin::BI__sync_fetch_and_nand_4:
1255  case Builtin::BI__sync_fetch_and_nand_8:
1256  case Builtin::BI__sync_fetch_and_nand_16:
1257    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1258
1259  // Clang extensions: not overloaded yet.
1260  case Builtin::BI__sync_fetch_and_min:
1261    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1262  case Builtin::BI__sync_fetch_and_max:
1263    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1264  case Builtin::BI__sync_fetch_and_umin:
1265    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1266  case Builtin::BI__sync_fetch_and_umax:
1267    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1268
1269  case Builtin::BI__sync_add_and_fetch_1:
1270  case Builtin::BI__sync_add_and_fetch_2:
1271  case Builtin::BI__sync_add_and_fetch_4:
1272  case Builtin::BI__sync_add_and_fetch_8:
1273  case Builtin::BI__sync_add_and_fetch_16:
1274    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1275                                llvm::Instruction::Add);
1276  case Builtin::BI__sync_sub_and_fetch_1:
1277  case Builtin::BI__sync_sub_and_fetch_2:
1278  case Builtin::BI__sync_sub_and_fetch_4:
1279  case Builtin::BI__sync_sub_and_fetch_8:
1280  case Builtin::BI__sync_sub_and_fetch_16:
1281    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1282                                llvm::Instruction::Sub);
1283  case Builtin::BI__sync_and_and_fetch_1:
1284  case Builtin::BI__sync_and_and_fetch_2:
1285  case Builtin::BI__sync_and_and_fetch_4:
1286  case Builtin::BI__sync_and_and_fetch_8:
1287  case Builtin::BI__sync_and_and_fetch_16:
1288    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1289                                llvm::Instruction::And);
1290  case Builtin::BI__sync_or_and_fetch_1:
1291  case Builtin::BI__sync_or_and_fetch_2:
1292  case Builtin::BI__sync_or_and_fetch_4:
1293  case Builtin::BI__sync_or_and_fetch_8:
1294  case Builtin::BI__sync_or_and_fetch_16:
1295    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1296                                llvm::Instruction::Or);
1297  case Builtin::BI__sync_xor_and_fetch_1:
1298  case Builtin::BI__sync_xor_and_fetch_2:
1299  case Builtin::BI__sync_xor_and_fetch_4:
1300  case Builtin::BI__sync_xor_and_fetch_8:
1301  case Builtin::BI__sync_xor_and_fetch_16:
1302    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1303                                llvm::Instruction::Xor);
1304  case Builtin::BI__sync_nand_and_fetch_1:
1305  case Builtin::BI__sync_nand_and_fetch_2:
1306  case Builtin::BI__sync_nand_and_fetch_4:
1307  case Builtin::BI__sync_nand_and_fetch_8:
1308  case Builtin::BI__sync_nand_and_fetch_16:
1309    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1310                                llvm::Instruction::And, true);
1311
1312  case Builtin::BI__sync_val_compare_and_swap_1:
1313  case Builtin::BI__sync_val_compare_and_swap_2:
1314  case Builtin::BI__sync_val_compare_and_swap_4:
1315  case Builtin::BI__sync_val_compare_and_swap_8:
1316  case Builtin::BI__sync_val_compare_and_swap_16:
1317    return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1318
1319  case Builtin::BI__sync_bool_compare_and_swap_1:
1320  case Builtin::BI__sync_bool_compare_and_swap_2:
1321  case Builtin::BI__sync_bool_compare_and_swap_4:
1322  case Builtin::BI__sync_bool_compare_and_swap_8:
1323  case Builtin::BI__sync_bool_compare_and_swap_16:
1324    return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1325
1326  case Builtin::BI__sync_swap_1:
1327  case Builtin::BI__sync_swap_2:
1328  case Builtin::BI__sync_swap_4:
1329  case Builtin::BI__sync_swap_8:
1330  case Builtin::BI__sync_swap_16:
1331    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1332
1333  case Builtin::BI__sync_lock_test_and_set_1:
1334  case Builtin::BI__sync_lock_test_and_set_2:
1335  case Builtin::BI__sync_lock_test_and_set_4:
1336  case Builtin::BI__sync_lock_test_and_set_8:
1337  case Builtin::BI__sync_lock_test_and_set_16:
1338    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1339
1340  case Builtin::BI__sync_lock_release_1:
1341  case Builtin::BI__sync_lock_release_2:
1342  case Builtin::BI__sync_lock_release_4:
1343  case Builtin::BI__sync_lock_release_8:
1344  case Builtin::BI__sync_lock_release_16: {
1345    Value *Ptr = EmitScalarExpr(E->getArg(0));
1346    QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1347    CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1348    llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1349                                             StoreSize.getQuantity() * 8);
1350    Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1351    llvm::StoreInst *Store =
1352      Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1353                                 StoreSize);
1354    Store->setAtomic(llvm::AtomicOrdering::Release);
1355    return RValue::get(nullptr);
1356  }
1357
1358  case Builtin::BI__sync_synchronize: {
1359    // We assume this is supposed to correspond to a C++0x-style
1360    // sequentially-consistent fence (i.e. this is only usable for
1361    // synchonization, not device I/O or anything like that). This intrinsic
1362    // is really badly designed in the sense that in theory, there isn't
1363    // any way to safely use it... but in practice, it mostly works
1364    // to use it with non-atomic loads and stores to get acquire/release
1365    // semantics.
1366    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1367    return RValue::get(nullptr);
1368  }
1369
1370  case Builtin::BI__builtin_nontemporal_load:
1371    return RValue::get(EmitNontemporalLoad(*this, E));
1372  case Builtin::BI__builtin_nontemporal_store:
1373    return RValue::get(EmitNontemporalStore(*this, E));
1374  case Builtin::BI__c11_atomic_is_lock_free:
1375  case Builtin::BI__atomic_is_lock_free: {
1376    // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1377    // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1378    // _Atomic(T) is always properly-aligned.
1379    const char *LibCallName = "__atomic_is_lock_free";
1380    CallArgList Args;
1381    Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1382             getContext().getSizeType());
1383    if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1384      Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1385               getContext().VoidPtrTy);
1386    else
1387      Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1388               getContext().VoidPtrTy);
1389    const CGFunctionInfo &FuncInfo =
1390        CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1391    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1392    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1393    return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1394  }
1395
1396  case Builtin::BI__atomic_test_and_set: {
1397    // Look at the argument type to determine whether this is a volatile
1398    // operation. The parameter type is always volatile.
1399    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1400    bool Volatile =
1401        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1402
1403    Value *Ptr = EmitScalarExpr(E->getArg(0));
1404    unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1405    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1406    Value *NewVal = Builder.getInt8(1);
1407    Value *Order = EmitScalarExpr(E->getArg(1));
1408    if (isa<llvm::ConstantInt>(Order)) {
1409      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1410      AtomicRMWInst *Result = nullptr;
1411      switch (ord) {
1412      case 0:  // memory_order_relaxed
1413      default: // invalid order
1414        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1415                                         llvm::AtomicOrdering::Monotonic);
1416        break;
1417      case 1: // memory_order_consume
1418      case 2: // memory_order_acquire
1419        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1420                                         llvm::AtomicOrdering::Acquire);
1421        break;
1422      case 3: // memory_order_release
1423        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1424                                         llvm::AtomicOrdering::Release);
1425        break;
1426      case 4: // memory_order_acq_rel
1427
1428        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1429                                         llvm::AtomicOrdering::AcquireRelease);
1430        break;
1431      case 5: // memory_order_seq_cst
1432        Result = Builder.CreateAtomicRMW(
1433            llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1434            llvm::AtomicOrdering::SequentiallyConsistent);
1435        break;
1436      }
1437      Result->setVolatile(Volatile);
1438      return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1439    }
1440
1441    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1442
1443    llvm::BasicBlock *BBs[5] = {
1444      createBasicBlock("monotonic", CurFn),
1445      createBasicBlock("acquire", CurFn),
1446      createBasicBlock("release", CurFn),
1447      createBasicBlock("acqrel", CurFn),
1448      createBasicBlock("seqcst", CurFn)
1449    };
1450    llvm::AtomicOrdering Orders[5] = {
1451        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1452        llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1453        llvm::AtomicOrdering::SequentiallyConsistent};
1454
1455    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1456    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1457
1458    Builder.SetInsertPoint(ContBB);
1459    PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1460
1461    for (unsigned i = 0; i < 5; ++i) {
1462      Builder.SetInsertPoint(BBs[i]);
1463      AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1464                                                   Ptr, NewVal, Orders[i]);
1465      RMW->setVolatile(Volatile);
1466      Result->addIncoming(RMW, BBs[i]);
1467      Builder.CreateBr(ContBB);
1468    }
1469
1470    SI->addCase(Builder.getInt32(0), BBs[0]);
1471    SI->addCase(Builder.getInt32(1), BBs[1]);
1472    SI->addCase(Builder.getInt32(2), BBs[1]);
1473    SI->addCase(Builder.getInt32(3), BBs[2]);
1474    SI->addCase(Builder.getInt32(4), BBs[3]);
1475    SI->addCase(Builder.getInt32(5), BBs[4]);
1476
1477    Builder.SetInsertPoint(ContBB);
1478    return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1479  }
1480
1481  case Builtin::BI__atomic_clear: {
1482    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1483    bool Volatile =
1484        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1485
1486    Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1487    unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1488    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1489    Value *NewVal = Builder.getInt8(0);
1490    Value *Order = EmitScalarExpr(E->getArg(1));
1491    if (isa<llvm::ConstantInt>(Order)) {
1492      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1493      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1494      switch (ord) {
1495      case 0:  // memory_order_relaxed
1496      default: // invalid order
1497        Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1498        break;
1499      case 3:  // memory_order_release
1500        Store->setOrdering(llvm::AtomicOrdering::Release);
1501        break;
1502      case 5:  // memory_order_seq_cst
1503        Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1504        break;
1505      }
1506      return RValue::get(nullptr);
1507    }
1508
1509    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1510
1511    llvm::BasicBlock *BBs[3] = {
1512      createBasicBlock("monotonic", CurFn),
1513      createBasicBlock("release", CurFn),
1514      createBasicBlock("seqcst", CurFn)
1515    };
1516    llvm::AtomicOrdering Orders[3] = {
1517        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1518        llvm::AtomicOrdering::SequentiallyConsistent};
1519
1520    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1521    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1522
1523    for (unsigned i = 0; i < 3; ++i) {
1524      Builder.SetInsertPoint(BBs[i]);
1525      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1526      Store->setOrdering(Orders[i]);
1527      Builder.CreateBr(ContBB);
1528    }
1529
1530    SI->addCase(Builder.getInt32(0), BBs[0]);
1531    SI->addCase(Builder.getInt32(3), BBs[1]);
1532    SI->addCase(Builder.getInt32(5), BBs[2]);
1533
1534    Builder.SetInsertPoint(ContBB);
1535    return RValue::get(nullptr);
1536  }
1537
1538  case Builtin::BI__atomic_thread_fence:
1539  case Builtin::BI__atomic_signal_fence:
1540  case Builtin::BI__c11_atomic_thread_fence:
1541  case Builtin::BI__c11_atomic_signal_fence: {
1542    llvm::SynchronizationScope Scope;
1543    if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1544        BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1545      Scope = llvm::SingleThread;
1546    else
1547      Scope = llvm::CrossThread;
1548    Value *Order = EmitScalarExpr(E->getArg(0));
1549    if (isa<llvm::ConstantInt>(Order)) {
1550      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1551      switch (ord) {
1552      case 0:  // memory_order_relaxed
1553      default: // invalid order
1554        break;
1555      case 1:  // memory_order_consume
1556      case 2:  // memory_order_acquire
1557        Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1558        break;
1559      case 3:  // memory_order_release
1560        Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1561        break;
1562      case 4:  // memory_order_acq_rel
1563        Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1564        break;
1565      case 5:  // memory_order_seq_cst
1566        Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1567                            Scope);
1568        break;
1569      }
1570      return RValue::get(nullptr);
1571    }
1572
1573    llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1574    AcquireBB = createBasicBlock("acquire", CurFn);
1575    ReleaseBB = createBasicBlock("release", CurFn);
1576    AcqRelBB = createBasicBlock("acqrel", CurFn);
1577    SeqCstBB = createBasicBlock("seqcst", CurFn);
1578    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1579
1580    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1581    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1582
1583    Builder.SetInsertPoint(AcquireBB);
1584    Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1585    Builder.CreateBr(ContBB);
1586    SI->addCase(Builder.getInt32(1), AcquireBB);
1587    SI->addCase(Builder.getInt32(2), AcquireBB);
1588
1589    Builder.SetInsertPoint(ReleaseBB);
1590    Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1591    Builder.CreateBr(ContBB);
1592    SI->addCase(Builder.getInt32(3), ReleaseBB);
1593
1594    Builder.SetInsertPoint(AcqRelBB);
1595    Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1596    Builder.CreateBr(ContBB);
1597    SI->addCase(Builder.getInt32(4), AcqRelBB);
1598
1599    Builder.SetInsertPoint(SeqCstBB);
1600    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1601    Builder.CreateBr(ContBB);
1602    SI->addCase(Builder.getInt32(5), SeqCstBB);
1603
1604    Builder.SetInsertPoint(ContBB);
1605    return RValue::get(nullptr);
1606  }
1607
1608    // Library functions with special handling.
1609  case Builtin::BIsqrt:
1610  case Builtin::BIsqrtf:
1611  case Builtin::BIsqrtl: {
1612    // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1613    // in finite- or unsafe-math mode (the intrinsic has different semantics
1614    // for handling negative numbers compared to the library function, so
1615    // -fmath-errno=0 is not enough).
1616    if (!FD->hasAttr<ConstAttr>())
1617      break;
1618    if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1619          CGM.getCodeGenOpts().NoNaNsFPMath))
1620      break;
1621    Value *Arg0 = EmitScalarExpr(E->getArg(0));
1622    llvm::Type *ArgType = Arg0->getType();
1623    Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1624    return RValue::get(Builder.CreateCall(F, Arg0));
1625  }
1626
1627  case Builtin::BI__builtin_pow:
1628  case Builtin::BI__builtin_powf:
1629  case Builtin::BI__builtin_powl:
1630  case Builtin::BIpow:
1631  case Builtin::BIpowf:
1632  case Builtin::BIpowl: {
1633    // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1634    if (!FD->hasAttr<ConstAttr>())
1635      break;
1636    Value *Base = EmitScalarExpr(E->getArg(0));
1637    Value *Exponent = EmitScalarExpr(E->getArg(1));
1638    llvm::Type *ArgType = Base->getType();
1639    Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1640    return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1641  }
1642
1643  case Builtin::BIfma:
1644  case Builtin::BIfmaf:
1645  case Builtin::BIfmal:
1646  case Builtin::BI__builtin_fma:
1647  case Builtin::BI__builtin_fmaf:
1648  case Builtin::BI__builtin_fmal: {
1649    // Rewrite fma to intrinsic.
1650    Value *FirstArg = EmitScalarExpr(E->getArg(0));
1651    llvm::Type *ArgType = FirstArg->getType();
1652    Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1653    return RValue::get(
1654        Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1655                               EmitScalarExpr(E->getArg(2))}));
1656  }
1657
1658  case Builtin::BI__builtin_signbit:
1659  case Builtin::BI__builtin_signbitf:
1660  case Builtin::BI__builtin_signbitl: {
1661    return RValue::get(
1662        Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1663                           ConvertType(E->getType())));
1664  }
1665  case Builtin::BI__builtin_annotation: {
1666    llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1667    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1668                                      AnnVal->getType());
1669
1670    // Get the annotation string, go through casts. Sema requires this to be a
1671    // non-wide string literal, potentially casted, so the cast<> is safe.
1672    const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1673    StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1674    return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1675  }
1676  case Builtin::BI__builtin_addcb:
1677  case Builtin::BI__builtin_addcs:
1678  case Builtin::BI__builtin_addc:
1679  case Builtin::BI__builtin_addcl:
1680  case Builtin::BI__builtin_addcll:
1681  case Builtin::BI__builtin_subcb:
1682  case Builtin::BI__builtin_subcs:
1683  case Builtin::BI__builtin_subc:
1684  case Builtin::BI__builtin_subcl:
1685  case Builtin::BI__builtin_subcll: {
1686
1687    // We translate all of these builtins from expressions of the form:
1688    //   int x = ..., y = ..., carryin = ..., carryout, result;
1689    //   result = __builtin_addc(x, y, carryin, &carryout);
1690    //
1691    // to LLVM IR of the form:
1692    //
1693    //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1694    //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1695    //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1696    //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1697    //                                                       i32 %carryin)
1698    //   %result = extractvalue {i32, i1} %tmp2, 0
1699    //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1700    //   %tmp3 = or i1 %carry1, %carry2
1701    //   %tmp4 = zext i1 %tmp3 to i32
1702    //   store i32 %tmp4, i32* %carryout
1703
1704    // Scalarize our inputs.
1705    llvm::Value *X = EmitScalarExpr(E->getArg(0));
1706    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1707    llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1708    Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1709
1710    // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1711    llvm::Intrinsic::ID IntrinsicId;
1712    switch (BuiltinID) {
1713    default: llvm_unreachable("Unknown multiprecision builtin id.");
1714    case Builtin::BI__builtin_addcb:
1715    case Builtin::BI__builtin_addcs:
1716    case Builtin::BI__builtin_addc:
1717    case Builtin::BI__builtin_addcl:
1718    case Builtin::BI__builtin_addcll:
1719      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1720      break;
1721    case Builtin::BI__builtin_subcb:
1722    case Builtin::BI__builtin_subcs:
1723    case Builtin::BI__builtin_subc:
1724    case Builtin::BI__builtin_subcl:
1725    case Builtin::BI__builtin_subcll:
1726      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1727      break;
1728    }
1729
1730    // Construct our resulting LLVM IR expression.
1731    llvm::Value *Carry1;
1732    llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1733                                              X, Y, Carry1);
1734    llvm::Value *Carry2;
1735    llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1736                                              Sum1, Carryin, Carry2);
1737    llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1738                                               X->getType());
1739    Builder.CreateStore(CarryOut, CarryOutPtr);
1740    return RValue::get(Sum2);
1741  }
1742
1743  case Builtin::BI__builtin_add_overflow:
1744  case Builtin::BI__builtin_sub_overflow:
1745  case Builtin::BI__builtin_mul_overflow: {
1746    const clang::Expr *LeftArg = E->getArg(0);
1747    const clang::Expr *RightArg = E->getArg(1);
1748    const clang::Expr *ResultArg = E->getArg(2);
1749
1750    clang::QualType ResultQTy =
1751        ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1752
1753    WidthAndSignedness LeftInfo =
1754        getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1755    WidthAndSignedness RightInfo =
1756        getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1757    WidthAndSignedness ResultInfo =
1758        getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1759    WidthAndSignedness EncompassingInfo =
1760        EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1761
1762    llvm::Type *EncompassingLLVMTy =
1763        llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1764
1765    llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1766
1767    llvm::Intrinsic::ID IntrinsicId;
1768    switch (BuiltinID) {
1769    default:
1770      llvm_unreachable("Unknown overflow builtin id.");
1771    case Builtin::BI__builtin_add_overflow:
1772      IntrinsicId = EncompassingInfo.Signed
1773                        ? llvm::Intrinsic::sadd_with_overflow
1774                        : llvm::Intrinsic::uadd_with_overflow;
1775      break;
1776    case Builtin::BI__builtin_sub_overflow:
1777      IntrinsicId = EncompassingInfo.Signed
1778                        ? llvm::Intrinsic::ssub_with_overflow
1779                        : llvm::Intrinsic::usub_with_overflow;
1780      break;
1781    case Builtin::BI__builtin_mul_overflow:
1782      IntrinsicId = EncompassingInfo.Signed
1783                        ? llvm::Intrinsic::smul_with_overflow
1784                        : llvm::Intrinsic::umul_with_overflow;
1785      break;
1786    }
1787
1788    llvm::Value *Left = EmitScalarExpr(LeftArg);
1789    llvm::Value *Right = EmitScalarExpr(RightArg);
1790    Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1791
1792    // Extend each operand to the encompassing type.
1793    Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
1794    Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
1795
1796    // Perform the operation on the extended values.
1797    llvm::Value *Overflow, *Result;
1798    Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
1799
1800    if (EncompassingInfo.Width > ResultInfo.Width) {
1801      // The encompassing type is wider than the result type, so we need to
1802      // truncate it.
1803      llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
1804
1805      // To see if the truncation caused an overflow, we will extend
1806      // the result and then compare it to the original result.
1807      llvm::Value *ResultTruncExt = Builder.CreateIntCast(
1808          ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
1809      llvm::Value *TruncationOverflow =
1810          Builder.CreateICmpNE(Result, ResultTruncExt);
1811
1812      Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
1813      Result = ResultTrunc;
1814    }
1815
1816    // Finally, store the result using the pointer.
1817    bool isVolatile =
1818      ResultArg->getType()->getPointeeType().isVolatileQualified();
1819    Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
1820
1821    return RValue::get(Overflow);
1822  }
1823
1824  case Builtin::BI__builtin_uadd_overflow:
1825  case Builtin::BI__builtin_uaddl_overflow:
1826  case Builtin::BI__builtin_uaddll_overflow:
1827  case Builtin::BI__builtin_usub_overflow:
1828  case Builtin::BI__builtin_usubl_overflow:
1829  case Builtin::BI__builtin_usubll_overflow:
1830  case Builtin::BI__builtin_umul_overflow:
1831  case Builtin::BI__builtin_umull_overflow:
1832  case Builtin::BI__builtin_umulll_overflow:
1833  case Builtin::BI__builtin_sadd_overflow:
1834  case Builtin::BI__builtin_saddl_overflow:
1835  case Builtin::BI__builtin_saddll_overflow:
1836  case Builtin::BI__builtin_ssub_overflow:
1837  case Builtin::BI__builtin_ssubl_overflow:
1838  case Builtin::BI__builtin_ssubll_overflow:
1839  case Builtin::BI__builtin_smul_overflow:
1840  case Builtin::BI__builtin_smull_overflow:
1841  case Builtin::BI__builtin_smulll_overflow: {
1842
1843    // We translate all of these builtins directly to the relevant llvm IR node.
1844
1845    // Scalarize our inputs.
1846    llvm::Value *X = EmitScalarExpr(E->getArg(0));
1847    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1848    Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1849
1850    // Decide which of the overflow intrinsics we are lowering to:
1851    llvm::Intrinsic::ID IntrinsicId;
1852    switch (BuiltinID) {
1853    default: llvm_unreachable("Unknown overflow builtin id.");
1854    case Builtin::BI__builtin_uadd_overflow:
1855    case Builtin::BI__builtin_uaddl_overflow:
1856    case Builtin::BI__builtin_uaddll_overflow:
1857      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1858      break;
1859    case Builtin::BI__builtin_usub_overflow:
1860    case Builtin::BI__builtin_usubl_overflow:
1861    case Builtin::BI__builtin_usubll_overflow:
1862      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1863      break;
1864    case Builtin::BI__builtin_umul_overflow:
1865    case Builtin::BI__builtin_umull_overflow:
1866    case Builtin::BI__builtin_umulll_overflow:
1867      IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1868      break;
1869    case Builtin::BI__builtin_sadd_overflow:
1870    case Builtin::BI__builtin_saddl_overflow:
1871    case Builtin::BI__builtin_saddll_overflow:
1872      IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1873      break;
1874    case Builtin::BI__builtin_ssub_overflow:
1875    case Builtin::BI__builtin_ssubl_overflow:
1876    case Builtin::BI__builtin_ssubll_overflow:
1877      IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1878      break;
1879    case Builtin::BI__builtin_smul_overflow:
1880    case Builtin::BI__builtin_smull_overflow:
1881    case Builtin::BI__builtin_smulll_overflow:
1882      IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1883      break;
1884    }
1885
1886
1887    llvm::Value *Carry;
1888    llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1889    Builder.CreateStore(Sum, SumOutPtr);
1890
1891    return RValue::get(Carry);
1892  }
1893  case Builtin::BI__builtin_addressof:
1894    return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1895  case Builtin::BI__builtin_operator_new:
1896    return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1897                                    E->getArg(0), false);
1898  case Builtin::BI__builtin_operator_delete:
1899    return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1900                                    E->getArg(0), true);
1901  case Builtin::BI__noop:
1902    // __noop always evaluates to an integer literal zero.
1903    return RValue::get(ConstantInt::get(IntTy, 0));
1904  case Builtin::BI__builtin_call_with_static_chain: {
1905    const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1906    const Expr *Chain = E->getArg(1);
1907    return EmitCall(Call->getCallee()->getType(),
1908                    EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1909                    Call->getCalleeDecl(), EmitScalarExpr(Chain));
1910  }
1911  case Builtin::BI_InterlockedExchange:
1912  case Builtin::BI_InterlockedExchangePointer:
1913    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1914  case Builtin::BI_InterlockedCompareExchangePointer: {
1915    llvm::Type *RTy;
1916    llvm::IntegerType *IntType =
1917      IntegerType::get(getLLVMContext(),
1918                       getContext().getTypeSize(E->getType()));
1919    llvm::Type *IntPtrType = IntType->getPointerTo();
1920
1921    llvm::Value *Destination =
1922      Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1923
1924    llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1925    RTy = Exchange->getType();
1926    Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1927
1928    llvm::Value *Comparand =
1929      Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1930
1931    auto Result =
1932        Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1933                                    AtomicOrdering::SequentiallyConsistent,
1934                                    AtomicOrdering::SequentiallyConsistent);
1935    Result->setVolatile(true);
1936
1937    return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1938                                                                         0),
1939                                              RTy));
1940  }
1941  case Builtin::BI_InterlockedCompareExchange: {
1942    AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1943        EmitScalarExpr(E->getArg(0)),
1944        EmitScalarExpr(E->getArg(2)),
1945        EmitScalarExpr(E->getArg(1)),
1946        AtomicOrdering::SequentiallyConsistent,
1947        AtomicOrdering::SequentiallyConsistent);
1948      CXI->setVolatile(true);
1949      return RValue::get(Builder.CreateExtractValue(CXI, 0));
1950  }
1951  case Builtin::BI_InterlockedIncrement: {
1952    llvm::Type *IntTy = ConvertType(E->getType());
1953    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1954      AtomicRMWInst::Add,
1955      EmitScalarExpr(E->getArg(0)),
1956      ConstantInt::get(IntTy, 1),
1957      llvm::AtomicOrdering::SequentiallyConsistent);
1958    RMWI->setVolatile(true);
1959    return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)));
1960  }
1961  case Builtin::BI_InterlockedDecrement: {
1962    llvm::Type *IntTy = ConvertType(E->getType());
1963    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1964      AtomicRMWInst::Sub,
1965      EmitScalarExpr(E->getArg(0)),
1966      ConstantInt::get(IntTy, 1),
1967      llvm::AtomicOrdering::SequentiallyConsistent);
1968    RMWI->setVolatile(true);
1969    return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)));
1970  }
1971  case Builtin::BI_InterlockedExchangeAdd: {
1972    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1973      AtomicRMWInst::Add,
1974      EmitScalarExpr(E->getArg(0)),
1975      EmitScalarExpr(E->getArg(1)),
1976      llvm::AtomicOrdering::SequentiallyConsistent);
1977    RMWI->setVolatile(true);
1978    return RValue::get(RMWI);
1979  }
1980  case Builtin::BI__readfsdword: {
1981    llvm::Type *IntTy = ConvertType(E->getType());
1982    Value *IntToPtr =
1983      Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1984                             llvm::PointerType::get(IntTy, 257));
1985    LoadInst *Load =
1986        Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true);
1987    return RValue::get(Load);
1988  }
1989
1990  case Builtin::BI__exception_code:
1991  case Builtin::BI_exception_code:
1992    return RValue::get(EmitSEHExceptionCode());
1993  case Builtin::BI__exception_info:
1994  case Builtin::BI_exception_info:
1995    return RValue::get(EmitSEHExceptionInfo());
1996  case Builtin::BI__abnormal_termination:
1997  case Builtin::BI_abnormal_termination:
1998    return RValue::get(EmitSEHAbnormalTermination());
1999  case Builtin::BI_setjmpex: {
2000    if (getTarget().getTriple().isOSMSVCRT()) {
2001      llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2002      llvm::AttributeSet ReturnsTwiceAttr =
2003          AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2004                            llvm::Attribute::ReturnsTwice);
2005      llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2006          llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2007          "_setjmpex", ReturnsTwiceAttr);
2008      llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2009          EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2010      llvm::Value *FrameAddr =
2011          Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2012                             ConstantInt::get(Int32Ty, 0));
2013      llvm::Value *Args[] = {Buf, FrameAddr};
2014      llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2015      CS.setAttributes(ReturnsTwiceAttr);
2016      return RValue::get(CS.getInstruction());
2017    }
2018    break;
2019  }
2020  case Builtin::BI_setjmp: {
2021    if (getTarget().getTriple().isOSMSVCRT()) {
2022      llvm::AttributeSet ReturnsTwiceAttr =
2023          AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2024                            llvm::Attribute::ReturnsTwice);
2025      llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2026          EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2027      llvm::CallSite CS;
2028      if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2029        llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2030        llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2031            llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2032            "_setjmp3", ReturnsTwiceAttr);
2033        llvm::Value *Count = ConstantInt::get(IntTy, 0);
2034        llvm::Value *Args[] = {Buf, Count};
2035        CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2036      } else {
2037        llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2038        llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2039            llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2040            "_setjmp", ReturnsTwiceAttr);
2041        llvm::Value *FrameAddr =
2042            Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2043                               ConstantInt::get(Int32Ty, 0));
2044        llvm::Value *Args[] = {Buf, FrameAddr};
2045        CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2046      }
2047      CS.setAttributes(ReturnsTwiceAttr);
2048      return RValue::get(CS.getInstruction());
2049    }
2050    break;
2051  }
2052
2053  case Builtin::BI__GetExceptionInfo: {
2054    if (llvm::GlobalVariable *GV =
2055            CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2056      return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2057    break;
2058  }
2059
2060  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2061  case Builtin::BIread_pipe:
2062  case Builtin::BIwrite_pipe: {
2063    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2064          *Arg1 = EmitScalarExpr(E->getArg(1));
2065
2066    // Type of the generic packet parameter.
2067    unsigned GenericAS =
2068        getContext().getTargetAddressSpace(LangAS::opencl_generic);
2069    llvm::Type *I8PTy = llvm::PointerType::get(
2070        llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2071
2072    // Testing which overloaded version we should generate the call for.
2073    if (2U == E->getNumArgs()) {
2074      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2075                                                             : "__write_pipe_2";
2076      // Creating a generic function type to be able to call with any builtin or
2077      // user defined type.
2078      llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy};
2079      llvm::FunctionType *FTy = llvm::FunctionType::get(
2080          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2081      Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2082      return RValue::get(Builder.CreateCall(
2083          CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast}));
2084    } else {
2085      assert(4 == E->getNumArgs() &&
2086             "Illegal number of parameters to pipe function");
2087      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2088                                                             : "__write_pipe_4";
2089
2090      llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy};
2091      Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2092            *Arg3 = EmitScalarExpr(E->getArg(3));
2093      llvm::FunctionType *FTy = llvm::FunctionType::get(
2094          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2095      Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2096      // We know the third argument is an integer type, but we may need to cast
2097      // it to i32.
2098      if (Arg2->getType() != Int32Ty)
2099        Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2100      return RValue::get(Builder.CreateCall(
2101          CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast}));
2102    }
2103  }
2104  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2105  // functions
2106  case Builtin::BIreserve_read_pipe:
2107  case Builtin::BIreserve_write_pipe:
2108  case Builtin::BIwork_group_reserve_read_pipe:
2109  case Builtin::BIwork_group_reserve_write_pipe:
2110  case Builtin::BIsub_group_reserve_read_pipe:
2111  case Builtin::BIsub_group_reserve_write_pipe: {
2112    // Composing the mangled name for the function.
2113    const char *Name;
2114    if (BuiltinID == Builtin::BIreserve_read_pipe)
2115      Name = "__reserve_read_pipe";
2116    else if (BuiltinID == Builtin::BIreserve_write_pipe)
2117      Name = "__reserve_write_pipe";
2118    else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2119      Name = "__work_group_reserve_read_pipe";
2120    else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2121      Name = "__work_group_reserve_write_pipe";
2122    else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2123      Name = "__sub_group_reserve_read_pipe";
2124    else
2125      Name = "__sub_group_reserve_write_pipe";
2126
2127    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2128          *Arg1 = EmitScalarExpr(E->getArg(1));
2129    llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2130
2131    // Building the generic function prototype.
2132    llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty};
2133    llvm::FunctionType *FTy = llvm::FunctionType::get(
2134        ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2135    // We know the second argument is an integer type, but we may need to cast
2136    // it to i32.
2137    if (Arg1->getType() != Int32Ty)
2138      Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2139    return RValue::get(
2140        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2141  }
2142  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2143  // functions
2144  case Builtin::BIcommit_read_pipe:
2145  case Builtin::BIcommit_write_pipe:
2146  case Builtin::BIwork_group_commit_read_pipe:
2147  case Builtin::BIwork_group_commit_write_pipe:
2148  case Builtin::BIsub_group_commit_read_pipe:
2149  case Builtin::BIsub_group_commit_write_pipe: {
2150    const char *Name;
2151    if (BuiltinID == Builtin::BIcommit_read_pipe)
2152      Name = "__commit_read_pipe";
2153    else if (BuiltinID == Builtin::BIcommit_write_pipe)
2154      Name = "__commit_write_pipe";
2155    else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2156      Name = "__work_group_commit_read_pipe";
2157    else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2158      Name = "__work_group_commit_write_pipe";
2159    else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2160      Name = "__sub_group_commit_read_pipe";
2161    else
2162      Name = "__sub_group_commit_write_pipe";
2163
2164    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2165          *Arg1 = EmitScalarExpr(E->getArg(1));
2166
2167    // Building the generic function prototype.
2168    llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()};
2169    llvm::FunctionType *FTy =
2170        llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2171                                llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2172
2173    return RValue::get(
2174        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2175  }
2176  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2177  case Builtin::BIget_pipe_num_packets:
2178  case Builtin::BIget_pipe_max_packets: {
2179    const char *Name;
2180    if (BuiltinID == Builtin::BIget_pipe_num_packets)
2181      Name = "__get_pipe_num_packets";
2182    else
2183      Name = "__get_pipe_max_packets";
2184
2185    // Building the generic function prototype.
2186    Value *Arg0 = EmitScalarExpr(E->getArg(0));
2187    llvm::Type *ArgTys[] = {Arg0->getType()};
2188    llvm::FunctionType *FTy = llvm::FunctionType::get(
2189        Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2190
2191    return RValue::get(
2192        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0}));
2193  }
2194
2195  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2196  case Builtin::BIto_global:
2197  case Builtin::BIto_local:
2198  case Builtin::BIto_private: {
2199    auto Arg0 = EmitScalarExpr(E->getArg(0));
2200    auto NewArgT = llvm::PointerType::get(Int8Ty,
2201      CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2202    auto NewRetT = llvm::PointerType::get(Int8Ty,
2203      CGM.getContext().getTargetAddressSpace(
2204        E->getType()->getPointeeType().getAddressSpace()));
2205    auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2206    llvm::Value *NewArg;
2207    if (Arg0->getType()->getPointerAddressSpace() !=
2208        NewArgT->getPointerAddressSpace())
2209      NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2210    else
2211      NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2212    auto NewCall = Builder.CreateCall(CGM.CreateRuntimeFunction(FTy,
2213      E->getDirectCallee()->getName()), {NewArg});
2214    return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2215      ConvertType(E->getType())));
2216  }
2217
2218  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2219  // It contains four different overload formats specified in Table 6.13.17.1.
2220  case Builtin::BIenqueue_kernel: {
2221    StringRef Name; // Generated function call name
2222    unsigned NumArgs = E->getNumArgs();
2223
2224    llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2225    llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
2226
2227    llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2228    llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2229    llvm::Value *Range = EmitScalarExpr(E->getArg(2));
2230
2231    if (NumArgs == 4) {
2232      // The most basic form of the call with parameters:
2233      // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2234      Name = "__enqueue_kernel_basic";
2235      llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
2236      llvm::FunctionType *FTy = llvm::FunctionType::get(
2237          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2238
2239      llvm::Value *Block =
2240          Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2241
2242      return RValue::get(Builder.CreateCall(
2243          CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
2244    }
2245    assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2246
2247    // Could have events and/or vaargs.
2248    if (E->getArg(3)->getType()->isBlockPointerType()) {
2249      // No events passed, but has variadic arguments.
2250      Name = "__enqueue_kernel_vaargs";
2251      llvm::Value *Block =
2252          Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2253      // Create a vector of the arguments, as well as a constant value to
2254      // express to the runtime the number of variadic arguments.
2255      std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2256                                         ConstantInt::get(IntTy, NumArgs - 4)};
2257      std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
2258                                          IntTy};
2259
2260      // Add the variadics.
2261      for (unsigned I = 4; I < NumArgs; ++I) {
2262        llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2263        unsigned TypeSizeInBytes =
2264            getContext()
2265                .getTypeSizeInChars(E->getArg(I)->getType())
2266                .getQuantity();
2267        Args.push_back(TypeSizeInBytes < 4
2268                           ? Builder.CreateZExt(ArgSize, Int32Ty)
2269                           : ArgSize);
2270      }
2271
2272      llvm::FunctionType *FTy = llvm::FunctionType::get(
2273          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2274      return RValue::get(
2275          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2276                             llvm::ArrayRef<llvm::Value *>(Args)));
2277    }
2278    // Any calls now have event arguments passed.
2279    if (NumArgs >= 7) {
2280      llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2281      unsigned AS4 =
2282          E->getArg(4)->getType()->isArrayType()
2283              ? E->getArg(4)->getType().getAddressSpace()
2284              : E->getArg(4)->getType()->getPointeeType().getAddressSpace();
2285      llvm::Type *EventPtrAS4Ty =
2286          EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4));
2287      unsigned AS5 =
2288          E->getArg(5)->getType()->getPointeeType().getAddressSpace();
2289      llvm::Type *EventPtrAS5Ty =
2290          EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
2291
2292      llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
2293      llvm::Value *EventList =
2294          E->getArg(4)->getType()->isArrayType()
2295              ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2296              : EmitScalarExpr(E->getArg(4));
2297      llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2298      llvm::Value *Block =
2299          Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
2300
2301      std::vector<llvm::Type *> ArgTys = {
2302          QueueTy,       Int32Ty,       RangeTy,  Int32Ty,
2303          EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy};
2304      std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2305                                         EventList, ClkEvent, Block};
2306
2307      if (NumArgs == 7) {
2308        // Has events but no variadics.
2309        Name = "__enqueue_kernel_basic_events";
2310        llvm::FunctionType *FTy = llvm::FunctionType::get(
2311            Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2312        return RValue::get(
2313            Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2314                               llvm::ArrayRef<llvm::Value *>(Args)));
2315      }
2316      // Has event info and variadics
2317      // Pass the number of variadics to the runtime function too.
2318      Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2319      ArgTys.push_back(Int32Ty);
2320      Name = "__enqueue_kernel_events_vaargs";
2321
2322      // Add the variadics.
2323      for (unsigned I = 7; I < NumArgs; ++I) {
2324        llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2325        unsigned TypeSizeInBytes =
2326            getContext()
2327                .getTypeSizeInChars(E->getArg(I)->getType())
2328                .getQuantity();
2329        Args.push_back(TypeSizeInBytes < 4
2330                           ? Builder.CreateZExt(ArgSize, Int32Ty)
2331                           : ArgSize);
2332      }
2333      llvm::FunctionType *FTy = llvm::FunctionType::get(
2334          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2335      return RValue::get(
2336          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2337                             llvm::ArrayRef<llvm::Value *>(Args)));
2338    }
2339  }
2340  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2341  // parameter.
2342  case Builtin::BIget_kernel_work_group_size: {
2343    Value *Arg = EmitScalarExpr(E->getArg(0));
2344    Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2345    return RValue::get(
2346        Builder.CreateCall(CGM.CreateRuntimeFunction(
2347                               llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2348                               "__get_kernel_work_group_size_impl"),
2349                           Arg));
2350  }
2351  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2352    Value *Arg = EmitScalarExpr(E->getArg(0));
2353    Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2354    return RValue::get(Builder.CreateCall(
2355        CGM.CreateRuntimeFunction(
2356            llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2357            "__get_kernel_preferred_work_group_multiple_impl"),
2358        Arg));
2359  }
2360  case Builtin::BIprintf:
2361    if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
2362      return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
2363    break;
2364  case Builtin::BI__builtin_canonicalize:
2365  case Builtin::BI__builtin_canonicalizef:
2366  case Builtin::BI__builtin_canonicalizel:
2367    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2368
2369  case Builtin::BI__builtin_thread_pointer: {
2370    if (!getContext().getTargetInfo().isTLSSupported())
2371      CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2372    // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2373    break;
2374  }
2375  }
2376
2377  // If this is an alias for a lib function (e.g. __builtin_sin), emit
2378  // the call using the normal call path, but using the unmangled
2379  // version of the function name.
2380  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2381    return emitLibraryCall(*this, FD, E,
2382                           CGM.getBuiltinLibFunction(FD, BuiltinID));
2383
2384  // If this is a predefined lib function (e.g. malloc), emit the call
2385  // using exactly the normal call path.
2386  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2387    return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
2388
2389  // Check that a call to a target specific builtin has the correct target
2390  // features.
2391  // This is down here to avoid non-target specific builtins, however, if
2392  // generic builtins start to require generic target features then we
2393  // can move this up to the beginning of the function.
2394  checkTargetFeatures(E, FD);
2395
2396  // See if we have a target specific intrinsic.
2397  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2398  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2399  if (const char *Prefix =
2400          llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
2401    IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
2402    // NOTE we dont need to perform a compatibility flag check here since the
2403    // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2404    // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2405    if (IntrinsicID == Intrinsic::not_intrinsic)
2406      IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
2407  }
2408
2409  if (IntrinsicID != Intrinsic::not_intrinsic) {
2410    SmallVector<Value*, 16> Args;
2411
2412    // Find out if any arguments are required to be integer constant
2413    // expressions.
2414    unsigned ICEArguments = 0;
2415    ASTContext::GetBuiltinTypeError Error;
2416    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2417    assert(Error == ASTContext::GE_None && "Should not codegen an error");
2418
2419    Function *F = CGM.getIntrinsic(IntrinsicID);
2420    llvm::FunctionType *FTy = F->getFunctionType();
2421
2422    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2423      Value *ArgValue;
2424      // If this is a normal argument, just emit it as a scalar.
2425      if ((ICEArguments & (1 << i)) == 0) {
2426        ArgValue = EmitScalarExpr(E->getArg(i));
2427      } else {
2428        // If this is required to be a constant, constant fold it so that we
2429        // know that the generated intrinsic gets a ConstantInt.
2430        llvm::APSInt Result;
2431        bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2432        assert(IsConst && "Constant arg isn't actually constant?");
2433        (void)IsConst;
2434        ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2435      }
2436
2437      // If the intrinsic arg type is different from the builtin arg type
2438      // we need to do a bit cast.
2439      llvm::Type *PTy = FTy->getParamType(i);
2440      if (PTy != ArgValue->getType()) {
2441        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2442               "Must be able to losslessly bit cast to param");
2443        ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2444      }
2445
2446      Args.push_back(ArgValue);
2447    }
2448
2449    Value *V = Builder.CreateCall(F, Args);
2450    QualType BuiltinRetType = E->getType();
2451
2452    llvm::Type *RetTy = VoidTy;
2453    if (!BuiltinRetType->isVoidType())
2454      RetTy = ConvertType(BuiltinRetType);
2455
2456    if (RetTy != V->getType()) {
2457      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2458             "Must be able to losslessly bit cast result type");
2459      V = Builder.CreateBitCast(V, RetTy);
2460    }
2461
2462    return RValue::get(V);
2463  }
2464
2465  // See if we have a target specific builtin that needs to be lowered.
2466  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2467    return RValue::get(V);
2468
2469  ErrorUnsupported(E, "builtin function");
2470
2471  // Unknown builtin, for now just dump it out and return undef.
2472  return GetUndefRValue(E->getType());
2473}
2474
2475static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2476                                        unsigned BuiltinID, const CallExpr *E,
2477                                        llvm::Triple::ArchType Arch) {
2478  switch (Arch) {
2479  case llvm::Triple::arm:
2480  case llvm::Triple::armeb:
2481  case llvm::Triple::thumb:
2482  case llvm::Triple::thumbeb:
2483    return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2484  case llvm::Triple::aarch64:
2485  case llvm::Triple::aarch64_be:
2486    return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2487  case llvm::Triple::x86:
2488  case llvm::Triple::x86_64:
2489    return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2490  case llvm::Triple::ppc:
2491  case llvm::Triple::ppc64:
2492  case llvm::Triple::ppc64le:
2493    return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2494  case llvm::Triple::r600:
2495  case llvm::Triple::amdgcn:
2496    return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2497  case llvm::Triple::systemz:
2498    return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2499  case llvm::Triple::nvptx:
2500  case llvm::Triple::nvptx64:
2501    return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2502  case llvm::Triple::wasm32:
2503  case llvm::Triple::wasm64:
2504    return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2505  default:
2506    return nullptr;
2507  }
2508}
2509
2510Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2511                                              const CallExpr *E) {
2512  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2513    assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2514    return EmitTargetArchBuiltinExpr(
2515        this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2516        getContext().getAuxTargetInfo()->getTriple().getArch());
2517  }
2518
2519  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2520                                   getTarget().getTriple().getArch());
2521}
2522
2523static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2524                                     NeonTypeFlags TypeFlags,
2525                                     bool V1Ty=false) {
2526  int IsQuad = TypeFlags.isQuad();
2527  switch (TypeFlags.getEltType()) {
2528  case NeonTypeFlags::Int8:
2529  case NeonTypeFlags::Poly8:
2530    return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2531  case NeonTypeFlags::Int16:
2532  case NeonTypeFlags::Poly16:
2533  case NeonTypeFlags::Float16:
2534    return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2535  case NeonTypeFlags::Int32:
2536    return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2537  case NeonTypeFlags::Int64:
2538  case NeonTypeFlags::Poly64:
2539    return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2540  case NeonTypeFlags::Poly128:
2541    // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2542    // There is a lot of i128 and f128 API missing.
2543    // so we use v16i8 to represent poly128 and get pattern matched.
2544    return llvm::VectorType::get(CGF->Int8Ty, 16);
2545  case NeonTypeFlags::Float32:
2546    return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2547  case NeonTypeFlags::Float64:
2548    return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2549  }
2550  llvm_unreachable("Unknown vector element type!");
2551}
2552
2553static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2554                                          NeonTypeFlags IntTypeFlags) {
2555  int IsQuad = IntTypeFlags.isQuad();
2556  switch (IntTypeFlags.getEltType()) {
2557  case NeonTypeFlags::Int32:
2558    return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2559  case NeonTypeFlags::Int64:
2560    return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2561  default:
2562    llvm_unreachable("Type can't be converted to floating-point!");
2563  }
2564}
2565
2566Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2567  unsigned nElts = V->getType()->getVectorNumElements();
2568  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2569  return Builder.CreateShuffleVector(V, V, SV, "lane");
2570}
2571
2572Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2573                                     const char *name,
2574                                     unsigned shift, bool rightshift) {
2575  unsigned j = 0;
2576  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2577       ai != ae; ++ai, ++j)
2578    if (shift > 0 && shift == j)
2579      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2580    else
2581      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2582
2583  return Builder.CreateCall(F, Ops, name);
2584}
2585
2586Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2587                                            bool neg) {
2588  int SV = cast<ConstantInt>(V)->getSExtValue();
2589  return ConstantInt::get(Ty, neg ? -SV : SV);
2590}
2591
2592// \brief Right-shift a vector by a constant.
2593Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2594                                          llvm::Type *Ty, bool usgn,
2595                                          const char *name) {
2596  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2597
2598  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2599  int EltSize = VTy->getScalarSizeInBits();
2600
2601  Vec = Builder.CreateBitCast(Vec, Ty);
2602
2603  // lshr/ashr are undefined when the shift amount is equal to the vector
2604  // element size.
2605  if (ShiftAmt == EltSize) {
2606    if (usgn) {
2607      // Right-shifting an unsigned value by its size yields 0.
2608      return llvm::ConstantAggregateZero::get(VTy);
2609    } else {
2610      // Right-shifting a signed value by its size is equivalent
2611      // to a shift of size-1.
2612      --ShiftAmt;
2613      Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2614    }
2615  }
2616
2617  Shift = EmitNeonShiftVector(Shift, Ty, false);
2618  if (usgn)
2619    return Builder.CreateLShr(Vec, Shift, name);
2620  else
2621    return Builder.CreateAShr(Vec, Shift, name);
2622}
2623
2624enum {
2625  AddRetType = (1 << 0),
2626  Add1ArgType = (1 << 1),
2627  Add2ArgTypes = (1 << 2),
2628
2629  VectorizeRetType = (1 << 3),
2630  VectorizeArgTypes = (1 << 4),
2631
2632  InventFloatType = (1 << 5),
2633  UnsignedAlts = (1 << 6),
2634
2635  Use64BitVectors = (1 << 7),
2636  Use128BitVectors = (1 << 8),
2637
2638  Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2639  VectorRet = AddRetType | VectorizeRetType,
2640  VectorRetGetArgs01 =
2641      AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2642  FpCmpzModifiers =
2643      AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2644};
2645
2646namespace {
2647struct NeonIntrinsicInfo {
2648  const char *NameHint;
2649  unsigned BuiltinID;
2650  unsigned LLVMIntrinsic;
2651  unsigned AltLLVMIntrinsic;
2652  unsigned TypeModifier;
2653
2654  bool operator<(unsigned RHSBuiltinID) const {
2655    return BuiltinID < RHSBuiltinID;
2656  }
2657  bool operator<(const NeonIntrinsicInfo &TE) const {
2658    return BuiltinID < TE.BuiltinID;
2659  }
2660};
2661} // end anonymous namespace
2662
2663#define NEONMAP0(NameBase) \
2664  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2665
2666#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2667  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2668      Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2669
2670#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2671  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2672      Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2673      TypeModifier }
2674
2675static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2676  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2677  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2678  NEONMAP1(vabs_v, arm_neon_vabs, 0),
2679  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2680  NEONMAP0(vaddhn_v),
2681  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2682  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2683  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2684  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2685  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2686  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2687  NEONMAP1(vcage_v, arm_neon_vacge, 0),
2688  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2689  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2690  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2691  NEONMAP1(vcale_v, arm_neon_vacge, 0),
2692  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2693  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2694  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2695  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2696  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2697  NEONMAP1(vclz_v, ctlz, Add1ArgType),
2698  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2699  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2700  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2701  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2702  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2703  NEONMAP0(vcvt_f32_v),
2704  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2705  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2706  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2707  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2708  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2709  NEONMAP0(vcvt_s32_v),
2710  NEONMAP0(vcvt_s64_v),
2711  NEONMAP0(vcvt_u32_v),
2712  NEONMAP0(vcvt_u64_v),
2713  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2714  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2715  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2716  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2717  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2718  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2719  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2720  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2721  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2722  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2723  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2724  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2725  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2726  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2727  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2728  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2729  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2730  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2731  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2732  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2733  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2734  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2735  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2736  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2737  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2738  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2739  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2740  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2741  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2742  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2743  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2744  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2745  NEONMAP0(vcvtq_f32_v),
2746  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2747  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2748  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2749  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2750  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2751  NEONMAP0(vcvtq_s32_v),
2752  NEONMAP0(vcvtq_s64_v),
2753  NEONMAP0(vcvtq_u32_v),
2754  NEONMAP0(vcvtq_u64_v),
2755  NEONMAP0(vext_v),
2756  NEONMAP0(vextq_v),
2757  NEONMAP0(vfma_v),
2758  NEONMAP0(vfmaq_v),
2759  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2760  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2761  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2762  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2763  NEONMAP0(vld1_dup_v),
2764  NEONMAP1(vld1_v, arm_neon_vld1, 0),
2765  NEONMAP0(vld1q_dup_v),
2766  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2767  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2768  NEONMAP1(vld2_v, arm_neon_vld2, 0),
2769  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2770  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2771  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2772  NEONMAP1(vld3_v, arm_neon_vld3, 0),
2773  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2774  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2775  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2776  NEONMAP1(vld4_v, arm_neon_vld4, 0),
2777  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2778  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2779  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2780  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2781  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2782  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2783  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2784  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2785  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2786  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2787  NEONMAP0(vmovl_v),
2788  NEONMAP0(vmovn_v),
2789  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2790  NEONMAP0(vmull_v),
2791  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2792  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2793  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2794  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2795  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2796  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2797  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2798  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2799  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2800  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2801  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2802  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2803  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2804  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2805  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2806  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2807  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2808  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2809  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2810  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2811  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2812  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2813  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2814  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2815  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2816  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2817  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2818  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2819  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2820  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2821  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2822  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2823  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2824  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2825  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2826  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2827  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2828  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2829  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2830  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2831  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2832  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2833  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2834  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2835  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2836  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2837  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2838  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2839  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2840  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2841  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2842  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2843  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2844  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2845  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2846  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2847  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2848  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2849  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2850  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2851  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2852  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2853  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2854  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2855  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2856  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2857  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2858  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2859  NEONMAP0(vshl_n_v),
2860  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2861  NEONMAP0(vshll_n_v),
2862  NEONMAP0(vshlq_n_v),
2863  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2864  NEONMAP0(vshr_n_v),
2865  NEONMAP0(vshrn_n_v),
2866  NEONMAP0(vshrq_n_v),
2867  NEONMAP1(vst1_v, arm_neon_vst1, 0),
2868  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2869  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2870  NEONMAP1(vst2_v, arm_neon_vst2, 0),
2871  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2872  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2873  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2874  NEONMAP1(vst3_v, arm_neon_vst3, 0),
2875  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2876  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2877  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2878  NEONMAP1(vst4_v, arm_neon_vst4, 0),
2879  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2880  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2881  NEONMAP0(vsubhn_v),
2882  NEONMAP0(vtrn_v),
2883  NEONMAP0(vtrnq_v),
2884  NEONMAP0(vtst_v),
2885  NEONMAP0(vtstq_v),
2886  NEONMAP0(vuzp_v),
2887  NEONMAP0(vuzpq_v),
2888  NEONMAP0(vzip_v),
2889  NEONMAP0(vzipq_v)
2890};
2891
2892static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2893  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2894  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2895  NEONMAP0(vaddhn_v),
2896  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2897  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2898  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2899  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2900  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2901  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2902  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2903  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2904  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2905  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2906  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2907  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2908  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2909  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2910  NEONMAP1(vclz_v, ctlz, Add1ArgType),
2911  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2912  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2913  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2914  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2915  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2916  NEONMAP0(vcvt_f32_v),
2917  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2918  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2919  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2920  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2921  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2922  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2923  NEONMAP0(vcvtq_f32_v),
2924  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2925  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2926  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2927  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2928  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2929  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2930  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2931  NEONMAP0(vext_v),
2932  NEONMAP0(vextq_v),
2933  NEONMAP0(vfma_v),
2934  NEONMAP0(vfmaq_v),
2935  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2936  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2937  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2938  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2939  NEONMAP0(vmovl_v),
2940  NEONMAP0(vmovn_v),
2941  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2942  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2943  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2944  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2945  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2946  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2947  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2948  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2949  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2950  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2951  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2952  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2953  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2954  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2955  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2956  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2957  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2958  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2959  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2960  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2961  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2962  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2963  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2964  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2965  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2966  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2967  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2968  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2969  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2970  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2971  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2972  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2973  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2974  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2975  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2976  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2977  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2978  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2979  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2980  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2981  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2982  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2983  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2984  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2985  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2986  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2987  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2988  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2989  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2990  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2991  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2992  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2993  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2994  NEONMAP0(vshl_n_v),
2995  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2996  NEONMAP0(vshll_n_v),
2997  NEONMAP0(vshlq_n_v),
2998  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2999  NEONMAP0(vshr_n_v),
3000  NEONMAP0(vshrn_n_v),
3001  NEONMAP0(vshrq_n_v),
3002  NEONMAP0(vsubhn_v),
3003  NEONMAP0(vtst_v),
3004  NEONMAP0(vtstq_v),
3005};
3006
3007static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3008  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3009  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3010  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3011  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3012  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3013  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3014  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3015  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3016  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3017  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3018  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3019  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3020  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3021  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3022  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3023  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3024  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3025  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3026  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3027  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3028  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3029  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3030  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3031  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3032  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3033  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3034  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3035  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3036  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3037  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3038  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3039  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3040  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3041  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3042  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3043  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3044  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3045  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3046  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3047  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3048  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3049  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3050  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3051  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3052  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3053  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3054  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3055  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3056  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3057  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3058  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3059  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3060  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3061  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3062  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3063  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3064  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3065  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3066  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3067  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3068  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3069  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3070  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3071  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3072  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3073  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3074  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3075  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3076  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3077  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3078  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3079  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3080  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3081  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3082  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3083  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3084  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3085  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3086  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3087  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3088  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3089  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3090  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3091  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3092  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3093  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3094  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3095  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3096  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3097  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3098  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3099  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3100  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3101  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3102  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3103  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3104  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3105  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3106  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3107  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3108  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3109  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3110  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3111  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3112  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3113  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3114  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3115  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3116  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3117  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3118  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3119  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3120  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3121  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3122  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3123  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3124  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3125  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3126  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3127  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3128  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3129  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3130  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3131  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3132  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3133  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3134  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3135  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3136  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3137  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3138  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3139  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3140  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3141  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3142  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3143  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3144  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3145  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3146  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3147  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3148  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3149  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3150  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3151  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3152  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3153  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3154  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3155  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3156  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3157  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3158  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3159  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3160  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3161  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3162  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3163  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3164  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3165  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3166  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3167  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3168  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3169  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3170  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3171  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3172  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3173  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3174  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3175  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3176  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3177  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3178  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3179  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3180  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3181  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3182  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3183  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3184  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3185  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3186  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3187  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3188  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3189  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3190  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3191  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3192  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3193  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3194  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3195  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3196  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3197  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3198  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3199  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3200};
3201
3202#undef NEONMAP0
3203#undef NEONMAP1
3204#undef NEONMAP2
3205
3206static bool NEONSIMDIntrinsicsProvenSorted = false;
3207
3208static bool AArch64SIMDIntrinsicsProvenSorted = false;
3209static bool AArch64SISDIntrinsicsProvenSorted = false;
3210
3211
3212static const NeonIntrinsicInfo *
3213findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3214                       unsigned BuiltinID, bool &MapProvenSorted) {
3215
3216#ifndef NDEBUG
3217  if (!MapProvenSorted) {
3218    assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3219    MapProvenSorted = true;
3220  }
3221#endif
3222
3223  const NeonIntrinsicInfo *Builtin =
3224      std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3225
3226  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3227    return Builtin;
3228
3229  return nullptr;
3230}
3231
3232Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3233                                                   unsigned Modifier,
3234                                                   llvm::Type *ArgType,
3235                                                   const CallExpr *E) {
3236  int VectorSize = 0;
3237  if (Modifier & Use64BitVectors)
3238    VectorSize = 64;
3239  else if (Modifier & Use128BitVectors)
3240    VectorSize = 128;
3241
3242  // Return type.
3243  SmallVector<llvm::Type *, 3> Tys;
3244  if (Modifier & AddRetType) {
3245    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3246    if (Modifier & VectorizeRetType)
3247      Ty = llvm::VectorType::get(
3248          Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3249
3250    Tys.push_back(Ty);
3251  }
3252
3253  // Arguments.
3254  if (Modifier & VectorizeArgTypes) {
3255    int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3256    ArgType = llvm::VectorType::get(ArgType, Elts);
3257  }
3258
3259  if (Modifier & (Add1ArgType | Add2ArgTypes))
3260    Tys.push_back(ArgType);
3261
3262  if (Modifier & Add2ArgTypes)
3263    Tys.push_back(ArgType);
3264
3265  if (Modifier & InventFloatType)
3266    Tys.push_back(FloatTy);
3267
3268  return CGM.getIntrinsic(IntrinsicID, Tys);
3269}
3270
3271static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3272                                            const NeonIntrinsicInfo &SISDInfo,
3273                                            SmallVectorImpl<Value *> &Ops,
3274                                            const CallExpr *E) {
3275  unsigned BuiltinID = SISDInfo.BuiltinID;
3276  unsigned int Int = SISDInfo.LLVMIntrinsic;
3277  unsigned Modifier = SISDInfo.TypeModifier;
3278  const char *s = SISDInfo.NameHint;
3279
3280  switch (BuiltinID) {
3281  case NEON::BI__builtin_neon_vcled_s64:
3282  case NEON::BI__builtin_neon_vcled_u64:
3283  case NEON::BI__builtin_neon_vcles_f32:
3284  case NEON::BI__builtin_neon_vcled_f64:
3285  case NEON::BI__builtin_neon_vcltd_s64:
3286  case NEON::BI__builtin_neon_vcltd_u64:
3287  case NEON::BI__builtin_neon_vclts_f32:
3288  case NEON::BI__builtin_neon_vcltd_f64:
3289  case NEON::BI__builtin_neon_vcales_f32:
3290  case NEON::BI__builtin_neon_vcaled_f64:
3291  case NEON::BI__builtin_neon_vcalts_f32:
3292  case NEON::BI__builtin_neon_vcaltd_f64:
3293    // Only one direction of comparisons actually exist, cmle is actually a cmge
3294    // with swapped operands. The table gives us the right intrinsic but we
3295    // still need to do the swap.
3296    std::swap(Ops[0], Ops[1]);
3297    break;
3298  }
3299
3300  assert(Int && "Generic code assumes a valid intrinsic");
3301
3302  // Determine the type(s) of this overloaded AArch64 intrinsic.
3303  const Expr *Arg = E->getArg(0);
3304  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3305  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3306
3307  int j = 0;
3308  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3309  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3310       ai != ae; ++ai, ++j) {
3311    llvm::Type *ArgTy = ai->getType();
3312    if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3313             ArgTy->getPrimitiveSizeInBits())
3314      continue;
3315
3316    assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3317    // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3318    // it before inserting.
3319    Ops[j] =
3320        CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3321    Ops[j] =
3322        CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3323  }
3324
3325  Value *Result = CGF.EmitNeonCall(F, Ops, s);
3326  llvm::Type *ResultType = CGF.ConvertType(E->getType());
3327  if (ResultType->getPrimitiveSizeInBits() <
3328      Result->getType()->getPrimitiveSizeInBits())
3329    return CGF.Builder.CreateExtractElement(Result, C0);
3330
3331  return CGF.Builder.CreateBitCast(Result, ResultType, s);
3332}
3333
3334Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3335    unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3336    const char *NameHint, unsigned Modifier, const CallExpr *E,
3337    SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3338  // Get the last argument, which specifies the vector type.
3339  llvm::APSInt NeonTypeConst;
3340  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3341  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3342    return nullptr;
3343
3344  // Determine the type of this overloaded NEON intrinsic.
3345  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3346  bool Usgn = Type.isUnsigned();
3347  bool Quad = Type.isQuad();
3348
3349  llvm::VectorType *VTy = GetNeonType(this, Type);
3350  llvm::Type *Ty = VTy;
3351  if (!Ty)
3352    return nullptr;
3353
3354  auto getAlignmentValue32 = [&](Address addr) -> Value* {
3355    return Builder.getInt32(addr.getAlignment().getQuantity());
3356  };
3357
3358  unsigned Int = LLVMIntrinsic;
3359  if ((Modifier & UnsignedAlts) && !Usgn)
3360    Int = AltLLVMIntrinsic;
3361
3362  switch (BuiltinID) {
3363  default: break;
3364  case NEON::BI__builtin_neon_vabs_v:
3365  case NEON::BI__builtin_neon_vabsq_v:
3366    if (VTy->getElementType()->isFloatingPointTy())
3367      return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3368    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3369  case NEON::BI__builtin_neon_vaddhn_v: {
3370    llvm::VectorType *SrcTy =
3371        llvm::VectorType::getExtendedElementVectorType(VTy);
3372
3373    // %sum = add <4 x i32> %lhs, %rhs
3374    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3375    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3376    Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3377
3378    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3379    Constant *ShiftAmt =
3380        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3381    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3382
3383    // %res = trunc <4 x i32> %high to <4 x i16>
3384    return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3385  }
3386  case NEON::BI__builtin_neon_vcale_v:
3387  case NEON::BI__builtin_neon_vcaleq_v:
3388  case NEON::BI__builtin_neon_vcalt_v:
3389  case NEON::BI__builtin_neon_vcaltq_v:
3390    std::swap(Ops[0], Ops[1]);
3391  case NEON::BI__builtin_neon_vcage_v:
3392  case NEON::BI__builtin_neon_vcageq_v:
3393  case NEON::BI__builtin_neon_vcagt_v:
3394  case NEON::BI__builtin_neon_vcagtq_v: {
3395    llvm::Type *VecFlt = llvm::VectorType::get(
3396        VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3397        VTy->getNumElements());
3398    llvm::Type *Tys[] = { VTy, VecFlt };
3399    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3400    return EmitNeonCall(F, Ops, NameHint);
3401  }
3402  case NEON::BI__builtin_neon_vclz_v:
3403  case NEON::BI__builtin_neon_vclzq_v:
3404    // We generate target-independent intrinsic, which needs a second argument
3405    // for whether or not clz of zero is undefined; on ARM it isn't.
3406    Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3407    break;
3408  case NEON::BI__builtin_neon_vcvt_f32_v:
3409  case NEON::BI__builtin_neon_vcvtq_f32_v:
3410    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3411    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3412    return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3413                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3414  case NEON::BI__builtin_neon_vcvt_n_f32_v:
3415  case NEON::BI__builtin_neon_vcvt_n_f64_v:
3416  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3417  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3418    llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3419    Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3420    Function *F = CGM.getIntrinsic(Int, Tys);
3421    return EmitNeonCall(F, Ops, "vcvt_n");
3422  }
3423  case NEON::BI__builtin_neon_vcvt_n_s32_v:
3424  case NEON::BI__builtin_neon_vcvt_n_u32_v:
3425  case NEON::BI__builtin_neon_vcvt_n_s64_v:
3426  case NEON::BI__builtin_neon_vcvt_n_u64_v:
3427  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3428  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3429  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3430  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3431    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3432    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3433    return EmitNeonCall(F, Ops, "vcvt_n");
3434  }
3435  case NEON::BI__builtin_neon_vcvt_s32_v:
3436  case NEON::BI__builtin_neon_vcvt_u32_v:
3437  case NEON::BI__builtin_neon_vcvt_s64_v:
3438  case NEON::BI__builtin_neon_vcvt_u64_v:
3439  case NEON::BI__builtin_neon_vcvtq_s32_v:
3440  case NEON::BI__builtin_neon_vcvtq_u32_v:
3441  case NEON::BI__builtin_neon_vcvtq_s64_v:
3442  case NEON::BI__builtin_neon_vcvtq_u64_v: {
3443    Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3444    return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3445                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3446  }
3447  case NEON::BI__builtin_neon_vcvta_s32_v:
3448  case NEON::BI__builtin_neon_vcvta_s64_v:
3449  case NEON::BI__builtin_neon_vcvta_u32_v:
3450  case NEON::BI__builtin_neon_vcvta_u64_v:
3451  case NEON::BI__builtin_neon_vcvtaq_s32_v:
3452  case NEON::BI__builtin_neon_vcvtaq_s64_v:
3453  case NEON::BI__builtin_neon_vcvtaq_u32_v:
3454  case NEON::BI__builtin_neon_vcvtaq_u64_v:
3455  case NEON::BI__builtin_neon_vcvtn_s32_v:
3456  case NEON::BI__builtin_neon_vcvtn_s64_v:
3457  case NEON::BI__builtin_neon_vcvtn_u32_v:
3458  case NEON::BI__builtin_neon_vcvtn_u64_v:
3459  case NEON::BI__builtin_neon_vcvtnq_s32_v:
3460  case NEON::BI__builtin_neon_vcvtnq_s64_v:
3461  case NEON::BI__builtin_neon_vcvtnq_u32_v:
3462  case NEON::BI__builtin_neon_vcvtnq_u64_v:
3463  case NEON::BI__builtin_neon_vcvtp_s32_v:
3464  case NEON::BI__builtin_neon_vcvtp_s64_v:
3465  case NEON::BI__builtin_neon_vcvtp_u32_v:
3466  case NEON::BI__builtin_neon_vcvtp_u64_v:
3467  case NEON::BI__builtin_neon_vcvtpq_s32_v:
3468  case NEON::BI__builtin_neon_vcvtpq_s64_v:
3469  case NEON::BI__builtin_neon_vcvtpq_u32_v:
3470  case NEON::BI__builtin_neon_vcvtpq_u64_v:
3471  case NEON::BI__builtin_neon_vcvtm_s32_v:
3472  case NEON::BI__builtin_neon_vcvtm_s64_v:
3473  case NEON::BI__builtin_neon_vcvtm_u32_v:
3474  case NEON::BI__builtin_neon_vcvtm_u64_v:
3475  case NEON::BI__builtin_neon_vcvtmq_s32_v:
3476  case NEON::BI__builtin_neon_vcvtmq_s64_v:
3477  case NEON::BI__builtin_neon_vcvtmq_u32_v:
3478  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3479    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3480    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3481  }
3482  case NEON::BI__builtin_neon_vext_v:
3483  case NEON::BI__builtin_neon_vextq_v: {
3484    int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3485    SmallVector<uint32_t, 16> Indices;
3486    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3487      Indices.push_back(i+CV);
3488
3489    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3490    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3491    return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3492  }
3493  case NEON::BI__builtin_neon_vfma_v:
3494  case NEON::BI__builtin_neon_vfmaq_v: {
3495    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3496    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3497    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3498    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3499
3500    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3501    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3502  }
3503  case NEON::BI__builtin_neon_vld1_v:
3504  case NEON::BI__builtin_neon_vld1q_v: {
3505    llvm::Type *Tys[] = {Ty, Int8PtrTy};
3506    Ops.push_back(getAlignmentValue32(PtrOp0));
3507    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3508  }
3509  case NEON::BI__builtin_neon_vld2_v:
3510  case NEON::BI__builtin_neon_vld2q_v:
3511  case NEON::BI__builtin_neon_vld3_v:
3512  case NEON::BI__builtin_neon_vld3q_v:
3513  case NEON::BI__builtin_neon_vld4_v:
3514  case NEON::BI__builtin_neon_vld4q_v: {
3515    llvm::Type *Tys[] = {Ty, Int8PtrTy};
3516    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3517    Value *Align = getAlignmentValue32(PtrOp1);
3518    Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3519    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3520    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3521    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3522  }
3523  case NEON::BI__builtin_neon_vld1_dup_v:
3524  case NEON::BI__builtin_neon_vld1q_dup_v: {
3525    Value *V = UndefValue::get(Ty);
3526    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3527    PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3528    LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3529    llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3530    Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3531    return EmitNeonSplat(Ops[0], CI);
3532  }
3533  case NEON::BI__builtin_neon_vld2_lane_v:
3534  case NEON::BI__builtin_neon_vld2q_lane_v:
3535  case NEON::BI__builtin_neon_vld3_lane_v:
3536  case NEON::BI__builtin_neon_vld3q_lane_v:
3537  case NEON::BI__builtin_neon_vld4_lane_v:
3538  case NEON::BI__builtin_neon_vld4q_lane_v: {
3539    llvm::Type *Tys[] = {Ty, Int8PtrTy};
3540    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3541    for (unsigned I = 2; I < Ops.size() - 1; ++I)
3542      Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3543    Ops.push_back(getAlignmentValue32(PtrOp1));
3544    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3545    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3546    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3547    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3548  }
3549  case NEON::BI__builtin_neon_vmovl_v: {
3550    llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3551    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3552    if (Usgn)
3553      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3554    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3555  }
3556  case NEON::BI__builtin_neon_vmovn_v: {
3557    llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3558    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3559    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3560  }
3561  case NEON::BI__builtin_neon_vmull_v:
3562    // FIXME: the integer vmull operations could be emitted in terms of pure
3563    // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3564    // hoisting the exts outside loops. Until global ISel comes along that can
3565    // see through such movement this leads to bad CodeGen. So we need an
3566    // intrinsic for now.
3567    Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3568    Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3569    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3570  case NEON::BI__builtin_neon_vpadal_v:
3571  case NEON::BI__builtin_neon_vpadalq_v: {
3572    // The source operand type has twice as many elements of half the size.
3573    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3574    llvm::Type *EltTy =
3575      llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3576    llvm::Type *NarrowTy =
3577      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3578    llvm::Type *Tys[2] = { Ty, NarrowTy };
3579    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3580  }
3581  case NEON::BI__builtin_neon_vpaddl_v:
3582  case NEON::BI__builtin_neon_vpaddlq_v: {
3583    // The source operand type has twice as many elements of half the size.
3584    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3585    llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3586    llvm::Type *NarrowTy =
3587      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3588    llvm::Type *Tys[2] = { Ty, NarrowTy };
3589    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3590  }
3591  case NEON::BI__builtin_neon_vqdmlal_v:
3592  case NEON::BI__builtin_neon_vqdmlsl_v: {
3593    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3594    Ops[1] =
3595        EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3596    Ops.resize(2);
3597    return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3598  }
3599  case NEON::BI__builtin_neon_vqshl_n_v:
3600  case NEON::BI__builtin_neon_vqshlq_n_v:
3601    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3602                        1, false);
3603  case NEON::BI__builtin_neon_vqshlu_n_v:
3604  case NEON::BI__builtin_neon_vqshluq_n_v:
3605    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3606                        1, false);
3607  case NEON::BI__builtin_neon_vrecpe_v:
3608  case NEON::BI__builtin_neon_vrecpeq_v:
3609  case NEON::BI__builtin_neon_vrsqrte_v:
3610  case NEON::BI__builtin_neon_vrsqrteq_v:
3611    Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3612    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3613
3614  case NEON::BI__builtin_neon_vrshr_n_v:
3615  case NEON::BI__builtin_neon_vrshrq_n_v:
3616    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3617                        1, true);
3618  case NEON::BI__builtin_neon_vshl_n_v:
3619  case NEON::BI__builtin_neon_vshlq_n_v:
3620    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3621    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3622                             "vshl_n");
3623  case NEON::BI__builtin_neon_vshll_n_v: {
3624    llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3625    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3626    if (Usgn)
3627      Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3628    else
3629      Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3630    Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3631    return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3632  }
3633  case NEON::BI__builtin_neon_vshrn_n_v: {
3634    llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3635    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3636    Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3637    if (Usgn)
3638      Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3639    else
3640      Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3641    return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3642  }
3643  case NEON::BI__builtin_neon_vshr_n_v:
3644  case NEON::BI__builtin_neon_vshrq_n_v:
3645    return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3646  case NEON::BI__builtin_neon_vst1_v:
3647  case NEON::BI__builtin_neon_vst1q_v:
3648  case NEON::BI__builtin_neon_vst2_v:
3649  case NEON::BI__builtin_neon_vst2q_v:
3650  case NEON::BI__builtin_neon_vst3_v:
3651  case NEON::BI__builtin_neon_vst3q_v:
3652  case NEON::BI__builtin_neon_vst4_v:
3653  case NEON::BI__builtin_neon_vst4q_v:
3654  case NEON::BI__builtin_neon_vst2_lane_v:
3655  case NEON::BI__builtin_neon_vst2q_lane_v:
3656  case NEON::BI__builtin_neon_vst3_lane_v:
3657  case NEON::BI__builtin_neon_vst3q_lane_v:
3658  case NEON::BI__builtin_neon_vst4_lane_v:
3659  case NEON::BI__builtin_neon_vst4q_lane_v: {
3660    llvm::Type *Tys[] = {Int8PtrTy, Ty};
3661    Ops.push_back(getAlignmentValue32(PtrOp0));
3662    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3663  }
3664  case NEON::BI__builtin_neon_vsubhn_v: {
3665    llvm::VectorType *SrcTy =
3666        llvm::VectorType::getExtendedElementVectorType(VTy);
3667
3668    // %sum = add <4 x i32> %lhs, %rhs
3669    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3670    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3671    Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3672
3673    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3674    Constant *ShiftAmt =
3675        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3676    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3677
3678    // %res = trunc <4 x i32> %high to <4 x i16>
3679    return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3680  }
3681  case NEON::BI__builtin_neon_vtrn_v:
3682  case NEON::BI__builtin_neon_vtrnq_v: {
3683    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3684    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3685    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3686    Value *SV = nullptr;
3687
3688    for (unsigned vi = 0; vi != 2; ++vi) {
3689      SmallVector<uint32_t, 16> Indices;
3690      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3691        Indices.push_back(i+vi);
3692        Indices.push_back(i+e+vi);
3693      }
3694      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3695      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
3696      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3697    }
3698    return SV;
3699  }
3700  case NEON::BI__builtin_neon_vtst_v:
3701  case NEON::BI__builtin_neon_vtstq_v: {
3702    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3703    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3704    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3705    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3706                                ConstantAggregateZero::get(Ty));
3707    return Builder.CreateSExt(Ops[0], Ty, "vtst");
3708  }
3709  case NEON::BI__builtin_neon_vuzp_v:
3710  case NEON::BI__builtin_neon_vuzpq_v: {
3711    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3712    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3713    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3714    Value *SV = nullptr;
3715
3716    for (unsigned vi = 0; vi != 2; ++vi) {
3717      SmallVector<uint32_t, 16> Indices;
3718      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3719        Indices.push_back(2*i+vi);
3720
3721      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3722      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
3723      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3724    }
3725    return SV;
3726  }
3727  case NEON::BI__builtin_neon_vzip_v:
3728  case NEON::BI__builtin_neon_vzipq_v: {
3729    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3730    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3731    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3732    Value *SV = nullptr;
3733
3734    for (unsigned vi = 0; vi != 2; ++vi) {
3735      SmallVector<uint32_t, 16> Indices;
3736      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3737        Indices.push_back((i + vi*e) >> 1);
3738        Indices.push_back(((i + vi*e) >> 1)+e);
3739      }
3740      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3741      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
3742      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3743    }
3744    return SV;
3745  }
3746  }
3747
3748  assert(Int && "Expected valid intrinsic number");
3749
3750  // Determine the type(s) of this overloaded AArch64 intrinsic.
3751  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3752
3753  Value *Result = EmitNeonCall(F, Ops, NameHint);
3754  llvm::Type *ResultType = ConvertType(E->getType());
3755  // AArch64 intrinsic one-element vector type cast to
3756  // scalar type expected by the builtin
3757  return Builder.CreateBitCast(Result, ResultType, NameHint);
3758}
3759
3760Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
3761    Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3762    const CmpInst::Predicate Ip, const Twine &Name) {
3763  llvm::Type *OTy = Op->getType();
3764
3765  // FIXME: this is utterly horrific. We should not be looking at previous
3766  // codegen context to find out what needs doing. Unfortunately TableGen
3767  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3768  // (etc).
3769  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3770    OTy = BI->getOperand(0)->getType();
3771
3772  Op = Builder.CreateBitCast(Op, OTy);
3773  if (OTy->getScalarType()->isFloatingPointTy()) {
3774    Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3775  } else {
3776    Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3777  }
3778  return Builder.CreateSExt(Op, Ty, Name);
3779}
3780
3781static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
3782                                 Value *ExtOp, Value *IndexOp,
3783                                 llvm::Type *ResTy, unsigned IntID,
3784                                 const char *Name) {
3785  SmallVector<Value *, 2> TblOps;
3786  if (ExtOp)
3787    TblOps.push_back(ExtOp);
3788
3789  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3790  SmallVector<uint32_t, 16> Indices;
3791  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3792  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3793    Indices.push_back(2*i);
3794    Indices.push_back(2*i+1);
3795  }
3796
3797  int PairPos = 0, End = Ops.size() - 1;
3798  while (PairPos < End) {
3799    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3800                                                     Ops[PairPos+1], Indices,
3801                                                     Name));
3802    PairPos += 2;
3803  }
3804
3805  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3806  // of the 128-bit lookup table with zero.
3807  if (PairPos == End) {
3808    Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3809    TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3810                                                     ZeroTbl, Indices, Name));
3811  }
3812
3813  Function *TblF;
3814  TblOps.push_back(IndexOp);
3815  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3816
3817  return CGF.EmitNeonCall(TblF, TblOps, Name);
3818}
3819
3820Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3821  unsigned Value;
3822  switch (BuiltinID) {
3823  default:
3824    return nullptr;
3825  case ARM::BI__builtin_arm_nop:
3826    Value = 0;
3827    break;
3828  case ARM::BI__builtin_arm_yield:
3829  case ARM::BI__yield:
3830    Value = 1;
3831    break;
3832  case ARM::BI__builtin_arm_wfe:
3833  case ARM::BI__wfe:
3834    Value = 2;
3835    break;
3836  case ARM::BI__builtin_arm_wfi:
3837  case ARM::BI__wfi:
3838    Value = 3;
3839    break;
3840  case ARM::BI__builtin_arm_sev:
3841  case ARM::BI__sev:
3842    Value = 4;
3843    break;
3844  case ARM::BI__builtin_arm_sevl:
3845  case ARM::BI__sevl:
3846    Value = 5;
3847    break;
3848  }
3849
3850  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3851                            llvm::ConstantInt::get(Int32Ty, Value));
3852}
3853
3854// Generates the IR for the read/write special register builtin,
3855// ValueType is the type of the value that is to be written or read,
3856// RegisterType is the type of the register being written to or read from.
3857static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
3858                                         const CallExpr *E,
3859                                         llvm::Type *RegisterType,
3860                                         llvm::Type *ValueType,
3861                                         bool IsRead,
3862                                         StringRef SysReg = "") {
3863  // write and register intrinsics only support 32 and 64 bit operations.
3864  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3865          && "Unsupported size for register.");
3866
3867  CodeGen::CGBuilderTy &Builder = CGF.Builder;
3868  CodeGen::CodeGenModule &CGM = CGF.CGM;
3869  LLVMContext &Context = CGM.getLLVMContext();
3870
3871  if (SysReg.empty()) {
3872    const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3873    SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3874  }
3875
3876  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3877  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3878  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3879
3880  llvm::Type *Types[] = { RegisterType };
3881
3882  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3883  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3884            && "Can't fit 64-bit value in 32-bit register");
3885
3886  if (IsRead) {
3887    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3888    llvm::Value *Call = Builder.CreateCall(F, Metadata);
3889
3890    if (MixedTypes)
3891      // Read into 64 bit register and then truncate result to 32 bit.
3892      return Builder.CreateTrunc(Call, ValueType);
3893
3894    if (ValueType->isPointerTy())
3895      // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3896      return Builder.CreateIntToPtr(Call, ValueType);
3897
3898    return Call;
3899  }
3900
3901  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3902  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3903  if (MixedTypes) {
3904    // Extend 32 bit write value to 64 bit to pass to write.
3905    ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3906    return Builder.CreateCall(F, { Metadata, ArgValue });
3907  }
3908
3909  if (ValueType->isPointerTy()) {
3910    // Have VoidPtrTy ArgValue but want to return an i32/i64.
3911    ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3912    return Builder.CreateCall(F, { Metadata, ArgValue });
3913  }
3914
3915  return Builder.CreateCall(F, { Metadata, ArgValue });
3916}
3917
3918/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3919/// argument that specifies the vector type.
3920static bool HasExtraNeonArgument(unsigned BuiltinID) {
3921  switch (BuiltinID) {
3922  default: break;
3923  case NEON::BI__builtin_neon_vget_lane_i8:
3924  case NEON::BI__builtin_neon_vget_lane_i16:
3925  case NEON::BI__builtin_neon_vget_lane_i32:
3926  case NEON::BI__builtin_neon_vget_lane_i64:
3927  case NEON::BI__builtin_neon_vget_lane_f32:
3928  case NEON::BI__builtin_neon_vgetq_lane_i8:
3929  case NEON::BI__builtin_neon_vgetq_lane_i16:
3930  case NEON::BI__builtin_neon_vgetq_lane_i32:
3931  case NEON::BI__builtin_neon_vgetq_lane_i64:
3932  case NEON::BI__builtin_neon_vgetq_lane_f32:
3933  case NEON::BI__builtin_neon_vset_lane_i8:
3934  case NEON::BI__builtin_neon_vset_lane_i16:
3935  case NEON::BI__builtin_neon_vset_lane_i32:
3936  case NEON::BI__builtin_neon_vset_lane_i64:
3937  case NEON::BI__builtin_neon_vset_lane_f32:
3938  case NEON::BI__builtin_neon_vsetq_lane_i8:
3939  case NEON::BI__builtin_neon_vsetq_lane_i16:
3940  case NEON::BI__builtin_neon_vsetq_lane_i32:
3941  case NEON::BI__builtin_neon_vsetq_lane_i64:
3942  case NEON::BI__builtin_neon_vsetq_lane_f32:
3943  case NEON::BI__builtin_neon_vsha1h_u32:
3944  case NEON::BI__builtin_neon_vsha1cq_u32:
3945  case NEON::BI__builtin_neon_vsha1pq_u32:
3946  case NEON::BI__builtin_neon_vsha1mq_u32:
3947  case ARM::BI_MoveToCoprocessor:
3948  case ARM::BI_MoveToCoprocessor2:
3949    return false;
3950  }
3951  return true;
3952}
3953
3954Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3955                                           const CallExpr *E) {
3956  if (auto Hint = GetValueForARMHint(BuiltinID))
3957    return Hint;
3958
3959  if (BuiltinID == ARM::BI__emit) {
3960    bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3961    llvm::FunctionType *FTy =
3962        llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3963
3964    APSInt Value;
3965    if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3966      llvm_unreachable("Sema will ensure that the parameter is constant");
3967
3968    uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3969
3970    llvm::InlineAsm *Emit =
3971        IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3972                                 /*SideEffects=*/true)
3973                : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3974                                 /*SideEffects=*/true);
3975
3976    return Builder.CreateCall(Emit);
3977  }
3978
3979  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3980    Value *Option = EmitScalarExpr(E->getArg(0));
3981    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3982  }
3983
3984  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3985    Value *Address = EmitScalarExpr(E->getArg(0));
3986    Value *RW      = EmitScalarExpr(E->getArg(1));
3987    Value *IsData  = EmitScalarExpr(E->getArg(2));
3988
3989    // Locality is not supported on ARM target
3990    Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3991
3992    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3993    return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3994  }
3995
3996  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3997    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3998                                               EmitScalarExpr(E->getArg(0)),
3999                              "rbit");
4000  }
4001
4002  if (BuiltinID == ARM::BI__clear_cache) {
4003    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4004    const FunctionDecl *FD = E->getDirectCallee();
4005    Value *Ops[2];
4006    for (unsigned i = 0; i < 2; i++)
4007      Ops[i] = EmitScalarExpr(E->getArg(i));
4008    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4009    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4010    StringRef Name = FD->getName();
4011    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4012  }
4013
4014  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4015      BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4016    Function *F;
4017
4018    switch (BuiltinID) {
4019    default: llvm_unreachable("unexpected builtin");
4020    case ARM::BI__builtin_arm_mcrr:
4021      F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4022      break;
4023    case ARM::BI__builtin_arm_mcrr2:
4024      F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4025      break;
4026    }
4027
4028    // MCRR{2} instruction has 5 operands but
4029    // the intrinsic has 4 because Rt and Rt2
4030    // are represented as a single unsigned 64
4031    // bit integer in the intrinsic definition
4032    // but internally it's represented as 2 32
4033    // bit integers.
4034
4035    Value *Coproc = EmitScalarExpr(E->getArg(0));
4036    Value *Opc1 = EmitScalarExpr(E->getArg(1));
4037    Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4038    Value *CRm = EmitScalarExpr(E->getArg(3));
4039
4040    Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4041    Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4042    Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4043    Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4044
4045    return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4046  }
4047
4048  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4049      BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4050    Function *F;
4051
4052    switch (BuiltinID) {
4053    default: llvm_unreachable("unexpected builtin");
4054    case ARM::BI__builtin_arm_mrrc:
4055      F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4056      break;
4057    case ARM::BI__builtin_arm_mrrc2:
4058      F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4059      break;
4060    }
4061
4062    Value *Coproc = EmitScalarExpr(E->getArg(0));
4063    Value *Opc1 = EmitScalarExpr(E->getArg(1));
4064    Value *CRm  = EmitScalarExpr(E->getArg(2));
4065    Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4066
4067    // Returns an unsigned 64 bit integer, represented
4068    // as two 32 bit integers.
4069
4070    Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4071    Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4072    Rt = Builder.CreateZExt(Rt, Int64Ty);
4073    Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4074
4075    Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4076    RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4077    RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4078
4079    return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4080  }
4081
4082  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4083      ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4084        BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4085       getContext().getTypeSize(E->getType()) == 64) ||
4086      BuiltinID == ARM::BI__ldrexd) {
4087    Function *F;
4088
4089    switch (BuiltinID) {
4090    default: llvm_unreachable("unexpected builtin");
4091    case ARM::BI__builtin_arm_ldaex:
4092      F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4093      break;
4094    case ARM::BI__builtin_arm_ldrexd:
4095    case ARM::BI__builtin_arm_ldrex:
4096    case ARM::BI__ldrexd:
4097      F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4098      break;
4099    }
4100
4101    Value *LdPtr = EmitScalarExpr(E->getArg(0));
4102    Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4103                                    "ldrexd");
4104
4105    Value *Val0 = Builder.CreateExtractValue(Val, 1);
4106    Value *Val1 = Builder.CreateExtractValue(Val, 0);
4107    Val0 = Builder.CreateZExt(Val0, Int64Ty);
4108    Val1 = Builder.CreateZExt(Val1, Int64Ty);
4109
4110    Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4111    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4112    Val = Builder.CreateOr(Val, Val1);
4113    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4114  }
4115
4116  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4117      BuiltinID == ARM::BI__builtin_arm_ldaex) {
4118    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4119
4120    QualType Ty = E->getType();
4121    llvm::Type *RealResTy = ConvertType(Ty);
4122    llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4123                                                  getContext().getTypeSize(Ty));
4124    LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4125
4126    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4127                                       ? Intrinsic::arm_ldaex
4128                                       : Intrinsic::arm_ldrex,
4129                                   LoadAddr->getType());
4130    Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4131
4132    if (RealResTy->isPointerTy())
4133      return Builder.CreateIntToPtr(Val, RealResTy);
4134    else {
4135      Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4136      return Builder.CreateBitCast(Val, RealResTy);
4137    }
4138  }
4139
4140  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4141      ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4142        BuiltinID == ARM::BI__builtin_arm_strex) &&
4143       getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4144    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4145                                       ? Intrinsic::arm_stlexd
4146                                       : Intrinsic::arm_strexd);
4147    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
4148
4149    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4150    Value *Val = EmitScalarExpr(E->getArg(0));
4151    Builder.CreateStore(Val, Tmp);
4152
4153    Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4154    Val = Builder.CreateLoad(LdPtr);
4155
4156    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4157    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4158    Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4159    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4160  }
4161
4162  if (BuiltinID == ARM::BI__builtin_arm_strex ||
4163      BuiltinID == ARM::BI__builtin_arm_stlex) {
4164    Value *StoreVal = EmitScalarExpr(E->getArg(0));
4165    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4166
4167    QualType Ty = E->getArg(0)->getType();
4168    llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4169                                                 getContext().getTypeSize(Ty));
4170    StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4171
4172    if (StoreVal->getType()->isPointerTy())
4173      StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4174    else {
4175      StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4176      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4177    }
4178
4179    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4180                                       ? Intrinsic::arm_stlex
4181                                       : Intrinsic::arm_strex,
4182                                   StoreAddr->getType());
4183    return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4184  }
4185
4186  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4187    Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4188    return Builder.CreateCall(F);
4189  }
4190
4191  // CRC32
4192  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4193  switch (BuiltinID) {
4194  case ARM::BI__builtin_arm_crc32b:
4195    CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4196  case ARM::BI__builtin_arm_crc32cb:
4197    CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4198  case ARM::BI__builtin_arm_crc32h:
4199    CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4200  case ARM::BI__builtin_arm_crc32ch:
4201    CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4202  case ARM::BI__builtin_arm_crc32w:
4203  case ARM::BI__builtin_arm_crc32d:
4204    CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4205  case ARM::BI__builtin_arm_crc32cw:
4206  case ARM::BI__builtin_arm_crc32cd:
4207    CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4208  }
4209
4210  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4211    Value *Arg0 = EmitScalarExpr(E->getArg(0));
4212    Value *Arg1 = EmitScalarExpr(E->getArg(1));
4213
4214    // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4215    // intrinsics, hence we need different codegen for these cases.
4216    if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4217        BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4218      Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4219      Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4220      Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4221      Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4222
4223      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4224      Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4225      return Builder.CreateCall(F, {Res, Arg1b});
4226    } else {
4227      Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4228
4229      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4230      return Builder.CreateCall(F, {Arg0, Arg1});
4231    }
4232  }
4233
4234  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4235      BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4236      BuiltinID == ARM::BI__builtin_arm_rsrp ||
4237      BuiltinID == ARM::BI__builtin_arm_wsr ||
4238      BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4239      BuiltinID == ARM::BI__builtin_arm_wsrp) {
4240
4241    bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4242                  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4243                  BuiltinID == ARM::BI__builtin_arm_rsrp;
4244
4245    bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4246                            BuiltinID == ARM::BI__builtin_arm_wsrp;
4247
4248    bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4249                   BuiltinID == ARM::BI__builtin_arm_wsr64;
4250
4251    llvm::Type *ValueType;
4252    llvm::Type *RegisterType;
4253    if (IsPointerBuiltin) {
4254      ValueType = VoidPtrTy;
4255      RegisterType = Int32Ty;
4256    } else if (Is64Bit) {
4257      ValueType = RegisterType = Int64Ty;
4258    } else {
4259      ValueType = RegisterType = Int32Ty;
4260    }
4261
4262    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4263  }
4264
4265  // Find out if any arguments are required to be integer constant
4266  // expressions.
4267  unsigned ICEArguments = 0;
4268  ASTContext::GetBuiltinTypeError Error;
4269  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4270  assert(Error == ASTContext::GE_None && "Should not codegen an error");
4271
4272  auto getAlignmentValue32 = [&](Address addr) -> Value* {
4273    return Builder.getInt32(addr.getAlignment().getQuantity());
4274  };
4275
4276  Address PtrOp0 = Address::invalid();
4277  Address PtrOp1 = Address::invalid();
4278  SmallVector<Value*, 4> Ops;
4279  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4280  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4281  for (unsigned i = 0, e = NumArgs; i != e; i++) {
4282    if (i == 0) {
4283      switch (BuiltinID) {
4284      case NEON::BI__builtin_neon_vld1_v:
4285      case NEON::BI__builtin_neon_vld1q_v:
4286      case NEON::BI__builtin_neon_vld1q_lane_v:
4287      case NEON::BI__builtin_neon_vld1_lane_v:
4288      case NEON::BI__builtin_neon_vld1_dup_v:
4289      case NEON::BI__builtin_neon_vld1q_dup_v:
4290      case NEON::BI__builtin_neon_vst1_v:
4291      case NEON::BI__builtin_neon_vst1q_v:
4292      case NEON::BI__builtin_neon_vst1q_lane_v:
4293      case NEON::BI__builtin_neon_vst1_lane_v:
4294      case NEON::BI__builtin_neon_vst2_v:
4295      case NEON::BI__builtin_neon_vst2q_v:
4296      case NEON::BI__builtin_neon_vst2_lane_v:
4297      case NEON::BI__builtin_neon_vst2q_lane_v:
4298      case NEON::BI__builtin_neon_vst3_v:
4299      case NEON::BI__builtin_neon_vst3q_v:
4300      case NEON::BI__builtin_neon_vst3_lane_v:
4301      case NEON::BI__builtin_neon_vst3q_lane_v:
4302      case NEON::BI__builtin_neon_vst4_v:
4303      case NEON::BI__builtin_neon_vst4q_v:
4304      case NEON::BI__builtin_neon_vst4_lane_v:
4305      case NEON::BI__builtin_neon_vst4q_lane_v:
4306        // Get the alignment for the argument in addition to the value;
4307        // we'll use it later.
4308        PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4309        Ops.push_back(PtrOp0.getPointer());
4310        continue;
4311      }
4312    }
4313    if (i == 1) {
4314      switch (BuiltinID) {
4315      case NEON::BI__builtin_neon_vld2_v:
4316      case NEON::BI__builtin_neon_vld2q_v:
4317      case NEON::BI__builtin_neon_vld3_v:
4318      case NEON::BI__builtin_neon_vld3q_v:
4319      case NEON::BI__builtin_neon_vld4_v:
4320      case NEON::BI__builtin_neon_vld4q_v:
4321      case NEON::BI__builtin_neon_vld2_lane_v:
4322      case NEON::BI__builtin_neon_vld2q_lane_v:
4323      case NEON::BI__builtin_neon_vld3_lane_v:
4324      case NEON::BI__builtin_neon_vld3q_lane_v:
4325      case NEON::BI__builtin_neon_vld4_lane_v:
4326      case NEON::BI__builtin_neon_vld4q_lane_v:
4327      case NEON::BI__builtin_neon_vld2_dup_v:
4328      case NEON::BI__builtin_neon_vld3_dup_v:
4329      case NEON::BI__builtin_neon_vld4_dup_v:
4330        // Get the alignment for the argument in addition to the value;
4331        // we'll use it later.
4332        PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4333        Ops.push_back(PtrOp1.getPointer());
4334        continue;
4335      }
4336    }
4337
4338    if ((ICEArguments & (1 << i)) == 0) {
4339      Ops.push_back(EmitScalarExpr(E->getArg(i)));
4340    } else {
4341      // If this is required to be a constant, constant fold it so that we know
4342      // that the generated intrinsic gets a ConstantInt.
4343      llvm::APSInt Result;
4344      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4345      assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4346      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4347    }
4348  }
4349
4350  switch (BuiltinID) {
4351  default: break;
4352
4353  case NEON::BI__builtin_neon_vget_lane_i8:
4354  case NEON::BI__builtin_neon_vget_lane_i16:
4355  case NEON::BI__builtin_neon_vget_lane_i32:
4356  case NEON::BI__builtin_neon_vget_lane_i64:
4357  case NEON::BI__builtin_neon_vget_lane_f32:
4358  case NEON::BI__builtin_neon_vgetq_lane_i8:
4359  case NEON::BI__builtin_neon_vgetq_lane_i16:
4360  case NEON::BI__builtin_neon_vgetq_lane_i32:
4361  case NEON::BI__builtin_neon_vgetq_lane_i64:
4362  case NEON::BI__builtin_neon_vgetq_lane_f32:
4363    return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4364
4365  case NEON::BI__builtin_neon_vset_lane_i8:
4366  case NEON::BI__builtin_neon_vset_lane_i16:
4367  case NEON::BI__builtin_neon_vset_lane_i32:
4368  case NEON::BI__builtin_neon_vset_lane_i64:
4369  case NEON::BI__builtin_neon_vset_lane_f32:
4370  case NEON::BI__builtin_neon_vsetq_lane_i8:
4371  case NEON::BI__builtin_neon_vsetq_lane_i16:
4372  case NEON::BI__builtin_neon_vsetq_lane_i32:
4373  case NEON::BI__builtin_neon_vsetq_lane_i64:
4374  case NEON::BI__builtin_neon_vsetq_lane_f32:
4375    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4376
4377  case NEON::BI__builtin_neon_vsha1h_u32:
4378    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4379                        "vsha1h");
4380  case NEON::BI__builtin_neon_vsha1cq_u32:
4381    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4382                        "vsha1h");
4383  case NEON::BI__builtin_neon_vsha1pq_u32:
4384    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4385                        "vsha1h");
4386  case NEON::BI__builtin_neon_vsha1mq_u32:
4387    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4388                        "vsha1h");
4389
4390  // The ARM _MoveToCoprocessor builtins put the input register value as
4391  // the first argument, but the LLVM intrinsic expects it as the third one.
4392  case ARM::BI_MoveToCoprocessor:
4393  case ARM::BI_MoveToCoprocessor2: {
4394    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4395                                   Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4396    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4397                                  Ops[3], Ops[4], Ops[5]});
4398  }
4399  }
4400
4401  // Get the last argument, which specifies the vector type.
4402  assert(HasExtraArg);
4403  llvm::APSInt Result;
4404  const Expr *Arg = E->getArg(E->getNumArgs()-1);
4405  if (!Arg->isIntegerConstantExpr(Result, getContext()))
4406    return nullptr;
4407
4408  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4409      BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4410    // Determine the overloaded type of this builtin.
4411    llvm::Type *Ty;
4412    if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4413      Ty = FloatTy;
4414    else
4415      Ty = DoubleTy;
4416
4417    // Determine whether this is an unsigned conversion or not.
4418    bool usgn = Result.getZExtValue() == 1;
4419    unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4420
4421    // Call the appropriate intrinsic.
4422    Function *F = CGM.getIntrinsic(Int, Ty);
4423    return Builder.CreateCall(F, Ops, "vcvtr");
4424  }
4425
4426  // Determine the type of this overloaded NEON intrinsic.
4427  NeonTypeFlags Type(Result.getZExtValue());
4428  bool usgn = Type.isUnsigned();
4429  bool rightShift = false;
4430
4431  llvm::VectorType *VTy = GetNeonType(this, Type);
4432  llvm::Type *Ty = VTy;
4433  if (!Ty)
4434    return nullptr;
4435
4436  // Many NEON builtins have identical semantics and uses in ARM and
4437  // AArch64. Emit these in a single function.
4438  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4439  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4440      IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4441  if (Builtin)
4442    return EmitCommonNeonBuiltinExpr(
4443        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4444        Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4445
4446  unsigned Int;
4447  switch (BuiltinID) {
4448  default: return nullptr;
4449  case NEON::BI__builtin_neon_vld1q_lane_v:
4450    // Handle 64-bit integer elements as a special case.  Use shuffles of
4451    // one-element vectors to avoid poor code for i64 in the backend.
4452    if (VTy->getElementType()->isIntegerTy(64)) {
4453      // Extract the other lane.
4454      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4455      uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4456      Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4457      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4458      // Load the value as a one-element vector.
4459      Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4460      llvm::Type *Tys[] = {Ty, Int8PtrTy};
4461      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4462      Value *Align = getAlignmentValue32(PtrOp0);
4463      Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4464      // Combine them.
4465      uint32_t Indices[] = {1 - Lane, Lane};
4466      SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4467      return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4468    }
4469    // fall through
4470  case NEON::BI__builtin_neon_vld1_lane_v: {
4471    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4472    PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4473    Value *Ld = Builder.CreateLoad(PtrOp0);
4474    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4475  }
4476  case NEON::BI__builtin_neon_vld2_dup_v:
4477  case NEON::BI__builtin_neon_vld3_dup_v:
4478  case NEON::BI__builtin_neon_vld4_dup_v: {
4479    // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4480    if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4481      switch (BuiltinID) {
4482      case NEON::BI__builtin_neon_vld2_dup_v:
4483        Int = Intrinsic::arm_neon_vld2;
4484        break;
4485      case NEON::BI__builtin_neon_vld3_dup_v:
4486        Int = Intrinsic::arm_neon_vld3;
4487        break;
4488      case NEON::BI__builtin_neon_vld4_dup_v:
4489        Int = Intrinsic::arm_neon_vld4;
4490        break;
4491      default: llvm_unreachable("unknown vld_dup intrinsic?");
4492      }
4493      llvm::Type *Tys[] = {Ty, Int8PtrTy};
4494      Function *F = CGM.getIntrinsic(Int, Tys);
4495      llvm::Value *Align = getAlignmentValue32(PtrOp1);
4496      Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4497      Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4498      Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4499      return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4500    }
4501    switch (BuiltinID) {
4502    case NEON::BI__builtin_neon_vld2_dup_v:
4503      Int = Intrinsic::arm_neon_vld2lane;
4504      break;
4505    case NEON::BI__builtin_neon_vld3_dup_v:
4506      Int = Intrinsic::arm_neon_vld3lane;
4507      break;
4508    case NEON::BI__builtin_neon_vld4_dup_v:
4509      Int = Intrinsic::arm_neon_vld4lane;
4510      break;
4511    default: llvm_unreachable("unknown vld_dup intrinsic?");
4512    }
4513    llvm::Type *Tys[] = {Ty, Int8PtrTy};
4514    Function *F = CGM.getIntrinsic(Int, Tys);
4515    llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4516
4517    SmallVector<Value*, 6> Args;
4518    Args.push_back(Ops[1]);
4519    Args.append(STy->getNumElements(), UndefValue::get(Ty));
4520
4521    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4522    Args.push_back(CI);
4523    Args.push_back(getAlignmentValue32(PtrOp1));
4524
4525    Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4526    // splat lane 0 to all elts in each vector of the result.
4527    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4528      Value *Val = Builder.CreateExtractValue(Ops[1], i);
4529      Value *Elt = Builder.CreateBitCast(Val, Ty);
4530      Elt = EmitNeonSplat(Elt, CI);
4531      Elt = Builder.CreateBitCast(Elt, Val->getType());
4532      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4533    }
4534    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4535    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4536    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4537  }
4538  case NEON::BI__builtin_neon_vqrshrn_n_v:
4539    Int =
4540      usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4541    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4542                        1, true);
4543  case NEON::BI__builtin_neon_vqrshrun_n_v:
4544    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4545                        Ops, "vqrshrun_n", 1, true);
4546  case NEON::BI__builtin_neon_vqshrn_n_v:
4547    Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4548    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4549                        1, true);
4550  case NEON::BI__builtin_neon_vqshrun_n_v:
4551    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4552                        Ops, "vqshrun_n", 1, true);
4553  case NEON::BI__builtin_neon_vrecpe_v:
4554  case NEON::BI__builtin_neon_vrecpeq_v:
4555    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4556                        Ops, "vrecpe");
4557  case NEON::BI__builtin_neon_vrshrn_n_v:
4558    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4559                        Ops, "vrshrn_n", 1, true);
4560  case NEON::BI__builtin_neon_vrsra_n_v:
4561  case NEON::BI__builtin_neon_vrsraq_n_v:
4562    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4563    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4564    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4565    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4566    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4567    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4568  case NEON::BI__builtin_neon_vsri_n_v:
4569  case NEON::BI__builtin_neon_vsriq_n_v:
4570    rightShift = true;
4571  case NEON::BI__builtin_neon_vsli_n_v:
4572  case NEON::BI__builtin_neon_vsliq_n_v:
4573    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4574    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4575                        Ops, "vsli_n");
4576  case NEON::BI__builtin_neon_vsra_n_v:
4577  case NEON::BI__builtin_neon_vsraq_n_v:
4578    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4579    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4580    return Builder.CreateAdd(Ops[0], Ops[1]);
4581  case NEON::BI__builtin_neon_vst1q_lane_v:
4582    // Handle 64-bit integer elements as a special case.  Use a shuffle to get
4583    // a one-element vector and avoid poor code for i64 in the backend.
4584    if (VTy->getElementType()->isIntegerTy(64)) {
4585      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4586      Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4587      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4588      Ops[2] = getAlignmentValue32(PtrOp0);
4589      llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4590      return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4591                                                 Tys), Ops);
4592    }
4593    // fall through
4594  case NEON::BI__builtin_neon_vst1_lane_v: {
4595    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4596    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4597    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4598    auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4599    return St;
4600  }
4601  case NEON::BI__builtin_neon_vtbl1_v:
4602    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4603                        Ops, "vtbl1");
4604  case NEON::BI__builtin_neon_vtbl2_v:
4605    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4606                        Ops, "vtbl2");
4607  case NEON::BI__builtin_neon_vtbl3_v:
4608    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4609                        Ops, "vtbl3");
4610  case NEON::BI__builtin_neon_vtbl4_v:
4611    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4612                        Ops, "vtbl4");
4613  case NEON::BI__builtin_neon_vtbx1_v:
4614    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
4615                        Ops, "vtbx1");
4616  case NEON::BI__builtin_neon_vtbx2_v:
4617    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
4618                        Ops, "vtbx2");
4619  case NEON::BI__builtin_neon_vtbx3_v:
4620    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
4621                        Ops, "vtbx3");
4622  case NEON::BI__builtin_neon_vtbx4_v:
4623    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
4624                        Ops, "vtbx4");
4625  }
4626}
4627
4628static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
4629                                      const CallExpr *E,
4630                                      SmallVectorImpl<Value *> &Ops) {
4631  unsigned int Int = 0;
4632  const char *s = nullptr;
4633
4634  switch (BuiltinID) {
4635  default:
4636    return nullptr;
4637  case NEON::BI__builtin_neon_vtbl1_v:
4638  case NEON::BI__builtin_neon_vqtbl1_v:
4639  case NEON::BI__builtin_neon_vqtbl1q_v:
4640  case NEON::BI__builtin_neon_vtbl2_v:
4641  case NEON::BI__builtin_neon_vqtbl2_v:
4642  case NEON::BI__builtin_neon_vqtbl2q_v:
4643  case NEON::BI__builtin_neon_vtbl3_v:
4644  case NEON::BI__builtin_neon_vqtbl3_v:
4645  case NEON::BI__builtin_neon_vqtbl3q_v:
4646  case NEON::BI__builtin_neon_vtbl4_v:
4647  case NEON::BI__builtin_neon_vqtbl4_v:
4648  case NEON::BI__builtin_neon_vqtbl4q_v:
4649    break;
4650  case NEON::BI__builtin_neon_vtbx1_v:
4651  case NEON::BI__builtin_neon_vqtbx1_v:
4652  case NEON::BI__builtin_neon_vqtbx1q_v:
4653  case NEON::BI__builtin_neon_vtbx2_v:
4654  case NEON::BI__builtin_neon_vqtbx2_v:
4655  case NEON::BI__builtin_neon_vqtbx2q_v:
4656  case NEON::BI__builtin_neon_vtbx3_v:
4657  case NEON::BI__builtin_neon_vqtbx3_v:
4658  case NEON::BI__builtin_neon_vqtbx3q_v:
4659  case NEON::BI__builtin_neon_vtbx4_v:
4660  case NEON::BI__builtin_neon_vqtbx4_v:
4661  case NEON::BI__builtin_neon_vqtbx4q_v:
4662    break;
4663  }
4664
4665  assert(E->getNumArgs() >= 3);
4666
4667  // Get the last argument, which specifies the vector type.
4668  llvm::APSInt Result;
4669  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4670  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
4671    return nullptr;
4672
4673  // Determine the type of this overloaded NEON intrinsic.
4674  NeonTypeFlags Type(Result.getZExtValue());
4675  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
4676  if (!Ty)
4677    return nullptr;
4678
4679  CodeGen::CGBuilderTy &Builder = CGF.Builder;
4680
4681  // AArch64 scalar builtins are not overloaded, they do not have an extra
4682  // argument that specifies the vector type, need to handle each case.
4683  switch (BuiltinID) {
4684  case NEON::BI__builtin_neon_vtbl1_v: {
4685    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4686                              Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4687                              "vtbl1");
4688  }
4689  case NEON::BI__builtin_neon_vtbl2_v: {
4690    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4691                              Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4692                              "vtbl1");
4693  }
4694  case NEON::BI__builtin_neon_vtbl3_v: {
4695    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4696                              Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4697                              "vtbl2");
4698  }
4699  case NEON::BI__builtin_neon_vtbl4_v: {
4700    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4701                              Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4702                              "vtbl2");
4703  }
4704  case NEON::BI__builtin_neon_vtbx1_v: {
4705    Value *TblRes =
4706        packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4707                           Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4708
4709    llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4710    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4711    CmpRes = Builder.CreateSExt(CmpRes, Ty);
4712
4713    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4714    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4715    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4716  }
4717  case NEON::BI__builtin_neon_vtbx2_v: {
4718    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4719                              Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4720                              "vtbx1");
4721  }
4722  case NEON::BI__builtin_neon_vtbx3_v: {
4723    Value *TblRes =
4724        packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4725                           Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4726
4727    llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4728    Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4729                                           TwentyFourV);
4730    CmpRes = Builder.CreateSExt(CmpRes, Ty);
4731
4732    Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4733    Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4734    return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4735  }
4736  case NEON::BI__builtin_neon_vtbx4_v: {
4737    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4738                              Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4739                              "vtbx2");
4740  }
4741  case NEON::BI__builtin_neon_vqtbl1_v:
4742  case NEON::BI__builtin_neon_vqtbl1q_v:
4743    Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4744  case NEON::BI__builtin_neon_vqtbl2_v:
4745  case NEON::BI__builtin_neon_vqtbl2q_v: {
4746    Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4747  case NEON::BI__builtin_neon_vqtbl3_v:
4748  case NEON::BI__builtin_neon_vqtbl3q_v:
4749    Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4750  case NEON::BI__builtin_neon_vqtbl4_v:
4751  case NEON::BI__builtin_neon_vqtbl4q_v:
4752    Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4753  case NEON::BI__builtin_neon_vqtbx1_v:
4754  case NEON::BI__builtin_neon_vqtbx1q_v:
4755    Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4756  case NEON::BI__builtin_neon_vqtbx2_v:
4757  case NEON::BI__builtin_neon_vqtbx2q_v:
4758    Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4759  case NEON::BI__builtin_neon_vqtbx3_v:
4760  case NEON::BI__builtin_neon_vqtbx3q_v:
4761    Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4762  case NEON::BI__builtin_neon_vqtbx4_v:
4763  case NEON::BI__builtin_neon_vqtbx4q_v:
4764    Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4765  }
4766  }
4767
4768  if (!Int)
4769    return nullptr;
4770
4771  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4772  return CGF.EmitNeonCall(F, Ops, s);
4773}
4774
4775Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
4776  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4777  Op = Builder.CreateBitCast(Op, Int16Ty);
4778  Value *V = UndefValue::get(VTy);
4779  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4780  Op = Builder.CreateInsertElement(V, Op, CI);
4781  return Op;
4782}
4783
4784Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4785                                               const CallExpr *E) {
4786  unsigned HintID = static_cast<unsigned>(-1);
4787  switch (BuiltinID) {
4788  default: break;
4789  case AArch64::BI__builtin_arm_nop:
4790    HintID = 0;
4791    break;
4792  case AArch64::BI__builtin_arm_yield:
4793    HintID = 1;
4794    break;
4795  case AArch64::BI__builtin_arm_wfe:
4796    HintID = 2;
4797    break;
4798  case AArch64::BI__builtin_arm_wfi:
4799    HintID = 3;
4800    break;
4801  case AArch64::BI__builtin_arm_sev:
4802    HintID = 4;
4803    break;
4804  case AArch64::BI__builtin_arm_sevl:
4805    HintID = 5;
4806    break;
4807  }
4808
4809  if (HintID != static_cast<unsigned>(-1)) {
4810    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4811    return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4812  }
4813
4814  if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4815    Value *Address         = EmitScalarExpr(E->getArg(0));
4816    Value *RW              = EmitScalarExpr(E->getArg(1));
4817    Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
4818    Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4819    Value *IsData          = EmitScalarExpr(E->getArg(4));
4820
4821    Value *Locality = nullptr;
4822    if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4823      // Temporal fetch, needs to convert cache level to locality.
4824      Locality = llvm::ConstantInt::get(Int32Ty,
4825        -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4826    } else {
4827      // Streaming fetch.
4828      Locality = llvm::ConstantInt::get(Int32Ty, 0);
4829    }
4830
4831    // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4832    // PLDL3STRM or PLDL2STRM.
4833    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4834    return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4835  }
4836
4837  if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4838    assert((getContext().getTypeSize(E->getType()) == 32) &&
4839           "rbit of unusual size!");
4840    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4841    return Builder.CreateCall(
4842        CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4843  }
4844  if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4845    assert((getContext().getTypeSize(E->getType()) == 64) &&
4846           "rbit of unusual size!");
4847    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4848    return Builder.CreateCall(
4849        CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4850  }
4851
4852  if (BuiltinID == AArch64::BI__clear_cache) {
4853    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4854    const FunctionDecl *FD = E->getDirectCallee();
4855    Value *Ops[2];
4856    for (unsigned i = 0; i < 2; i++)
4857      Ops[i] = EmitScalarExpr(E->getArg(i));
4858    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4859    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4860    StringRef Name = FD->getName();
4861    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4862  }
4863
4864  if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4865      BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4866      getContext().getTypeSize(E->getType()) == 128) {
4867    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4868                                       ? Intrinsic::aarch64_ldaxp
4869                                       : Intrinsic::aarch64_ldxp);
4870
4871    Value *LdPtr = EmitScalarExpr(E->getArg(0));
4872    Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4873                                    "ldxp");
4874
4875    Value *Val0 = Builder.CreateExtractValue(Val, 1);
4876    Value *Val1 = Builder.CreateExtractValue(Val, 0);
4877    llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4878    Val0 = Builder.CreateZExt(Val0, Int128Ty);
4879    Val1 = Builder.CreateZExt(Val1, Int128Ty);
4880
4881    Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4882    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4883    Val = Builder.CreateOr(Val, Val1);
4884    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4885  } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4886             BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4887    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4888
4889    QualType Ty = E->getType();
4890    llvm::Type *RealResTy = ConvertType(Ty);
4891    llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4892                                                  getContext().getTypeSize(Ty));
4893    LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4894
4895    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4896                                       ? Intrinsic::aarch64_ldaxr
4897                                       : Intrinsic::aarch64_ldxr,
4898                                   LoadAddr->getType());
4899    Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4900
4901    if (RealResTy->isPointerTy())
4902      return Builder.CreateIntToPtr(Val, RealResTy);
4903
4904    Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4905    return Builder.CreateBitCast(Val, RealResTy);
4906  }
4907
4908  if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4909       BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4910      getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4911    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4912                                       ? Intrinsic::aarch64_stlxp
4913                                       : Intrinsic::aarch64_stxp);
4914    llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4915
4916    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4917    EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4918
4919    Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4920    llvm::Value *Val = Builder.CreateLoad(Tmp);
4921
4922    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4923    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4924    Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4925                                         Int8PtrTy);
4926    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4927  }
4928
4929  if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4930      BuiltinID == AArch64::BI__builtin_arm_stlex) {
4931    Value *StoreVal = EmitScalarExpr(E->getArg(0));
4932    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4933
4934    QualType Ty = E->getArg(0)->getType();
4935    llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4936                                                 getContext().getTypeSize(Ty));
4937    StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4938
4939    if (StoreVal->getType()->isPointerTy())
4940      StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4941    else {
4942      StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4943      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4944    }
4945
4946    Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4947                                       ? Intrinsic::aarch64_stlxr
4948                                       : Intrinsic::aarch64_stxr,
4949                                   StoreAddr->getType());
4950    return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4951  }
4952
4953  if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4954    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4955    return Builder.CreateCall(F);
4956  }
4957
4958  // CRC32
4959  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4960  switch (BuiltinID) {
4961  case AArch64::BI__builtin_arm_crc32b:
4962    CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4963  case AArch64::BI__builtin_arm_crc32cb:
4964    CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4965  case AArch64::BI__builtin_arm_crc32h:
4966    CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4967  case AArch64::BI__builtin_arm_crc32ch:
4968    CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4969  case AArch64::BI__builtin_arm_crc32w:
4970    CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4971  case AArch64::BI__builtin_arm_crc32cw:
4972    CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4973  case AArch64::BI__builtin_arm_crc32d:
4974    CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4975  case AArch64::BI__builtin_arm_crc32cd:
4976    CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4977  }
4978
4979  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4980    Value *Arg0 = EmitScalarExpr(E->getArg(0));
4981    Value *Arg1 = EmitScalarExpr(E->getArg(1));
4982    Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4983
4984    llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4985    Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4986
4987    return Builder.CreateCall(F, {Arg0, Arg1});
4988  }
4989
4990  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4991      BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4992      BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4993      BuiltinID == AArch64::BI__builtin_arm_wsr ||
4994      BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4995      BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4996
4997    bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4998                  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4999                  BuiltinID == AArch64::BI__builtin_arm_rsrp;
5000
5001    bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5002                            BuiltinID == AArch64::BI__builtin_arm_wsrp;
5003
5004    bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5005                   BuiltinID != AArch64::BI__builtin_arm_wsr;
5006
5007    llvm::Type *ValueType;
5008    llvm::Type *RegisterType = Int64Ty;
5009    if (IsPointerBuiltin) {
5010      ValueType = VoidPtrTy;
5011    } else if (Is64Bit) {
5012      ValueType = Int64Ty;
5013    } else {
5014      ValueType = Int32Ty;
5015    }
5016
5017    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5018  }
5019
5020  // Find out if any arguments are required to be integer constant
5021  // expressions.
5022  unsigned ICEArguments = 0;
5023  ASTContext::GetBuiltinTypeError Error;
5024  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5025  assert(Error == ASTContext::GE_None && "Should not codegen an error");
5026
5027  llvm::SmallVector<Value*, 4> Ops;
5028  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5029    if ((ICEArguments & (1 << i)) == 0) {
5030      Ops.push_back(EmitScalarExpr(E->getArg(i)));
5031    } else {
5032      // If this is required to be a constant, constant fold it so that we know
5033      // that the generated intrinsic gets a ConstantInt.
5034      llvm::APSInt Result;
5035      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5036      assert(IsConst && "Constant arg isn't actually constant?");
5037      (void)IsConst;
5038      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5039    }
5040  }
5041
5042  auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5043  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5044      SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5045
5046  if (Builtin) {
5047    Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5048    Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5049    assert(Result && "SISD intrinsic should have been handled");
5050    return Result;
5051  }
5052
5053  llvm::APSInt Result;
5054  const Expr *Arg = E->getArg(E->getNumArgs()-1);
5055  NeonTypeFlags Type(0);
5056  if (Arg->isIntegerConstantExpr(Result, getContext()))
5057    // Determine the type of this overloaded NEON intrinsic.
5058    Type = NeonTypeFlags(Result.getZExtValue());
5059
5060  bool usgn = Type.isUnsigned();
5061  bool quad = Type.isQuad();
5062
5063  // Handle non-overloaded intrinsics first.
5064  switch (BuiltinID) {
5065  default: break;
5066  case NEON::BI__builtin_neon_vldrq_p128: {
5067    llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5068    Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5069    return Builder.CreateDefaultAlignedLoad(Ptr);
5070  }
5071  case NEON::BI__builtin_neon_vstrq_p128: {
5072    llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5073    Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5074    return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5075  }
5076  case NEON::BI__builtin_neon_vcvts_u32_f32:
5077  case NEON::BI__builtin_neon_vcvtd_u64_f64:
5078    usgn = true;
5079    // FALL THROUGH
5080  case NEON::BI__builtin_neon_vcvts_s32_f32:
5081  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5082    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5083    bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5084    llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5085    llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5086    Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5087    if (usgn)
5088      return Builder.CreateFPToUI(Ops[0], InTy);
5089    return Builder.CreateFPToSI(Ops[0], InTy);
5090  }
5091  case NEON::BI__builtin_neon_vcvts_f32_u32:
5092  case NEON::BI__builtin_neon_vcvtd_f64_u64:
5093    usgn = true;
5094    // FALL THROUGH
5095  case NEON::BI__builtin_neon_vcvts_f32_s32:
5096  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5097    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5098    bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5099    llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5100    llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5101    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5102    if (usgn)
5103      return Builder.CreateUIToFP(Ops[0], FTy);
5104    return Builder.CreateSIToFP(Ops[0], FTy);
5105  }
5106  case NEON::BI__builtin_neon_vpaddd_s64: {
5107    llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5108    Value *Vec = EmitScalarExpr(E->getArg(0));
5109    // The vector is v2f64, so make sure it's bitcast to that.
5110    Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5111    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5112    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5113    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5114    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5115    // Pairwise addition of a v2f64 into a scalar f64.
5116    return Builder.CreateAdd(Op0, Op1, "vpaddd");
5117  }
5118  case NEON::BI__builtin_neon_vpaddd_f64: {
5119    llvm::Type *Ty =
5120      llvm::VectorType::get(DoubleTy, 2);
5121    Value *Vec = EmitScalarExpr(E->getArg(0));
5122    // The vector is v2f64, so make sure it's bitcast to that.
5123    Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5124    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5125    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5126    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5127    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5128    // Pairwise addition of a v2f64 into a scalar f64.
5129    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5130  }
5131  case NEON::BI__builtin_neon_vpadds_f32: {
5132    llvm::Type *Ty =
5133      llvm::VectorType::get(FloatTy, 2);
5134    Value *Vec = EmitScalarExpr(E->getArg(0));
5135    // The vector is v2f32, so make sure it's bitcast to that.
5136    Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5137    llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5138    llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5139    Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5140    Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5141    // Pairwise addition of a v2f32 into a scalar f32.
5142    return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5143  }
5144  case NEON::BI__builtin_neon_vceqzd_s64:
5145  case NEON::BI__builtin_neon_vceqzd_f64:
5146  case NEON::BI__builtin_neon_vceqzs_f32:
5147    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5148    return EmitAArch64CompareBuiltinExpr(
5149        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5150        ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5151  case NEON::BI__builtin_neon_vcgezd_s64:
5152  case NEON::BI__builtin_neon_vcgezd_f64:
5153  case NEON::BI__builtin_neon_vcgezs_f32:
5154    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5155    return EmitAArch64CompareBuiltinExpr(
5156        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5157        ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5158  case NEON::BI__builtin_neon_vclezd_s64:
5159  case NEON::BI__builtin_neon_vclezd_f64:
5160  case NEON::BI__builtin_neon_vclezs_f32:
5161    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5162    return EmitAArch64CompareBuiltinExpr(
5163        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5164        ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5165  case NEON::BI__builtin_neon_vcgtzd_s64:
5166  case NEON::BI__builtin_neon_vcgtzd_f64:
5167  case NEON::BI__builtin_neon_vcgtzs_f32:
5168    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5169    return EmitAArch64CompareBuiltinExpr(
5170        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5171        ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5172  case NEON::BI__builtin_neon_vcltzd_s64:
5173  case NEON::BI__builtin_neon_vcltzd_f64:
5174  case NEON::BI__builtin_neon_vcltzs_f32:
5175    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5176    return EmitAArch64CompareBuiltinExpr(
5177        Ops[0], ConvertType(E->getCallReturnType(getContext())),
5178        ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5179
5180  case NEON::BI__builtin_neon_vceqzd_u64: {
5181    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5182    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5183    Ops[0] =
5184        Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5185    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5186  }
5187  case NEON::BI__builtin_neon_vceqd_f64:
5188  case NEON::BI__builtin_neon_vcled_f64:
5189  case NEON::BI__builtin_neon_vcltd_f64:
5190  case NEON::BI__builtin_neon_vcged_f64:
5191  case NEON::BI__builtin_neon_vcgtd_f64: {
5192    llvm::CmpInst::Predicate P;
5193    switch (BuiltinID) {
5194    default: llvm_unreachable("missing builtin ID in switch!");
5195    case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5196    case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5197    case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5198    case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5199    case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5200    }
5201    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5202    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5203    Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5204    Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5205    return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5206  }
5207  case NEON::BI__builtin_neon_vceqs_f32:
5208  case NEON::BI__builtin_neon_vcles_f32:
5209  case NEON::BI__builtin_neon_vclts_f32:
5210  case NEON::BI__builtin_neon_vcges_f32:
5211  case NEON::BI__builtin_neon_vcgts_f32: {
5212    llvm::CmpInst::Predicate P;
5213    switch (BuiltinID) {
5214    default: llvm_unreachable("missing builtin ID in switch!");
5215    case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5216    case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5217    case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5218    case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5219    case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5220    }
5221    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5222    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5223    Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5224    Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5225    return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5226  }
5227  case NEON::BI__builtin_neon_vceqd_s64:
5228  case NEON::BI__builtin_neon_vceqd_u64:
5229  case NEON::BI__builtin_neon_vcgtd_s64:
5230  case NEON::BI__builtin_neon_vcgtd_u64:
5231  case NEON::BI__builtin_neon_vcltd_s64:
5232  case NEON::BI__builtin_neon_vcltd_u64:
5233  case NEON::BI__builtin_neon_vcged_u64:
5234  case NEON::BI__builtin_neon_vcged_s64:
5235  case NEON::BI__builtin_neon_vcled_u64:
5236  case NEON::BI__builtin_neon_vcled_s64: {
5237    llvm::CmpInst::Predicate P;
5238    switch (BuiltinID) {
5239    default: llvm_unreachable("missing builtin ID in switch!");
5240    case NEON::BI__builtin_neon_vceqd_s64:
5241    case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5242    case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5243    case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5244    case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5245    case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5246    case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5247    case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5248    case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5249    case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5250    }
5251    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5252    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5253    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5254    Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5255    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5256  }
5257  case NEON::BI__builtin_neon_vtstd_s64:
5258  case NEON::BI__builtin_neon_vtstd_u64: {
5259    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5260    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5261    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5262    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5263    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5264                                llvm::Constant::getNullValue(Int64Ty));
5265    return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5266  }
5267  case NEON::BI__builtin_neon_vset_lane_i8:
5268  case NEON::BI__builtin_neon_vset_lane_i16:
5269  case NEON::BI__builtin_neon_vset_lane_i32:
5270  case NEON::BI__builtin_neon_vset_lane_i64:
5271  case NEON::BI__builtin_neon_vset_lane_f32:
5272  case NEON::BI__builtin_neon_vsetq_lane_i8:
5273  case NEON::BI__builtin_neon_vsetq_lane_i16:
5274  case NEON::BI__builtin_neon_vsetq_lane_i32:
5275  case NEON::BI__builtin_neon_vsetq_lane_i64:
5276  case NEON::BI__builtin_neon_vsetq_lane_f32:
5277    Ops.push_back(EmitScalarExpr(E->getArg(2)));
5278    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5279  case NEON::BI__builtin_neon_vset_lane_f64:
5280    // The vector type needs a cast for the v1f64 variant.
5281    Ops[1] = Builder.CreateBitCast(Ops[1],
5282                                   llvm::VectorType::get(DoubleTy, 1));
5283    Ops.push_back(EmitScalarExpr(E->getArg(2)));
5284    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5285  case NEON::BI__builtin_neon_vsetq_lane_f64:
5286    // The vector type needs a cast for the v2f64 variant.
5287    Ops[1] = Builder.CreateBitCast(Ops[1],
5288        llvm::VectorType::get(DoubleTy, 2));
5289    Ops.push_back(EmitScalarExpr(E->getArg(2)));
5290    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5291
5292  case NEON::BI__builtin_neon_vget_lane_i8:
5293  case NEON::BI__builtin_neon_vdupb_lane_i8:
5294    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5295    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5296                                        "vget_lane");
5297  case NEON::BI__builtin_neon_vgetq_lane_i8:
5298  case NEON::BI__builtin_neon_vdupb_laneq_i8:
5299    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5300    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5301                                        "vgetq_lane");
5302  case NEON::BI__builtin_neon_vget_lane_i16:
5303  case NEON::BI__builtin_neon_vduph_lane_i16:
5304    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5305    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5306                                        "vget_lane");
5307  case NEON::BI__builtin_neon_vgetq_lane_i16:
5308  case NEON::BI__builtin_neon_vduph_laneq_i16:
5309    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5310    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5311                                        "vgetq_lane");
5312  case NEON::BI__builtin_neon_vget_lane_i32:
5313  case NEON::BI__builtin_neon_vdups_lane_i32:
5314    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5315    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5316                                        "vget_lane");
5317  case NEON::BI__builtin_neon_vdups_lane_f32:
5318    Ops[0] = Builder.CreateBitCast(Ops[0],
5319        llvm::VectorType::get(FloatTy, 2));
5320    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5321                                        "vdups_lane");
5322  case NEON::BI__builtin_neon_vgetq_lane_i32:
5323  case NEON::BI__builtin_neon_vdups_laneq_i32:
5324    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5325    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5326                                        "vgetq_lane");
5327  case NEON::BI__builtin_neon_vget_lane_i64:
5328  case NEON::BI__builtin_neon_vdupd_lane_i64:
5329    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5330    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5331                                        "vget_lane");
5332  case NEON::BI__builtin_neon_vdupd_lane_f64:
5333    Ops[0] = Builder.CreateBitCast(Ops[0],
5334        llvm::VectorType::get(DoubleTy, 1));
5335    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5336                                        "vdupd_lane");
5337  case NEON::BI__builtin_neon_vgetq_lane_i64:
5338  case NEON::BI__builtin_neon_vdupd_laneq_i64:
5339    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5340    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5341                                        "vgetq_lane");
5342  case NEON::BI__builtin_neon_vget_lane_f32:
5343    Ops[0] = Builder.CreateBitCast(Ops[0],
5344        llvm::VectorType::get(FloatTy, 2));
5345    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5346                                        "vget_lane");
5347  case NEON::BI__builtin_neon_vget_lane_f64:
5348    Ops[0] = Builder.CreateBitCast(Ops[0],
5349        llvm::VectorType::get(DoubleTy, 1));
5350    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5351                                        "vget_lane");
5352  case NEON::BI__builtin_neon_vgetq_lane_f32:
5353  case NEON::BI__builtin_neon_vdups_laneq_f32:
5354    Ops[0] = Builder.CreateBitCast(Ops[0],
5355        llvm::VectorType::get(FloatTy, 4));
5356    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5357                                        "vgetq_lane");
5358  case NEON::BI__builtin_neon_vgetq_lane_f64:
5359  case NEON::BI__builtin_neon_vdupd_laneq_f64:
5360    Ops[0] = Builder.CreateBitCast(Ops[0],
5361        llvm::VectorType::get(DoubleTy, 2));
5362    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5363                                        "vgetq_lane");
5364  case NEON::BI__builtin_neon_vaddd_s64:
5365  case NEON::BI__builtin_neon_vaddd_u64:
5366    return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5367  case NEON::BI__builtin_neon_vsubd_s64:
5368  case NEON::BI__builtin_neon_vsubd_u64:
5369    return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5370  case NEON::BI__builtin_neon_vqdmlalh_s16:
5371  case NEON::BI__builtin_neon_vqdmlslh_s16: {
5372    SmallVector<Value *, 2> ProductOps;
5373    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5374    ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5375    llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5376    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5377                          ProductOps, "vqdmlXl");
5378    Constant *CI = ConstantInt::get(SizeTy, 0);
5379    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5380
5381    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5382                                        ? Intrinsic::aarch64_neon_sqadd
5383                                        : Intrinsic::aarch64_neon_sqsub;
5384    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5385  }
5386  case NEON::BI__builtin_neon_vqshlud_n_s64: {
5387    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5388    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5389    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5390                        Ops, "vqshlu_n");
5391  }
5392  case NEON::BI__builtin_neon_vqshld_n_u64:
5393  case NEON::BI__builtin_neon_vqshld_n_s64: {
5394    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5395                                   ? Intrinsic::aarch64_neon_uqshl
5396                                   : Intrinsic::aarch64_neon_sqshl;
5397    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5398    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5399    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5400  }
5401  case NEON::BI__builtin_neon_vrshrd_n_u64:
5402  case NEON::BI__builtin_neon_vrshrd_n_s64: {
5403    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5404                                   ? Intrinsic::aarch64_neon_urshl
5405                                   : Intrinsic::aarch64_neon_srshl;
5406    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5407    int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5408    Ops[1] = ConstantInt::get(Int64Ty, -SV);
5409    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5410  }
5411  case NEON::BI__builtin_neon_vrsrad_n_u64:
5412  case NEON::BI__builtin_neon_vrsrad_n_s64: {
5413    unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5414                                   ? Intrinsic::aarch64_neon_urshl
5415                                   : Intrinsic::aarch64_neon_srshl;
5416    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5417    Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5418    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5419                                {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5420    return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5421  }
5422  case NEON::BI__builtin_neon_vshld_n_s64:
5423  case NEON::BI__builtin_neon_vshld_n_u64: {
5424    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5425    return Builder.CreateShl(
5426        Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5427  }
5428  case NEON::BI__builtin_neon_vshrd_n_s64: {
5429    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5430    return Builder.CreateAShr(
5431        Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5432                                                   Amt->getZExtValue())),
5433        "shrd_n");
5434  }
5435  case NEON::BI__builtin_neon_vshrd_n_u64: {
5436    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5437    uint64_t ShiftAmt = Amt->getZExtValue();
5438    // Right-shifting an unsigned value by its size yields 0.
5439    if (ShiftAmt == 64)
5440      return ConstantInt::get(Int64Ty, 0);
5441    return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5442                              "shrd_n");
5443  }
5444  case NEON::BI__builtin_neon_vsrad_n_s64: {
5445    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5446    Ops[1] = Builder.CreateAShr(
5447        Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5448                                                   Amt->getZExtValue())),
5449        "shrd_n");
5450    return Builder.CreateAdd(Ops[0], Ops[1]);
5451  }
5452  case NEON::BI__builtin_neon_vsrad_n_u64: {
5453    llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5454    uint64_t ShiftAmt = Amt->getZExtValue();
5455    // Right-shifting an unsigned value by its size yields 0.
5456    // As Op + 0 = Op, return Ops[0] directly.
5457    if (ShiftAmt == 64)
5458      return Ops[0];
5459    Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5460                                "shrd_n");
5461    return Builder.CreateAdd(Ops[0], Ops[1]);
5462  }
5463  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5464  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5465  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5466  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5467    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5468                                          "lane");
5469    SmallVector<Value *, 2> ProductOps;
5470    ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5471    ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5472    llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5473    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5474                          ProductOps, "vqdmlXl");
5475    Constant *CI = ConstantInt::get(SizeTy, 0);
5476    Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5477    Ops.pop_back();
5478
5479    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5480                       BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5481                          ? Intrinsic::aarch64_neon_sqadd
5482                          : Intrinsic::aarch64_neon_sqsub;
5483    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5484  }
5485  case NEON::BI__builtin_neon_vqdmlals_s32:
5486  case NEON::BI__builtin_neon_vqdmlsls_s32: {
5487    SmallVector<Value *, 2> ProductOps;
5488    ProductOps.push_back(Ops[1]);
5489    ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5490    Ops[1] =
5491        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5492                     ProductOps, "vqdmlXl");
5493
5494    unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5495                                        ? Intrinsic::aarch64_neon_sqadd
5496                                        : Intrinsic::aarch64_neon_sqsub;
5497    return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5498  }
5499  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5500  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5501  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5502  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5503    Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5504                                          "lane");
5505    SmallVector<Value *, 2> ProductOps;
5506    ProductOps.push_back(Ops[1]);
5507    ProductOps.push_back(Ops[2]);
5508    Ops[1] =
5509        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5510                     ProductOps, "vqdmlXl");
5511    Ops.pop_back();
5512
5513    unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5514                       BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5515                          ? Intrinsic::aarch64_neon_sqadd
5516                          : Intrinsic::aarch64_neon_sqsub;
5517    return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5518  }
5519  }
5520
5521  llvm::VectorType *VTy = GetNeonType(this, Type);
5522  llvm::Type *Ty = VTy;
5523  if (!Ty)
5524    return nullptr;
5525
5526  // Not all intrinsics handled by the common case work for AArch64 yet, so only
5527  // defer to common code if it's been added to our special map.
5528  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5529                                   AArch64SIMDIntrinsicsProvenSorted);
5530
5531  if (Builtin)
5532    return EmitCommonNeonBuiltinExpr(
5533        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5534        Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5535        /*never use addresses*/ Address::invalid(), Address::invalid());
5536
5537  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5538    return V;
5539
5540  unsigned Int;
5541  switch (BuiltinID) {
5542  default: return nullptr;
5543  case NEON::BI__builtin_neon_vbsl_v:
5544  case NEON::BI__builtin_neon_vbslq_v: {
5545    llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5546    Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5547    Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5548    Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5549
5550    Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5551    Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5552    Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5553    return Builder.CreateBitCast(Ops[0], Ty);
5554  }
5555  case NEON::BI__builtin_neon_vfma_lane_v:
5556  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5557    // The ARM builtins (and instructions) have the addend as the first
5558    // operand, but the 'fma' intrinsics have it last. Swap it around here.
5559    Value *Addend = Ops[0];
5560    Value *Multiplicand = Ops[1];
5561    Value *LaneSource = Ops[2];
5562    Ops[0] = Multiplicand;
5563    Ops[1] = LaneSource;
5564    Ops[2] = Addend;
5565
5566    // Now adjust things to handle the lane access.
5567    llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5568      llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5569      VTy;
5570    llvm::Constant *cst = cast<Constant>(Ops[3]);
5571    Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5572    Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5573    Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5574
5575    Ops.pop_back();
5576    Int = Intrinsic::fma;
5577    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5578  }
5579  case NEON::BI__builtin_neon_vfma_laneq_v: {
5580    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5581    // v1f64 fma should be mapped to Neon scalar f64 fma
5582    if (VTy && VTy->getElementType() == DoubleTy) {
5583      Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5584      Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5585      llvm::Type *VTy = GetNeonType(this,
5586        NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5587      Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5588      Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5589      Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5590      Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5591      return Builder.CreateBitCast(Result, Ty);
5592    }
5593    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5594    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5595    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5596
5597    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5598                                            VTy->getNumElements() * 2);
5599    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5600    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5601                                               cast<ConstantInt>(Ops[3]));
5602    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5603
5604    return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5605  }
5606  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
5607    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5608    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5609    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5610
5611    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5612    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
5613    return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5614  }
5615  case NEON::BI__builtin_neon_vfmas_lane_f32:
5616  case NEON::BI__builtin_neon_vfmas_laneq_f32:
5617  case NEON::BI__builtin_neon_vfmad_lane_f64:
5618  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
5619    Ops.push_back(EmitScalarExpr(E->getArg(3)));
5620    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
5621    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5622    Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5623    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5624  }
5625  case NEON::BI__builtin_neon_vmull_v:
5626    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5627    Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
5628    if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
5629    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5630  case NEON::BI__builtin_neon_vmax_v:
5631  case NEON::BI__builtin_neon_vmaxq_v:
5632    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5633    Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
5634    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
5635    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
5636  case NEON::BI__builtin_neon_vmin_v:
5637  case NEON::BI__builtin_neon_vminq_v:
5638    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5639    Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
5640    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
5641    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
5642  case NEON::BI__builtin_neon_vabd_v:
5643  case NEON::BI__builtin_neon_vabdq_v:
5644    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5645    Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
5646    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
5647    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
5648  case NEON::BI__builtin_neon_vpadal_v:
5649  case NEON::BI__builtin_neon_vpadalq_v: {
5650    unsigned ArgElts = VTy->getNumElements();
5651    llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
5652    unsigned BitWidth = EltTy->getBitWidth();
5653    llvm::Type *ArgTy = llvm::VectorType::get(
5654        llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5655    llvm::Type* Tys[2] = { VTy, ArgTy };
5656    Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5657    SmallVector<llvm::Value*, 1> TmpOps;
5658    TmpOps.push_back(Ops[1]);
5659    Function *F = CGM.getIntrinsic(Int, Tys);
5660    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5661    llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5662    return Builder.CreateAdd(tmp, addend);
5663  }
5664  case NEON::BI__builtin_neon_vpmin_v:
5665  case NEON::BI__builtin_neon_vpminq_v:
5666    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5667    Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5668    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5669    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5670  case NEON::BI__builtin_neon_vpmax_v:
5671  case NEON::BI__builtin_neon_vpmaxq_v:
5672    // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5673    Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5674    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5675    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5676  case NEON::BI__builtin_neon_vminnm_v:
5677  case NEON::BI__builtin_neon_vminnmq_v:
5678    Int = Intrinsic::aarch64_neon_fminnm;
5679    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5680  case NEON::BI__builtin_neon_vmaxnm_v:
5681  case NEON::BI__builtin_neon_vmaxnmq_v:
5682    Int = Intrinsic::aarch64_neon_fmaxnm;
5683    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5684  case NEON::BI__builtin_neon_vrecpss_f32: {
5685    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5686    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5687                        Ops, "vrecps");
5688  }
5689  case NEON::BI__builtin_neon_vrecpsd_f64: {
5690    Ops.push_back(EmitScalarExpr(E->getArg(1)));
5691    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5692                        Ops, "vrecps");
5693  }
5694  case NEON::BI__builtin_neon_vqshrun_n_v:
5695    Int = Intrinsic::aarch64_neon_sqshrun;
5696    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5697  case NEON::BI__builtin_neon_vqrshrun_n_v:
5698    Int = Intrinsic::aarch64_neon_sqrshrun;
5699    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5700  case NEON::BI__builtin_neon_vqshrn_n_v:
5701    Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5702    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5703  case NEON::BI__builtin_neon_vrshrn_n_v:
5704    Int = Intrinsic::aarch64_neon_rshrn;
5705    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5706  case NEON::BI__builtin_neon_vqrshrn_n_v:
5707    Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5708    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5709  case NEON::BI__builtin_neon_vrnda_v:
5710  case NEON::BI__builtin_neon_vrndaq_v: {
5711    Int = Intrinsic::round;
5712    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5713  }
5714  case NEON::BI__builtin_neon_vrndi_v:
5715  case NEON::BI__builtin_neon_vrndiq_v: {
5716    Int = Intrinsic::nearbyint;
5717    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5718  }
5719  case NEON::BI__builtin_neon_vrndm_v:
5720  case NEON::BI__builtin_neon_vrndmq_v: {
5721    Int = Intrinsic::floor;
5722    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5723  }
5724  case NEON::BI__builtin_neon_vrndn_v:
5725  case NEON::BI__builtin_neon_vrndnq_v: {
5726    Int = Intrinsic::aarch64_neon_frintn;
5727    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5728  }
5729  case NEON::BI__builtin_neon_vrndp_v:
5730  case NEON::BI__builtin_neon_vrndpq_v: {
5731    Int = Intrinsic::ceil;
5732    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5733  }
5734  case NEON::BI__builtin_neon_vrndx_v:
5735  case NEON::BI__builtin_neon_vrndxq_v: {
5736    Int = Intrinsic::rint;
5737    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5738  }
5739  case NEON::BI__builtin_neon_vrnd_v:
5740  case NEON::BI__builtin_neon_vrndq_v: {
5741    Int = Intrinsic::trunc;
5742    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5743  }
5744  case NEON::BI__builtin_neon_vceqz_v:
5745  case NEON::BI__builtin_neon_vceqzq_v:
5746    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5747                                         ICmpInst::ICMP_EQ, "vceqz");
5748  case NEON::BI__builtin_neon_vcgez_v:
5749  case NEON::BI__builtin_neon_vcgezq_v:
5750    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5751                                         ICmpInst::ICMP_SGE, "vcgez");
5752  case NEON::BI__builtin_neon_vclez_v:
5753  case NEON::BI__builtin_neon_vclezq_v:
5754    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5755                                         ICmpInst::ICMP_SLE, "vclez");
5756  case NEON::BI__builtin_neon_vcgtz_v:
5757  case NEON::BI__builtin_neon_vcgtzq_v:
5758    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5759                                         ICmpInst::ICMP_SGT, "vcgtz");
5760  case NEON::BI__builtin_neon_vcltz_v:
5761  case NEON::BI__builtin_neon_vcltzq_v:
5762    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5763                                         ICmpInst::ICMP_SLT, "vcltz");
5764  case NEON::BI__builtin_neon_vcvt_f64_v:
5765  case NEON::BI__builtin_neon_vcvtq_f64_v:
5766    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5767    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5768    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5769                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5770  case NEON::BI__builtin_neon_vcvt_f64_f32: {
5771    assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5772           "unexpected vcvt_f64_f32 builtin");
5773    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5774    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5775
5776    return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5777  }
5778  case NEON::BI__builtin_neon_vcvt_f32_f64: {
5779    assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5780           "unexpected vcvt_f32_f64 builtin");
5781    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5782    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5783
5784    return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5785  }
5786  case NEON::BI__builtin_neon_vcvt_s32_v:
5787  case NEON::BI__builtin_neon_vcvt_u32_v:
5788  case NEON::BI__builtin_neon_vcvt_s64_v:
5789  case NEON::BI__builtin_neon_vcvt_u64_v:
5790  case NEON::BI__builtin_neon_vcvtq_s32_v:
5791  case NEON::BI__builtin_neon_vcvtq_u32_v:
5792  case NEON::BI__builtin_neon_vcvtq_s64_v:
5793  case NEON::BI__builtin_neon_vcvtq_u64_v: {
5794    Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5795    if (usgn)
5796      return Builder.CreateFPToUI(Ops[0], Ty);
5797    return Builder.CreateFPToSI(Ops[0], Ty);
5798  }
5799  case NEON::BI__builtin_neon_vcvta_s32_v:
5800  case NEON::BI__builtin_neon_vcvtaq_s32_v:
5801  case NEON::BI__builtin_neon_vcvta_u32_v:
5802  case NEON::BI__builtin_neon_vcvtaq_u32_v:
5803  case NEON::BI__builtin_neon_vcvta_s64_v:
5804  case NEON::BI__builtin_neon_vcvtaq_s64_v:
5805  case NEON::BI__builtin_neon_vcvta_u64_v:
5806  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5807    Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5808    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5809    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5810  }
5811  case NEON::BI__builtin_neon_vcvtm_s32_v:
5812  case NEON::BI__builtin_neon_vcvtmq_s32_v:
5813  case NEON::BI__builtin_neon_vcvtm_u32_v:
5814  case NEON::BI__builtin_neon_vcvtmq_u32_v:
5815  case NEON::BI__builtin_neon_vcvtm_s64_v:
5816  case NEON::BI__builtin_neon_vcvtmq_s64_v:
5817  case NEON::BI__builtin_neon_vcvtm_u64_v:
5818  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5819    Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5820    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5821    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5822  }
5823  case NEON::BI__builtin_neon_vcvtn_s32_v:
5824  case NEON::BI__builtin_neon_vcvtnq_s32_v:
5825  case NEON::BI__builtin_neon_vcvtn_u32_v:
5826  case NEON::BI__builtin_neon_vcvtnq_u32_v:
5827  case NEON::BI__builtin_neon_vcvtn_s64_v:
5828  case NEON::BI__builtin_neon_vcvtnq_s64_v:
5829  case NEON::BI__builtin_neon_vcvtn_u64_v:
5830  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5831    Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5832    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5833    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5834  }
5835  case NEON::BI__builtin_neon_vcvtp_s32_v:
5836  case NEON::BI__builtin_neon_vcvtpq_s32_v:
5837  case NEON::BI__builtin_neon_vcvtp_u32_v:
5838  case NEON::BI__builtin_neon_vcvtpq_u32_v:
5839  case NEON::BI__builtin_neon_vcvtp_s64_v:
5840  case NEON::BI__builtin_neon_vcvtpq_s64_v:
5841  case NEON::BI__builtin_neon_vcvtp_u64_v:
5842  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5843    Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5844    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5845    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5846  }
5847  case NEON::BI__builtin_neon_vmulx_v:
5848  case NEON::BI__builtin_neon_vmulxq_v: {
5849    Int = Intrinsic::aarch64_neon_fmulx;
5850    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5851  }
5852  case NEON::BI__builtin_neon_vmul_lane_v:
5853  case NEON::BI__builtin_neon_vmul_laneq_v: {
5854    // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5855    bool Quad = false;
5856    if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5857      Quad = true;
5858    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5859    llvm::Type *VTy = GetNeonType(this,
5860      NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5861    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5862    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5863    Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5864    return Builder.CreateBitCast(Result, Ty);
5865  }
5866  case NEON::BI__builtin_neon_vnegd_s64:
5867    return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5868  case NEON::BI__builtin_neon_vpmaxnm_v:
5869  case NEON::BI__builtin_neon_vpmaxnmq_v: {
5870    Int = Intrinsic::aarch64_neon_fmaxnmp;
5871    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5872  }
5873  case NEON::BI__builtin_neon_vpminnm_v:
5874  case NEON::BI__builtin_neon_vpminnmq_v: {
5875    Int = Intrinsic::aarch64_neon_fminnmp;
5876    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5877  }
5878  case NEON::BI__builtin_neon_vsqrt_v:
5879  case NEON::BI__builtin_neon_vsqrtq_v: {
5880    Int = Intrinsic::sqrt;
5881    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5882    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5883  }
5884  case NEON::BI__builtin_neon_vrbit_v:
5885  case NEON::BI__builtin_neon_vrbitq_v: {
5886    Int = Intrinsic::aarch64_neon_rbit;
5887    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5888  }
5889  case NEON::BI__builtin_neon_vaddv_u8:
5890    // FIXME: These are handled by the AArch64 scalar code.
5891    usgn = true;
5892    // FALLTHROUGH
5893  case NEON::BI__builtin_neon_vaddv_s8: {
5894    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5895    Ty = Int32Ty;
5896    VTy = llvm::VectorType::get(Int8Ty, 8);
5897    llvm::Type *Tys[2] = { Ty, VTy };
5898    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5899    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5900    return Builder.CreateTrunc(Ops[0], Int8Ty);
5901  }
5902  case NEON::BI__builtin_neon_vaddv_u16:
5903    usgn = true;
5904    // FALLTHROUGH
5905  case NEON::BI__builtin_neon_vaddv_s16: {
5906    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5907    Ty = Int32Ty;
5908    VTy = llvm::VectorType::get(Int16Ty, 4);
5909    llvm::Type *Tys[2] = { Ty, VTy };
5910    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5911    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5912    return Builder.CreateTrunc(Ops[0], Int16Ty);
5913  }
5914  case NEON::BI__builtin_neon_vaddvq_u8:
5915    usgn = true;
5916    // FALLTHROUGH
5917  case NEON::BI__builtin_neon_vaddvq_s8: {
5918    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5919    Ty = Int32Ty;
5920    VTy = llvm::VectorType::get(Int8Ty, 16);
5921    llvm::Type *Tys[2] = { Ty, VTy };
5922    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5923    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5924    return Builder.CreateTrunc(Ops[0], Int8Ty);
5925  }
5926  case NEON::BI__builtin_neon_vaddvq_u16:
5927    usgn = true;
5928    // FALLTHROUGH
5929  case NEON::BI__builtin_neon_vaddvq_s16: {
5930    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5931    Ty = Int32Ty;
5932    VTy = llvm::VectorType::get(Int16Ty, 8);
5933    llvm::Type *Tys[2] = { Ty, VTy };
5934    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5935    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5936    return Builder.CreateTrunc(Ops[0], Int16Ty);
5937  }
5938  case NEON::BI__builtin_neon_vmaxv_u8: {
5939    Int = Intrinsic::aarch64_neon_umaxv;
5940    Ty = Int32Ty;
5941    VTy = llvm::VectorType::get(Int8Ty, 8);
5942    llvm::Type *Tys[2] = { Ty, VTy };
5943    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5944    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5945    return Builder.CreateTrunc(Ops[0], Int8Ty);
5946  }
5947  case NEON::BI__builtin_neon_vmaxv_u16: {
5948    Int = Intrinsic::aarch64_neon_umaxv;
5949    Ty = Int32Ty;
5950    VTy = llvm::VectorType::get(Int16Ty, 4);
5951    llvm::Type *Tys[2] = { Ty, VTy };
5952    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5953    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5954    return Builder.CreateTrunc(Ops[0], Int16Ty);
5955  }
5956  case NEON::BI__builtin_neon_vmaxvq_u8: {
5957    Int = Intrinsic::aarch64_neon_umaxv;
5958    Ty = Int32Ty;
5959    VTy = llvm::VectorType::get(Int8Ty, 16);
5960    llvm::Type *Tys[2] = { Ty, VTy };
5961    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5962    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5963    return Builder.CreateTrunc(Ops[0], Int8Ty);
5964  }
5965  case NEON::BI__builtin_neon_vmaxvq_u16: {
5966    Int = Intrinsic::aarch64_neon_umaxv;
5967    Ty = Int32Ty;
5968    VTy = llvm::VectorType::get(Int16Ty, 8);
5969    llvm::Type *Tys[2] = { Ty, VTy };
5970    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5971    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5972    return Builder.CreateTrunc(Ops[0], Int16Ty);
5973  }
5974  case NEON::BI__builtin_neon_vmaxv_s8: {
5975    Int = Intrinsic::aarch64_neon_smaxv;
5976    Ty = Int32Ty;
5977    VTy = llvm::VectorType::get(Int8Ty, 8);
5978    llvm::Type *Tys[2] = { Ty, VTy };
5979    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5980    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5981    return Builder.CreateTrunc(Ops[0], Int8Ty);
5982  }
5983  case NEON::BI__builtin_neon_vmaxv_s16: {
5984    Int = Intrinsic::aarch64_neon_smaxv;
5985    Ty = Int32Ty;
5986    VTy = llvm::VectorType::get(Int16Ty, 4);
5987    llvm::Type *Tys[2] = { Ty, VTy };
5988    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5989    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5990    return Builder.CreateTrunc(Ops[0], Int16Ty);
5991  }
5992  case NEON::BI__builtin_neon_vmaxvq_s8: {
5993    Int = Intrinsic::aarch64_neon_smaxv;
5994    Ty = Int32Ty;
5995    VTy = llvm::VectorType::get(Int8Ty, 16);
5996    llvm::Type *Tys[2] = { Ty, VTy };
5997    Ops.push_back(EmitScalarExpr(E->getArg(0)));
5998    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5999    return Builder.CreateTrunc(Ops[0], Int8Ty);
6000  }
6001  case NEON::BI__builtin_neon_vmaxvq_s16: {
6002    Int = Intrinsic::aarch64_neon_smaxv;
6003    Ty = Int32Ty;
6004    VTy = llvm::VectorType::get(Int16Ty, 8);
6005    llvm::Type *Tys[2] = { Ty, VTy };
6006    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6007    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6008    return Builder.CreateTrunc(Ops[0], Int16Ty);
6009  }
6010  case NEON::BI__builtin_neon_vminv_u8: {
6011    Int = Intrinsic::aarch64_neon_uminv;
6012    Ty = Int32Ty;
6013    VTy = llvm::VectorType::get(Int8Ty, 8);
6014    llvm::Type *Tys[2] = { Ty, VTy };
6015    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6016    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6017    return Builder.CreateTrunc(Ops[0], Int8Ty);
6018  }
6019  case NEON::BI__builtin_neon_vminv_u16: {
6020    Int = Intrinsic::aarch64_neon_uminv;
6021    Ty = Int32Ty;
6022    VTy = llvm::VectorType::get(Int16Ty, 4);
6023    llvm::Type *Tys[2] = { Ty, VTy };
6024    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6025    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6026    return Builder.CreateTrunc(Ops[0], Int16Ty);
6027  }
6028  case NEON::BI__builtin_neon_vminvq_u8: {
6029    Int = Intrinsic::aarch64_neon_uminv;
6030    Ty = Int32Ty;
6031    VTy = llvm::VectorType::get(Int8Ty, 16);
6032    llvm::Type *Tys[2] = { Ty, VTy };
6033    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6034    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6035    return Builder.CreateTrunc(Ops[0], Int8Ty);
6036  }
6037  case NEON::BI__builtin_neon_vminvq_u16: {
6038    Int = Intrinsic::aarch64_neon_uminv;
6039    Ty = Int32Ty;
6040    VTy = llvm::VectorType::get(Int16Ty, 8);
6041    llvm::Type *Tys[2] = { Ty, VTy };
6042    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6043    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6044    return Builder.CreateTrunc(Ops[0], Int16Ty);
6045  }
6046  case NEON::BI__builtin_neon_vminv_s8: {
6047    Int = Intrinsic::aarch64_neon_sminv;
6048    Ty = Int32Ty;
6049    VTy = llvm::VectorType::get(Int8Ty, 8);
6050    llvm::Type *Tys[2] = { Ty, VTy };
6051    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6052    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6053    return Builder.CreateTrunc(Ops[0], Int8Ty);
6054  }
6055  case NEON::BI__builtin_neon_vminv_s16: {
6056    Int = Intrinsic::aarch64_neon_sminv;
6057    Ty = Int32Ty;
6058    VTy = llvm::VectorType::get(Int16Ty, 4);
6059    llvm::Type *Tys[2] = { Ty, VTy };
6060    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6061    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6062    return Builder.CreateTrunc(Ops[0], Int16Ty);
6063  }
6064  case NEON::BI__builtin_neon_vminvq_s8: {
6065    Int = Intrinsic::aarch64_neon_sminv;
6066    Ty = Int32Ty;
6067    VTy = llvm::VectorType::get(Int8Ty, 16);
6068    llvm::Type *Tys[2] = { Ty, VTy };
6069    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6070    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6071    return Builder.CreateTrunc(Ops[0], Int8Ty);
6072  }
6073  case NEON::BI__builtin_neon_vminvq_s16: {
6074    Int = Intrinsic::aarch64_neon_sminv;
6075    Ty = Int32Ty;
6076    VTy = llvm::VectorType::get(Int16Ty, 8);
6077    llvm::Type *Tys[2] = { Ty, VTy };
6078    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6079    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6080    return Builder.CreateTrunc(Ops[0], Int16Ty);
6081  }
6082  case NEON::BI__builtin_neon_vmul_n_f64: {
6083    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6084    Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6085    return Builder.CreateFMul(Ops[0], RHS);
6086  }
6087  case NEON::BI__builtin_neon_vaddlv_u8: {
6088    Int = Intrinsic::aarch64_neon_uaddlv;
6089    Ty = Int32Ty;
6090    VTy = llvm::VectorType::get(Int8Ty, 8);
6091    llvm::Type *Tys[2] = { Ty, VTy };
6092    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6093    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6094    return Builder.CreateTrunc(Ops[0], Int16Ty);
6095  }
6096  case NEON::BI__builtin_neon_vaddlv_u16: {
6097    Int = Intrinsic::aarch64_neon_uaddlv;
6098    Ty = Int32Ty;
6099    VTy = llvm::VectorType::get(Int16Ty, 4);
6100    llvm::Type *Tys[2] = { Ty, VTy };
6101    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6102    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6103  }
6104  case NEON::BI__builtin_neon_vaddlvq_u8: {
6105    Int = Intrinsic::aarch64_neon_uaddlv;
6106    Ty = Int32Ty;
6107    VTy = llvm::VectorType::get(Int8Ty, 16);
6108    llvm::Type *Tys[2] = { Ty, VTy };
6109    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6110    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6111    return Builder.CreateTrunc(Ops[0], Int16Ty);
6112  }
6113  case NEON::BI__builtin_neon_vaddlvq_u16: {
6114    Int = Intrinsic::aarch64_neon_uaddlv;
6115    Ty = Int32Ty;
6116    VTy = llvm::VectorType::get(Int16Ty, 8);
6117    llvm::Type *Tys[2] = { Ty, VTy };
6118    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6119    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6120  }
6121  case NEON::BI__builtin_neon_vaddlv_s8: {
6122    Int = Intrinsic::aarch64_neon_saddlv;
6123    Ty = Int32Ty;
6124    VTy = llvm::VectorType::get(Int8Ty, 8);
6125    llvm::Type *Tys[2] = { Ty, VTy };
6126    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6127    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6128    return Builder.CreateTrunc(Ops[0], Int16Ty);
6129  }
6130  case NEON::BI__builtin_neon_vaddlv_s16: {
6131    Int = Intrinsic::aarch64_neon_saddlv;
6132    Ty = Int32Ty;
6133    VTy = llvm::VectorType::get(Int16Ty, 4);
6134    llvm::Type *Tys[2] = { Ty, VTy };
6135    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6136    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6137  }
6138  case NEON::BI__builtin_neon_vaddlvq_s8: {
6139    Int = Intrinsic::aarch64_neon_saddlv;
6140    Ty = Int32Ty;
6141    VTy = llvm::VectorType::get(Int8Ty, 16);
6142    llvm::Type *Tys[2] = { Ty, VTy };
6143    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6144    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6145    return Builder.CreateTrunc(Ops[0], Int16Ty);
6146  }
6147  case NEON::BI__builtin_neon_vaddlvq_s16: {
6148    Int = Intrinsic::aarch64_neon_saddlv;
6149    Ty = Int32Ty;
6150    VTy = llvm::VectorType::get(Int16Ty, 8);
6151    llvm::Type *Tys[2] = { Ty, VTy };
6152    Ops.push_back(EmitScalarExpr(E->getArg(0)));
6153    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6154  }
6155  case NEON::BI__builtin_neon_vsri_n_v:
6156  case NEON::BI__builtin_neon_vsriq_n_v: {
6157    Int = Intrinsic::aarch64_neon_vsri;
6158    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6159    return EmitNeonCall(Intrin, Ops, "vsri_n");
6160  }
6161  case NEON::BI__builtin_neon_vsli_n_v:
6162  case NEON::BI__builtin_neon_vsliq_n_v: {
6163    Int = Intrinsic::aarch64_neon_vsli;
6164    llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6165    return EmitNeonCall(Intrin, Ops, "vsli_n");
6166  }
6167  case NEON::BI__builtin_neon_vsra_n_v:
6168  case NEON::BI__builtin_neon_vsraq_n_v:
6169    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6170    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6171    return Builder.CreateAdd(Ops[0], Ops[1]);
6172  case NEON::BI__builtin_neon_vrsra_n_v:
6173  case NEON::BI__builtin_neon_vrsraq_n_v: {
6174    Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6175    SmallVector<llvm::Value*,2> TmpOps;
6176    TmpOps.push_back(Ops[1]);
6177    TmpOps.push_back(Ops[2]);
6178    Function* F = CGM.getIntrinsic(Int, Ty);
6179    llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6180    Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6181    return Builder.CreateAdd(Ops[0], tmp);
6182  }
6183    // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6184    // of an Align parameter here.
6185  case NEON::BI__builtin_neon_vld1_x2_v:
6186  case NEON::BI__builtin_neon_vld1q_x2_v:
6187  case NEON::BI__builtin_neon_vld1_x3_v:
6188  case NEON::BI__builtin_neon_vld1q_x3_v:
6189  case NEON::BI__builtin_neon_vld1_x4_v:
6190  case NEON::BI__builtin_neon_vld1q_x4_v: {
6191    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6192    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6193    llvm::Type *Tys[2] = { VTy, PTy };
6194    unsigned Int;
6195    switch (BuiltinID) {
6196    case NEON::BI__builtin_neon_vld1_x2_v:
6197    case NEON::BI__builtin_neon_vld1q_x2_v:
6198      Int = Intrinsic::aarch64_neon_ld1x2;
6199      break;
6200    case NEON::BI__builtin_neon_vld1_x3_v:
6201    case NEON::BI__builtin_neon_vld1q_x3_v:
6202      Int = Intrinsic::aarch64_neon_ld1x3;
6203      break;
6204    case NEON::BI__builtin_neon_vld1_x4_v:
6205    case NEON::BI__builtin_neon_vld1q_x4_v:
6206      Int = Intrinsic::aarch64_neon_ld1x4;
6207      break;
6208    }
6209    Function *F = CGM.getIntrinsic(Int, Tys);
6210    Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6211    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6212    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6213    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6214  }
6215  case NEON::BI__builtin_neon_vst1_x2_v:
6216  case NEON::BI__builtin_neon_vst1q_x2_v:
6217  case NEON::BI__builtin_neon_vst1_x3_v:
6218  case NEON::BI__builtin_neon_vst1q_x3_v:
6219  case NEON::BI__builtin_neon_vst1_x4_v:
6220  case NEON::BI__builtin_neon_vst1q_x4_v: {
6221    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6222    llvm::Type *Tys[2] = { VTy, PTy };
6223    unsigned Int;
6224    switch (BuiltinID) {
6225    case NEON::BI__builtin_neon_vst1_x2_v:
6226    case NEON::BI__builtin_neon_vst1q_x2_v:
6227      Int = Intrinsic::aarch64_neon_st1x2;
6228      break;
6229    case NEON::BI__builtin_neon_vst1_x3_v:
6230    case NEON::BI__builtin_neon_vst1q_x3_v:
6231      Int = Intrinsic::aarch64_neon_st1x3;
6232      break;
6233    case NEON::BI__builtin_neon_vst1_x4_v:
6234    case NEON::BI__builtin_neon_vst1q_x4_v:
6235      Int = Intrinsic::aarch64_neon_st1x4;
6236      break;
6237    }
6238    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6239    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6240  }
6241  case NEON::BI__builtin_neon_vld1_v:
6242  case NEON::BI__builtin_neon_vld1q_v:
6243    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6244    return Builder.CreateDefaultAlignedLoad(Ops[0]);
6245  case NEON::BI__builtin_neon_vst1_v:
6246  case NEON::BI__builtin_neon_vst1q_v:
6247    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6248    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6249    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6250  case NEON::BI__builtin_neon_vld1_lane_v:
6251  case NEON::BI__builtin_neon_vld1q_lane_v:
6252    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6253    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6254    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6255    Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6256    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6257  case NEON::BI__builtin_neon_vld1_dup_v:
6258  case NEON::BI__builtin_neon_vld1q_dup_v: {
6259    Value *V = UndefValue::get(Ty);
6260    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6261    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6262    Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6263    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6264    Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6265    return EmitNeonSplat(Ops[0], CI);
6266  }
6267  case NEON::BI__builtin_neon_vst1_lane_v:
6268  case NEON::BI__builtin_neon_vst1q_lane_v:
6269    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6270    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6271    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6272    return Builder.CreateDefaultAlignedStore(Ops[1],
6273                                             Builder.CreateBitCast(Ops[0], Ty));
6274  case NEON::BI__builtin_neon_vld2_v:
6275  case NEON::BI__builtin_neon_vld2q_v: {
6276    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6277    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6278    llvm::Type *Tys[2] = { VTy, PTy };
6279    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6280    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6281    Ops[0] = Builder.CreateBitCast(Ops[0],
6282                llvm::PointerType::getUnqual(Ops[1]->getType()));
6283    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6284  }
6285  case NEON::BI__builtin_neon_vld3_v:
6286  case NEON::BI__builtin_neon_vld3q_v: {
6287    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6288    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6289    llvm::Type *Tys[2] = { VTy, PTy };
6290    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6291    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6292    Ops[0] = Builder.CreateBitCast(Ops[0],
6293                llvm::PointerType::getUnqual(Ops[1]->getType()));
6294    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6295  }
6296  case NEON::BI__builtin_neon_vld4_v:
6297  case NEON::BI__builtin_neon_vld4q_v: {
6298    llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6299    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6300    llvm::Type *Tys[2] = { VTy, PTy };
6301    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6302    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6303    Ops[0] = Builder.CreateBitCast(Ops[0],
6304                llvm::PointerType::getUnqual(Ops[1]->getType()));
6305    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6306  }
6307  case NEON::BI__builtin_neon_vld2_dup_v:
6308  case NEON::BI__builtin_neon_vld2q_dup_v: {
6309    llvm::Type *PTy =
6310      llvm::PointerType::getUnqual(VTy->getElementType());
6311    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6312    llvm::Type *Tys[2] = { VTy, PTy };
6313    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6314    Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6315    Ops[0] = Builder.CreateBitCast(Ops[0],
6316                llvm::PointerType::getUnqual(Ops[1]->getType()));
6317    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6318  }
6319  case NEON::BI__builtin_neon_vld3_dup_v:
6320  case NEON::BI__builtin_neon_vld3q_dup_v: {
6321    llvm::Type *PTy =
6322      llvm::PointerType::getUnqual(VTy->getElementType());
6323    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6324    llvm::Type *Tys[2] = { VTy, PTy };
6325    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6326    Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6327    Ops[0] = Builder.CreateBitCast(Ops[0],
6328                llvm::PointerType::getUnqual(Ops[1]->getType()));
6329    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6330  }
6331  case NEON::BI__builtin_neon_vld4_dup_v:
6332  case NEON::BI__builtin_neon_vld4q_dup_v: {
6333    llvm::Type *PTy =
6334      llvm::PointerType::getUnqual(VTy->getElementType());
6335    Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6336    llvm::Type *Tys[2] = { VTy, PTy };
6337    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6338    Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6339    Ops[0] = Builder.CreateBitCast(Ops[0],
6340                llvm::PointerType::getUnqual(Ops[1]->getType()));
6341    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6342  }
6343  case NEON::BI__builtin_neon_vld2_lane_v:
6344  case NEON::BI__builtin_neon_vld2q_lane_v: {
6345    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6346    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6347    Ops.push_back(Ops[1]);
6348    Ops.erase(Ops.begin()+1);
6349    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6350    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6351    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6352    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6353    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6354    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6355    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6356  }
6357  case NEON::BI__builtin_neon_vld3_lane_v:
6358  case NEON::BI__builtin_neon_vld3q_lane_v: {
6359    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6360    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6361    Ops.push_back(Ops[1]);
6362    Ops.erase(Ops.begin()+1);
6363    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6364    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6365    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6366    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6367    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6368    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6369    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6370    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6371  }
6372  case NEON::BI__builtin_neon_vld4_lane_v:
6373  case NEON::BI__builtin_neon_vld4q_lane_v: {
6374    llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6375    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6376    Ops.push_back(Ops[1]);
6377    Ops.erase(Ops.begin()+1);
6378    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6379    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6380    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6381    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6382    Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6383    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6384    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6385    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6386    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6387  }
6388  case NEON::BI__builtin_neon_vst2_v:
6389  case NEON::BI__builtin_neon_vst2q_v: {
6390    Ops.push_back(Ops[0]);
6391    Ops.erase(Ops.begin());
6392    llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6393    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6394                        Ops, "");
6395  }
6396  case NEON::BI__builtin_neon_vst2_lane_v:
6397  case NEON::BI__builtin_neon_vst2q_lane_v: {
6398    Ops.push_back(Ops[0]);
6399    Ops.erase(Ops.begin());
6400    Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6401    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6402    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6403                        Ops, "");
6404  }
6405  case NEON::BI__builtin_neon_vst3_v:
6406  case NEON::BI__builtin_neon_vst3q_v: {
6407    Ops.push_back(Ops[0]);
6408    Ops.erase(Ops.begin());
6409    llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6410    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6411                        Ops, "");
6412  }
6413  case NEON::BI__builtin_neon_vst3_lane_v:
6414  case NEON::BI__builtin_neon_vst3q_lane_v: {
6415    Ops.push_back(Ops[0]);
6416    Ops.erase(Ops.begin());
6417    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6418    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6419    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6420                        Ops, "");
6421  }
6422  case NEON::BI__builtin_neon_vst4_v:
6423  case NEON::BI__builtin_neon_vst4q_v: {
6424    Ops.push_back(Ops[0]);
6425    Ops.erase(Ops.begin());
6426    llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6427    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6428                        Ops, "");
6429  }
6430  case NEON::BI__builtin_neon_vst4_lane_v:
6431  case NEON::BI__builtin_neon_vst4q_lane_v: {
6432    Ops.push_back(Ops[0]);
6433    Ops.erase(Ops.begin());
6434    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6435    llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6436    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6437                        Ops, "");
6438  }
6439  case NEON::BI__builtin_neon_vtrn_v:
6440  case NEON::BI__builtin_neon_vtrnq_v: {
6441    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6442    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6443    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6444    Value *SV = nullptr;
6445
6446    for (unsigned vi = 0; vi != 2; ++vi) {
6447      SmallVector<uint32_t, 16> Indices;
6448      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6449        Indices.push_back(i+vi);
6450        Indices.push_back(i+e+vi);
6451      }
6452      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6453      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6454      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6455    }
6456    return SV;
6457  }
6458  case NEON::BI__builtin_neon_vuzp_v:
6459  case NEON::BI__builtin_neon_vuzpq_v: {
6460    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6461    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6462    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6463    Value *SV = nullptr;
6464
6465    for (unsigned vi = 0; vi != 2; ++vi) {
6466      SmallVector<uint32_t, 16> Indices;
6467      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6468        Indices.push_back(2*i+vi);
6469
6470      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6471      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6472      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6473    }
6474    return SV;
6475  }
6476  case NEON::BI__builtin_neon_vzip_v:
6477  case NEON::BI__builtin_neon_vzipq_v: {
6478    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6479    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6480    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6481    Value *SV = nullptr;
6482
6483    for (unsigned vi = 0; vi != 2; ++vi) {
6484      SmallVector<uint32_t, 16> Indices;
6485      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6486        Indices.push_back((i + vi*e) >> 1);
6487        Indices.push_back(((i + vi*e) >> 1)+e);
6488      }
6489      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6490      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6491      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6492    }
6493    return SV;
6494  }
6495  case NEON::BI__builtin_neon_vqtbl1q_v: {
6496    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6497                        Ops, "vtbl1");
6498  }
6499  case NEON::BI__builtin_neon_vqtbl2q_v: {
6500    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6501                        Ops, "vtbl2");
6502  }
6503  case NEON::BI__builtin_neon_vqtbl3q_v: {
6504    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6505                        Ops, "vtbl3");
6506  }
6507  case NEON::BI__builtin_neon_vqtbl4q_v: {
6508    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6509                        Ops, "vtbl4");
6510  }
6511  case NEON::BI__builtin_neon_vqtbx1q_v: {
6512    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6513                        Ops, "vtbx1");
6514  }
6515  case NEON::BI__builtin_neon_vqtbx2q_v: {
6516    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6517                        Ops, "vtbx2");
6518  }
6519  case NEON::BI__builtin_neon_vqtbx3q_v: {
6520    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6521                        Ops, "vtbx3");
6522  }
6523  case NEON::BI__builtin_neon_vqtbx4q_v: {
6524    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6525                        Ops, "vtbx4");
6526  }
6527  case NEON::BI__builtin_neon_vsqadd_v:
6528  case NEON::BI__builtin_neon_vsqaddq_v: {
6529    Int = Intrinsic::aarch64_neon_usqadd;
6530    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6531  }
6532  case NEON::BI__builtin_neon_vuqadd_v:
6533  case NEON::BI__builtin_neon_vuqaddq_v: {
6534    Int = Intrinsic::aarch64_neon_suqadd;
6535    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6536  }
6537  }
6538}
6539
6540llvm::Value *CodeGenFunction::
6541BuildVector(ArrayRef<llvm::Value*> Ops) {
6542  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6543         "Not a power-of-two sized vector!");
6544  bool AllConstants = true;
6545  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6546    AllConstants &= isa<Constant>(Ops[i]);
6547
6548  // If this is a constant vector, create a ConstantVector.
6549  if (AllConstants) {
6550    SmallVector<llvm::Constant*, 16> CstOps;
6551    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6552      CstOps.push_back(cast<Constant>(Ops[i]));
6553    return llvm::ConstantVector::get(CstOps);
6554  }
6555
6556  // Otherwise, insertelement the values to build the vector.
6557  Value *Result =
6558    llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6559
6560  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6561    Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6562
6563  return Result;
6564}
6565
6566// Convert the mask from an integer type to a vector of i1.
6567static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
6568                              unsigned NumElts) {
6569
6570  llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
6571                         cast<IntegerType>(Mask->getType())->getBitWidth());
6572  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
6573
6574  // If we have less than 8 elements, then the starting mask was an i8 and
6575  // we need to extract down to the right number of elements.
6576  if (NumElts < 8) {
6577    uint32_t Indices[4];
6578    for (unsigned i = 0; i != NumElts; ++i)
6579      Indices[i] = i;
6580    MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
6581                                             makeArrayRef(Indices, NumElts),
6582                                             "extract");
6583  }
6584  return MaskVec;
6585}
6586
6587static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
6588                                 SmallVectorImpl<Value *> &Ops,
6589                                 unsigned Align) {
6590  // Cast the pointer to right type.
6591  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6592                               llvm::PointerType::getUnqual(Ops[1]->getType()));
6593
6594  // If the mask is all ones just emit a regular store.
6595  if (const auto *C = dyn_cast<Constant>(Ops[2]))
6596    if (C->isAllOnesValue())
6597      return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
6598
6599  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6600                                   Ops[1]->getType()->getVectorNumElements());
6601
6602  return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
6603}
6604
6605static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
6606                                SmallVectorImpl<Value *> &Ops, unsigned Align) {
6607  // Cast the pointer to right type.
6608  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6609                               llvm::PointerType::getUnqual(Ops[1]->getType()));
6610
6611  // If the mask is all ones just emit a regular store.
6612  if (const auto *C = dyn_cast<Constant>(Ops[2]))
6613    if (C->isAllOnesValue())
6614      return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
6615
6616  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6617                                   Ops[1]->getType()->getVectorNumElements());
6618
6619  return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
6620}
6621
6622static Value *EmitX86Select(CodeGenFunction &CGF,
6623                            Value *Mask, Value *Op0, Value *Op1) {
6624
6625  // If the mask is all ones just return first argument.
6626  if (const auto *C = dyn_cast<Constant>(Mask))
6627    if (C->isAllOnesValue())
6628      return Op0;
6629
6630  Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
6631
6632  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
6633}
6634
6635static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
6636                                   bool Signed, SmallVectorImpl<Value *> &Ops) {
6637  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
6638  Value *Cmp;
6639
6640  if (CC == 3) {
6641    Cmp = Constant::getNullValue(
6642                       llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6643  } else if (CC == 7) {
6644    Cmp = Constant::getAllOnesValue(
6645                       llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6646  } else {
6647    ICmpInst::Predicate Pred;
6648    switch (CC) {
6649    default: llvm_unreachable("Unknown condition code");
6650    case 0: Pred = ICmpInst::ICMP_EQ;  break;
6651    case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
6652    case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
6653    case 4: Pred = ICmpInst::ICMP_NE;  break;
6654    case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
6655    case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
6656    }
6657    Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
6658  }
6659
6660  const auto *C = dyn_cast<Constant>(Ops.back());
6661  if (!C || !C->isAllOnesValue())
6662    Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
6663
6664  if (NumElts < 8) {
6665    uint32_t Indices[8];
6666    for (unsigned i = 0; i != NumElts; ++i)
6667      Indices[i] = i;
6668    for (unsigned i = NumElts; i != 8; ++i)
6669      Indices[i] = i % NumElts + NumElts;
6670    Cmp = CGF.Builder.CreateShuffleVector(
6671        Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
6672  }
6673  return CGF.Builder.CreateBitCast(Cmp,
6674                                   IntegerType::get(CGF.getLLVMContext(),
6675                                                    std::max(NumElts, 8U)));
6676}
6677
6678Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
6679                                           const CallExpr *E) {
6680  if (BuiltinID == X86::BI__builtin_ms_va_start ||
6681      BuiltinID == X86::BI__builtin_ms_va_end)
6682    return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
6683                          BuiltinID == X86::BI__builtin_ms_va_start);
6684  if (BuiltinID == X86::BI__builtin_ms_va_copy) {
6685    // Lower this manually. We can't reliably determine whether or not any
6686    // given va_copy() is for a Win64 va_list from the calling convention
6687    // alone, because it's legal to do this from a System V ABI function.
6688    // With opaque pointer types, we won't have enough information in LLVM
6689    // IR to determine this from the argument types, either. Best to do it
6690    // now, while we have enough information.
6691    Address DestAddr = EmitMSVAListRef(E->getArg(0));
6692    Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6693
6694    llvm::Type *BPP = Int8PtrPtrTy;
6695
6696    DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
6697                       DestAddr.getAlignment());
6698    SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
6699                      SrcAddr.getAlignment());
6700
6701    Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6702    return Builder.CreateStore(ArgPtr, DestAddr);
6703  }
6704
6705  SmallVector<Value*, 4> Ops;
6706
6707  // Find out if any arguments are required to be integer constant expressions.
6708  unsigned ICEArguments = 0;
6709  ASTContext::GetBuiltinTypeError Error;
6710  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6711  assert(Error == ASTContext::GE_None && "Should not codegen an error");
6712
6713  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
6714    // If this is a normal argument, just emit it as a scalar.
6715    if ((ICEArguments & (1 << i)) == 0) {
6716      Ops.push_back(EmitScalarExpr(E->getArg(i)));
6717      continue;
6718    }
6719
6720    // If this is required to be a constant, constant fold it so that we know
6721    // that the generated intrinsic gets a ConstantInt.
6722    llvm::APSInt Result;
6723    bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6724    assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6725    Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6726  }
6727
6728  // These exist so that the builtin that takes an immediate can be bounds
6729  // checked by clang to avoid passing bad immediates to the backend. Since
6730  // AVX has a larger immediate than SSE we would need separate builtins to
6731  // do the different bounds checking. Rather than create a clang specific
6732  // SSE only builtin, this implements eight separate builtins to match gcc
6733  // implementation.
6734  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
6735    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6736    llvm::Function *F = CGM.getIntrinsic(ID);
6737    return Builder.CreateCall(F, Ops);
6738  };
6739
6740  // For the vector forms of FP comparisons, translate the builtins directly to
6741  // IR.
6742  // TODO: The builtins could be removed if the SSE header files used vector
6743  // extension comparisons directly (vector ordered/unordered may need
6744  // additional support via __builtin_isnan()).
6745  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
6746    Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
6747    llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
6748    llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
6749    Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
6750    return Builder.CreateBitCast(Sext, FPVecTy);
6751  };
6752
6753  switch (BuiltinID) {
6754  default: return nullptr;
6755  case X86::BI__builtin_cpu_supports: {
6756    const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
6757    StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
6758
6759    // TODO: When/if this becomes more than x86 specific then use a TargetInfo
6760    // based mapping.
6761    // Processor features and mapping to processor feature value.
6762    enum X86Features {
6763      CMOV = 0,
6764      MMX,
6765      POPCNT,
6766      SSE,
6767      SSE2,
6768      SSE3,
6769      SSSE3,
6770      SSE4_1,
6771      SSE4_2,
6772      AVX,
6773      AVX2,
6774      SSE4_A,
6775      FMA4,
6776      XOP,
6777      FMA,
6778      AVX512F,
6779      BMI,
6780      BMI2,
6781      AES,
6782      PCLMUL,
6783      AVX512VL,
6784      AVX512BW,
6785      AVX512DQ,
6786      AVX512CD,
6787      AVX512ER,
6788      AVX512PF,
6789      AVX512VBMI,
6790      AVX512IFMA,
6791      MAX
6792    };
6793
6794    X86Features Feature = StringSwitch<X86Features>(FeatureStr)
6795                              .Case("cmov", X86Features::CMOV)
6796                              .Case("mmx", X86Features::MMX)
6797                              .Case("popcnt", X86Features::POPCNT)
6798                              .Case("sse", X86Features::SSE)
6799                              .Case("sse2", X86Features::SSE2)
6800                              .Case("sse3", X86Features::SSE3)
6801                              .Case("ssse3", X86Features::SSSE3)
6802                              .Case("sse4.1", X86Features::SSE4_1)
6803                              .Case("sse4.2", X86Features::SSE4_2)
6804                              .Case("avx", X86Features::AVX)
6805                              .Case("avx2", X86Features::AVX2)
6806                              .Case("sse4a", X86Features::SSE4_A)
6807                              .Case("fma4", X86Features::FMA4)
6808                              .Case("xop", X86Features::XOP)
6809                              .Case("fma", X86Features::FMA)
6810                              .Case("avx512f", X86Features::AVX512F)
6811                              .Case("bmi", X86Features::BMI)
6812                              .Case("bmi2", X86Features::BMI2)
6813                              .Case("aes", X86Features::AES)
6814                              .Case("pclmul", X86Features::PCLMUL)
6815                              .Case("avx512vl", X86Features::AVX512VL)
6816                              .Case("avx512bw", X86Features::AVX512BW)
6817                              .Case("avx512dq", X86Features::AVX512DQ)
6818                              .Case("avx512cd", X86Features::AVX512CD)
6819                              .Case("avx512er", X86Features::AVX512ER)
6820                              .Case("avx512pf", X86Features::AVX512PF)
6821                              .Case("avx512vbmi", X86Features::AVX512VBMI)
6822                              .Case("avx512ifma", X86Features::AVX512IFMA)
6823                              .Default(X86Features::MAX);
6824    assert(Feature != X86Features::MAX && "Invalid feature!");
6825
6826    // Matching the struct layout from the compiler-rt/libgcc structure that is
6827    // filled in:
6828    // unsigned int __cpu_vendor;
6829    // unsigned int __cpu_type;
6830    // unsigned int __cpu_subtype;
6831    // unsigned int __cpu_features[1];
6832    llvm::Type *STy = llvm::StructType::get(
6833        Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6834
6835    // Grab the global __cpu_model.
6836    llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6837
6838    // Grab the first (0th) element from the field __cpu_features off of the
6839    // global in the struct STy.
6840    Value *Idxs[] = {
6841      ConstantInt::get(Int32Ty, 0),
6842      ConstantInt::get(Int32Ty, 3),
6843      ConstantInt::get(Int32Ty, 0)
6844    };
6845    Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6846    Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6847                                                CharUnits::fromQuantity(4));
6848
6849    // Check the value of the bit corresponding to the feature requested.
6850    Value *Bitset = Builder.CreateAnd(
6851        Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
6852    return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6853  }
6854  case X86::BI_mm_prefetch: {
6855    Value *Address = Ops[0];
6856    Value *RW = ConstantInt::get(Int32Ty, 0);
6857    Value *Locality = Ops[1];
6858    Value *Data = ConstantInt::get(Int32Ty, 1);
6859    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6860    return Builder.CreateCall(F, {Address, RW, Locality, Data});
6861  }
6862  case X86::BI__builtin_ia32_undef128:
6863  case X86::BI__builtin_ia32_undef256:
6864  case X86::BI__builtin_ia32_undef512:
6865    return UndefValue::get(ConvertType(E->getType()));
6866  case X86::BI__builtin_ia32_vec_init_v8qi:
6867  case X86::BI__builtin_ia32_vec_init_v4hi:
6868  case X86::BI__builtin_ia32_vec_init_v2si:
6869    return Builder.CreateBitCast(BuildVector(Ops),
6870                                 llvm::Type::getX86_MMXTy(getLLVMContext()));
6871  case X86::BI__builtin_ia32_vec_ext_v2si:
6872    return Builder.CreateExtractElement(Ops[0],
6873                                  llvm::ConstantInt::get(Ops[1]->getType(), 0));
6874  case X86::BI__builtin_ia32_ldmxcsr: {
6875    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6876    Builder.CreateStore(Ops[0], Tmp);
6877    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6878                          Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6879  }
6880  case X86::BI__builtin_ia32_stmxcsr: {
6881    Address Tmp = CreateMemTemp(E->getType());
6882    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6883                       Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6884    return Builder.CreateLoad(Tmp, "stmxcsr");
6885  }
6886  case X86::BI__builtin_ia32_xsave:
6887  case X86::BI__builtin_ia32_xsave64:
6888  case X86::BI__builtin_ia32_xrstor:
6889  case X86::BI__builtin_ia32_xrstor64:
6890  case X86::BI__builtin_ia32_xsaveopt:
6891  case X86::BI__builtin_ia32_xsaveopt64:
6892  case X86::BI__builtin_ia32_xrstors:
6893  case X86::BI__builtin_ia32_xrstors64:
6894  case X86::BI__builtin_ia32_xsavec:
6895  case X86::BI__builtin_ia32_xsavec64:
6896  case X86::BI__builtin_ia32_xsaves:
6897  case X86::BI__builtin_ia32_xsaves64: {
6898    Intrinsic::ID ID;
6899#define INTRINSIC_X86_XSAVE_ID(NAME) \
6900    case X86::BI__builtin_ia32_##NAME: \
6901      ID = Intrinsic::x86_##NAME; \
6902      break
6903    switch (BuiltinID) {
6904    default: llvm_unreachable("Unsupported intrinsic!");
6905    INTRINSIC_X86_XSAVE_ID(xsave);
6906    INTRINSIC_X86_XSAVE_ID(xsave64);
6907    INTRINSIC_X86_XSAVE_ID(xrstor);
6908    INTRINSIC_X86_XSAVE_ID(xrstor64);
6909    INTRINSIC_X86_XSAVE_ID(xsaveopt);
6910    INTRINSIC_X86_XSAVE_ID(xsaveopt64);
6911    INTRINSIC_X86_XSAVE_ID(xrstors);
6912    INTRINSIC_X86_XSAVE_ID(xrstors64);
6913    INTRINSIC_X86_XSAVE_ID(xsavec);
6914    INTRINSIC_X86_XSAVE_ID(xsavec64);
6915    INTRINSIC_X86_XSAVE_ID(xsaves);
6916    INTRINSIC_X86_XSAVE_ID(xsaves64);
6917    }
6918#undef INTRINSIC_X86_XSAVE_ID
6919    Value *Mhi = Builder.CreateTrunc(
6920      Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
6921    Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
6922    Ops[1] = Mhi;
6923    Ops.push_back(Mlo);
6924    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
6925  }
6926  case X86::BI__builtin_ia32_storedqudi128_mask:
6927  case X86::BI__builtin_ia32_storedqusi128_mask:
6928  case X86::BI__builtin_ia32_storedquhi128_mask:
6929  case X86::BI__builtin_ia32_storedquqi128_mask:
6930  case X86::BI__builtin_ia32_storeupd128_mask:
6931  case X86::BI__builtin_ia32_storeups128_mask:
6932  case X86::BI__builtin_ia32_storedqudi256_mask:
6933  case X86::BI__builtin_ia32_storedqusi256_mask:
6934  case X86::BI__builtin_ia32_storedquhi256_mask:
6935  case X86::BI__builtin_ia32_storedquqi256_mask:
6936  case X86::BI__builtin_ia32_storeupd256_mask:
6937  case X86::BI__builtin_ia32_storeups256_mask:
6938  case X86::BI__builtin_ia32_storedqudi512_mask:
6939  case X86::BI__builtin_ia32_storedqusi512_mask:
6940  case X86::BI__builtin_ia32_storedquhi512_mask:
6941  case X86::BI__builtin_ia32_storedquqi512_mask:
6942  case X86::BI__builtin_ia32_storeupd512_mask:
6943  case X86::BI__builtin_ia32_storeups512_mask:
6944    return EmitX86MaskedStore(*this, Ops, 1);
6945
6946  case X86::BI__builtin_ia32_movdqa32store128_mask:
6947  case X86::BI__builtin_ia32_movdqa64store128_mask:
6948  case X86::BI__builtin_ia32_storeaps128_mask:
6949  case X86::BI__builtin_ia32_storeapd128_mask:
6950  case X86::BI__builtin_ia32_movdqa32store256_mask:
6951  case X86::BI__builtin_ia32_movdqa64store256_mask:
6952  case X86::BI__builtin_ia32_storeaps256_mask:
6953  case X86::BI__builtin_ia32_storeapd256_mask:
6954  case X86::BI__builtin_ia32_movdqa32store512_mask:
6955  case X86::BI__builtin_ia32_movdqa64store512_mask:
6956  case X86::BI__builtin_ia32_storeaps512_mask:
6957  case X86::BI__builtin_ia32_storeapd512_mask: {
6958    unsigned Align =
6959      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
6960    return EmitX86MaskedStore(*this, Ops, Align);
6961  }
6962  case X86::BI__builtin_ia32_loadups128_mask:
6963  case X86::BI__builtin_ia32_loadups256_mask:
6964  case X86::BI__builtin_ia32_loadups512_mask:
6965  case X86::BI__builtin_ia32_loadupd128_mask:
6966  case X86::BI__builtin_ia32_loadupd256_mask:
6967  case X86::BI__builtin_ia32_loadupd512_mask:
6968  case X86::BI__builtin_ia32_loaddquqi128_mask:
6969  case X86::BI__builtin_ia32_loaddquqi256_mask:
6970  case X86::BI__builtin_ia32_loaddquqi512_mask:
6971  case X86::BI__builtin_ia32_loaddquhi128_mask:
6972  case X86::BI__builtin_ia32_loaddquhi256_mask:
6973  case X86::BI__builtin_ia32_loaddquhi512_mask:
6974  case X86::BI__builtin_ia32_loaddqusi128_mask:
6975  case X86::BI__builtin_ia32_loaddqusi256_mask:
6976  case X86::BI__builtin_ia32_loaddqusi512_mask:
6977  case X86::BI__builtin_ia32_loaddqudi128_mask:
6978  case X86::BI__builtin_ia32_loaddqudi256_mask:
6979  case X86::BI__builtin_ia32_loaddqudi512_mask:
6980    return EmitX86MaskedLoad(*this, Ops, 1);
6981
6982  case X86::BI__builtin_ia32_loadaps128_mask:
6983  case X86::BI__builtin_ia32_loadaps256_mask:
6984  case X86::BI__builtin_ia32_loadaps512_mask:
6985  case X86::BI__builtin_ia32_loadapd128_mask:
6986  case X86::BI__builtin_ia32_loadapd256_mask:
6987  case X86::BI__builtin_ia32_loadapd512_mask:
6988  case X86::BI__builtin_ia32_movdqa32load128_mask:
6989  case X86::BI__builtin_ia32_movdqa32load256_mask:
6990  case X86::BI__builtin_ia32_movdqa32load512_mask:
6991  case X86::BI__builtin_ia32_movdqa64load128_mask:
6992  case X86::BI__builtin_ia32_movdqa64load256_mask:
6993  case X86::BI__builtin_ia32_movdqa64load512_mask: {
6994    unsigned Align =
6995      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
6996    return EmitX86MaskedLoad(*this, Ops, Align);
6997  }
6998  case X86::BI__builtin_ia32_storehps:
6999  case X86::BI__builtin_ia32_storelps: {
7000    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7001    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7002
7003    // cast val v2i64
7004    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7005
7006    // extract (0, 1)
7007    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7008    llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7009    Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7010
7011    // cast pointer to i64 & store
7012    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7013    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7014  }
7015  case X86::BI__builtin_ia32_palignr128:
7016  case X86::BI__builtin_ia32_palignr256:
7017  case X86::BI__builtin_ia32_palignr128_mask:
7018  case X86::BI__builtin_ia32_palignr256_mask:
7019  case X86::BI__builtin_ia32_palignr512_mask: {
7020    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7021
7022    unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7023    assert(NumElts % 16 == 0);
7024
7025    // If palignr is shifting the pair of vectors more than the size of two
7026    // lanes, emit zero.
7027    if (ShiftVal >= 32)
7028      return llvm::Constant::getNullValue(ConvertType(E->getType()));
7029
7030    // If palignr is shifting the pair of input vectors more than one lane,
7031    // but less than two lanes, convert to shifting in zeroes.
7032    if (ShiftVal > 16) {
7033      ShiftVal -= 16;
7034      Ops[1] = Ops[0];
7035      Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7036    }
7037
7038    uint32_t Indices[64];
7039    // 256-bit palignr operates on 128-bit lanes so we need to handle that
7040    for (unsigned l = 0; l != NumElts; l += 16) {
7041      for (unsigned i = 0; i != 16; ++i) {
7042        unsigned Idx = ShiftVal + i;
7043        if (Idx >= 16)
7044          Idx += NumElts - 16; // End of lane, switch operand.
7045        Indices[l + i] = Idx + l;
7046      }
7047    }
7048
7049    Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7050                                               makeArrayRef(Indices, NumElts),
7051                                               "palignr");
7052
7053    // If this isn't a masked builtin, just return the align operation.
7054    if (Ops.size() == 3)
7055      return Align;
7056
7057    return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7058  }
7059
7060  case X86::BI__builtin_ia32_movnti:
7061  case X86::BI__builtin_ia32_movnti64: {
7062    llvm::MDNode *Node = llvm::MDNode::get(
7063        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7064
7065    // Convert the type of the pointer to a pointer to the stored type.
7066    Value *BC = Builder.CreateBitCast(Ops[0],
7067                                llvm::PointerType::getUnqual(Ops[1]->getType()),
7068                                      "cast");
7069    StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
7070    SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7071
7072    // No alignment for scalar intrinsic store.
7073    SI->setAlignment(1);
7074    return SI;
7075  }
7076  case X86::BI__builtin_ia32_movntsd:
7077  case X86::BI__builtin_ia32_movntss: {
7078    llvm::MDNode *Node = llvm::MDNode::get(
7079        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7080
7081    // Extract the 0'th element of the source vector.
7082    Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
7083
7084    // Convert the type of the pointer to a pointer to the stored type.
7085    Value *BC = Builder.CreateBitCast(Ops[0],
7086                                llvm::PointerType::getUnqual(Scl->getType()),
7087                                      "cast");
7088
7089    // Unaligned nontemporal store of the scalar value.
7090    StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
7091    SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7092    SI->setAlignment(1);
7093    return SI;
7094  }
7095
7096  case X86::BI__builtin_ia32_selectb_128:
7097  case X86::BI__builtin_ia32_selectb_256:
7098  case X86::BI__builtin_ia32_selectb_512:
7099  case X86::BI__builtin_ia32_selectw_128:
7100  case X86::BI__builtin_ia32_selectw_256:
7101  case X86::BI__builtin_ia32_selectw_512:
7102  case X86::BI__builtin_ia32_selectd_128:
7103  case X86::BI__builtin_ia32_selectd_256:
7104  case X86::BI__builtin_ia32_selectd_512:
7105  case X86::BI__builtin_ia32_selectq_128:
7106  case X86::BI__builtin_ia32_selectq_256:
7107  case X86::BI__builtin_ia32_selectq_512:
7108  case X86::BI__builtin_ia32_selectps_128:
7109  case X86::BI__builtin_ia32_selectps_256:
7110  case X86::BI__builtin_ia32_selectps_512:
7111  case X86::BI__builtin_ia32_selectpd_128:
7112  case X86::BI__builtin_ia32_selectpd_256:
7113  case X86::BI__builtin_ia32_selectpd_512:
7114    return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7115  case X86::BI__builtin_ia32_pcmpeqb128_mask:
7116  case X86::BI__builtin_ia32_pcmpeqb256_mask:
7117  case X86::BI__builtin_ia32_pcmpeqb512_mask:
7118  case X86::BI__builtin_ia32_pcmpeqw128_mask:
7119  case X86::BI__builtin_ia32_pcmpeqw256_mask:
7120  case X86::BI__builtin_ia32_pcmpeqw512_mask:
7121  case X86::BI__builtin_ia32_pcmpeqd128_mask:
7122  case X86::BI__builtin_ia32_pcmpeqd256_mask:
7123  case X86::BI__builtin_ia32_pcmpeqd512_mask:
7124  case X86::BI__builtin_ia32_pcmpeqq128_mask:
7125  case X86::BI__builtin_ia32_pcmpeqq256_mask:
7126  case X86::BI__builtin_ia32_pcmpeqq512_mask:
7127    return EmitX86MaskedCompare(*this, 0, false, Ops);
7128  case X86::BI__builtin_ia32_pcmpgtb128_mask:
7129  case X86::BI__builtin_ia32_pcmpgtb256_mask:
7130  case X86::BI__builtin_ia32_pcmpgtb512_mask:
7131  case X86::BI__builtin_ia32_pcmpgtw128_mask:
7132  case X86::BI__builtin_ia32_pcmpgtw256_mask:
7133  case X86::BI__builtin_ia32_pcmpgtw512_mask:
7134  case X86::BI__builtin_ia32_pcmpgtd128_mask:
7135  case X86::BI__builtin_ia32_pcmpgtd256_mask:
7136  case X86::BI__builtin_ia32_pcmpgtd512_mask:
7137  case X86::BI__builtin_ia32_pcmpgtq128_mask:
7138  case X86::BI__builtin_ia32_pcmpgtq256_mask:
7139  case X86::BI__builtin_ia32_pcmpgtq512_mask:
7140    return EmitX86MaskedCompare(*this, 6, true, Ops);
7141  case X86::BI__builtin_ia32_cmpb128_mask:
7142  case X86::BI__builtin_ia32_cmpb256_mask:
7143  case X86::BI__builtin_ia32_cmpb512_mask:
7144  case X86::BI__builtin_ia32_cmpw128_mask:
7145  case X86::BI__builtin_ia32_cmpw256_mask:
7146  case X86::BI__builtin_ia32_cmpw512_mask:
7147  case X86::BI__builtin_ia32_cmpd128_mask:
7148  case X86::BI__builtin_ia32_cmpd256_mask:
7149  case X86::BI__builtin_ia32_cmpd512_mask:
7150  case X86::BI__builtin_ia32_cmpq128_mask:
7151  case X86::BI__builtin_ia32_cmpq256_mask:
7152  case X86::BI__builtin_ia32_cmpq512_mask: {
7153    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7154    return EmitX86MaskedCompare(*this, CC, true, Ops);
7155  }
7156  case X86::BI__builtin_ia32_ucmpb128_mask:
7157  case X86::BI__builtin_ia32_ucmpb256_mask:
7158  case X86::BI__builtin_ia32_ucmpb512_mask:
7159  case X86::BI__builtin_ia32_ucmpw128_mask:
7160  case X86::BI__builtin_ia32_ucmpw256_mask:
7161  case X86::BI__builtin_ia32_ucmpw512_mask:
7162  case X86::BI__builtin_ia32_ucmpd128_mask:
7163  case X86::BI__builtin_ia32_ucmpd256_mask:
7164  case X86::BI__builtin_ia32_ucmpd512_mask:
7165  case X86::BI__builtin_ia32_ucmpq128_mask:
7166  case X86::BI__builtin_ia32_ucmpq256_mask:
7167  case X86::BI__builtin_ia32_ucmpq512_mask: {
7168    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7169    return EmitX86MaskedCompare(*this, CC, false, Ops);
7170  }
7171
7172  case X86::BI__builtin_ia32_vplzcntd_128_mask:
7173  case X86::BI__builtin_ia32_vplzcntd_256_mask:
7174  case X86::BI__builtin_ia32_vplzcntd_512_mask:
7175  case X86::BI__builtin_ia32_vplzcntq_128_mask:
7176  case X86::BI__builtin_ia32_vplzcntq_256_mask:
7177  case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7178    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7179    return EmitX86Select(*this, Ops[2],
7180                         Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7181                         Ops[1]);
7182  }
7183
7184  // TODO: Handle 64/512-bit vector widths of min/max.
7185  case X86::BI__builtin_ia32_pmaxsb128:
7186  case X86::BI__builtin_ia32_pmaxsw128:
7187  case X86::BI__builtin_ia32_pmaxsd128:
7188  case X86::BI__builtin_ia32_pmaxsb256:
7189  case X86::BI__builtin_ia32_pmaxsw256:
7190  case X86::BI__builtin_ia32_pmaxsd256: {
7191    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
7192    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7193  }
7194  case X86::BI__builtin_ia32_pmaxub128:
7195  case X86::BI__builtin_ia32_pmaxuw128:
7196  case X86::BI__builtin_ia32_pmaxud128:
7197  case X86::BI__builtin_ia32_pmaxub256:
7198  case X86::BI__builtin_ia32_pmaxuw256:
7199  case X86::BI__builtin_ia32_pmaxud256: {
7200    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
7201    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7202  }
7203  case X86::BI__builtin_ia32_pminsb128:
7204  case X86::BI__builtin_ia32_pminsw128:
7205  case X86::BI__builtin_ia32_pminsd128:
7206  case X86::BI__builtin_ia32_pminsb256:
7207  case X86::BI__builtin_ia32_pminsw256:
7208  case X86::BI__builtin_ia32_pminsd256: {
7209    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]);
7210    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7211  }
7212  case X86::BI__builtin_ia32_pminub128:
7213  case X86::BI__builtin_ia32_pminuw128:
7214  case X86::BI__builtin_ia32_pminud128:
7215  case X86::BI__builtin_ia32_pminub256:
7216  case X86::BI__builtin_ia32_pminuw256:
7217  case X86::BI__builtin_ia32_pminud256: {
7218    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]);
7219    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7220  }
7221
7222  // 3DNow!
7223  case X86::BI__builtin_ia32_pswapdsf:
7224  case X86::BI__builtin_ia32_pswapdsi: {
7225    llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7226    Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7227    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7228    return Builder.CreateCall(F, Ops, "pswapd");
7229  }
7230  case X86::BI__builtin_ia32_rdrand16_step:
7231  case X86::BI__builtin_ia32_rdrand32_step:
7232  case X86::BI__builtin_ia32_rdrand64_step:
7233  case X86::BI__builtin_ia32_rdseed16_step:
7234  case X86::BI__builtin_ia32_rdseed32_step:
7235  case X86::BI__builtin_ia32_rdseed64_step: {
7236    Intrinsic::ID ID;
7237    switch (BuiltinID) {
7238    default: llvm_unreachable("Unsupported intrinsic!");
7239    case X86::BI__builtin_ia32_rdrand16_step:
7240      ID = Intrinsic::x86_rdrand_16;
7241      break;
7242    case X86::BI__builtin_ia32_rdrand32_step:
7243      ID = Intrinsic::x86_rdrand_32;
7244      break;
7245    case X86::BI__builtin_ia32_rdrand64_step:
7246      ID = Intrinsic::x86_rdrand_64;
7247      break;
7248    case X86::BI__builtin_ia32_rdseed16_step:
7249      ID = Intrinsic::x86_rdseed_16;
7250      break;
7251    case X86::BI__builtin_ia32_rdseed32_step:
7252      ID = Intrinsic::x86_rdseed_32;
7253      break;
7254    case X86::BI__builtin_ia32_rdseed64_step:
7255      ID = Intrinsic::x86_rdseed_64;
7256      break;
7257    }
7258
7259    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7260    Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7261                                      Ops[0]);
7262    return Builder.CreateExtractValue(Call, 1);
7263  }
7264
7265  // SSE packed comparison intrinsics
7266  case X86::BI__builtin_ia32_cmpeqps:
7267  case X86::BI__builtin_ia32_cmpeqpd:
7268    return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7269  case X86::BI__builtin_ia32_cmpltps:
7270  case X86::BI__builtin_ia32_cmpltpd:
7271    return getVectorFCmpIR(CmpInst::FCMP_OLT);
7272  case X86::BI__builtin_ia32_cmpleps:
7273  case X86::BI__builtin_ia32_cmplepd:
7274    return getVectorFCmpIR(CmpInst::FCMP_OLE);
7275  case X86::BI__builtin_ia32_cmpunordps:
7276  case X86::BI__builtin_ia32_cmpunordpd:
7277    return getVectorFCmpIR(CmpInst::FCMP_UNO);
7278  case X86::BI__builtin_ia32_cmpneqps:
7279  case X86::BI__builtin_ia32_cmpneqpd:
7280    return getVectorFCmpIR(CmpInst::FCMP_UNE);
7281  case X86::BI__builtin_ia32_cmpnltps:
7282  case X86::BI__builtin_ia32_cmpnltpd:
7283    return getVectorFCmpIR(CmpInst::FCMP_UGE);
7284  case X86::BI__builtin_ia32_cmpnleps:
7285  case X86::BI__builtin_ia32_cmpnlepd:
7286    return getVectorFCmpIR(CmpInst::FCMP_UGT);
7287  case X86::BI__builtin_ia32_cmpordps:
7288  case X86::BI__builtin_ia32_cmpordpd:
7289    return getVectorFCmpIR(CmpInst::FCMP_ORD);
7290  case X86::BI__builtin_ia32_cmpps:
7291  case X86::BI__builtin_ia32_cmpps256:
7292  case X86::BI__builtin_ia32_cmppd:
7293  case X86::BI__builtin_ia32_cmppd256: {
7294    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7295    // If this one of the SSE immediates, we can use native IR.
7296    if (CC < 8) {
7297      FCmpInst::Predicate Pred;
7298      switch (CC) {
7299      case 0: Pred = FCmpInst::FCMP_OEQ; break;
7300      case 1: Pred = FCmpInst::FCMP_OLT; break;
7301      case 2: Pred = FCmpInst::FCMP_OLE; break;
7302      case 3: Pred = FCmpInst::FCMP_UNO; break;
7303      case 4: Pred = FCmpInst::FCMP_UNE; break;
7304      case 5: Pred = FCmpInst::FCMP_UGE; break;
7305      case 6: Pred = FCmpInst::FCMP_UGT; break;
7306      case 7: Pred = FCmpInst::FCMP_ORD; break;
7307      }
7308      return getVectorFCmpIR(Pred);
7309    }
7310
7311    // We can't handle 8-31 immediates with native IR, use the intrinsic.
7312    Intrinsic::ID ID;
7313    switch (BuiltinID) {
7314    default: llvm_unreachable("Unsupported intrinsic!");
7315    case X86::BI__builtin_ia32_cmpps:
7316      ID = Intrinsic::x86_sse_cmp_ps;
7317      break;
7318    case X86::BI__builtin_ia32_cmpps256:
7319      ID = Intrinsic::x86_avx_cmp_ps_256;
7320      break;
7321    case X86::BI__builtin_ia32_cmppd:
7322      ID = Intrinsic::x86_sse2_cmp_pd;
7323      break;
7324    case X86::BI__builtin_ia32_cmppd256:
7325      ID = Intrinsic::x86_avx_cmp_pd_256;
7326      break;
7327    }
7328
7329    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7330  }
7331
7332  // SSE scalar comparison intrinsics
7333  case X86::BI__builtin_ia32_cmpeqss:
7334    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7335  case X86::BI__builtin_ia32_cmpltss:
7336    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7337  case X86::BI__builtin_ia32_cmpless:
7338    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7339  case X86::BI__builtin_ia32_cmpunordss:
7340    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7341  case X86::BI__builtin_ia32_cmpneqss:
7342    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7343  case X86::BI__builtin_ia32_cmpnltss:
7344    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7345  case X86::BI__builtin_ia32_cmpnless:
7346    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7347  case X86::BI__builtin_ia32_cmpordss:
7348    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7349  case X86::BI__builtin_ia32_cmpeqsd:
7350    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7351  case X86::BI__builtin_ia32_cmpltsd:
7352    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7353  case X86::BI__builtin_ia32_cmplesd:
7354    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7355  case X86::BI__builtin_ia32_cmpunordsd:
7356    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7357  case X86::BI__builtin_ia32_cmpneqsd:
7358    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7359  case X86::BI__builtin_ia32_cmpnltsd:
7360    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7361  case X86::BI__builtin_ia32_cmpnlesd:
7362    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7363  case X86::BI__builtin_ia32_cmpordsd:
7364    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7365  }
7366}
7367
7368
7369Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
7370                                           const CallExpr *E) {
7371  SmallVector<Value*, 4> Ops;
7372
7373  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
7374    Ops.push_back(EmitScalarExpr(E->getArg(i)));
7375
7376  Intrinsic::ID ID = Intrinsic::not_intrinsic;
7377
7378  switch (BuiltinID) {
7379  default: return nullptr;
7380
7381  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
7382  // call __builtin_readcyclecounter.
7383  case PPC::BI__builtin_ppc_get_timebase:
7384    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
7385
7386  // vec_ld, vec_lvsl, vec_lvsr
7387  case PPC::BI__builtin_altivec_lvx:
7388  case PPC::BI__builtin_altivec_lvxl:
7389  case PPC::BI__builtin_altivec_lvebx:
7390  case PPC::BI__builtin_altivec_lvehx:
7391  case PPC::BI__builtin_altivec_lvewx:
7392  case PPC::BI__builtin_altivec_lvsl:
7393  case PPC::BI__builtin_altivec_lvsr:
7394  case PPC::BI__builtin_vsx_lxvd2x:
7395  case PPC::BI__builtin_vsx_lxvw4x:
7396  {
7397    Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
7398
7399    Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
7400    Ops.pop_back();
7401
7402    switch (BuiltinID) {
7403    default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
7404    case PPC::BI__builtin_altivec_lvx:
7405      ID = Intrinsic::ppc_altivec_lvx;
7406      break;
7407    case PPC::BI__builtin_altivec_lvxl:
7408      ID = Intrinsic::ppc_altivec_lvxl;
7409      break;
7410    case PPC::BI__builtin_altivec_lvebx:
7411      ID = Intrinsic::ppc_altivec_lvebx;
7412      break;
7413    case PPC::BI__builtin_altivec_lvehx:
7414      ID = Intrinsic::ppc_altivec_lvehx;
7415      break;
7416    case PPC::BI__builtin_altivec_lvewx:
7417      ID = Intrinsic::ppc_altivec_lvewx;
7418      break;
7419    case PPC::BI__builtin_altivec_lvsl:
7420      ID = Intrinsic::ppc_altivec_lvsl;
7421      break;
7422    case PPC::BI__builtin_altivec_lvsr:
7423      ID = Intrinsic::ppc_altivec_lvsr;
7424      break;
7425    case PPC::BI__builtin_vsx_lxvd2x:
7426      ID = Intrinsic::ppc_vsx_lxvd2x;
7427      break;
7428    case PPC::BI__builtin_vsx_lxvw4x:
7429      ID = Intrinsic::ppc_vsx_lxvw4x;
7430      break;
7431    }
7432    llvm::Function *F = CGM.getIntrinsic(ID);
7433    return Builder.CreateCall(F, Ops, "");
7434  }
7435
7436  // vec_st
7437  case PPC::BI__builtin_altivec_stvx:
7438  case PPC::BI__builtin_altivec_stvxl:
7439  case PPC::BI__builtin_altivec_stvebx:
7440  case PPC::BI__builtin_altivec_stvehx:
7441  case PPC::BI__builtin_altivec_stvewx:
7442  case PPC::BI__builtin_vsx_stxvd2x:
7443  case PPC::BI__builtin_vsx_stxvw4x:
7444  {
7445    Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
7446    Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
7447    Ops.pop_back();
7448
7449    switch (BuiltinID) {
7450    default: llvm_unreachable("Unsupported st intrinsic!");
7451    case PPC::BI__builtin_altivec_stvx:
7452      ID = Intrinsic::ppc_altivec_stvx;
7453      break;
7454    case PPC::BI__builtin_altivec_stvxl:
7455      ID = Intrinsic::ppc_altivec_stvxl;
7456      break;
7457    case PPC::BI__builtin_altivec_stvebx:
7458      ID = Intrinsic::ppc_altivec_stvebx;
7459      break;
7460    case PPC::BI__builtin_altivec_stvehx:
7461      ID = Intrinsic::ppc_altivec_stvehx;
7462      break;
7463    case PPC::BI__builtin_altivec_stvewx:
7464      ID = Intrinsic::ppc_altivec_stvewx;
7465      break;
7466    case PPC::BI__builtin_vsx_stxvd2x:
7467      ID = Intrinsic::ppc_vsx_stxvd2x;
7468      break;
7469    case PPC::BI__builtin_vsx_stxvw4x:
7470      ID = Intrinsic::ppc_vsx_stxvw4x;
7471      break;
7472    }
7473    llvm::Function *F = CGM.getIntrinsic(ID);
7474    return Builder.CreateCall(F, Ops, "");
7475  }
7476  // Square root
7477  case PPC::BI__builtin_vsx_xvsqrtsp:
7478  case PPC::BI__builtin_vsx_xvsqrtdp: {
7479    llvm::Type *ResultType = ConvertType(E->getType());
7480    Value *X = EmitScalarExpr(E->getArg(0));
7481    ID = Intrinsic::sqrt;
7482    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7483    return Builder.CreateCall(F, X);
7484  }
7485  // Count leading zeros
7486  case PPC::BI__builtin_altivec_vclzb:
7487  case PPC::BI__builtin_altivec_vclzh:
7488  case PPC::BI__builtin_altivec_vclzw:
7489  case PPC::BI__builtin_altivec_vclzd: {
7490    llvm::Type *ResultType = ConvertType(E->getType());
7491    Value *X = EmitScalarExpr(E->getArg(0));
7492    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7493    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7494    return Builder.CreateCall(F, {X, Undef});
7495  }
7496  // Copy sign
7497  case PPC::BI__builtin_vsx_xvcpsgnsp:
7498  case PPC::BI__builtin_vsx_xvcpsgndp: {
7499    llvm::Type *ResultType = ConvertType(E->getType());
7500    Value *X = EmitScalarExpr(E->getArg(0));
7501    Value *Y = EmitScalarExpr(E->getArg(1));
7502    ID = Intrinsic::copysign;
7503    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7504    return Builder.CreateCall(F, {X, Y});
7505  }
7506  // Rounding/truncation
7507  case PPC::BI__builtin_vsx_xvrspip:
7508  case PPC::BI__builtin_vsx_xvrdpip:
7509  case PPC::BI__builtin_vsx_xvrdpim:
7510  case PPC::BI__builtin_vsx_xvrspim:
7511  case PPC::BI__builtin_vsx_xvrdpi:
7512  case PPC::BI__builtin_vsx_xvrspi:
7513  case PPC::BI__builtin_vsx_xvrdpic:
7514  case PPC::BI__builtin_vsx_xvrspic:
7515  case PPC::BI__builtin_vsx_xvrdpiz:
7516  case PPC::BI__builtin_vsx_xvrspiz: {
7517    llvm::Type *ResultType = ConvertType(E->getType());
7518    Value *X = EmitScalarExpr(E->getArg(0));
7519    if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
7520        BuiltinID == PPC::BI__builtin_vsx_xvrspim)
7521      ID = Intrinsic::floor;
7522    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
7523             BuiltinID == PPC::BI__builtin_vsx_xvrspi)
7524      ID = Intrinsic::round;
7525    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
7526             BuiltinID == PPC::BI__builtin_vsx_xvrspic)
7527      ID = Intrinsic::nearbyint;
7528    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
7529             BuiltinID == PPC::BI__builtin_vsx_xvrspip)
7530      ID = Intrinsic::ceil;
7531    else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
7532             BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
7533      ID = Intrinsic::trunc;
7534    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7535    return Builder.CreateCall(F, X);
7536  }
7537
7538  // Absolute value
7539  case PPC::BI__builtin_vsx_xvabsdp:
7540  case PPC::BI__builtin_vsx_xvabssp: {
7541    llvm::Type *ResultType = ConvertType(E->getType());
7542    Value *X = EmitScalarExpr(E->getArg(0));
7543    llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7544    return Builder.CreateCall(F, X);
7545  }
7546
7547  // FMA variations
7548  case PPC::BI__builtin_vsx_xvmaddadp:
7549  case PPC::BI__builtin_vsx_xvmaddasp:
7550  case PPC::BI__builtin_vsx_xvnmaddadp:
7551  case PPC::BI__builtin_vsx_xvnmaddasp:
7552  case PPC::BI__builtin_vsx_xvmsubadp:
7553  case PPC::BI__builtin_vsx_xvmsubasp:
7554  case PPC::BI__builtin_vsx_xvnmsubadp:
7555  case PPC::BI__builtin_vsx_xvnmsubasp: {
7556    llvm::Type *ResultType = ConvertType(E->getType());
7557    Value *X = EmitScalarExpr(E->getArg(0));
7558    Value *Y = EmitScalarExpr(E->getArg(1));
7559    Value *Z = EmitScalarExpr(E->getArg(2));
7560    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7561    llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7562    switch (BuiltinID) {
7563      case PPC::BI__builtin_vsx_xvmaddadp:
7564      case PPC::BI__builtin_vsx_xvmaddasp:
7565        return Builder.CreateCall(F, {X, Y, Z});
7566      case PPC::BI__builtin_vsx_xvnmaddadp:
7567      case PPC::BI__builtin_vsx_xvnmaddasp:
7568        return Builder.CreateFSub(Zero,
7569                                  Builder.CreateCall(F, {X, Y, Z}), "sub");
7570      case PPC::BI__builtin_vsx_xvmsubadp:
7571      case PPC::BI__builtin_vsx_xvmsubasp:
7572        return Builder.CreateCall(F,
7573                                  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7574      case PPC::BI__builtin_vsx_xvnmsubadp:
7575      case PPC::BI__builtin_vsx_xvnmsubasp:
7576        Value *FsubRes =
7577          Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7578        return Builder.CreateFSub(Zero, FsubRes, "sub");
7579    }
7580    llvm_unreachable("Unknown FMA operation");
7581    return nullptr; // Suppress no-return warning
7582  }
7583  }
7584}
7585
7586Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
7587                                              const CallExpr *E) {
7588  switch (BuiltinID) {
7589  case AMDGPU::BI__builtin_amdgcn_div_scale:
7590  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
7591    // Translate from the intrinsics's struct return to the builtin's out
7592    // argument.
7593
7594    Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
7595
7596    llvm::Value *X = EmitScalarExpr(E->getArg(0));
7597    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
7598    llvm::Value *Z = EmitScalarExpr(E->getArg(2));
7599
7600    llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
7601                                           X->getType());
7602
7603    llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
7604
7605    llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
7606    llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
7607
7608    llvm::Type *RealFlagType
7609      = FlagOutPtr.getPointer()->getType()->getPointerElementType();
7610
7611    llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
7612    Builder.CreateStore(FlagExt, FlagOutPtr);
7613    return Result;
7614  }
7615  case AMDGPU::BI__builtin_amdgcn_div_fmas:
7616  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
7617    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
7618    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
7619    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
7620    llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
7621
7622    llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
7623                                      Src0->getType());
7624    llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
7625    return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
7626  }
7627  case AMDGPU::BI__builtin_amdgcn_div_fixup:
7628  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
7629    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
7630  case AMDGPU::BI__builtin_amdgcn_trig_preop:
7631  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
7632    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
7633  case AMDGPU::BI__builtin_amdgcn_rcp:
7634  case AMDGPU::BI__builtin_amdgcn_rcpf:
7635    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
7636  case AMDGPU::BI__builtin_amdgcn_rsq:
7637  case AMDGPU::BI__builtin_amdgcn_rsqf:
7638    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7639  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
7640  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
7641    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
7642  case AMDGPU::BI__builtin_amdgcn_sinf:
7643    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
7644  case AMDGPU::BI__builtin_amdgcn_cosf:
7645    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
7646  case AMDGPU::BI__builtin_amdgcn_log_clampf:
7647    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
7648  case AMDGPU::BI__builtin_amdgcn_ldexp:
7649  case AMDGPU::BI__builtin_amdgcn_ldexpf:
7650    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7651  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
7652  case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
7653    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
7654  }
7655  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
7656  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
7657    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
7658  }
7659  case AMDGPU::BI__builtin_amdgcn_fract:
7660  case AMDGPU::BI__builtin_amdgcn_fractf:
7661    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
7662  case AMDGPU::BI__builtin_amdgcn_class:
7663  case AMDGPU::BI__builtin_amdgcn_classf:
7664    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
7665
7666  case AMDGPU::BI__builtin_amdgcn_read_exec: {
7667    CallInst *CI = cast<CallInst>(
7668      EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
7669    CI->setConvergent();
7670    return CI;
7671  }
7672  // Legacy amdgpu prefix
7673  case AMDGPU::BI__builtin_amdgpu_rsq:
7674  case AMDGPU::BI__builtin_amdgpu_rsqf: {
7675    if (getTarget().getTriple().getArch() == Triple::amdgcn)
7676      return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7677    return emitUnaryBuiltin(*this, E, Intrinsic::r600_rsq);
7678  }
7679  case AMDGPU::BI__builtin_amdgpu_ldexp:
7680  case AMDGPU::BI__builtin_amdgpu_ldexpf: {
7681    if (getTarget().getTriple().getArch() == Triple::amdgcn)
7682      return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7683    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
7684  }
7685
7686  // amdgcn workitem
7687  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
7688    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
7689  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
7690    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
7691  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
7692    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
7693
7694  // r600 workitem
7695  case AMDGPU::BI__builtin_r600_read_tidig_x:
7696    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
7697  case AMDGPU::BI__builtin_r600_read_tidig_y:
7698    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
7699  case AMDGPU::BI__builtin_r600_read_tidig_z:
7700    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
7701  default:
7702    return nullptr;
7703  }
7704}
7705
7706/// Handle a SystemZ function in which the final argument is a pointer
7707/// to an int that receives the post-instruction CC value.  At the LLVM level
7708/// this is represented as a function that returns a {result, cc} pair.
7709static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
7710                                         unsigned IntrinsicID,
7711                                         const CallExpr *E) {
7712  unsigned NumArgs = E->getNumArgs() - 1;
7713  SmallVector<Value *, 8> Args(NumArgs);
7714  for (unsigned I = 0; I < NumArgs; ++I)
7715    Args[I] = CGF.EmitScalarExpr(E->getArg(I));
7716  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
7717  Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
7718  Value *Call = CGF.Builder.CreateCall(F, Args);
7719  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
7720  CGF.Builder.CreateStore(CC, CCPtr);
7721  return CGF.Builder.CreateExtractValue(Call, 0);
7722}
7723
7724Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
7725                                               const CallExpr *E) {
7726  switch (BuiltinID) {
7727  case SystemZ::BI__builtin_tbegin: {
7728    Value *TDB = EmitScalarExpr(E->getArg(0));
7729    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7730    Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
7731    return Builder.CreateCall(F, {TDB, Control});
7732  }
7733  case SystemZ::BI__builtin_tbegin_nofloat: {
7734    Value *TDB = EmitScalarExpr(E->getArg(0));
7735    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7736    Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
7737    return Builder.CreateCall(F, {TDB, Control});
7738  }
7739  case SystemZ::BI__builtin_tbeginc: {
7740    Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
7741    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
7742    Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
7743    return Builder.CreateCall(F, {TDB, Control});
7744  }
7745  case SystemZ::BI__builtin_tabort: {
7746    Value *Data = EmitScalarExpr(E->getArg(0));
7747    Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
7748    return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
7749  }
7750  case SystemZ::BI__builtin_non_tx_store: {
7751    Value *Address = EmitScalarExpr(E->getArg(0));
7752    Value *Data = EmitScalarExpr(E->getArg(1));
7753    Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
7754    return Builder.CreateCall(F, {Data, Address});
7755  }
7756
7757  // Vector builtins.  Note that most vector builtins are mapped automatically
7758  // to target-specific LLVM intrinsics.  The ones handled specially here can
7759  // be represented via standard LLVM IR, which is preferable to enable common
7760  // LLVM optimizations.
7761
7762  case SystemZ::BI__builtin_s390_vpopctb:
7763  case SystemZ::BI__builtin_s390_vpopcth:
7764  case SystemZ::BI__builtin_s390_vpopctf:
7765  case SystemZ::BI__builtin_s390_vpopctg: {
7766    llvm::Type *ResultType = ConvertType(E->getType());
7767    Value *X = EmitScalarExpr(E->getArg(0));
7768    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7769    return Builder.CreateCall(F, X);
7770  }
7771
7772  case SystemZ::BI__builtin_s390_vclzb:
7773  case SystemZ::BI__builtin_s390_vclzh:
7774  case SystemZ::BI__builtin_s390_vclzf:
7775  case SystemZ::BI__builtin_s390_vclzg: {
7776    llvm::Type *ResultType = ConvertType(E->getType());
7777    Value *X = EmitScalarExpr(E->getArg(0));
7778    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7779    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7780    return Builder.CreateCall(F, {X, Undef});
7781  }
7782
7783  case SystemZ::BI__builtin_s390_vctzb:
7784  case SystemZ::BI__builtin_s390_vctzh:
7785  case SystemZ::BI__builtin_s390_vctzf:
7786  case SystemZ::BI__builtin_s390_vctzg: {
7787    llvm::Type *ResultType = ConvertType(E->getType());
7788    Value *X = EmitScalarExpr(E->getArg(0));
7789    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7790    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
7791    return Builder.CreateCall(F, {X, Undef});
7792  }
7793
7794  case SystemZ::BI__builtin_s390_vfsqdb: {
7795    llvm::Type *ResultType = ConvertType(E->getType());
7796    Value *X = EmitScalarExpr(E->getArg(0));
7797    Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
7798    return Builder.CreateCall(F, X);
7799  }
7800  case SystemZ::BI__builtin_s390_vfmadb: {
7801    llvm::Type *ResultType = ConvertType(E->getType());
7802    Value *X = EmitScalarExpr(E->getArg(0));
7803    Value *Y = EmitScalarExpr(E->getArg(1));
7804    Value *Z = EmitScalarExpr(E->getArg(2));
7805    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7806    return Builder.CreateCall(F, {X, Y, Z});
7807  }
7808  case SystemZ::BI__builtin_s390_vfmsdb: {
7809    llvm::Type *ResultType = ConvertType(E->getType());
7810    Value *X = EmitScalarExpr(E->getArg(0));
7811    Value *Y = EmitScalarExpr(E->getArg(1));
7812    Value *Z = EmitScalarExpr(E->getArg(2));
7813    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7814    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7815    return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7816  }
7817  case SystemZ::BI__builtin_s390_vflpdb: {
7818    llvm::Type *ResultType = ConvertType(E->getType());
7819    Value *X = EmitScalarExpr(E->getArg(0));
7820    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7821    return Builder.CreateCall(F, X);
7822  }
7823  case SystemZ::BI__builtin_s390_vflndb: {
7824    llvm::Type *ResultType = ConvertType(E->getType());
7825    Value *X = EmitScalarExpr(E->getArg(0));
7826    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7827    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7828    return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
7829  }
7830  case SystemZ::BI__builtin_s390_vfidb: {
7831    llvm::Type *ResultType = ConvertType(E->getType());
7832    Value *X = EmitScalarExpr(E->getArg(0));
7833    // Constant-fold the M4 and M5 mask arguments.
7834    llvm::APSInt M4, M5;
7835    bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
7836    bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
7837    assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
7838    (void)IsConstM4; (void)IsConstM5;
7839    // Check whether this instance of vfidb can be represented via a LLVM
7840    // standard intrinsic.  We only support some combinations of M4 and M5.
7841    Intrinsic::ID ID = Intrinsic::not_intrinsic;
7842    switch (M4.getZExtValue()) {
7843    default: break;
7844    case 0:  // IEEE-inexact exception allowed
7845      switch (M5.getZExtValue()) {
7846      default: break;
7847      case 0: ID = Intrinsic::rint; break;
7848      }
7849      break;
7850    case 4:  // IEEE-inexact exception suppressed
7851      switch (M5.getZExtValue()) {
7852      default: break;
7853      case 0: ID = Intrinsic::nearbyint; break;
7854      case 1: ID = Intrinsic::round; break;
7855      case 5: ID = Intrinsic::trunc; break;
7856      case 6: ID = Intrinsic::ceil; break;
7857      case 7: ID = Intrinsic::floor; break;
7858      }
7859      break;
7860    }
7861    if (ID != Intrinsic::not_intrinsic) {
7862      Function *F = CGM.getIntrinsic(ID, ResultType);
7863      return Builder.CreateCall(F, X);
7864    }
7865    Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
7866    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
7867    Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
7868    return Builder.CreateCall(F, {X, M4Value, M5Value});
7869  }
7870
7871  // Vector intrisincs that output the post-instruction CC value.
7872
7873#define INTRINSIC_WITH_CC(NAME) \
7874    case SystemZ::BI__builtin_##NAME: \
7875      return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
7876
7877  INTRINSIC_WITH_CC(s390_vpkshs);
7878  INTRINSIC_WITH_CC(s390_vpksfs);
7879  INTRINSIC_WITH_CC(s390_vpksgs);
7880
7881  INTRINSIC_WITH_CC(s390_vpklshs);
7882  INTRINSIC_WITH_CC(s390_vpklsfs);
7883  INTRINSIC_WITH_CC(s390_vpklsgs);
7884
7885  INTRINSIC_WITH_CC(s390_vceqbs);
7886  INTRINSIC_WITH_CC(s390_vceqhs);
7887  INTRINSIC_WITH_CC(s390_vceqfs);
7888  INTRINSIC_WITH_CC(s390_vceqgs);
7889
7890  INTRINSIC_WITH_CC(s390_vchbs);
7891  INTRINSIC_WITH_CC(s390_vchhs);
7892  INTRINSIC_WITH_CC(s390_vchfs);
7893  INTRINSIC_WITH_CC(s390_vchgs);
7894
7895  INTRINSIC_WITH_CC(s390_vchlbs);
7896  INTRINSIC_WITH_CC(s390_vchlhs);
7897  INTRINSIC_WITH_CC(s390_vchlfs);
7898  INTRINSIC_WITH_CC(s390_vchlgs);
7899
7900  INTRINSIC_WITH_CC(s390_vfaebs);
7901  INTRINSIC_WITH_CC(s390_vfaehs);
7902  INTRINSIC_WITH_CC(s390_vfaefs);
7903
7904  INTRINSIC_WITH_CC(s390_vfaezbs);
7905  INTRINSIC_WITH_CC(s390_vfaezhs);
7906  INTRINSIC_WITH_CC(s390_vfaezfs);
7907
7908  INTRINSIC_WITH_CC(s390_vfeebs);
7909  INTRINSIC_WITH_CC(s390_vfeehs);
7910  INTRINSIC_WITH_CC(s390_vfeefs);
7911
7912  INTRINSIC_WITH_CC(s390_vfeezbs);
7913  INTRINSIC_WITH_CC(s390_vfeezhs);
7914  INTRINSIC_WITH_CC(s390_vfeezfs);
7915
7916  INTRINSIC_WITH_CC(s390_vfenebs);
7917  INTRINSIC_WITH_CC(s390_vfenehs);
7918  INTRINSIC_WITH_CC(s390_vfenefs);
7919
7920  INTRINSIC_WITH_CC(s390_vfenezbs);
7921  INTRINSIC_WITH_CC(s390_vfenezhs);
7922  INTRINSIC_WITH_CC(s390_vfenezfs);
7923
7924  INTRINSIC_WITH_CC(s390_vistrbs);
7925  INTRINSIC_WITH_CC(s390_vistrhs);
7926  INTRINSIC_WITH_CC(s390_vistrfs);
7927
7928  INTRINSIC_WITH_CC(s390_vstrcbs);
7929  INTRINSIC_WITH_CC(s390_vstrchs);
7930  INTRINSIC_WITH_CC(s390_vstrcfs);
7931
7932  INTRINSIC_WITH_CC(s390_vstrczbs);
7933  INTRINSIC_WITH_CC(s390_vstrczhs);
7934  INTRINSIC_WITH_CC(s390_vstrczfs);
7935
7936  INTRINSIC_WITH_CC(s390_vfcedbs);
7937  INTRINSIC_WITH_CC(s390_vfchdbs);
7938  INTRINSIC_WITH_CC(s390_vfchedbs);
7939
7940  INTRINSIC_WITH_CC(s390_vftcidb);
7941
7942#undef INTRINSIC_WITH_CC
7943
7944  default:
7945    return nullptr;
7946  }
7947}
7948
7949Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
7950                                             const CallExpr *E) {
7951  auto MakeLdg = [&](unsigned IntrinsicID) {
7952    Value *Ptr = EmitScalarExpr(E->getArg(0));
7953    AlignmentSource AlignSource;
7954    clang::CharUnits Align =
7955        getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
7956    return Builder.CreateCall(
7957        CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
7958                                       Ptr->getType()}),
7959        {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
7960  };
7961
7962  switch (BuiltinID) {
7963  case NVPTX::BI__nvvm_atom_add_gen_i:
7964  case NVPTX::BI__nvvm_atom_add_gen_l:
7965  case NVPTX::BI__nvvm_atom_add_gen_ll:
7966    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
7967
7968  case NVPTX::BI__nvvm_atom_sub_gen_i:
7969  case NVPTX::BI__nvvm_atom_sub_gen_l:
7970  case NVPTX::BI__nvvm_atom_sub_gen_ll:
7971    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
7972
7973  case NVPTX::BI__nvvm_atom_and_gen_i:
7974  case NVPTX::BI__nvvm_atom_and_gen_l:
7975  case NVPTX::BI__nvvm_atom_and_gen_ll:
7976    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
7977
7978  case NVPTX::BI__nvvm_atom_or_gen_i:
7979  case NVPTX::BI__nvvm_atom_or_gen_l:
7980  case NVPTX::BI__nvvm_atom_or_gen_ll:
7981    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
7982
7983  case NVPTX::BI__nvvm_atom_xor_gen_i:
7984  case NVPTX::BI__nvvm_atom_xor_gen_l:
7985  case NVPTX::BI__nvvm_atom_xor_gen_ll:
7986    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
7987
7988  case NVPTX::BI__nvvm_atom_xchg_gen_i:
7989  case NVPTX::BI__nvvm_atom_xchg_gen_l:
7990  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
7991    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
7992
7993  case NVPTX::BI__nvvm_atom_max_gen_i:
7994  case NVPTX::BI__nvvm_atom_max_gen_l:
7995  case NVPTX::BI__nvvm_atom_max_gen_ll:
7996    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
7997
7998  case NVPTX::BI__nvvm_atom_max_gen_ui:
7999  case NVPTX::BI__nvvm_atom_max_gen_ul:
8000  case NVPTX::BI__nvvm_atom_max_gen_ull:
8001    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8002
8003  case NVPTX::BI__nvvm_atom_min_gen_i:
8004  case NVPTX::BI__nvvm_atom_min_gen_l:
8005  case NVPTX::BI__nvvm_atom_min_gen_ll:
8006    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8007
8008  case NVPTX::BI__nvvm_atom_min_gen_ui:
8009  case NVPTX::BI__nvvm_atom_min_gen_ul:
8010  case NVPTX::BI__nvvm_atom_min_gen_ull:
8011    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8012
8013  case NVPTX::BI__nvvm_atom_cas_gen_i:
8014  case NVPTX::BI__nvvm_atom_cas_gen_l:
8015  case NVPTX::BI__nvvm_atom_cas_gen_ll:
8016    // __nvvm_atom_cas_gen_* should return the old value rather than the
8017    // success flag.
8018    return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8019
8020  case NVPTX::BI__nvvm_atom_add_gen_f: {
8021    Value *Ptr = EmitScalarExpr(E->getArg(0));
8022    Value *Val = EmitScalarExpr(E->getArg(1));
8023    // atomicrmw only deals with integer arguments so we need to use
8024    // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8025    Value *FnALAF32 =
8026        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8027    return Builder.CreateCall(FnALAF32, {Ptr, Val});
8028  }
8029
8030  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8031    Value *Ptr = EmitScalarExpr(E->getArg(0));
8032    Value *Val = EmitScalarExpr(E->getArg(1));
8033    Value *FnALI32 =
8034        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8035    return Builder.CreateCall(FnALI32, {Ptr, Val});
8036  }
8037
8038  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8039    Value *Ptr = EmitScalarExpr(E->getArg(0));
8040    Value *Val = EmitScalarExpr(E->getArg(1));
8041    Value *FnALD32 =
8042        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8043    return Builder.CreateCall(FnALD32, {Ptr, Val});
8044  }
8045
8046  case NVPTX::BI__nvvm_ldg_c:
8047  case NVPTX::BI__nvvm_ldg_c2:
8048  case NVPTX::BI__nvvm_ldg_c4:
8049  case NVPTX::BI__nvvm_ldg_s:
8050  case NVPTX::BI__nvvm_ldg_s2:
8051  case NVPTX::BI__nvvm_ldg_s4:
8052  case NVPTX::BI__nvvm_ldg_i:
8053  case NVPTX::BI__nvvm_ldg_i2:
8054  case NVPTX::BI__nvvm_ldg_i4:
8055  case NVPTX::BI__nvvm_ldg_l:
8056  case NVPTX::BI__nvvm_ldg_ll:
8057  case NVPTX::BI__nvvm_ldg_ll2:
8058  case NVPTX::BI__nvvm_ldg_uc:
8059  case NVPTX::BI__nvvm_ldg_uc2:
8060  case NVPTX::BI__nvvm_ldg_uc4:
8061  case NVPTX::BI__nvvm_ldg_us:
8062  case NVPTX::BI__nvvm_ldg_us2:
8063  case NVPTX::BI__nvvm_ldg_us4:
8064  case NVPTX::BI__nvvm_ldg_ui:
8065  case NVPTX::BI__nvvm_ldg_ui2:
8066  case NVPTX::BI__nvvm_ldg_ui4:
8067  case NVPTX::BI__nvvm_ldg_ul:
8068  case NVPTX::BI__nvvm_ldg_ull:
8069  case NVPTX::BI__nvvm_ldg_ull2:
8070    // PTX Interoperability section 2.2: "For a vector with an even number of
8071    // elements, its alignment is set to number of elements times the alignment
8072    // of its member: n*alignof(t)."
8073    return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8074  case NVPTX::BI__nvvm_ldg_f:
8075  case NVPTX::BI__nvvm_ldg_f2:
8076  case NVPTX::BI__nvvm_ldg_f4:
8077  case NVPTX::BI__nvvm_ldg_d:
8078  case NVPTX::BI__nvvm_ldg_d2:
8079    return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8080  default:
8081    return nullptr;
8082  }
8083}
8084
8085Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
8086                                                   const CallExpr *E) {
8087  switch (BuiltinID) {
8088  case WebAssembly::BI__builtin_wasm_current_memory: {
8089    llvm::Type *ResultType = ConvertType(E->getType());
8090    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
8091    return Builder.CreateCall(Callee);
8092  }
8093  case WebAssembly::BI__builtin_wasm_grow_memory: {
8094    Value *X = EmitScalarExpr(E->getArg(0));
8095    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
8096    return Builder.CreateCall(Callee, X);
8097  }
8098
8099  default:
8100    return nullptr;
8101  }
8102}
8103