CGBuiltin.cpp revision 22229d6822324a42913d25f256045dbf348a53e9
1b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//
3b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//                     The LLVM Compiler Infrastructure
4b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//
5b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// This file is distributed under the University of Illinois Open Source
6b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// License. See LICENSE.TXT for details.
7b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//
8b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//===----------------------------------------------------------------------===//
9b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//
10b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// This contains code to emit Builtin calls as LLVM code.
11b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//
12b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//===----------------------------------------------------------------------===//
13b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
14b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "CodeGenFunction.h"
15b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "CGObjCRuntime.h"
16b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "CodeGenModule.h"
17b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "TargetInfo.h"
18b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/AST/ASTContext.h"
19b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/AST/Decl.h"
20b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/Basic/TargetBuiltins.h"
21b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/Basic/TargetInfo.h"
22b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/CodeGen/CGFunctionInfo.h"
23b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "llvm/IR/DataLayout.h"
2445779228f8c9e40851cfd23f727e2bd8ffdd4714Alex Vakulenko#include "llvm/IR/Intrinsics.h"
25b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
26b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratusing namespace clang;
27b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratusing namespace CodeGen;
28b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratusing namespace llvm;
29b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
30b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat/// getBuiltinLibFunction - Given a builtin id for a function like
31b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat/// "__builtin_fabsf", return a Function* for "fabsf".
32b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratllvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
33b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat                                                  unsigned BuiltinID) {
34b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
35b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
36b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  // Get the name, skip over the __builtin_ prefix (if necessary).
37b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  StringRef Name;
38b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  GlobalDecl D(FD);
39b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
40b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  // If the builtin has been declared explicitly with an assembler label,
41b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  // use the mangled name. This differs from the plain label on platforms
42b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  // that prefix labels.
43b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  if (FD->hasAttr<AsmLabelAttr>())
44b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    Name = getMangledName(D);
45b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  else
46b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
47b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
48b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  llvm::FunctionType *Ty =
49b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
50b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
51b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
52b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}
53b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
54b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat/// Emit the conversions required to turn the given value into an
55b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat/// integer of the given size.
56b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratstatic Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
57b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat                        QualType T, llvm::IntegerType *IntType) {
58b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  V = CGF.EmitToMemory(V, T);
59b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
60b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  if (V->getType()->isPointerTy())
61b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    return CGF.Builder.CreatePtrToInt(V, IntType);
6294ffa55491333f3dcc701befd0d2652922916d99Luis Hector Chavez
63b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  assert(V->getType() == IntType);
64b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  return V;
65b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}
66b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
67b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratstatic Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
68b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat                          QualType T, llvm::Type *ResultType) {
69b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  V = CGF.EmitFromMemory(V, T);
70b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
71b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  if (ResultType->isPointerTy())
72b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    return CGF.Builder.CreateIntToPtr(V, ResultType);
73b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
74  assert(V->getType() == ResultType);
75  return V;
76}
77
78/// Utility to insert an atomic instruction based on Instrinsic::ID
79/// and the expression node.
80static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
81                               llvm::AtomicRMWInst::BinOp Kind,
82                               const CallExpr *E) {
83  QualType T = E->getType();
84  assert(E->getArg(0)->getType()->isPointerType());
85  assert(CGF.getContext().hasSameUnqualifiedType(T,
86                                  E->getArg(0)->getType()->getPointeeType()));
87  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
88
89  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
90  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
91
92  llvm::IntegerType *IntType =
93    llvm::IntegerType::get(CGF.getLLVMContext(),
94                           CGF.getContext().getTypeSize(T));
95  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
96
97  llvm::Value *Args[2];
98  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
99  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
100  llvm::Type *ValueType = Args[1]->getType();
101  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
102
103  llvm::Value *Result =
104      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
105                                  llvm::SequentiallyConsistent);
106  Result = EmitFromInt(CGF, Result, T, ValueType);
107  return RValue::get(Result);
108}
109
110/// Utility to insert an atomic instruction based Instrinsic::ID and
111/// the expression node, where the return value is the result of the
112/// operation.
113static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
114                                   llvm::AtomicRMWInst::BinOp Kind,
115                                   const CallExpr *E,
116                                   Instruction::BinaryOps Op) {
117  QualType T = E->getType();
118  assert(E->getArg(0)->getType()->isPointerType());
119  assert(CGF.getContext().hasSameUnqualifiedType(T,
120                                  E->getArg(0)->getType()->getPointeeType()));
121  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
122
123  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
124  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
125
126  llvm::IntegerType *IntType =
127    llvm::IntegerType::get(CGF.getLLVMContext(),
128                           CGF.getContext().getTypeSize(T));
129  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
130
131  llvm::Value *Args[2];
132  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
133  llvm::Type *ValueType = Args[1]->getType();
134  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
135  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
136
137  llvm::Value *Result =
138      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
139                                  llvm::SequentiallyConsistent);
140  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
141  Result = EmitFromInt(CGF, Result, T, ValueType);
142  return RValue::get(Result);
143}
144
145/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
146/// which must be a scalar floating point type.
147static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
148  const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
149  assert(ValTyP && "isn't scalar fp type!");
150
151  StringRef FnName;
152  switch (ValTyP->getKind()) {
153  default: llvm_unreachable("Isn't a scalar fp type!");
154  case BuiltinType::Float:      FnName = "fabsf"; break;
155  case BuiltinType::Double:     FnName = "fabs"; break;
156  case BuiltinType::LongDouble: FnName = "fabsl"; break;
157  }
158
159  // The prototype is something that takes and returns whatever V's type is.
160  llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
161                                                   false);
162  llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
163
164  return CGF.EmitNounwindRuntimeCall(Fn, V, "abs");
165}
166
167static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
168                              const CallExpr *E, llvm::Value *calleeValue) {
169  return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E->getLocStart(),
170                      ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
171}
172
173/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
174/// depending on IntrinsicID.
175///
176/// \arg CGF The current codegen function.
177/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
178/// \arg X The first argument to the llvm.*.with.overflow.*.
179/// \arg Y The second argument to the llvm.*.with.overflow.*.
180/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
181/// \returns The result (i.e. sum/product) returned by the intrinsic.
182static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
183                                          const llvm::Intrinsic::ID IntrinsicID,
184                                          llvm::Value *X, llvm::Value *Y,
185                                          llvm::Value *&Carry) {
186  // Make sure we have integers of the same width.
187  assert(X->getType() == Y->getType() &&
188         "Arguments must be the same type. (Did you forget to make sure both "
189         "arguments have the same integer width?)");
190
191  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
192  llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y);
193  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
194  return CGF.Builder.CreateExtractValue(Tmp, 0);
195}
196
197RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
198                                        unsigned BuiltinID, const CallExpr *E) {
199  // See if we can constant fold this builtin.  If so, don't emit it at all.
200  Expr::EvalResult Result;
201  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
202      !Result.hasSideEffects()) {
203    if (Result.Val.isInt())
204      return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
205                                                Result.Val.getInt()));
206    if (Result.Val.isFloat())
207      return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
208                                               Result.Val.getFloat()));
209  }
210
211  switch (BuiltinID) {
212  default: break;  // Handle intrinsics and libm functions below.
213  case Builtin::BI__builtin___CFStringMakeConstantString:
214  case Builtin::BI__builtin___NSStringMakeConstantString:
215    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
216  case Builtin::BI__builtin_stdarg_start:
217  case Builtin::BI__builtin_va_start:
218  case Builtin::BI__builtin_va_end: {
219    Value *ArgValue = EmitVAListRef(E->getArg(0));
220    llvm::Type *DestType = Int8PtrTy;
221    if (ArgValue->getType() != DestType)
222      ArgValue = Builder.CreateBitCast(ArgValue, DestType,
223                                       ArgValue->getName().data());
224
225    Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
226      Intrinsic::vaend : Intrinsic::vastart;
227    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
228  }
229  case Builtin::BI__builtin_va_copy: {
230    Value *DstPtr = EmitVAListRef(E->getArg(0));
231    Value *SrcPtr = EmitVAListRef(E->getArg(1));
232
233    llvm::Type *Type = Int8PtrTy;
234
235    DstPtr = Builder.CreateBitCast(DstPtr, Type);
236    SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
237    return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
238                                           DstPtr, SrcPtr));
239  }
240  case Builtin::BI__builtin_abs:
241  case Builtin::BI__builtin_labs:
242  case Builtin::BI__builtin_llabs: {
243    Value *ArgValue = EmitScalarExpr(E->getArg(0));
244
245    Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
246    Value *CmpResult =
247    Builder.CreateICmpSGE(ArgValue,
248                          llvm::Constant::getNullValue(ArgValue->getType()),
249                                                            "abscond");
250    Value *Result =
251      Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
252
253    return RValue::get(Result);
254  }
255
256  case Builtin::BI__builtin_conj:
257  case Builtin::BI__builtin_conjf:
258  case Builtin::BI__builtin_conjl: {
259    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
260    Value *Real = ComplexVal.first;
261    Value *Imag = ComplexVal.second;
262    Value *Zero =
263      Imag->getType()->isFPOrFPVectorTy()
264        ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
265        : llvm::Constant::getNullValue(Imag->getType());
266
267    Imag = Builder.CreateFSub(Zero, Imag, "sub");
268    return RValue::getComplex(std::make_pair(Real, Imag));
269  }
270  case Builtin::BI__builtin_creal:
271  case Builtin::BI__builtin_crealf:
272  case Builtin::BI__builtin_creall:
273  case Builtin::BIcreal:
274  case Builtin::BIcrealf:
275  case Builtin::BIcreall: {
276    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
277    return RValue::get(ComplexVal.first);
278  }
279
280  case Builtin::BI__builtin_cimag:
281  case Builtin::BI__builtin_cimagf:
282  case Builtin::BI__builtin_cimagl:
283  case Builtin::BIcimag:
284  case Builtin::BIcimagf:
285  case Builtin::BIcimagl: {
286    ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
287    return RValue::get(ComplexVal.second);
288  }
289
290  case Builtin::BI__builtin_ctzs:
291  case Builtin::BI__builtin_ctz:
292  case Builtin::BI__builtin_ctzl:
293  case Builtin::BI__builtin_ctzll: {
294    Value *ArgValue = EmitScalarExpr(E->getArg(0));
295
296    llvm::Type *ArgType = ArgValue->getType();
297    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
298
299    llvm::Type *ResultType = ConvertType(E->getType());
300    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
301    Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
302    if (Result->getType() != ResultType)
303      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
304                                     "cast");
305    return RValue::get(Result);
306  }
307  case Builtin::BI__builtin_clzs:
308  case Builtin::BI__builtin_clz:
309  case Builtin::BI__builtin_clzl:
310  case Builtin::BI__builtin_clzll: {
311    Value *ArgValue = EmitScalarExpr(E->getArg(0));
312
313    llvm::Type *ArgType = ArgValue->getType();
314    Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
315
316    llvm::Type *ResultType = ConvertType(E->getType());
317    Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
318    Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
319    if (Result->getType() != ResultType)
320      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
321                                     "cast");
322    return RValue::get(Result);
323  }
324  case Builtin::BI__builtin_ffs:
325  case Builtin::BI__builtin_ffsl:
326  case Builtin::BI__builtin_ffsll: {
327    // ffs(x) -> x ? cttz(x) + 1 : 0
328    Value *ArgValue = EmitScalarExpr(E->getArg(0));
329
330    llvm::Type *ArgType = ArgValue->getType();
331    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
332
333    llvm::Type *ResultType = ConvertType(E->getType());
334    Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue,
335                                                       Builder.getTrue()),
336                                   llvm::ConstantInt::get(ArgType, 1));
337    Value *Zero = llvm::Constant::getNullValue(ArgType);
338    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
339    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
340    if (Result->getType() != ResultType)
341      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
342                                     "cast");
343    return RValue::get(Result);
344  }
345  case Builtin::BI__builtin_parity:
346  case Builtin::BI__builtin_parityl:
347  case Builtin::BI__builtin_parityll: {
348    // parity(x) -> ctpop(x) & 1
349    Value *ArgValue = EmitScalarExpr(E->getArg(0));
350
351    llvm::Type *ArgType = ArgValue->getType();
352    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
353
354    llvm::Type *ResultType = ConvertType(E->getType());
355    Value *Tmp = Builder.CreateCall(F, ArgValue);
356    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
357    if (Result->getType() != ResultType)
358      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
359                                     "cast");
360    return RValue::get(Result);
361  }
362  case Builtin::BI__builtin_popcount:
363  case Builtin::BI__builtin_popcountl:
364  case Builtin::BI__builtin_popcountll: {
365    Value *ArgValue = EmitScalarExpr(E->getArg(0));
366
367    llvm::Type *ArgType = ArgValue->getType();
368    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
369
370    llvm::Type *ResultType = ConvertType(E->getType());
371    Value *Result = Builder.CreateCall(F, ArgValue);
372    if (Result->getType() != ResultType)
373      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
374                                     "cast");
375    return RValue::get(Result);
376  }
377  case Builtin::BI__builtin_expect: {
378    Value *ArgValue = EmitScalarExpr(E->getArg(0));
379    llvm::Type *ArgType = ArgValue->getType();
380
381    Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
382    Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
383
384    Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
385                                        "expval");
386    return RValue::get(Result);
387  }
388  case Builtin::BI__builtin_bswap16:
389  case Builtin::BI__builtin_bswap32:
390  case Builtin::BI__builtin_bswap64: {
391    Value *ArgValue = EmitScalarExpr(E->getArg(0));
392    llvm::Type *ArgType = ArgValue->getType();
393    Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
394    return RValue::get(Builder.CreateCall(F, ArgValue));
395  }
396  case Builtin::BI__builtin_object_size: {
397    // We rely on constant folding to deal with expressions with side effects.
398    assert(!E->getArg(0)->HasSideEffects(getContext()) &&
399           "should have been constant folded");
400
401    // We pass this builtin onto the optimizer so that it can
402    // figure out the object size in more complex cases.
403    llvm::Type *ResType = ConvertType(E->getType());
404
405    // LLVM only supports 0 and 2, make sure that we pass along that
406    // as a boolean.
407    Value *Ty = EmitScalarExpr(E->getArg(1));
408    ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
409    assert(CI);
410    uint64_t val = CI->getZExtValue();
411    CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
412    // FIXME: Get right address space.
413    llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) };
414    Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
415    return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI));
416  }
417  case Builtin::BI__builtin_prefetch: {
418    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
419    // FIXME: Technically these constants should of type 'int', yes?
420    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
421      llvm::ConstantInt::get(Int32Ty, 0);
422    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
423      llvm::ConstantInt::get(Int32Ty, 3);
424    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
425    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
426    return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
427  }
428  case Builtin::BI__builtin_readcyclecounter: {
429    Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
430    return RValue::get(Builder.CreateCall(F));
431  }
432  case Builtin::BI__builtin_trap: {
433    Value *F = CGM.getIntrinsic(Intrinsic::trap);
434    return RValue::get(Builder.CreateCall(F));
435  }
436  case Builtin::BI__debugbreak: {
437    Value *F = CGM.getIntrinsic(Intrinsic::debugtrap);
438    return RValue::get(Builder.CreateCall(F));
439  }
440  case Builtin::BI__builtin_unreachable: {
441    if (SanOpts->Unreachable)
442      EmitCheck(Builder.getFalse(), "builtin_unreachable",
443                EmitCheckSourceLocation(E->getExprLoc()),
444                ArrayRef<llvm::Value *>(), CRK_Unrecoverable);
445    else
446      Builder.CreateUnreachable();
447
448    // We do need to preserve an insertion point.
449    EmitBlock(createBasicBlock("unreachable.cont"));
450
451    return RValue::get(0);
452  }
453
454  case Builtin::BI__builtin_powi:
455  case Builtin::BI__builtin_powif:
456  case Builtin::BI__builtin_powil: {
457    Value *Base = EmitScalarExpr(E->getArg(0));
458    Value *Exponent = EmitScalarExpr(E->getArg(1));
459    llvm::Type *ArgType = Base->getType();
460    Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
461    return RValue::get(Builder.CreateCall2(F, Base, Exponent));
462  }
463
464  case Builtin::BI__builtin_isgreater:
465  case Builtin::BI__builtin_isgreaterequal:
466  case Builtin::BI__builtin_isless:
467  case Builtin::BI__builtin_islessequal:
468  case Builtin::BI__builtin_islessgreater:
469  case Builtin::BI__builtin_isunordered: {
470    // Ordered comparisons: we know the arguments to these are matching scalar
471    // floating point values.
472    Value *LHS = EmitScalarExpr(E->getArg(0));
473    Value *RHS = EmitScalarExpr(E->getArg(1));
474
475    switch (BuiltinID) {
476    default: llvm_unreachable("Unknown ordered comparison");
477    case Builtin::BI__builtin_isgreater:
478      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
479      break;
480    case Builtin::BI__builtin_isgreaterequal:
481      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
482      break;
483    case Builtin::BI__builtin_isless:
484      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
485      break;
486    case Builtin::BI__builtin_islessequal:
487      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
488      break;
489    case Builtin::BI__builtin_islessgreater:
490      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
491      break;
492    case Builtin::BI__builtin_isunordered:
493      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
494      break;
495    }
496    // ZExt bool to int type.
497    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
498  }
499  case Builtin::BI__builtin_isnan: {
500    Value *V = EmitScalarExpr(E->getArg(0));
501    V = Builder.CreateFCmpUNO(V, V, "cmp");
502    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
503  }
504
505  case Builtin::BI__builtin_isinf: {
506    // isinf(x) --> fabs(x) == infinity
507    Value *V = EmitScalarExpr(E->getArg(0));
508    V = EmitFAbs(*this, V, E->getArg(0)->getType());
509
510    V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
511    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
512  }
513
514  // TODO: BI__builtin_isinf_sign
515  //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
516
517  case Builtin::BI__builtin_isnormal: {
518    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
519    Value *V = EmitScalarExpr(E->getArg(0));
520    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
521
522    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
523    Value *IsLessThanInf =
524      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
525    APFloat Smallest = APFloat::getSmallestNormalized(
526                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
527    Value *IsNormal =
528      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
529                            "isnormal");
530    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
531    V = Builder.CreateAnd(V, IsNormal, "and");
532    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
533  }
534
535  case Builtin::BI__builtin_isfinite: {
536    // isfinite(x) --> x == x && fabs(x) != infinity;
537    Value *V = EmitScalarExpr(E->getArg(0));
538    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
539
540    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
541    Value *IsNotInf =
542      Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
543
544    V = Builder.CreateAnd(Eq, IsNotInf, "and");
545    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
546  }
547
548  case Builtin::BI__builtin_fpclassify: {
549    Value *V = EmitScalarExpr(E->getArg(5));
550    llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
551
552    // Create Result
553    BasicBlock *Begin = Builder.GetInsertBlock();
554    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
555    Builder.SetInsertPoint(End);
556    PHINode *Result =
557      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
558                        "fpclassify_result");
559
560    // if (V==0) return FP_ZERO
561    Builder.SetInsertPoint(Begin);
562    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
563                                          "iszero");
564    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
565    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
566    Builder.CreateCondBr(IsZero, End, NotZero);
567    Result->addIncoming(ZeroLiteral, Begin);
568
569    // if (V != V) return FP_NAN
570    Builder.SetInsertPoint(NotZero);
571    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
572    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
573    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
574    Builder.CreateCondBr(IsNan, End, NotNan);
575    Result->addIncoming(NanLiteral, NotZero);
576
577    // if (fabs(V) == infinity) return FP_INFINITY
578    Builder.SetInsertPoint(NotNan);
579    Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
580    Value *IsInf =
581      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
582                            "isinf");
583    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
584    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
585    Builder.CreateCondBr(IsInf, End, NotInf);
586    Result->addIncoming(InfLiteral, NotNan);
587
588    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
589    Builder.SetInsertPoint(NotInf);
590    APFloat Smallest = APFloat::getSmallestNormalized(
591        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
592    Value *IsNormal =
593      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
594                            "isnormal");
595    Value *NormalResult =
596      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
597                           EmitScalarExpr(E->getArg(3)));
598    Builder.CreateBr(End);
599    Result->addIncoming(NormalResult, NotInf);
600
601    // return Result
602    Builder.SetInsertPoint(End);
603    return RValue::get(Result);
604  }
605
606  case Builtin::BIalloca:
607  case Builtin::BI__builtin_alloca: {
608    Value *Size = EmitScalarExpr(E->getArg(0));
609    return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
610  }
611  case Builtin::BIbzero:
612  case Builtin::BI__builtin_bzero: {
613    std::pair<llvm::Value*, unsigned> Dest =
614        EmitPointerWithAlignment(E->getArg(0));
615    Value *SizeVal = EmitScalarExpr(E->getArg(1));
616    Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal,
617                         Dest.second, false);
618    return RValue::get(Dest.first);
619  }
620  case Builtin::BImemcpy:
621  case Builtin::BI__builtin_memcpy: {
622    std::pair<llvm::Value*, unsigned> Dest =
623        EmitPointerWithAlignment(E->getArg(0));
624    std::pair<llvm::Value*, unsigned> Src =
625        EmitPointerWithAlignment(E->getArg(1));
626    Value *SizeVal = EmitScalarExpr(E->getArg(2));
627    unsigned Align = std::min(Dest.second, Src.second);
628    Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
629    return RValue::get(Dest.first);
630  }
631
632  case Builtin::BI__builtin___memcpy_chk: {
633    // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
634    llvm::APSInt Size, DstSize;
635    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
636        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
637      break;
638    if (Size.ugt(DstSize))
639      break;
640    std::pair<llvm::Value*, unsigned> Dest =
641        EmitPointerWithAlignment(E->getArg(0));
642    std::pair<llvm::Value*, unsigned> Src =
643        EmitPointerWithAlignment(E->getArg(1));
644    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
645    unsigned Align = std::min(Dest.second, Src.second);
646    Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
647    return RValue::get(Dest.first);
648  }
649
650  case Builtin::BI__builtin_objc_memmove_collectable: {
651    Value *Address = EmitScalarExpr(E->getArg(0));
652    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
653    Value *SizeVal = EmitScalarExpr(E->getArg(2));
654    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
655                                                  Address, SrcAddr, SizeVal);
656    return RValue::get(Address);
657  }
658
659  case Builtin::BI__builtin___memmove_chk: {
660    // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
661    llvm::APSInt Size, DstSize;
662    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
663        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
664      break;
665    if (Size.ugt(DstSize))
666      break;
667    std::pair<llvm::Value*, unsigned> Dest =
668        EmitPointerWithAlignment(E->getArg(0));
669    std::pair<llvm::Value*, unsigned> Src =
670        EmitPointerWithAlignment(E->getArg(1));
671    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
672    unsigned Align = std::min(Dest.second, Src.second);
673    Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
674    return RValue::get(Dest.first);
675  }
676
677  case Builtin::BImemmove:
678  case Builtin::BI__builtin_memmove: {
679    std::pair<llvm::Value*, unsigned> Dest =
680        EmitPointerWithAlignment(E->getArg(0));
681    std::pair<llvm::Value*, unsigned> Src =
682        EmitPointerWithAlignment(E->getArg(1));
683    Value *SizeVal = EmitScalarExpr(E->getArg(2));
684    unsigned Align = std::min(Dest.second, Src.second);
685    Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
686    return RValue::get(Dest.first);
687  }
688  case Builtin::BImemset:
689  case Builtin::BI__builtin_memset: {
690    std::pair<llvm::Value*, unsigned> Dest =
691        EmitPointerWithAlignment(E->getArg(0));
692    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
693                                         Builder.getInt8Ty());
694    Value *SizeVal = EmitScalarExpr(E->getArg(2));
695    Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
696    return RValue::get(Dest.first);
697  }
698  case Builtin::BI__builtin___memset_chk: {
699    // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
700    llvm::APSInt Size, DstSize;
701    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
702        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
703      break;
704    if (Size.ugt(DstSize))
705      break;
706    std::pair<llvm::Value*, unsigned> Dest =
707        EmitPointerWithAlignment(E->getArg(0));
708    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
709                                         Builder.getInt8Ty());
710    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
711    Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
712    return RValue::get(Dest.first);
713  }
714  case Builtin::BI__builtin_dwarf_cfa: {
715    // The offset in bytes from the first argument to the CFA.
716    //
717    // Why on earth is this in the frontend?  Is there any reason at
718    // all that the backend can't reasonably determine this while
719    // lowering llvm.eh.dwarf.cfa()?
720    //
721    // TODO: If there's a satisfactory reason, add a target hook for
722    // this instead of hard-coding 0, which is correct for most targets.
723    int32_t Offset = 0;
724
725    Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
726    return RValue::get(Builder.CreateCall(F,
727                                      llvm::ConstantInt::get(Int32Ty, Offset)));
728  }
729  case Builtin::BI__builtin_return_address: {
730    Value *Depth = EmitScalarExpr(E->getArg(0));
731    Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
732    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
733    return RValue::get(Builder.CreateCall(F, Depth));
734  }
735  case Builtin::BI__builtin_frame_address: {
736    Value *Depth = EmitScalarExpr(E->getArg(0));
737    Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
738    Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
739    return RValue::get(Builder.CreateCall(F, Depth));
740  }
741  case Builtin::BI__builtin_extract_return_addr: {
742    Value *Address = EmitScalarExpr(E->getArg(0));
743    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
744    return RValue::get(Result);
745  }
746  case Builtin::BI__builtin_frob_return_addr: {
747    Value *Address = EmitScalarExpr(E->getArg(0));
748    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
749    return RValue::get(Result);
750  }
751  case Builtin::BI__builtin_dwarf_sp_column: {
752    llvm::IntegerType *Ty
753      = cast<llvm::IntegerType>(ConvertType(E->getType()));
754    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
755    if (Column == -1) {
756      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
757      return RValue::get(llvm::UndefValue::get(Ty));
758    }
759    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
760  }
761  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
762    Value *Address = EmitScalarExpr(E->getArg(0));
763    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
764      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
765    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
766  }
767  case Builtin::BI__builtin_eh_return: {
768    Value *Int = EmitScalarExpr(E->getArg(0));
769    Value *Ptr = EmitScalarExpr(E->getArg(1));
770
771    llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
772    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
773           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
774    Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
775                                  ? Intrinsic::eh_return_i32
776                                  : Intrinsic::eh_return_i64);
777    Builder.CreateCall2(F, Int, Ptr);
778    Builder.CreateUnreachable();
779
780    // We do need to preserve an insertion point.
781    EmitBlock(createBasicBlock("builtin_eh_return.cont"));
782
783    return RValue::get(0);
784  }
785  case Builtin::BI__builtin_unwind_init: {
786    Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
787    return RValue::get(Builder.CreateCall(F));
788  }
789  case Builtin::BI__builtin_extend_pointer: {
790    // Extends a pointer to the size of an _Unwind_Word, which is
791    // uint64_t on all platforms.  Generally this gets poked into a
792    // register and eventually used as an address, so if the
793    // addressing registers are wider than pointers and the platform
794    // doesn't implicitly ignore high-order bits when doing
795    // addressing, we need to make sure we zext / sext based on
796    // the platform's expectations.
797    //
798    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
799
800    // Cast the pointer to intptr_t.
801    Value *Ptr = EmitScalarExpr(E->getArg(0));
802    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
803
804    // If that's 64 bits, we're done.
805    if (IntPtrTy->getBitWidth() == 64)
806      return RValue::get(Result);
807
808    // Otherwise, ask the codegen data what to do.
809    if (getTargetHooks().extendPointerWithSExt())
810      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
811    else
812      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
813  }
814  case Builtin::BI__builtin_setjmp: {
815    // Buffer is a void**.
816    Value *Buf = EmitScalarExpr(E->getArg(0));
817
818    // Store the frame pointer to the setjmp buffer.
819    Value *FrameAddr =
820      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
821                         ConstantInt::get(Int32Ty, 0));
822    Builder.CreateStore(FrameAddr, Buf);
823
824    // Store the stack pointer to the setjmp buffer.
825    Value *StackAddr =
826      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
827    Value *StackSaveSlot =
828      Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
829    Builder.CreateStore(StackAddr, StackSaveSlot);
830
831    // Call LLVM's EH setjmp, which is lightweight.
832    Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
833    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
834    return RValue::get(Builder.CreateCall(F, Buf));
835  }
836  case Builtin::BI__builtin_longjmp: {
837    Value *Buf = EmitScalarExpr(E->getArg(0));
838    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
839
840    // Call LLVM's EH longjmp, which is lightweight.
841    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
842
843    // longjmp doesn't return; mark this as unreachable.
844    Builder.CreateUnreachable();
845
846    // We do need to preserve an insertion point.
847    EmitBlock(createBasicBlock("longjmp.cont"));
848
849    return RValue::get(0);
850  }
851  case Builtin::BI__sync_fetch_and_add:
852  case Builtin::BI__sync_fetch_and_sub:
853  case Builtin::BI__sync_fetch_and_or:
854  case Builtin::BI__sync_fetch_and_and:
855  case Builtin::BI__sync_fetch_and_xor:
856  case Builtin::BI__sync_add_and_fetch:
857  case Builtin::BI__sync_sub_and_fetch:
858  case Builtin::BI__sync_and_and_fetch:
859  case Builtin::BI__sync_or_and_fetch:
860  case Builtin::BI__sync_xor_and_fetch:
861  case Builtin::BI__sync_val_compare_and_swap:
862  case Builtin::BI__sync_bool_compare_and_swap:
863  case Builtin::BI__sync_lock_test_and_set:
864  case Builtin::BI__sync_lock_release:
865  case Builtin::BI__sync_swap:
866    llvm_unreachable("Shouldn't make it through sema");
867  case Builtin::BI__sync_fetch_and_add_1:
868  case Builtin::BI__sync_fetch_and_add_2:
869  case Builtin::BI__sync_fetch_and_add_4:
870  case Builtin::BI__sync_fetch_and_add_8:
871  case Builtin::BI__sync_fetch_and_add_16:
872    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
873  case Builtin::BI__sync_fetch_and_sub_1:
874  case Builtin::BI__sync_fetch_and_sub_2:
875  case Builtin::BI__sync_fetch_and_sub_4:
876  case Builtin::BI__sync_fetch_and_sub_8:
877  case Builtin::BI__sync_fetch_and_sub_16:
878    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
879  case Builtin::BI__sync_fetch_and_or_1:
880  case Builtin::BI__sync_fetch_and_or_2:
881  case Builtin::BI__sync_fetch_and_or_4:
882  case Builtin::BI__sync_fetch_and_or_8:
883  case Builtin::BI__sync_fetch_and_or_16:
884    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
885  case Builtin::BI__sync_fetch_and_and_1:
886  case Builtin::BI__sync_fetch_and_and_2:
887  case Builtin::BI__sync_fetch_and_and_4:
888  case Builtin::BI__sync_fetch_and_and_8:
889  case Builtin::BI__sync_fetch_and_and_16:
890    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
891  case Builtin::BI__sync_fetch_and_xor_1:
892  case Builtin::BI__sync_fetch_and_xor_2:
893  case Builtin::BI__sync_fetch_and_xor_4:
894  case Builtin::BI__sync_fetch_and_xor_8:
895  case Builtin::BI__sync_fetch_and_xor_16:
896    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
897
898  // Clang extensions: not overloaded yet.
899  case Builtin::BI__sync_fetch_and_min:
900    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
901  case Builtin::BI__sync_fetch_and_max:
902    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
903  case Builtin::BI__sync_fetch_and_umin:
904    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
905  case Builtin::BI__sync_fetch_and_umax:
906    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
907
908  case Builtin::BI__sync_add_and_fetch_1:
909  case Builtin::BI__sync_add_and_fetch_2:
910  case Builtin::BI__sync_add_and_fetch_4:
911  case Builtin::BI__sync_add_and_fetch_8:
912  case Builtin::BI__sync_add_and_fetch_16:
913    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
914                                llvm::Instruction::Add);
915  case Builtin::BI__sync_sub_and_fetch_1:
916  case Builtin::BI__sync_sub_and_fetch_2:
917  case Builtin::BI__sync_sub_and_fetch_4:
918  case Builtin::BI__sync_sub_and_fetch_8:
919  case Builtin::BI__sync_sub_and_fetch_16:
920    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
921                                llvm::Instruction::Sub);
922  case Builtin::BI__sync_and_and_fetch_1:
923  case Builtin::BI__sync_and_and_fetch_2:
924  case Builtin::BI__sync_and_and_fetch_4:
925  case Builtin::BI__sync_and_and_fetch_8:
926  case Builtin::BI__sync_and_and_fetch_16:
927    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
928                                llvm::Instruction::And);
929  case Builtin::BI__sync_or_and_fetch_1:
930  case Builtin::BI__sync_or_and_fetch_2:
931  case Builtin::BI__sync_or_and_fetch_4:
932  case Builtin::BI__sync_or_and_fetch_8:
933  case Builtin::BI__sync_or_and_fetch_16:
934    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
935                                llvm::Instruction::Or);
936  case Builtin::BI__sync_xor_and_fetch_1:
937  case Builtin::BI__sync_xor_and_fetch_2:
938  case Builtin::BI__sync_xor_and_fetch_4:
939  case Builtin::BI__sync_xor_and_fetch_8:
940  case Builtin::BI__sync_xor_and_fetch_16:
941    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
942                                llvm::Instruction::Xor);
943
944  case Builtin::BI__sync_val_compare_and_swap_1:
945  case Builtin::BI__sync_val_compare_and_swap_2:
946  case Builtin::BI__sync_val_compare_and_swap_4:
947  case Builtin::BI__sync_val_compare_and_swap_8:
948  case Builtin::BI__sync_val_compare_and_swap_16: {
949    QualType T = E->getType();
950    llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
951    unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
952
953    llvm::IntegerType *IntType =
954      llvm::IntegerType::get(getLLVMContext(),
955                             getContext().getTypeSize(T));
956    llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
957
958    Value *Args[3];
959    Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
960    Args[1] = EmitScalarExpr(E->getArg(1));
961    llvm::Type *ValueType = Args[1]->getType();
962    Args[1] = EmitToInt(*this, Args[1], T, IntType);
963    Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
964
965    Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
966                                                llvm::SequentiallyConsistent);
967    Result = EmitFromInt(*this, Result, T, ValueType);
968    return RValue::get(Result);
969  }
970
971  case Builtin::BI__sync_bool_compare_and_swap_1:
972  case Builtin::BI__sync_bool_compare_and_swap_2:
973  case Builtin::BI__sync_bool_compare_and_swap_4:
974  case Builtin::BI__sync_bool_compare_and_swap_8:
975  case Builtin::BI__sync_bool_compare_and_swap_16: {
976    QualType T = E->getArg(1)->getType();
977    llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
978    unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
979
980    llvm::IntegerType *IntType =
981      llvm::IntegerType::get(getLLVMContext(),
982                             getContext().getTypeSize(T));
983    llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
984
985    Value *Args[3];
986    Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
987    Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
988    Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
989
990    Value *OldVal = Args[1];
991    Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
992                                                 llvm::SequentiallyConsistent);
993    Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
994    // zext bool to int.
995    Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
996    return RValue::get(Result);
997  }
998
999  case Builtin::BI__sync_swap_1:
1000  case Builtin::BI__sync_swap_2:
1001  case Builtin::BI__sync_swap_4:
1002  case Builtin::BI__sync_swap_8:
1003  case Builtin::BI__sync_swap_16:
1004    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1005
1006  case Builtin::BI__sync_lock_test_and_set_1:
1007  case Builtin::BI__sync_lock_test_and_set_2:
1008  case Builtin::BI__sync_lock_test_and_set_4:
1009  case Builtin::BI__sync_lock_test_and_set_8:
1010  case Builtin::BI__sync_lock_test_and_set_16:
1011    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1012
1013  case Builtin::BI__sync_lock_release_1:
1014  case Builtin::BI__sync_lock_release_2:
1015  case Builtin::BI__sync_lock_release_4:
1016  case Builtin::BI__sync_lock_release_8:
1017  case Builtin::BI__sync_lock_release_16: {
1018    Value *Ptr = EmitScalarExpr(E->getArg(0));
1019    QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1020    CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1021    llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1022                                             StoreSize.getQuantity() * 8);
1023    Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1024    llvm::StoreInst *Store =
1025      Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
1026    Store->setAlignment(StoreSize.getQuantity());
1027    Store->setAtomic(llvm::Release);
1028    return RValue::get(0);
1029  }
1030
1031  case Builtin::BI__sync_synchronize: {
1032    // We assume this is supposed to correspond to a C++0x-style
1033    // sequentially-consistent fence (i.e. this is only usable for
1034    // synchonization, not device I/O or anything like that). This intrinsic
1035    // is really badly designed in the sense that in theory, there isn't
1036    // any way to safely use it... but in practice, it mostly works
1037    // to use it with non-atomic loads and stores to get acquire/release
1038    // semantics.
1039    Builder.CreateFence(llvm::SequentiallyConsistent);
1040    return RValue::get(0);
1041  }
1042
1043  case Builtin::BI__c11_atomic_is_lock_free:
1044  case Builtin::BI__atomic_is_lock_free: {
1045    // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1046    // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1047    // _Atomic(T) is always properly-aligned.
1048    const char *LibCallName = "__atomic_is_lock_free";
1049    CallArgList Args;
1050    Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1051             getContext().getSizeType());
1052    if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1053      Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1054               getContext().VoidPtrTy);
1055    else
1056      Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1057               getContext().VoidPtrTy);
1058    const CGFunctionInfo &FuncInfo =
1059        CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
1060                                               FunctionType::ExtInfo(),
1061                                               RequiredArgs::All);
1062    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1063    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1064    return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1065  }
1066
1067  case Builtin::BI__atomic_test_and_set: {
1068    // Look at the argument type to determine whether this is a volatile
1069    // operation. The parameter type is always volatile.
1070    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1071    bool Volatile =
1072        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1073
1074    Value *Ptr = EmitScalarExpr(E->getArg(0));
1075    unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1076    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1077    Value *NewVal = Builder.getInt8(1);
1078    Value *Order = EmitScalarExpr(E->getArg(1));
1079    if (isa<llvm::ConstantInt>(Order)) {
1080      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1081      AtomicRMWInst *Result = 0;
1082      switch (ord) {
1083      case 0:  // memory_order_relaxed
1084      default: // invalid order
1085        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1086                                         Ptr, NewVal,
1087                                         llvm::Monotonic);
1088        break;
1089      case 1:  // memory_order_consume
1090      case 2:  // memory_order_acquire
1091        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1092                                         Ptr, NewVal,
1093                                         llvm::Acquire);
1094        break;
1095      case 3:  // memory_order_release
1096        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1097                                         Ptr, NewVal,
1098                                         llvm::Release);
1099        break;
1100      case 4:  // memory_order_acq_rel
1101        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1102                                         Ptr, NewVal,
1103                                         llvm::AcquireRelease);
1104        break;
1105      case 5:  // memory_order_seq_cst
1106        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1107                                         Ptr, NewVal,
1108                                         llvm::SequentiallyConsistent);
1109        break;
1110      }
1111      Result->setVolatile(Volatile);
1112      return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1113    }
1114
1115    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1116
1117    llvm::BasicBlock *BBs[5] = {
1118      createBasicBlock("monotonic", CurFn),
1119      createBasicBlock("acquire", CurFn),
1120      createBasicBlock("release", CurFn),
1121      createBasicBlock("acqrel", CurFn),
1122      createBasicBlock("seqcst", CurFn)
1123    };
1124    llvm::AtomicOrdering Orders[5] = {
1125      llvm::Monotonic, llvm::Acquire, llvm::Release,
1126      llvm::AcquireRelease, llvm::SequentiallyConsistent
1127    };
1128
1129    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1130    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1131
1132    Builder.SetInsertPoint(ContBB);
1133    PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1134
1135    for (unsigned i = 0; i < 5; ++i) {
1136      Builder.SetInsertPoint(BBs[i]);
1137      AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1138                                                   Ptr, NewVal, Orders[i]);
1139      RMW->setVolatile(Volatile);
1140      Result->addIncoming(RMW, BBs[i]);
1141      Builder.CreateBr(ContBB);
1142    }
1143
1144    SI->addCase(Builder.getInt32(0), BBs[0]);
1145    SI->addCase(Builder.getInt32(1), BBs[1]);
1146    SI->addCase(Builder.getInt32(2), BBs[1]);
1147    SI->addCase(Builder.getInt32(3), BBs[2]);
1148    SI->addCase(Builder.getInt32(4), BBs[3]);
1149    SI->addCase(Builder.getInt32(5), BBs[4]);
1150
1151    Builder.SetInsertPoint(ContBB);
1152    return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1153  }
1154
1155  case Builtin::BI__atomic_clear: {
1156    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1157    bool Volatile =
1158        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1159
1160    Value *Ptr = EmitScalarExpr(E->getArg(0));
1161    unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1162    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1163    Value *NewVal = Builder.getInt8(0);
1164    Value *Order = EmitScalarExpr(E->getArg(1));
1165    if (isa<llvm::ConstantInt>(Order)) {
1166      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1167      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1168      Store->setAlignment(1);
1169      switch (ord) {
1170      case 0:  // memory_order_relaxed
1171      default: // invalid order
1172        Store->setOrdering(llvm::Monotonic);
1173        break;
1174      case 3:  // memory_order_release
1175        Store->setOrdering(llvm::Release);
1176        break;
1177      case 5:  // memory_order_seq_cst
1178        Store->setOrdering(llvm::SequentiallyConsistent);
1179        break;
1180      }
1181      return RValue::get(0);
1182    }
1183
1184    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1185
1186    llvm::BasicBlock *BBs[3] = {
1187      createBasicBlock("monotonic", CurFn),
1188      createBasicBlock("release", CurFn),
1189      createBasicBlock("seqcst", CurFn)
1190    };
1191    llvm::AtomicOrdering Orders[3] = {
1192      llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1193    };
1194
1195    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1196    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1197
1198    for (unsigned i = 0; i < 3; ++i) {
1199      Builder.SetInsertPoint(BBs[i]);
1200      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1201      Store->setAlignment(1);
1202      Store->setOrdering(Orders[i]);
1203      Builder.CreateBr(ContBB);
1204    }
1205
1206    SI->addCase(Builder.getInt32(0), BBs[0]);
1207    SI->addCase(Builder.getInt32(3), BBs[1]);
1208    SI->addCase(Builder.getInt32(5), BBs[2]);
1209
1210    Builder.SetInsertPoint(ContBB);
1211    return RValue::get(0);
1212  }
1213
1214  case Builtin::BI__atomic_thread_fence:
1215  case Builtin::BI__atomic_signal_fence:
1216  case Builtin::BI__c11_atomic_thread_fence:
1217  case Builtin::BI__c11_atomic_signal_fence: {
1218    llvm::SynchronizationScope Scope;
1219    if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1220        BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1221      Scope = llvm::SingleThread;
1222    else
1223      Scope = llvm::CrossThread;
1224    Value *Order = EmitScalarExpr(E->getArg(0));
1225    if (isa<llvm::ConstantInt>(Order)) {
1226      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1227      switch (ord) {
1228      case 0:  // memory_order_relaxed
1229      default: // invalid order
1230        break;
1231      case 1:  // memory_order_consume
1232      case 2:  // memory_order_acquire
1233        Builder.CreateFence(llvm::Acquire, Scope);
1234        break;
1235      case 3:  // memory_order_release
1236        Builder.CreateFence(llvm::Release, Scope);
1237        break;
1238      case 4:  // memory_order_acq_rel
1239        Builder.CreateFence(llvm::AcquireRelease, Scope);
1240        break;
1241      case 5:  // memory_order_seq_cst
1242        Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1243        break;
1244      }
1245      return RValue::get(0);
1246    }
1247
1248    llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1249    AcquireBB = createBasicBlock("acquire", CurFn);
1250    ReleaseBB = createBasicBlock("release", CurFn);
1251    AcqRelBB = createBasicBlock("acqrel", CurFn);
1252    SeqCstBB = createBasicBlock("seqcst", CurFn);
1253    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1254
1255    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1256    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1257
1258    Builder.SetInsertPoint(AcquireBB);
1259    Builder.CreateFence(llvm::Acquire, Scope);
1260    Builder.CreateBr(ContBB);
1261    SI->addCase(Builder.getInt32(1), AcquireBB);
1262    SI->addCase(Builder.getInt32(2), AcquireBB);
1263
1264    Builder.SetInsertPoint(ReleaseBB);
1265    Builder.CreateFence(llvm::Release, Scope);
1266    Builder.CreateBr(ContBB);
1267    SI->addCase(Builder.getInt32(3), ReleaseBB);
1268
1269    Builder.SetInsertPoint(AcqRelBB);
1270    Builder.CreateFence(llvm::AcquireRelease, Scope);
1271    Builder.CreateBr(ContBB);
1272    SI->addCase(Builder.getInt32(4), AcqRelBB);
1273
1274    Builder.SetInsertPoint(SeqCstBB);
1275    Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1276    Builder.CreateBr(ContBB);
1277    SI->addCase(Builder.getInt32(5), SeqCstBB);
1278
1279    Builder.SetInsertPoint(ContBB);
1280    return RValue::get(0);
1281  }
1282
1283    // Library functions with special handling.
1284  case Builtin::BIsqrt:
1285  case Builtin::BIsqrtf:
1286  case Builtin::BIsqrtl: {
1287    // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1288    // in finite- or unsafe-math mode (the intrinsic has different semantics
1289    // for handling negative numbers compared to the library function, so
1290    // -fmath-errno=0 is not enough).
1291    if (!FD->hasAttr<ConstAttr>())
1292      break;
1293    if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1294          CGM.getCodeGenOpts().NoNaNsFPMath))
1295      break;
1296    Value *Arg0 = EmitScalarExpr(E->getArg(0));
1297    llvm::Type *ArgType = Arg0->getType();
1298    Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1299    return RValue::get(Builder.CreateCall(F, Arg0));
1300  }
1301
1302  case Builtin::BIpow:
1303  case Builtin::BIpowf:
1304  case Builtin::BIpowl: {
1305    // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1306    if (!FD->hasAttr<ConstAttr>())
1307      break;
1308    Value *Base = EmitScalarExpr(E->getArg(0));
1309    Value *Exponent = EmitScalarExpr(E->getArg(1));
1310    llvm::Type *ArgType = Base->getType();
1311    Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1312    return RValue::get(Builder.CreateCall2(F, Base, Exponent));
1313    break;
1314  }
1315
1316  case Builtin::BIfma:
1317  case Builtin::BIfmaf:
1318  case Builtin::BIfmal:
1319  case Builtin::BI__builtin_fma:
1320  case Builtin::BI__builtin_fmaf:
1321  case Builtin::BI__builtin_fmal: {
1322    // Rewrite fma to intrinsic.
1323    Value *FirstArg = EmitScalarExpr(E->getArg(0));
1324    llvm::Type *ArgType = FirstArg->getType();
1325    Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1326    return RValue::get(Builder.CreateCall3(F, FirstArg,
1327                                              EmitScalarExpr(E->getArg(1)),
1328                                              EmitScalarExpr(E->getArg(2))));
1329  }
1330
1331  case Builtin::BI__builtin_signbit:
1332  case Builtin::BI__builtin_signbitf:
1333  case Builtin::BI__builtin_signbitl: {
1334    LLVMContext &C = CGM.getLLVMContext();
1335
1336    Value *Arg = EmitScalarExpr(E->getArg(0));
1337    llvm::Type *ArgTy = Arg->getType();
1338    if (ArgTy->isPPC_FP128Ty())
1339      break; // FIXME: I'm not sure what the right implementation is here.
1340    int ArgWidth = ArgTy->getPrimitiveSizeInBits();
1341    llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
1342    Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
1343    Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
1344    Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
1345    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
1346  }
1347  case Builtin::BI__builtin_annotation: {
1348    llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1349    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1350                                      AnnVal->getType());
1351
1352    // Get the annotation string, go through casts. Sema requires this to be a
1353    // non-wide string literal, potentially casted, so the cast<> is safe.
1354    const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1355    StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1356    return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1357  }
1358  case Builtin::BI__builtin_addcb:
1359  case Builtin::BI__builtin_addcs:
1360  case Builtin::BI__builtin_addc:
1361  case Builtin::BI__builtin_addcl:
1362  case Builtin::BI__builtin_addcll:
1363  case Builtin::BI__builtin_subcb:
1364  case Builtin::BI__builtin_subcs:
1365  case Builtin::BI__builtin_subc:
1366  case Builtin::BI__builtin_subcl:
1367  case Builtin::BI__builtin_subcll: {
1368
1369    // We translate all of these builtins from expressions of the form:
1370    //   int x = ..., y = ..., carryin = ..., carryout, result;
1371    //   result = __builtin_addc(x, y, carryin, &carryout);
1372    //
1373    // to LLVM IR of the form:
1374    //
1375    //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1376    //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1377    //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1378    //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1379    //                                                       i32 %carryin)
1380    //   %result = extractvalue {i32, i1} %tmp2, 0
1381    //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1382    //   %tmp3 = or i1 %carry1, %carry2
1383    //   %tmp4 = zext i1 %tmp3 to i32
1384    //   store i32 %tmp4, i32* %carryout
1385
1386    // Scalarize our inputs.
1387    llvm::Value *X = EmitScalarExpr(E->getArg(0));
1388    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1389    llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1390    std::pair<llvm::Value*, unsigned> CarryOutPtr =
1391      EmitPointerWithAlignment(E->getArg(3));
1392
1393    // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1394    llvm::Intrinsic::ID IntrinsicId;
1395    switch (BuiltinID) {
1396    default: llvm_unreachable("Unknown multiprecision builtin id.");
1397    case Builtin::BI__builtin_addcb:
1398    case Builtin::BI__builtin_addcs:
1399    case Builtin::BI__builtin_addc:
1400    case Builtin::BI__builtin_addcl:
1401    case Builtin::BI__builtin_addcll:
1402      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1403      break;
1404    case Builtin::BI__builtin_subcb:
1405    case Builtin::BI__builtin_subcs:
1406    case Builtin::BI__builtin_subc:
1407    case Builtin::BI__builtin_subcl:
1408    case Builtin::BI__builtin_subcll:
1409      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1410      break;
1411    }
1412
1413    // Construct our resulting LLVM IR expression.
1414    llvm::Value *Carry1;
1415    llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1416                                              X, Y, Carry1);
1417    llvm::Value *Carry2;
1418    llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1419                                              Sum1, Carryin, Carry2);
1420    llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1421                                               X->getType());
1422    llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut,
1423                                                         CarryOutPtr.first);
1424    CarryOutStore->setAlignment(CarryOutPtr.second);
1425    return RValue::get(Sum2);
1426  }
1427  case Builtin::BI__builtin_uadd_overflow:
1428  case Builtin::BI__builtin_uaddl_overflow:
1429  case Builtin::BI__builtin_uaddll_overflow:
1430  case Builtin::BI__builtin_usub_overflow:
1431  case Builtin::BI__builtin_usubl_overflow:
1432  case Builtin::BI__builtin_usubll_overflow:
1433  case Builtin::BI__builtin_umul_overflow:
1434  case Builtin::BI__builtin_umull_overflow:
1435  case Builtin::BI__builtin_umulll_overflow:
1436  case Builtin::BI__builtin_sadd_overflow:
1437  case Builtin::BI__builtin_saddl_overflow:
1438  case Builtin::BI__builtin_saddll_overflow:
1439  case Builtin::BI__builtin_ssub_overflow:
1440  case Builtin::BI__builtin_ssubl_overflow:
1441  case Builtin::BI__builtin_ssubll_overflow:
1442  case Builtin::BI__builtin_smul_overflow:
1443  case Builtin::BI__builtin_smull_overflow:
1444  case Builtin::BI__builtin_smulll_overflow: {
1445
1446    // We translate all of these builtins directly to the relevant llvm IR node.
1447
1448    // Scalarize our inputs.
1449    llvm::Value *X = EmitScalarExpr(E->getArg(0));
1450    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1451    std::pair<llvm::Value *, unsigned> SumOutPtr =
1452      EmitPointerWithAlignment(E->getArg(2));
1453
1454    // Decide which of the overflow intrinsics we are lowering to:
1455    llvm::Intrinsic::ID IntrinsicId;
1456    switch (BuiltinID) {
1457    default: llvm_unreachable("Unknown security overflow builtin id.");
1458    case Builtin::BI__builtin_uadd_overflow:
1459    case Builtin::BI__builtin_uaddl_overflow:
1460    case Builtin::BI__builtin_uaddll_overflow:
1461      IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1462      break;
1463    case Builtin::BI__builtin_usub_overflow:
1464    case Builtin::BI__builtin_usubl_overflow:
1465    case Builtin::BI__builtin_usubll_overflow:
1466      IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1467      break;
1468    case Builtin::BI__builtin_umul_overflow:
1469    case Builtin::BI__builtin_umull_overflow:
1470    case Builtin::BI__builtin_umulll_overflow:
1471      IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1472      break;
1473    case Builtin::BI__builtin_sadd_overflow:
1474    case Builtin::BI__builtin_saddl_overflow:
1475    case Builtin::BI__builtin_saddll_overflow:
1476      IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1477      break;
1478    case Builtin::BI__builtin_ssub_overflow:
1479    case Builtin::BI__builtin_ssubl_overflow:
1480    case Builtin::BI__builtin_ssubll_overflow:
1481      IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1482      break;
1483    case Builtin::BI__builtin_smul_overflow:
1484    case Builtin::BI__builtin_smull_overflow:
1485    case Builtin::BI__builtin_smulll_overflow:
1486      IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1487      break;
1488    }
1489
1490
1491    llvm::Value *Carry;
1492    llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1493    llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first);
1494    SumOutStore->setAlignment(SumOutPtr.second);
1495
1496    return RValue::get(Carry);
1497  }
1498  case Builtin::BI__builtin_addressof:
1499    return RValue::get(EmitLValue(E->getArg(0)).getAddress());
1500  case Builtin::BI__noop:
1501    return RValue::get(0);
1502  }
1503
1504  // If this is an alias for a lib function (e.g. __builtin_sin), emit
1505  // the call using the normal call path, but using the unmangled
1506  // version of the function name.
1507  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1508    return emitLibraryCall(*this, FD, E,
1509                           CGM.getBuiltinLibFunction(FD, BuiltinID));
1510
1511  // If this is a predefined lib function (e.g. malloc), emit the call
1512  // using exactly the normal call path.
1513  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1514    return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1515
1516  // See if we have a target specific intrinsic.
1517  const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
1518  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1519  if (const char *Prefix =
1520      llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()))
1521    IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1522
1523  if (IntrinsicID != Intrinsic::not_intrinsic) {
1524    SmallVector<Value*, 16> Args;
1525
1526    // Find out if any arguments are required to be integer constant
1527    // expressions.
1528    unsigned ICEArguments = 0;
1529    ASTContext::GetBuiltinTypeError Error;
1530    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1531    assert(Error == ASTContext::GE_None && "Should not codegen an error");
1532
1533    Function *F = CGM.getIntrinsic(IntrinsicID);
1534    llvm::FunctionType *FTy = F->getFunctionType();
1535
1536    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1537      Value *ArgValue;
1538      // If this is a normal argument, just emit it as a scalar.
1539      if ((ICEArguments & (1 << i)) == 0) {
1540        ArgValue = EmitScalarExpr(E->getArg(i));
1541      } else {
1542        // If this is required to be a constant, constant fold it so that we
1543        // know that the generated intrinsic gets a ConstantInt.
1544        llvm::APSInt Result;
1545        bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1546        assert(IsConst && "Constant arg isn't actually constant?");
1547        (void)IsConst;
1548        ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1549      }
1550
1551      // If the intrinsic arg type is different from the builtin arg type
1552      // we need to do a bit cast.
1553      llvm::Type *PTy = FTy->getParamType(i);
1554      if (PTy != ArgValue->getType()) {
1555        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1556               "Must be able to losslessly bit cast to param");
1557        ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1558      }
1559
1560      Args.push_back(ArgValue);
1561    }
1562
1563    Value *V = Builder.CreateCall(F, Args);
1564    QualType BuiltinRetType = E->getType();
1565
1566    llvm::Type *RetTy = VoidTy;
1567    if (!BuiltinRetType->isVoidType())
1568      RetTy = ConvertType(BuiltinRetType);
1569
1570    if (RetTy != V->getType()) {
1571      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1572             "Must be able to losslessly bit cast result type");
1573      V = Builder.CreateBitCast(V, RetTy);
1574    }
1575
1576    return RValue::get(V);
1577  }
1578
1579  // See if we have a target specific builtin that needs to be lowered.
1580  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1581    return RValue::get(V);
1582
1583  ErrorUnsupported(E, "builtin function");
1584
1585  // Unknown builtin, for now just dump it out and return undef.
1586  return GetUndefRValue(E->getType());
1587}
1588
1589Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1590                                              const CallExpr *E) {
1591  switch (getTarget().getTriple().getArch()) {
1592  case llvm::Triple::aarch64:
1593    return EmitAArch64BuiltinExpr(BuiltinID, E);
1594  case llvm::Triple::arm:
1595  case llvm::Triple::thumb:
1596    return EmitARMBuiltinExpr(BuiltinID, E);
1597  case llvm::Triple::x86:
1598  case llvm::Triple::x86_64:
1599    return EmitX86BuiltinExpr(BuiltinID, E);
1600  case llvm::Triple::ppc:
1601  case llvm::Triple::ppc64:
1602  case llvm::Triple::ppc64le:
1603    return EmitPPCBuiltinExpr(BuiltinID, E);
1604  default:
1605    return 0;
1606  }
1607}
1608
1609static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
1610                                     NeonTypeFlags TypeFlags,
1611                                     bool V1Ty=false) {
1612  int IsQuad = TypeFlags.isQuad();
1613  switch (TypeFlags.getEltType()) {
1614  case NeonTypeFlags::Int8:
1615  case NeonTypeFlags::Poly8:
1616    return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
1617  case NeonTypeFlags::Int16:
1618  case NeonTypeFlags::Poly16:
1619  case NeonTypeFlags::Float16:
1620    return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
1621  case NeonTypeFlags::Int32:
1622    return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
1623  case NeonTypeFlags::Int64:
1624    return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
1625  case NeonTypeFlags::Float32:
1626    return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
1627  case NeonTypeFlags::Float64:
1628    return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
1629  }
1630  llvm_unreachable("Unknown vector element type!");
1631}
1632
1633Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1634  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1635  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
1636  return Builder.CreateShuffleVector(V, V, SV, "lane");
1637}
1638
1639Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1640                                     const char *name,
1641                                     unsigned shift, bool rightshift) {
1642  unsigned j = 0;
1643  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1644       ai != ae; ++ai, ++j)
1645    if (shift > 0 && shift == j)
1646      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1647    else
1648      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1649
1650  return Builder.CreateCall(F, Ops, name);
1651}
1652
1653Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
1654                                            bool neg) {
1655  int SV = cast<ConstantInt>(V)->getSExtValue();
1656
1657  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1658  llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1659  return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
1660}
1661
1662// \brief Right-shift a vector by a constant.
1663Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
1664                                          llvm::Type *Ty, bool usgn,
1665                                          const char *name) {
1666  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1667
1668  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
1669  int EltSize = VTy->getScalarSizeInBits();
1670
1671  Vec = Builder.CreateBitCast(Vec, Ty);
1672
1673  // lshr/ashr are undefined when the shift amount is equal to the vector
1674  // element size.
1675  if (ShiftAmt == EltSize) {
1676    if (usgn) {
1677      // Right-shifting an unsigned value by its size yields 0.
1678      llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0);
1679      return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero);
1680    } else {
1681      // Right-shifting a signed value by its size is equivalent
1682      // to a shift of size-1.
1683      --ShiftAmt;
1684      Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
1685    }
1686  }
1687
1688  Shift = EmitNeonShiftVector(Shift, Ty, false);
1689  if (usgn)
1690    return Builder.CreateLShr(Vec, Shift, name);
1691  else
1692    return Builder.CreateAShr(Vec, Shift, name);
1693}
1694
1695/// GetPointeeAlignment - Given an expression with a pointer type, find the
1696/// alignment of the type referenced by the pointer.  Skip over implicit
1697/// casts.
1698std::pair<llvm::Value*, unsigned>
1699CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) {
1700  assert(Addr->getType()->isPointerType());
1701  Addr = Addr->IgnoreParens();
1702  if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) {
1703    if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) &&
1704        ICE->getSubExpr()->getType()->isPointerType()) {
1705      std::pair<llvm::Value*, unsigned> Ptr =
1706          EmitPointerWithAlignment(ICE->getSubExpr());
1707      Ptr.first = Builder.CreateBitCast(Ptr.first,
1708                                        ConvertType(Addr->getType()));
1709      return Ptr;
1710    } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) {
1711      LValue LV = EmitLValue(ICE->getSubExpr());
1712      unsigned Align = LV.getAlignment().getQuantity();
1713      if (!Align) {
1714        // FIXME: Once LValues are fixed to always set alignment,
1715        // zap this code.
1716        QualType PtTy = ICE->getSubExpr()->getType();
1717        if (!PtTy->isIncompleteType())
1718          Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1719        else
1720          Align = 1;
1721      }
1722      return std::make_pair(LV.getAddress(), Align);
1723    }
1724  }
1725  if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) {
1726    if (UO->getOpcode() == UO_AddrOf) {
1727      LValue LV = EmitLValue(UO->getSubExpr());
1728      unsigned Align = LV.getAlignment().getQuantity();
1729      if (!Align) {
1730        // FIXME: Once LValues are fixed to always set alignment,
1731        // zap this code.
1732        QualType PtTy = UO->getSubExpr()->getType();
1733        if (!PtTy->isIncompleteType())
1734          Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1735        else
1736          Align = 1;
1737      }
1738      return std::make_pair(LV.getAddress(), Align);
1739    }
1740  }
1741
1742  unsigned Align = 1;
1743  QualType PtTy = Addr->getType()->getPointeeType();
1744  if (!PtTy->isIncompleteType())
1745    Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
1746
1747  return std::make_pair(EmitScalarExpr(Addr), Align);
1748}
1749
1750static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
1751                                           unsigned BuiltinID,
1752                                           const CallExpr *E) {
1753  unsigned int Int = 0;
1754  // Scalar result generated across vectors
1755  bool AcrossVec = false;
1756  // Extend element of one-element vector
1757  bool ExtendEle = false;
1758  bool OverloadInt = false;
1759  bool OverloadCmpInt = false;
1760  bool OverloadWideInt = false;
1761  bool OverloadNarrowInt = false;
1762  const char *s = NULL;
1763
1764  SmallVector<Value *, 4> Ops;
1765  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
1766    Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
1767  }
1768
1769  // AArch64 scalar builtins are not overloaded, they do not have an extra
1770  // argument that specifies the vector type, need to handle each case.
1771  switch (BuiltinID) {
1772  default: break;
1773  // Scalar Add
1774  case AArch64::BI__builtin_neon_vaddd_s64:
1775    Int = Intrinsic::aarch64_neon_vaddds;
1776    s = "vaddds"; break;
1777  case AArch64::BI__builtin_neon_vaddd_u64:
1778    Int = Intrinsic::aarch64_neon_vadddu;
1779    s = "vadddu"; break;
1780  // Scalar Sub
1781  case AArch64::BI__builtin_neon_vsubd_s64:
1782    Int = Intrinsic::aarch64_neon_vsubds;
1783    s = "vsubds"; break;
1784  case AArch64::BI__builtin_neon_vsubd_u64:
1785    Int = Intrinsic::aarch64_neon_vsubdu;
1786    s = "vsubdu"; break;
1787  // Scalar Saturating Add
1788  case AArch64::BI__builtin_neon_vqaddb_s8:
1789  case AArch64::BI__builtin_neon_vqaddh_s16:
1790  case AArch64::BI__builtin_neon_vqadds_s32:
1791  case AArch64::BI__builtin_neon_vqaddd_s64:
1792    Int = Intrinsic::aarch64_neon_vqadds;
1793    s = "vqadds"; OverloadInt = true; break;
1794  case AArch64::BI__builtin_neon_vqaddb_u8:
1795  case AArch64::BI__builtin_neon_vqaddh_u16:
1796  case AArch64::BI__builtin_neon_vqadds_u32:
1797  case AArch64::BI__builtin_neon_vqaddd_u64:
1798    Int = Intrinsic::aarch64_neon_vqaddu;
1799    s = "vqaddu"; OverloadInt = true; break;
1800  // Scalar Saturating Sub
1801  case AArch64::BI__builtin_neon_vqsubb_s8:
1802  case AArch64::BI__builtin_neon_vqsubh_s16:
1803  case AArch64::BI__builtin_neon_vqsubs_s32:
1804  case AArch64::BI__builtin_neon_vqsubd_s64:
1805    Int = Intrinsic::aarch64_neon_vqsubs;
1806    s = "vqsubs"; OverloadInt = true; break;
1807  case AArch64::BI__builtin_neon_vqsubb_u8:
1808  case AArch64::BI__builtin_neon_vqsubh_u16:
1809  case AArch64::BI__builtin_neon_vqsubs_u32:
1810  case AArch64::BI__builtin_neon_vqsubd_u64:
1811    Int = Intrinsic::aarch64_neon_vqsubu;
1812    s = "vqsubu"; OverloadInt = true; break;
1813  // Scalar Shift Left
1814  case AArch64::BI__builtin_neon_vshld_s64:
1815    Int = Intrinsic::aarch64_neon_vshlds;
1816    s = "vshlds"; break;
1817  case AArch64::BI__builtin_neon_vshld_u64:
1818    Int = Intrinsic::aarch64_neon_vshldu;
1819    s = "vshldu"; break;
1820  // Scalar Saturating Shift Left
1821  case AArch64::BI__builtin_neon_vqshlb_s8:
1822  case AArch64::BI__builtin_neon_vqshlh_s16:
1823  case AArch64::BI__builtin_neon_vqshls_s32:
1824  case AArch64::BI__builtin_neon_vqshld_s64:
1825    Int = Intrinsic::aarch64_neon_vqshls;
1826    s = "vqshls"; OverloadInt = true; break;
1827  case AArch64::BI__builtin_neon_vqshlb_u8:
1828  case AArch64::BI__builtin_neon_vqshlh_u16:
1829  case AArch64::BI__builtin_neon_vqshls_u32:
1830  case AArch64::BI__builtin_neon_vqshld_u64:
1831    Int = Intrinsic::aarch64_neon_vqshlu;
1832    s = "vqshlu"; OverloadInt = true; break;
1833  // Scalar Rouding Shift Left
1834  case AArch64::BI__builtin_neon_vrshld_s64:
1835    Int = Intrinsic::aarch64_neon_vrshlds;
1836    s = "vrshlds"; break;
1837  case AArch64::BI__builtin_neon_vrshld_u64:
1838    Int = Intrinsic::aarch64_neon_vrshldu;
1839    s = "vrshldu"; break;
1840  // Scalar Saturating Rouding Shift Left
1841  case AArch64::BI__builtin_neon_vqrshlb_s8:
1842  case AArch64::BI__builtin_neon_vqrshlh_s16:
1843  case AArch64::BI__builtin_neon_vqrshls_s32:
1844  case AArch64::BI__builtin_neon_vqrshld_s64:
1845    Int = Intrinsic::aarch64_neon_vqrshls;
1846    s = "vqrshls"; OverloadInt = true; break;
1847  case AArch64::BI__builtin_neon_vqrshlb_u8:
1848  case AArch64::BI__builtin_neon_vqrshlh_u16:
1849  case AArch64::BI__builtin_neon_vqrshls_u32:
1850  case AArch64::BI__builtin_neon_vqrshld_u64:
1851    Int = Intrinsic::aarch64_neon_vqrshlu;
1852    s = "vqrshlu"; OverloadInt = true; break;
1853  // Scalar Reduce Pairwise Add
1854  case AArch64::BI__builtin_neon_vpaddd_s64:
1855    Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd";
1856    break;
1857  case AArch64::BI__builtin_neon_vpadds_f32:
1858    Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd";
1859    break;
1860  case AArch64::BI__builtin_neon_vpaddd_f64:
1861    Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq";
1862    break;
1863  // Scalar Reduce Pairwise Floating Point Max
1864  case AArch64::BI__builtin_neon_vpmaxs_f32:
1865    Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax";
1866    break;
1867  case AArch64::BI__builtin_neon_vpmaxqd_f64:
1868    Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq";
1869    break;
1870  // Scalar Reduce Pairwise Floating Point Min
1871  case AArch64::BI__builtin_neon_vpmins_f32:
1872    Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin";
1873    break;
1874  case AArch64::BI__builtin_neon_vpminqd_f64:
1875    Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq";
1876    break;
1877  // Scalar Reduce Pairwise Floating Point Maxnm
1878  case AArch64::BI__builtin_neon_vpmaxnms_f32:
1879    Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm";
1880    break;
1881  case AArch64::BI__builtin_neon_vpmaxnmqd_f64:
1882    Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq";
1883    break;
1884  // Scalar Reduce Pairwise Floating Point Minnm
1885  case AArch64::BI__builtin_neon_vpminnms_f32:
1886    Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm";
1887    break;
1888  case AArch64::BI__builtin_neon_vpminnmqd_f64:
1889    Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq";
1890    break;
1891  // The followings are intrinsics with scalar results generated AcrossVec vectors
1892  case AArch64::BI__builtin_neon_vaddlv_s8:
1893  case AArch64::BI__builtin_neon_vaddlv_s16:
1894  case AArch64::BI__builtin_neon_vaddlvq_s8:
1895  case AArch64::BI__builtin_neon_vaddlvq_s16:
1896  case AArch64::BI__builtin_neon_vaddlvq_s32:
1897    Int = Intrinsic::aarch64_neon_saddlv;
1898    AcrossVec = true; ExtendEle = true; s = "saddlv"; break;
1899  case AArch64::BI__builtin_neon_vaddlv_u8:
1900  case AArch64::BI__builtin_neon_vaddlv_u16:
1901  case AArch64::BI__builtin_neon_vaddlvq_u8:
1902  case AArch64::BI__builtin_neon_vaddlvq_u16:
1903  case AArch64::BI__builtin_neon_vaddlvq_u32:
1904    Int = Intrinsic::aarch64_neon_uaddlv;
1905    AcrossVec = true; ExtendEle = true; s = "uaddlv"; break;
1906  case AArch64::BI__builtin_neon_vmaxv_s8:
1907  case AArch64::BI__builtin_neon_vmaxv_s16:
1908  case AArch64::BI__builtin_neon_vmaxvq_s8:
1909  case AArch64::BI__builtin_neon_vmaxvq_s16:
1910  case AArch64::BI__builtin_neon_vmaxvq_s32:
1911    Int = Intrinsic::aarch64_neon_smaxv;
1912    AcrossVec = true; ExtendEle = false; s = "smaxv"; break;
1913  case AArch64::BI__builtin_neon_vmaxv_u8:
1914  case AArch64::BI__builtin_neon_vmaxv_u16:
1915  case AArch64::BI__builtin_neon_vmaxvq_u8:
1916  case AArch64::BI__builtin_neon_vmaxvq_u16:
1917  case AArch64::BI__builtin_neon_vmaxvq_u32:
1918    Int = Intrinsic::aarch64_neon_umaxv;
1919    AcrossVec = true; ExtendEle = false; s = "umaxv"; break;
1920  case AArch64::BI__builtin_neon_vminv_s8:
1921  case AArch64::BI__builtin_neon_vminv_s16:
1922  case AArch64::BI__builtin_neon_vminvq_s8:
1923  case AArch64::BI__builtin_neon_vminvq_s16:
1924  case AArch64::BI__builtin_neon_vminvq_s32:
1925    Int = Intrinsic::aarch64_neon_sminv;
1926    AcrossVec = true; ExtendEle = false; s = "sminv"; break;
1927  case AArch64::BI__builtin_neon_vminv_u8:
1928  case AArch64::BI__builtin_neon_vminv_u16:
1929  case AArch64::BI__builtin_neon_vminvq_u8:
1930  case AArch64::BI__builtin_neon_vminvq_u16:
1931  case AArch64::BI__builtin_neon_vminvq_u32:
1932    Int = Intrinsic::aarch64_neon_uminv;
1933    AcrossVec = true; ExtendEle = false; s = "uminv"; break;
1934  case AArch64::BI__builtin_neon_vaddv_s8:
1935  case AArch64::BI__builtin_neon_vaddv_s16:
1936  case AArch64::BI__builtin_neon_vaddvq_s8:
1937  case AArch64::BI__builtin_neon_vaddvq_s16:
1938  case AArch64::BI__builtin_neon_vaddvq_s32:
1939  case AArch64::BI__builtin_neon_vaddv_u8:
1940  case AArch64::BI__builtin_neon_vaddv_u16:
1941  case AArch64::BI__builtin_neon_vaddvq_u8:
1942  case AArch64::BI__builtin_neon_vaddvq_u16:
1943  case AArch64::BI__builtin_neon_vaddvq_u32:
1944    Int = Intrinsic::aarch64_neon_vaddv;
1945    AcrossVec = true; ExtendEle = false; s = "vaddv"; break;
1946  case AArch64::BI__builtin_neon_vmaxvq_f32:
1947    Int = Intrinsic::aarch64_neon_vmaxv;
1948    AcrossVec = true; ExtendEle = false; s = "vmaxv"; break;
1949  case AArch64::BI__builtin_neon_vminvq_f32:
1950    Int = Intrinsic::aarch64_neon_vminv;
1951    AcrossVec = true; ExtendEle = false; s = "vminv"; break;
1952  case AArch64::BI__builtin_neon_vmaxnmvq_f32:
1953    Int = Intrinsic::aarch64_neon_vmaxnmv;
1954    AcrossVec = true; ExtendEle = false; s = "vmaxnmv"; break;
1955  case AArch64::BI__builtin_neon_vminnmvq_f32:
1956    Int = Intrinsic::aarch64_neon_vminnmv;
1957    AcrossVec = true; ExtendEle = false; s = "vminnmv"; break;
1958  // Scalar Integer Saturating Doubling Multiply Half High
1959  case AArch64::BI__builtin_neon_vqdmulhh_s16:
1960  case AArch64::BI__builtin_neon_vqdmulhs_s32:
1961    Int = Intrinsic::arm_neon_vqdmulh;
1962    s = "vqdmulh"; OverloadInt = true; break;
1963  // Scalar Integer Saturating Rounding Doubling Multiply Half High
1964  case AArch64::BI__builtin_neon_vqrdmulhh_s16:
1965  case AArch64::BI__builtin_neon_vqrdmulhs_s32:
1966    Int = Intrinsic::arm_neon_vqrdmulh;
1967    s = "vqrdmulh"; OverloadInt = true; break;
1968  // Scalar Floating-point Multiply Extended
1969  case AArch64::BI__builtin_neon_vmulxs_f32:
1970  case AArch64::BI__builtin_neon_vmulxd_f64:
1971    Int = Intrinsic::aarch64_neon_vmulx;
1972    s = "vmulx"; OverloadInt = true; break;
1973  // Scalar Floating-point Reciprocal Step and
1974  case AArch64::BI__builtin_neon_vrecpss_f32:
1975  case AArch64::BI__builtin_neon_vrecpsd_f64:
1976    Int = Intrinsic::arm_neon_vrecps;
1977    s = "vrecps"; OverloadInt = true; break;
1978  // Scalar Floating-point Reciprocal Square Root Step
1979  case AArch64::BI__builtin_neon_vrsqrtss_f32:
1980  case AArch64::BI__builtin_neon_vrsqrtsd_f64:
1981    Int = Intrinsic::arm_neon_vrsqrts;
1982    s = "vrsqrts"; OverloadInt = true; break;
1983  // Scalar Signed Integer Convert To Floating-point
1984  case AArch64::BI__builtin_neon_vcvts_f32_s32:
1985    Int = Intrinsic::aarch64_neon_vcvtf32_s32,
1986    s = "vcvtf"; OverloadInt = false; break;
1987  case AArch64::BI__builtin_neon_vcvtd_f64_s64:
1988    Int = Intrinsic::aarch64_neon_vcvtf64_s64,
1989    s = "vcvtf"; OverloadInt = false; break;
1990  // Scalar Unsigned Integer Convert To Floating-point
1991  case AArch64::BI__builtin_neon_vcvts_f32_u32:
1992    Int = Intrinsic::aarch64_neon_vcvtf32_u32,
1993    s = "vcvtf"; OverloadInt = false; break;
1994  case AArch64::BI__builtin_neon_vcvtd_f64_u64:
1995    Int = Intrinsic::aarch64_neon_vcvtf64_u64,
1996    s = "vcvtf"; OverloadInt = false; break;
1997  // Scalar Floating-point Reciprocal Estimate
1998  case AArch64::BI__builtin_neon_vrecpes_f32:
1999  case AArch64::BI__builtin_neon_vrecped_f64:
2000    Int = Intrinsic::arm_neon_vrecpe;
2001    s = "vrecpe"; OverloadInt = true; break;
2002  // Scalar Floating-point Reciprocal Exponent
2003  case AArch64::BI__builtin_neon_vrecpxs_f32:
2004  case AArch64::BI__builtin_neon_vrecpxd_f64:
2005    Int = Intrinsic::aarch64_neon_vrecpx;
2006    s = "vrecpx"; OverloadInt = true; break;
2007  // Scalar Floating-point Reciprocal Square Root Estimate
2008  case AArch64::BI__builtin_neon_vrsqrtes_f32:
2009  case AArch64::BI__builtin_neon_vrsqrted_f64:
2010    Int = Intrinsic::arm_neon_vrsqrte;
2011    s = "vrsqrte"; OverloadInt = true; break;
2012  // Scalar Compare Equal
2013  case AArch64::BI__builtin_neon_vceqd_s64:
2014  case AArch64::BI__builtin_neon_vceqd_u64:
2015    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
2016    OverloadCmpInt = true; break;
2017  // Scalar Compare Equal To Zero
2018  case AArch64::BI__builtin_neon_vceqzd_s64:
2019  case AArch64::BI__builtin_neon_vceqzd_u64:
2020    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
2021    // Add implicit zero operand.
2022    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2023    OverloadCmpInt = true; break;
2024  // Scalar Compare Greater Than or Equal
2025  case AArch64::BI__builtin_neon_vcged_s64:
2026    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2027    OverloadCmpInt = true; break;
2028  case AArch64::BI__builtin_neon_vcged_u64:
2029    Int = Intrinsic::aarch64_neon_vchs; s = "vcge";
2030    OverloadCmpInt = true; break;
2031  // Scalar Compare Greater Than or Equal To Zero
2032  case AArch64::BI__builtin_neon_vcgezd_s64:
2033    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2034    // Add implicit zero operand.
2035    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2036    OverloadCmpInt = true; break;
2037  // Scalar Compare Greater Than
2038  case AArch64::BI__builtin_neon_vcgtd_s64:
2039    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2040    OverloadCmpInt = true; break;
2041  case AArch64::BI__builtin_neon_vcgtd_u64:
2042    Int = Intrinsic::aarch64_neon_vchi; s = "vcgt";
2043    OverloadCmpInt = true; break;
2044  // Scalar Compare Greater Than Zero
2045  case AArch64::BI__builtin_neon_vcgtzd_s64:
2046    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2047    // Add implicit zero operand.
2048    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2049    OverloadCmpInt = true; break;
2050  // Scalar Compare Less Than or Equal
2051  case AArch64::BI__builtin_neon_vcled_s64:
2052    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2053    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2054  case AArch64::BI__builtin_neon_vcled_u64:
2055    Int = Intrinsic::aarch64_neon_vchs; s = "vchs";
2056    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2057  // Scalar Compare Less Than or Equal To Zero
2058  case AArch64::BI__builtin_neon_vclezd_s64:
2059    Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
2060    // Add implicit zero operand.
2061    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2062    OverloadCmpInt = true; break;
2063  // Scalar Compare Less Than
2064  case AArch64::BI__builtin_neon_vcltd_s64:
2065    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2066    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2067  case AArch64::BI__builtin_neon_vcltd_u64:
2068    Int = Intrinsic::aarch64_neon_vchi; s = "vchi";
2069    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2070  // Scalar Compare Less Than Zero
2071  case AArch64::BI__builtin_neon_vcltzd_s64:
2072    Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
2073    // Add implicit zero operand.
2074    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2075    OverloadCmpInt = true; break;
2076  // Scalar Floating-point Compare Equal
2077  case AArch64::BI__builtin_neon_vceqs_f32:
2078  case AArch64::BI__builtin_neon_vceqd_f64:
2079    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
2080    OverloadCmpInt = true; break;
2081  // Scalar Floating-point Compare Equal To Zero
2082  case AArch64::BI__builtin_neon_vceqzs_f32:
2083  case AArch64::BI__builtin_neon_vceqzd_f64:
2084    Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
2085    // Add implicit zero operand.
2086    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2087    OverloadCmpInt = true; break;
2088  // Scalar Floating-point Compare Greater Than Or Equal
2089  case AArch64::BI__builtin_neon_vcges_f32:
2090  case AArch64::BI__builtin_neon_vcged_f64:
2091    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2092    OverloadCmpInt = true; break;
2093  // Scalar Floating-point Compare Greater Than Or Equal To Zero
2094  case AArch64::BI__builtin_neon_vcgezs_f32:
2095  case AArch64::BI__builtin_neon_vcgezd_f64:
2096    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2097    // Add implicit zero operand.
2098    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2099    OverloadCmpInt = true; break;
2100  // Scalar Floating-point Compare Greather Than
2101  case AArch64::BI__builtin_neon_vcgts_f32:
2102  case AArch64::BI__builtin_neon_vcgtd_f64:
2103    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2104    OverloadCmpInt = true; break;
2105  // Scalar Floating-point Compare Greather Than Zero
2106  case AArch64::BI__builtin_neon_vcgtzs_f32:
2107  case AArch64::BI__builtin_neon_vcgtzd_f64:
2108    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2109    // Add implicit zero operand.
2110    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2111    OverloadCmpInt = true; break;
2112  // Scalar Floating-point Compare Less Than or Equal
2113  case AArch64::BI__builtin_neon_vcles_f32:
2114  case AArch64::BI__builtin_neon_vcled_f64:
2115    Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
2116    OverloadCmpInt = true; break;
2117  // Scalar Floating-point Compare Less Than Or Equal To Zero
2118  case AArch64::BI__builtin_neon_vclezs_f32:
2119  case AArch64::BI__builtin_neon_vclezd_f64:
2120    Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
2121    // Add implicit zero operand.
2122    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2123    OverloadCmpInt = true; break;
2124  // Scalar Floating-point Compare Less Than Zero
2125  case AArch64::BI__builtin_neon_vclts_f32:
2126  case AArch64::BI__builtin_neon_vcltd_f64:
2127    Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
2128    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2129  // Scalar Floating-point Compare Less Than Zero
2130  case AArch64::BI__builtin_neon_vcltzs_f32:
2131  case AArch64::BI__builtin_neon_vcltzd_f64:
2132    Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
2133    // Add implicit zero operand.
2134    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
2135    OverloadCmpInt = true; break;
2136  // Scalar Floating-point Absolute Compare Greater Than Or Equal
2137  case AArch64::BI__builtin_neon_vcages_f32:
2138  case AArch64::BI__builtin_neon_vcaged_f64:
2139    Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
2140    OverloadCmpInt = true; break;
2141  // Scalar Floating-point Absolute Compare Greater Than
2142  case AArch64::BI__builtin_neon_vcagts_f32:
2143  case AArch64::BI__builtin_neon_vcagtd_f64:
2144    Int = Intrinsic::aarch64_neon_vcagt; s = "vcagt";
2145    OverloadCmpInt = true; break;
2146  // Scalar Floating-point Absolute Compare Less Than Or Equal
2147  case AArch64::BI__builtin_neon_vcales_f32:
2148  case AArch64::BI__builtin_neon_vcaled_f64:
2149    Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
2150    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2151  // Scalar Floating-point Absolute Compare Less Than
2152  case AArch64::BI__builtin_neon_vcalts_f32:
2153  case AArch64::BI__builtin_neon_vcaltd_f64:
2154    Int = Intrinsic::aarch64_neon_vcagt; s = "vcalt";
2155    OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
2156  // Scalar Compare Bitwise Test Bits
2157  case AArch64::BI__builtin_neon_vtstd_s64:
2158  case AArch64::BI__builtin_neon_vtstd_u64:
2159    Int = Intrinsic::aarch64_neon_vtstd; s = "vtst";
2160    OverloadCmpInt = true; break;
2161  // Scalar Absolute Value
2162  case AArch64::BI__builtin_neon_vabsd_s64:
2163    Int = Intrinsic::aarch64_neon_vabs;
2164    s = "vabs"; OverloadInt = false; break;
2165  // Scalar Signed Saturating Absolute Value
2166  case AArch64::BI__builtin_neon_vqabsb_s8:
2167  case AArch64::BI__builtin_neon_vqabsh_s16:
2168  case AArch64::BI__builtin_neon_vqabss_s32:
2169  case AArch64::BI__builtin_neon_vqabsd_s64:
2170    Int = Intrinsic::arm_neon_vqabs;
2171    s = "vqabs"; OverloadInt = true; break;
2172  // Scalar Negate
2173  case AArch64::BI__builtin_neon_vnegd_s64:
2174    Int = Intrinsic::aarch64_neon_vneg;
2175    s = "vneg"; OverloadInt = false; break;
2176  // Scalar Signed Saturating Negate
2177  case AArch64::BI__builtin_neon_vqnegb_s8:
2178  case AArch64::BI__builtin_neon_vqnegh_s16:
2179  case AArch64::BI__builtin_neon_vqnegs_s32:
2180  case AArch64::BI__builtin_neon_vqnegd_s64:
2181    Int = Intrinsic::arm_neon_vqneg;
2182    s = "vqneg"; OverloadInt = true; break;
2183  // Scalar Signed Saturating Accumulated of Unsigned Value
2184  case AArch64::BI__builtin_neon_vuqaddb_s8:
2185  case AArch64::BI__builtin_neon_vuqaddh_s16:
2186  case AArch64::BI__builtin_neon_vuqadds_s32:
2187  case AArch64::BI__builtin_neon_vuqaddd_s64:
2188    Int = Intrinsic::aarch64_neon_vuqadd;
2189    s = "vuqadd"; OverloadInt = true; break;
2190  // Scalar Unsigned Saturating Accumulated of Signed Value
2191  case AArch64::BI__builtin_neon_vsqaddb_u8:
2192  case AArch64::BI__builtin_neon_vsqaddh_u16:
2193  case AArch64::BI__builtin_neon_vsqadds_u32:
2194  case AArch64::BI__builtin_neon_vsqaddd_u64:
2195    Int = Intrinsic::aarch64_neon_vsqadd;
2196    s = "vsqadd"; OverloadInt = true; break;
2197  // Signed Saturating Doubling Multiply-Add Long
2198  case AArch64::BI__builtin_neon_vqdmlalh_s16:
2199  case AArch64::BI__builtin_neon_vqdmlals_s32:
2200    Int = Intrinsic::aarch64_neon_vqdmlal;
2201    s = "vqdmlal"; OverloadWideInt = true; break;
2202  // Signed Saturating Doubling Multiply-Subtract Long
2203  case AArch64::BI__builtin_neon_vqdmlslh_s16:
2204  case AArch64::BI__builtin_neon_vqdmlsls_s32:
2205    Int = Intrinsic::aarch64_neon_vqdmlsl;
2206    s = "vqdmlsl"; OverloadWideInt = true; break;
2207  // Signed Saturating Doubling Multiply Long
2208  case AArch64::BI__builtin_neon_vqdmullh_s16:
2209  case AArch64::BI__builtin_neon_vqdmulls_s32:
2210    Int = Intrinsic::aarch64_neon_vqdmull;
2211    s = "vqdmull"; OverloadWideInt = true; break;
2212  // Scalar Signed Saturating Extract Unsigned Narrow
2213  case AArch64::BI__builtin_neon_vqmovunh_s16:
2214  case AArch64::BI__builtin_neon_vqmovuns_s32:
2215  case AArch64::BI__builtin_neon_vqmovund_s64:
2216    Int = Intrinsic::arm_neon_vqmovnsu;
2217    s = "vqmovun"; OverloadNarrowInt = true; break;
2218  // Scalar Signed Saturating Extract Narrow
2219  case AArch64::BI__builtin_neon_vqmovnh_s16:
2220  case AArch64::BI__builtin_neon_vqmovns_s32:
2221  case AArch64::BI__builtin_neon_vqmovnd_s64:
2222    Int = Intrinsic::arm_neon_vqmovns;
2223    s = "vqmovn"; OverloadNarrowInt = true; break;
2224  // Scalar Unsigned Saturating Extract Narrow
2225  case AArch64::BI__builtin_neon_vqmovnh_u16:
2226  case AArch64::BI__builtin_neon_vqmovns_u32:
2227  case AArch64::BI__builtin_neon_vqmovnd_u64:
2228    Int = Intrinsic::arm_neon_vqmovnu;
2229    s = "vqmovn"; OverloadNarrowInt = true; break;
2230  // Scalar Signed Shift Right (Immediate)
2231  case AArch64::BI__builtin_neon_vshrd_n_s64:
2232    Int = Intrinsic::aarch64_neon_vshrds_n;
2233    s = "vsshr"; OverloadInt = false; break;
2234  // Scalar Unsigned Shift Right (Immediate)
2235  case AArch64::BI__builtin_neon_vshrd_n_u64:
2236    Int = Intrinsic::aarch64_neon_vshrdu_n;
2237    s = "vushr"; OverloadInt = false; break;
2238  // Scalar Signed Rounding Shift Right (Immediate)
2239  case AArch64::BI__builtin_neon_vrshrd_n_s64:
2240    Int = Intrinsic::aarch64_neon_vrshrds_n;
2241    s = "vsrshr"; OverloadInt = false; break;
2242  // Scalar Unsigned Rounding Shift Right (Immediate)
2243  case AArch64::BI__builtin_neon_vrshrd_n_u64:
2244    Int = Intrinsic::aarch64_neon_vrshrdu_n;
2245    s = "vurshr"; OverloadInt = false; break;
2246  // Scalar Signed Shift Right and Accumulate (Immediate)
2247  case AArch64::BI__builtin_neon_vsrad_n_s64:
2248    Int = Intrinsic::aarch64_neon_vsrads_n;
2249    s = "vssra"; OverloadInt = false; break;
2250  // Scalar Unsigned Shift Right and Accumulate (Immediate)
2251  case AArch64::BI__builtin_neon_vsrad_n_u64:
2252    Int = Intrinsic::aarch64_neon_vsradu_n;
2253    s = "vusra"; OverloadInt = false; break;
2254  // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
2255  case AArch64::BI__builtin_neon_vrsrad_n_s64:
2256    Int = Intrinsic::aarch64_neon_vrsrads_n;
2257    s = "vsrsra"; OverloadInt = false; break;
2258  // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
2259  case AArch64::BI__builtin_neon_vrsrad_n_u64:
2260    Int = Intrinsic::aarch64_neon_vrsradu_n;
2261    s = "vursra"; OverloadInt = false; break;
2262  // Scalar Signed/Unsigned Shift Left (Immediate)
2263  case AArch64::BI__builtin_neon_vshld_n_s64:
2264  case AArch64::BI__builtin_neon_vshld_n_u64:
2265    Int = Intrinsic::aarch64_neon_vshld_n;
2266    s = "vshl"; OverloadInt = false; break;
2267  // Signed Saturating Shift Left (Immediate)
2268  case AArch64::BI__builtin_neon_vqshlb_n_s8:
2269  case AArch64::BI__builtin_neon_vqshlh_n_s16:
2270  case AArch64::BI__builtin_neon_vqshls_n_s32:
2271  case AArch64::BI__builtin_neon_vqshld_n_s64:
2272    Int = Intrinsic::aarch64_neon_vqshls_n;
2273    s = "vsqshl"; OverloadInt = true; break;
2274  // Unsigned Saturating Shift Left (Immediate)
2275  case AArch64::BI__builtin_neon_vqshlb_n_u8:
2276  case AArch64::BI__builtin_neon_vqshlh_n_u16:
2277  case AArch64::BI__builtin_neon_vqshls_n_u32:
2278  case AArch64::BI__builtin_neon_vqshld_n_u64:
2279    Int = Intrinsic::aarch64_neon_vqshlu_n;
2280    s = "vuqshl"; OverloadInt = true; break;
2281  // Signed Saturating Shift Left Unsigned (Immediate)
2282  case AArch64::BI__builtin_neon_vqshlub_n_s8:
2283  case AArch64::BI__builtin_neon_vqshluh_n_s16:
2284  case AArch64::BI__builtin_neon_vqshlus_n_s32:
2285  case AArch64::BI__builtin_neon_vqshlud_n_s64:
2286    Int = Intrinsic::aarch64_neon_vqshlus_n;
2287    s = "vsqshlu"; OverloadInt = true; break;
2288  // Shift Right And Insert (Immediate)
2289  case AArch64::BI__builtin_neon_vsrid_n_s64:
2290  case AArch64::BI__builtin_neon_vsrid_n_u64:
2291    Int = Intrinsic::aarch64_neon_vsrid_n;
2292    s = "vsri"; OverloadInt = false; break;
2293  // Shift Left And Insert (Immediate)
2294  case AArch64::BI__builtin_neon_vslid_n_s64:
2295  case AArch64::BI__builtin_neon_vslid_n_u64:
2296    Int = Intrinsic::aarch64_neon_vslid_n;
2297    s = "vsli"; OverloadInt = false; break;
2298  // Signed Saturating Shift Right Narrow (Immediate)
2299  case AArch64::BI__builtin_neon_vqshrnh_n_s16:
2300  case AArch64::BI__builtin_neon_vqshrns_n_s32:
2301  case AArch64::BI__builtin_neon_vqshrnd_n_s64:
2302    Int = Intrinsic::aarch64_neon_vsqshrn;
2303    s = "vsqshrn"; OverloadInt = true; break;
2304  // Unsigned Saturating Shift Right Narrow (Immediate)
2305  case AArch64::BI__builtin_neon_vqshrnh_n_u16:
2306  case AArch64::BI__builtin_neon_vqshrns_n_u32:
2307  case AArch64::BI__builtin_neon_vqshrnd_n_u64:
2308    Int = Intrinsic::aarch64_neon_vuqshrn;
2309    s = "vuqshrn"; OverloadInt = true; break;
2310  // Signed Saturating Rounded Shift Right Narrow (Immediate)
2311  case AArch64::BI__builtin_neon_vqrshrnh_n_s16:
2312  case AArch64::BI__builtin_neon_vqrshrns_n_s32:
2313  case AArch64::BI__builtin_neon_vqrshrnd_n_s64:
2314    Int = Intrinsic::aarch64_neon_vsqrshrn;
2315    s = "vsqrshrn"; OverloadInt = true; break;
2316  // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
2317  case AArch64::BI__builtin_neon_vqrshrnh_n_u16:
2318  case AArch64::BI__builtin_neon_vqrshrns_n_u32:
2319  case AArch64::BI__builtin_neon_vqrshrnd_n_u64:
2320    Int = Intrinsic::aarch64_neon_vuqrshrn;
2321    s = "vuqrshrn"; OverloadInt = true; break;
2322  // Signed Saturating Shift Right Unsigned Narrow (Immediate)
2323  case AArch64::BI__builtin_neon_vqshrunh_n_s16:
2324  case AArch64::BI__builtin_neon_vqshruns_n_s32:
2325  case AArch64::BI__builtin_neon_vqshrund_n_s64:
2326    Int = Intrinsic::aarch64_neon_vsqshrun;
2327    s = "vsqshrun"; OverloadInt = true; break;
2328  // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
2329  case AArch64::BI__builtin_neon_vqrshrunh_n_s16:
2330  case AArch64::BI__builtin_neon_vqrshruns_n_s32:
2331  case AArch64::BI__builtin_neon_vqrshrund_n_s64:
2332    Int = Intrinsic::aarch64_neon_vsqrshrun;
2333    s = "vsqrshrun"; OverloadInt = true; break;
2334  // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
2335  case AArch64::BI__builtin_neon_vcvts_n_f32_s32:
2336    Int = Intrinsic::aarch64_neon_vcvtf32_n_s32;
2337    s = "vcvtf"; OverloadInt = false; break;
2338  case AArch64::BI__builtin_neon_vcvtd_n_f64_s64:
2339    Int = Intrinsic::aarch64_neon_vcvtf64_n_s64;
2340    s = "vcvtf"; OverloadInt = false; break;
2341  // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
2342  case AArch64::BI__builtin_neon_vcvts_n_f32_u32:
2343    Int = Intrinsic::aarch64_neon_vcvtf32_n_u32;
2344    s = "vcvtf"; OverloadInt = false; break;
2345  case AArch64::BI__builtin_neon_vcvtd_n_f64_u64:
2346    Int = Intrinsic::aarch64_neon_vcvtf64_n_u64;
2347    s = "vcvtf"; OverloadInt = false; break;
2348  }
2349
2350  if (!Int)
2351    return 0;
2352
2353  // AArch64 scalar builtin that returns scalar type
2354  // and should be mapped to AArch64 intrinsic that returns
2355  // one-element vector type.
2356  Function *F = 0;
2357  if (AcrossVec) {
2358    // Gen arg type
2359    const Expr *Arg = E->getArg(E->getNumArgs()-1);
2360    llvm::Type *Ty = CGF.ConvertType(Arg->getType());
2361    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2362    llvm::Type *ETy = VTy->getElementType();
2363    llvm::VectorType *RTy = llvm::VectorType::get(ETy, 1);
2364
2365    if (ExtendEle) {
2366      assert(!ETy->isFloatingPointTy());
2367      RTy = llvm::VectorType::getExtendedElementVectorType(RTy);
2368    }
2369
2370    llvm::Type *Tys[2] = {RTy, VTy};
2371    F = CGF.CGM.getIntrinsic(Int, Tys);
2372    assert(E->getNumArgs() == 1);
2373  } else if (OverloadInt) {
2374    // Determine the type of this overloaded AArch64 intrinsic
2375    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
2376    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
2377    assert(VTy);
2378
2379    F = CGF.CGM.getIntrinsic(Int, VTy);
2380  } else if (OverloadWideInt || OverloadNarrowInt) {
2381    // Determine the type of this overloaded AArch64 intrinsic
2382    const Expr *Arg = E->getArg(E->getNumArgs()-1);
2383    llvm::Type *Ty = CGF.ConvertType(Arg->getType());
2384    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
2385    llvm::VectorType *RTy = OverloadWideInt ?
2386      llvm::VectorType::getExtendedElementVectorType(VTy) :
2387      llvm::VectorType::getTruncatedElementVectorType(VTy);
2388    F = CGF.CGM.getIntrinsic(Int, RTy);
2389  } else if (OverloadCmpInt) {
2390    // Determine the types of this overloaded AArch64 intrinsic
2391    SmallVector<llvm::Type *, 3> Tys;
2392    const Expr *Arg = E->getArg(E->getNumArgs()-1);
2393    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
2394    llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
2395    Tys.push_back(VTy);
2396    Ty = CGF.ConvertType(Arg->getType());
2397    VTy = llvm::VectorType::get(Ty, 1);
2398    Tys.push_back(VTy);
2399    Tys.push_back(VTy);
2400
2401    F = CGF.CGM.getIntrinsic(Int, Tys);
2402  } else
2403    F = CGF.CGM.getIntrinsic(Int);
2404
2405  Value *Result = CGF.EmitNeonCall(F, Ops, s);
2406  llvm::Type *ResultType = CGF.ConvertType(E->getType());
2407  // AArch64 intrinsic one-element vector type cast to
2408  // scalar type expected by the builtin
2409  return CGF.Builder.CreateBitCast(Result, ResultType, s);
2410}
2411
2412Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
2413                                                     const CallExpr *E) {
2414
2415  // Process AArch64 scalar builtins
2416  if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E))
2417    return Result;
2418
2419  if (BuiltinID == AArch64::BI__clear_cache) {
2420    assert(E->getNumArgs() == 2 &&
2421           "Variadic __clear_cache slipped through on AArch64");
2422
2423    const FunctionDecl *FD = E->getDirectCallee();
2424    SmallVector<Value *, 2> Ops;
2425    for (unsigned i = 0; i < E->getNumArgs(); i++)
2426      Ops.push_back(EmitScalarExpr(E->getArg(i)));
2427    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2428    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2429    StringRef Name = FD->getName();
2430    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2431  }
2432
2433  SmallVector<Value *, 4> Ops;
2434  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
2435    Ops.push_back(EmitScalarExpr(E->getArg(i)));
2436  }
2437//  Some intrinsic isn't overloaded.
2438  switch (BuiltinID) {
2439  default: break;
2440  case AArch64::BI__builtin_neon_vget_lane_i8:
2441  case AArch64::BI__builtin_neon_vget_lane_i16:
2442  case AArch64::BI__builtin_neon_vget_lane_i32:
2443  case AArch64::BI__builtin_neon_vget_lane_i64:
2444  case AArch64::BI__builtin_neon_vget_lane_f32:
2445  case AArch64::BI__builtin_neon_vget_lane_f64:
2446  case AArch64::BI__builtin_neon_vgetq_lane_i8:
2447  case AArch64::BI__builtin_neon_vgetq_lane_i16:
2448  case AArch64::BI__builtin_neon_vgetq_lane_i32:
2449  case AArch64::BI__builtin_neon_vgetq_lane_i64:
2450  case AArch64::BI__builtin_neon_vgetq_lane_f32:
2451  case AArch64::BI__builtin_neon_vgetq_lane_f64:
2452    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E);
2453  case AArch64::BI__builtin_neon_vset_lane_i8:
2454  case AArch64::BI__builtin_neon_vset_lane_i16:
2455  case AArch64::BI__builtin_neon_vset_lane_i32:
2456  case AArch64::BI__builtin_neon_vset_lane_i64:
2457  case AArch64::BI__builtin_neon_vset_lane_f32:
2458  case AArch64::BI__builtin_neon_vset_lane_f64:
2459  case AArch64::BI__builtin_neon_vsetq_lane_i8:
2460  case AArch64::BI__builtin_neon_vsetq_lane_i16:
2461  case AArch64::BI__builtin_neon_vsetq_lane_i32:
2462  case AArch64::BI__builtin_neon_vsetq_lane_i64:
2463  case AArch64::BI__builtin_neon_vsetq_lane_f32:
2464  case AArch64::BI__builtin_neon_vsetq_lane_f64:
2465    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E);
2466  }
2467
2468  // Get the last argument, which specifies the vector type.
2469  llvm::APSInt Result;
2470  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
2471  if (!Arg->isIntegerConstantExpr(Result, getContext()))
2472    return 0;
2473
2474  // Determine the type of this overloaded NEON intrinsic.
2475  NeonTypeFlags Type(Result.getZExtValue());
2476  bool usgn = Type.isUnsigned();
2477
2478  llvm::VectorType *VTy = GetNeonType(this, Type);
2479  llvm::Type *Ty = VTy;
2480  if (!Ty)
2481    return 0;
2482
2483  unsigned Int;
2484  switch (BuiltinID) {
2485  default:
2486    return 0;
2487
2488  // AArch64 builtins mapping to legacy ARM v7 builtins.
2489  // FIXME: the mapped builtins listed correspond to what has been tested
2490  // in aarch64-neon-intrinsics.c so far.
2491  case AArch64::BI__builtin_neon_vmul_v:
2492    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E);
2493  case AArch64::BI__builtin_neon_vmulq_v:
2494    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E);
2495  case AArch64::BI__builtin_neon_vabd_v:
2496    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E);
2497  case AArch64::BI__builtin_neon_vabdq_v:
2498    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E);
2499  case AArch64::BI__builtin_neon_vfma_v:
2500    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E);
2501  case AArch64::BI__builtin_neon_vfmaq_v:
2502    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E);
2503  case AArch64::BI__builtin_neon_vbsl_v:
2504    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E);
2505  case AArch64::BI__builtin_neon_vbslq_v:
2506    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E);
2507  case AArch64::BI__builtin_neon_vrsqrts_v:
2508    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E);
2509  case AArch64::BI__builtin_neon_vrsqrtsq_v:
2510    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E);
2511  case AArch64::BI__builtin_neon_vrecps_v:
2512    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E);
2513  case AArch64::BI__builtin_neon_vrecpsq_v:
2514    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E);
2515  case AArch64::BI__builtin_neon_vcage_v:
2516    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E);
2517  case AArch64::BI__builtin_neon_vcale_v:
2518    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E);
2519  case AArch64::BI__builtin_neon_vcaleq_v:
2520    std::swap(Ops[0], Ops[1]);
2521  case AArch64::BI__builtin_neon_vcageq_v: {
2522    Function *F;
2523    if (VTy->getElementType()->isIntegerTy(64))
2524      F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq);
2525    else
2526      F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
2527    return EmitNeonCall(F, Ops, "vcage");
2528  }
2529  case AArch64::BI__builtin_neon_vcalt_v:
2530    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E);
2531  case AArch64::BI__builtin_neon_vcagt_v:
2532    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E);
2533  case AArch64::BI__builtin_neon_vcaltq_v:
2534    std::swap(Ops[0], Ops[1]);
2535  case AArch64::BI__builtin_neon_vcagtq_v: {
2536    Function *F;
2537    if (VTy->getElementType()->isIntegerTy(64))
2538      F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq);
2539    else
2540      F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
2541    return EmitNeonCall(F, Ops, "vcagt");
2542  }
2543  case AArch64::BI__builtin_neon_vtst_v:
2544    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E);
2545  case AArch64::BI__builtin_neon_vtstq_v:
2546    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E);
2547  case AArch64::BI__builtin_neon_vhadd_v:
2548    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E);
2549  case AArch64::BI__builtin_neon_vhaddq_v:
2550    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E);
2551  case AArch64::BI__builtin_neon_vhsub_v:
2552    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E);
2553  case AArch64::BI__builtin_neon_vhsubq_v:
2554    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E);
2555  case AArch64::BI__builtin_neon_vrhadd_v:
2556    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E);
2557  case AArch64::BI__builtin_neon_vrhaddq_v:
2558    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E);
2559  case AArch64::BI__builtin_neon_vqadd_v:
2560    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E);
2561  case AArch64::BI__builtin_neon_vqaddq_v:
2562    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E);
2563  case AArch64::BI__builtin_neon_vqsub_v:
2564    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E);
2565  case AArch64::BI__builtin_neon_vqsubq_v:
2566    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E);
2567  case AArch64::BI__builtin_neon_vshl_v:
2568    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E);
2569  case AArch64::BI__builtin_neon_vshlq_v:
2570    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E);
2571  case AArch64::BI__builtin_neon_vqshl_v:
2572    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E);
2573  case AArch64::BI__builtin_neon_vqshlq_v:
2574    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E);
2575  case AArch64::BI__builtin_neon_vrshl_v:
2576    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E);
2577  case AArch64::BI__builtin_neon_vrshlq_v:
2578    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E);
2579  case AArch64::BI__builtin_neon_vqrshl_v:
2580    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
2581  case AArch64::BI__builtin_neon_vqrshlq_v:
2582    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
2583  case AArch64::BI__builtin_neon_vaddhn_v:
2584    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E);
2585  case AArch64::BI__builtin_neon_vraddhn_v:
2586    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E);
2587  case AArch64::BI__builtin_neon_vsubhn_v:
2588    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E);
2589  case AArch64::BI__builtin_neon_vrsubhn_v:
2590    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E);
2591  case AArch64::BI__builtin_neon_vmull_v:
2592    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E);
2593  case AArch64::BI__builtin_neon_vqdmull_v:
2594    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E);
2595  case AArch64::BI__builtin_neon_vqdmlal_v:
2596    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E);
2597  case AArch64::BI__builtin_neon_vqdmlsl_v:
2598    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E);
2599  case AArch64::BI__builtin_neon_vmax_v:
2600    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
2601  case AArch64::BI__builtin_neon_vmaxq_v:
2602    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E);
2603  case AArch64::BI__builtin_neon_vmin_v:
2604    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E);
2605  case AArch64::BI__builtin_neon_vminq_v:
2606    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E);
2607  case AArch64::BI__builtin_neon_vpmax_v:
2608    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E);
2609  case AArch64::BI__builtin_neon_vpmin_v:
2610    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E);
2611  case AArch64::BI__builtin_neon_vpadd_v:
2612    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E);
2613  case AArch64::BI__builtin_neon_vqdmulh_v:
2614    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E);
2615  case AArch64::BI__builtin_neon_vqdmulhq_v:
2616    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E);
2617  case AArch64::BI__builtin_neon_vqrdmulh_v:
2618    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E);
2619  case AArch64::BI__builtin_neon_vqrdmulhq_v:
2620    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E);
2621
2622  // Shift by immediate
2623  case AArch64::BI__builtin_neon_vshr_n_v:
2624    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshr_n_v, E);
2625  case AArch64::BI__builtin_neon_vshrq_n_v:
2626    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshrq_n_v, E);
2627  case AArch64::BI__builtin_neon_vrshr_n_v:
2628  case AArch64::BI__builtin_neon_vrshrq_n_v:
2629    Int = usgn ? Intrinsic::aarch64_neon_vurshr
2630               : Intrinsic::aarch64_neon_vsrshr;
2631    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n");
2632  case AArch64::BI__builtin_neon_vsra_n_v:
2633    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsra_n_v, E);
2634  case AArch64::BI__builtin_neon_vsraq_n_v:
2635    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsraq_n_v, E);
2636  case AArch64::BI__builtin_neon_vrsra_n_v:
2637  case AArch64::BI__builtin_neon_vrsraq_n_v: {
2638    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2639    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2640    Int = usgn ? Intrinsic::aarch64_neon_vurshr
2641               : Intrinsic::aarch64_neon_vsrshr;
2642    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
2643    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2644  }
2645  case AArch64::BI__builtin_neon_vshl_n_v:
2646    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E);
2647  case AArch64::BI__builtin_neon_vshlq_n_v:
2648    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E);
2649  case AArch64::BI__builtin_neon_vqshl_n_v:
2650    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_n_v, E);
2651  case AArch64::BI__builtin_neon_vqshlq_n_v:
2652    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_n_v, E);
2653  case AArch64::BI__builtin_neon_vqshlu_n_v:
2654  case AArch64::BI__builtin_neon_vqshluq_n_v:
2655    Int = Intrinsic::aarch64_neon_vsqshlu;
2656    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n");
2657  case AArch64::BI__builtin_neon_vsri_n_v:
2658  case AArch64::BI__builtin_neon_vsriq_n_v:
2659    Int = Intrinsic::aarch64_neon_vsri;
2660    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n");
2661  case AArch64::BI__builtin_neon_vsli_n_v:
2662  case AArch64::BI__builtin_neon_vsliq_n_v:
2663    Int = Intrinsic::aarch64_neon_vsli;
2664    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n");
2665  case AArch64::BI__builtin_neon_vshll_n_v: {
2666    llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
2667    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2668    if (usgn)
2669      Ops[0] = Builder.CreateZExt(Ops[0], VTy);
2670    else
2671      Ops[0] = Builder.CreateSExt(Ops[0], VTy);
2672    Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
2673    return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
2674  }
2675  case AArch64::BI__builtin_neon_vshrn_n_v: {
2676    llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
2677    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2678    Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
2679    if (usgn)
2680      Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
2681    else
2682      Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
2683    return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
2684  }
2685  case AArch64::BI__builtin_neon_vqshrun_n_v:
2686    Int = Intrinsic::aarch64_neon_vsqshrun;
2687    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
2688  case AArch64::BI__builtin_neon_vrshrn_n_v:
2689    Int = Intrinsic::aarch64_neon_vrshrn;
2690    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
2691  case AArch64::BI__builtin_neon_vqrshrun_n_v:
2692    Int = Intrinsic::aarch64_neon_vsqrshrun;
2693    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
2694  case AArch64::BI__builtin_neon_vqshrn_n_v:
2695    Int = usgn ? Intrinsic::aarch64_neon_vuqshrn
2696               : Intrinsic::aarch64_neon_vsqshrn;
2697    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
2698  case AArch64::BI__builtin_neon_vqrshrn_n_v:
2699    Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn
2700               : Intrinsic::aarch64_neon_vsqrshrn;
2701    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
2702
2703  // Convert
2704  case AArch64::BI__builtin_neon_vmovl_v:
2705    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E);
2706  case AArch64::BI__builtin_neon_vcvt_n_f32_v:
2707    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E);
2708  case AArch64::BI__builtin_neon_vcvtq_n_f32_v:
2709    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E);
2710  case AArch64::BI__builtin_neon_vcvtq_n_f64_v: {
2711    llvm::Type *FloatTy =
2712        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
2713    llvm::Type *Tys[2] = { FloatTy, Ty };
2714    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
2715               : Intrinsic::arm_neon_vcvtfxs2fp;
2716    Function *F = CGM.getIntrinsic(Int, Tys);
2717    return EmitNeonCall(F, Ops, "vcvt_n");
2718  }
2719  case AArch64::BI__builtin_neon_vcvt_n_s32_v:
2720    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_s32_v, E);
2721  case AArch64::BI__builtin_neon_vcvtq_n_s32_v:
2722    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_s32_v, E);
2723  case AArch64::BI__builtin_neon_vcvt_n_u32_v:
2724    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E);
2725  case AArch64::BI__builtin_neon_vcvtq_n_u32_v:
2726    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E);
2727  case AArch64::BI__builtin_neon_vcvtq_n_s64_v:
2728  case AArch64::BI__builtin_neon_vcvtq_n_u64_v: {
2729    llvm::Type *FloatTy =
2730        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
2731    llvm::Type *Tys[2] = { Ty, FloatTy };
2732    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
2733               : Intrinsic::arm_neon_vcvtfp2fxs;
2734    Function *F = CGM.getIntrinsic(Int, Tys);
2735    return EmitNeonCall(F, Ops, "vcvt_n");
2736  }
2737
2738  // Load/Store
2739  case AArch64::BI__builtin_neon_vld1_v:
2740    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_v, E);
2741  case AArch64::BI__builtin_neon_vld1q_v:
2742    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_v, E);
2743  case AArch64::BI__builtin_neon_vld2_v:
2744    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2_v, E);
2745  case AArch64::BI__builtin_neon_vld2q_v:
2746    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_v, E);
2747  case AArch64::BI__builtin_neon_vld3_v:
2748    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_v, E);
2749  case AArch64::BI__builtin_neon_vld3q_v:
2750    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_v, E);
2751  case AArch64::BI__builtin_neon_vld4_v:
2752    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_v, E);
2753  case AArch64::BI__builtin_neon_vld4q_v:
2754    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_v, E);
2755  case AArch64::BI__builtin_neon_vst1_v:
2756    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1_v, E);
2757  case AArch64::BI__builtin_neon_vst1q_v:
2758    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1q_v, E);
2759  case AArch64::BI__builtin_neon_vst2_v:
2760    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_v, E);
2761  case AArch64::BI__builtin_neon_vst2q_v:
2762    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_v, E);
2763  case AArch64::BI__builtin_neon_vst3_v:
2764    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_v, E);
2765  case AArch64::BI__builtin_neon_vst3q_v:
2766    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_v, E);
2767  case AArch64::BI__builtin_neon_vst4_v:
2768    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E);
2769  case AArch64::BI__builtin_neon_vst4q_v:
2770    return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
2771
2772  // AArch64-only builtins
2773  case AArch64::BI__builtin_neon_vfma_lane_v:
2774  case AArch64::BI__builtin_neon_vfmaq_laneq_v: {
2775    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2776    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2777    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2778
2779    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2780    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
2781    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
2782  }
2783  case AArch64::BI__builtin_neon_vfmaq_lane_v: {
2784    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2785    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2786    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2787
2788    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2789    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
2790                                            VTy->getNumElements() / 2);
2791    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
2792    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
2793                                               cast<ConstantInt>(Ops[3]));
2794    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
2795
2796    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
2797  }
2798  case AArch64::BI__builtin_neon_vfma_laneq_v: {
2799    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2800    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2801    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2802
2803    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2804    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
2805                                            VTy->getNumElements() * 2);
2806    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
2807    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
2808                                               cast<ConstantInt>(Ops[3]));
2809    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
2810
2811    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
2812  }
2813  case AArch64::BI__builtin_neon_vfms_v:
2814  case AArch64::BI__builtin_neon_vfmsq_v: {
2815    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2816    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2817    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2818    Ops[1] = Builder.CreateFNeg(Ops[1]);
2819    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2820
2821    // LLVM's fma intrinsic puts the accumulator in the last position, but the
2822    // AArch64 intrinsic has it first.
2823    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
2824  }
2825  case AArch64::BI__builtin_neon_vmaxnm_v:
2826  case AArch64::BI__builtin_neon_vmaxnmq_v: {
2827    Int = Intrinsic::aarch64_neon_vmaxnm;
2828    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
2829  }
2830  case AArch64::BI__builtin_neon_vminnm_v:
2831  case AArch64::BI__builtin_neon_vminnmq_v: {
2832    Int = Intrinsic::aarch64_neon_vminnm;
2833    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
2834  }
2835  case AArch64::BI__builtin_neon_vpmaxnm_v:
2836  case AArch64::BI__builtin_neon_vpmaxnmq_v: {
2837    Int = Intrinsic::aarch64_neon_vpmaxnm;
2838    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
2839  }
2840  case AArch64::BI__builtin_neon_vpminnm_v:
2841  case AArch64::BI__builtin_neon_vpminnmq_v: {
2842    Int = Intrinsic::aarch64_neon_vpminnm;
2843    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
2844  }
2845  case AArch64::BI__builtin_neon_vpmaxq_v: {
2846    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
2847    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
2848  }
2849  case AArch64::BI__builtin_neon_vpminq_v: {
2850    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
2851    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
2852  }
2853  case AArch64::BI__builtin_neon_vpaddq_v: {
2854    Int = Intrinsic::arm_neon_vpadd;
2855    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd");
2856  }
2857  case AArch64::BI__builtin_neon_vmulx_v:
2858  case AArch64::BI__builtin_neon_vmulxq_v: {
2859    Int = Intrinsic::aarch64_neon_vmulx;
2860    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
2861  }
2862  }
2863}
2864
2865Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
2866                                           const CallExpr *E) {
2867  if (BuiltinID == ARM::BI__clear_cache) {
2868    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
2869    const FunctionDecl *FD = E->getDirectCallee();
2870    SmallVector<Value*, 2> Ops;
2871    for (unsigned i = 0; i < 2; i++)
2872      Ops.push_back(EmitScalarExpr(E->getArg(i)));
2873    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
2874    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
2875    StringRef Name = FD->getName();
2876    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
2877  }
2878
2879  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
2880      (BuiltinID == ARM::BI__builtin_arm_ldrex &&
2881       getContext().getTypeSize(E->getType()) == 64)) {
2882    Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
2883
2884    Value *LdPtr = EmitScalarExpr(E->getArg(0));
2885    Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
2886                                    "ldrexd");
2887
2888    Value *Val0 = Builder.CreateExtractValue(Val, 1);
2889    Value *Val1 = Builder.CreateExtractValue(Val, 0);
2890    Val0 = Builder.CreateZExt(Val0, Int64Ty);
2891    Val1 = Builder.CreateZExt(Val1, Int64Ty);
2892
2893    Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
2894    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
2895    Val = Builder.CreateOr(Val, Val1);
2896    return Builder.CreateBitCast(Val, ConvertType(E->getType()));
2897  }
2898
2899  if (BuiltinID == ARM::BI__builtin_arm_ldrex) {
2900    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
2901
2902    QualType Ty = E->getType();
2903    llvm::Type *RealResTy = ConvertType(Ty);
2904    llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
2905                                                  getContext().getTypeSize(Ty));
2906    LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
2907
2908    Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrex, LoadAddr->getType());
2909    Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
2910
2911    if (RealResTy->isPointerTy())
2912      return Builder.CreateIntToPtr(Val, RealResTy);
2913    else {
2914      Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
2915      return Builder.CreateBitCast(Val, RealResTy);
2916    }
2917  }
2918
2919  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
2920      (BuiltinID == ARM::BI__builtin_arm_strex &&
2921       getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
2922    Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
2923    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
2924
2925    Value *Tmp = CreateMemTemp(E->getArg(0)->getType());
2926    Value *Val = EmitScalarExpr(E->getArg(0));
2927    Builder.CreateStore(Val, Tmp);
2928
2929    Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
2930    Val = Builder.CreateLoad(LdPtr);
2931
2932    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
2933    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
2934    Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
2935    return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
2936  }
2937
2938  if (BuiltinID == ARM::BI__builtin_arm_strex) {
2939    Value *StoreVal = EmitScalarExpr(E->getArg(0));
2940    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
2941
2942    QualType Ty = E->getArg(0)->getType();
2943    llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
2944                                                 getContext().getTypeSize(Ty));
2945    StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
2946
2947    if (StoreVal->getType()->isPointerTy())
2948      StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
2949    else {
2950      StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
2951      StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
2952    }
2953
2954    Function *F = CGM.getIntrinsic(Intrinsic::arm_strex, StoreAddr->getType());
2955    return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex");
2956  }
2957
2958  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
2959    Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
2960    return Builder.CreateCall(F);
2961  }
2962
2963  if (BuiltinID == ARM::BI__builtin_arm_sevl) {
2964    Function *F = CGM.getIntrinsic(Intrinsic::arm_sevl);
2965    return Builder.CreateCall(F);
2966  }
2967
2968  // CRC32
2969  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
2970  switch (BuiltinID) {
2971  case ARM::BI__builtin_arm_crc32b:
2972    CRCIntrinsicID = Intrinsic::arm_crc32b; break;
2973  case ARM::BI__builtin_arm_crc32cb:
2974    CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
2975  case ARM::BI__builtin_arm_crc32h:
2976    CRCIntrinsicID = Intrinsic::arm_crc32h; break;
2977  case ARM::BI__builtin_arm_crc32ch:
2978    CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
2979  case ARM::BI__builtin_arm_crc32w:
2980  case ARM::BI__builtin_arm_crc32d:
2981    CRCIntrinsicID = Intrinsic::arm_crc32w; break;
2982  case ARM::BI__builtin_arm_crc32cw:
2983  case ARM::BI__builtin_arm_crc32cd:
2984    CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
2985  }
2986
2987  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
2988    Value *Arg0 = EmitScalarExpr(E->getArg(0));
2989    Value *Arg1 = EmitScalarExpr(E->getArg(1));
2990
2991    // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
2992    // intrinsics, hence we need different codegen for these cases.
2993    if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
2994        BuiltinID == ARM::BI__builtin_arm_crc32cd) {
2995      Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
2996      Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
2997      Value *Arg1b = Builder.CreateLShr(Arg1, C1);
2998      Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
2999
3000      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3001      Value *Res = Builder.CreateCall2(F, Arg0, Arg1a);
3002      return Builder.CreateCall2(F, Res, Arg1b);
3003    } else {
3004      Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3005
3006      Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3007      return Builder.CreateCall2(F, Arg0, Arg1);
3008    }
3009  }
3010
3011  SmallVector<Value*, 4> Ops;
3012  llvm::Value *Align = 0;
3013  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
3014    if (i == 0) {
3015      switch (BuiltinID) {
3016      case ARM::BI__builtin_neon_vld1_v:
3017      case ARM::BI__builtin_neon_vld1q_v:
3018      case ARM::BI__builtin_neon_vld1q_lane_v:
3019      case ARM::BI__builtin_neon_vld1_lane_v:
3020      case ARM::BI__builtin_neon_vld1_dup_v:
3021      case ARM::BI__builtin_neon_vld1q_dup_v:
3022      case ARM::BI__builtin_neon_vst1_v:
3023      case ARM::BI__builtin_neon_vst1q_v:
3024      case ARM::BI__builtin_neon_vst1q_lane_v:
3025      case ARM::BI__builtin_neon_vst1_lane_v:
3026      case ARM::BI__builtin_neon_vst2_v:
3027      case ARM::BI__builtin_neon_vst2q_v:
3028      case ARM::BI__builtin_neon_vst2_lane_v:
3029      case ARM::BI__builtin_neon_vst2q_lane_v:
3030      case ARM::BI__builtin_neon_vst3_v:
3031      case ARM::BI__builtin_neon_vst3q_v:
3032      case ARM::BI__builtin_neon_vst3_lane_v:
3033      case ARM::BI__builtin_neon_vst3q_lane_v:
3034      case ARM::BI__builtin_neon_vst4_v:
3035      case ARM::BI__builtin_neon_vst4q_v:
3036      case ARM::BI__builtin_neon_vst4_lane_v:
3037      case ARM::BI__builtin_neon_vst4q_lane_v:
3038        // Get the alignment for the argument in addition to the value;
3039        // we'll use it later.
3040        std::pair<llvm::Value*, unsigned> Src =
3041            EmitPointerWithAlignment(E->getArg(0));
3042        Ops.push_back(Src.first);
3043        Align = Builder.getInt32(Src.second);
3044        continue;
3045      }
3046    }
3047    if (i == 1) {
3048      switch (BuiltinID) {
3049      case ARM::BI__builtin_neon_vld2_v:
3050      case ARM::BI__builtin_neon_vld2q_v:
3051      case ARM::BI__builtin_neon_vld3_v:
3052      case ARM::BI__builtin_neon_vld3q_v:
3053      case ARM::BI__builtin_neon_vld4_v:
3054      case ARM::BI__builtin_neon_vld4q_v:
3055      case ARM::BI__builtin_neon_vld2_lane_v:
3056      case ARM::BI__builtin_neon_vld2q_lane_v:
3057      case ARM::BI__builtin_neon_vld3_lane_v:
3058      case ARM::BI__builtin_neon_vld3q_lane_v:
3059      case ARM::BI__builtin_neon_vld4_lane_v:
3060      case ARM::BI__builtin_neon_vld4q_lane_v:
3061      case ARM::BI__builtin_neon_vld2_dup_v:
3062      case ARM::BI__builtin_neon_vld3_dup_v:
3063      case ARM::BI__builtin_neon_vld4_dup_v:
3064        // Get the alignment for the argument in addition to the value;
3065        // we'll use it later.
3066        std::pair<llvm::Value*, unsigned> Src =
3067            EmitPointerWithAlignment(E->getArg(1));
3068        Ops.push_back(Src.first);
3069        Align = Builder.getInt32(Src.second);
3070        continue;
3071      }
3072    }
3073    Ops.push_back(EmitScalarExpr(E->getArg(i)));
3074  }
3075
3076  // vget_lane and vset_lane are not overloaded and do not have an extra
3077  // argument that specifies the vector type.
3078  switch (BuiltinID) {
3079  default: break;
3080  case ARM::BI__builtin_neon_vget_lane_i8:
3081  case ARM::BI__builtin_neon_vget_lane_i16:
3082  case ARM::BI__builtin_neon_vget_lane_i32:
3083  case ARM::BI__builtin_neon_vget_lane_i64:
3084  case ARM::BI__builtin_neon_vget_lane_f32:
3085  case ARM::BI__builtin_neon_vgetq_lane_i8:
3086  case ARM::BI__builtin_neon_vgetq_lane_i16:
3087  case ARM::BI__builtin_neon_vgetq_lane_i32:
3088  case ARM::BI__builtin_neon_vgetq_lane_i64:
3089  case ARM::BI__builtin_neon_vgetq_lane_f32:
3090    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
3091                                        "vget_lane");
3092  case ARM::BI__builtin_neon_vset_lane_i8:
3093  case ARM::BI__builtin_neon_vset_lane_i16:
3094  case ARM::BI__builtin_neon_vset_lane_i32:
3095  case ARM::BI__builtin_neon_vset_lane_i64:
3096  case ARM::BI__builtin_neon_vset_lane_f32:
3097  case ARM::BI__builtin_neon_vsetq_lane_i8:
3098  case ARM::BI__builtin_neon_vsetq_lane_i16:
3099  case ARM::BI__builtin_neon_vsetq_lane_i32:
3100  case ARM::BI__builtin_neon_vsetq_lane_i64:
3101  case ARM::BI__builtin_neon_vsetq_lane_f32:
3102    Ops.push_back(EmitScalarExpr(E->getArg(2)));
3103    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3104  }
3105
3106  // Get the last argument, which specifies the vector type.
3107  llvm::APSInt Result;
3108  const Expr *Arg = E->getArg(E->getNumArgs()-1);
3109  if (!Arg->isIntegerConstantExpr(Result, getContext()))
3110    return 0;
3111
3112  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
3113      BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
3114    // Determine the overloaded type of this builtin.
3115    llvm::Type *Ty;
3116    if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
3117      Ty = FloatTy;
3118    else
3119      Ty = DoubleTy;
3120
3121    // Determine whether this is an unsigned conversion or not.
3122    bool usgn = Result.getZExtValue() == 1;
3123    unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3124
3125    // Call the appropriate intrinsic.
3126    Function *F = CGM.getIntrinsic(Int, Ty);
3127    return Builder.CreateCall(F, Ops, "vcvtr");
3128  }
3129
3130  // Determine the type of this overloaded NEON intrinsic.
3131  NeonTypeFlags Type(Result.getZExtValue());
3132  bool usgn = Type.isUnsigned();
3133  bool quad = Type.isQuad();
3134  bool rightShift = false;
3135
3136  llvm::VectorType *VTy = GetNeonType(this, Type);
3137  llvm::Type *Ty = VTy;
3138  if (!Ty)
3139    return 0;
3140
3141  unsigned Int;
3142  switch (BuiltinID) {
3143  default: return 0;
3144  case ARM::BI__builtin_neon_vbsl_v:
3145  case ARM::BI__builtin_neon_vbslq_v:
3146    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty),
3147                        Ops, "vbsl");
3148  case ARM::BI__builtin_neon_vabd_v:
3149  case ARM::BI__builtin_neon_vabdq_v:
3150    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
3151    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
3152  case ARM::BI__builtin_neon_vabs_v:
3153  case ARM::BI__builtin_neon_vabsq_v:
3154    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
3155                        Ops, "vabs");
3156  case ARM::BI__builtin_neon_vaddhn_v: {
3157    llvm::VectorType *SrcTy =
3158        llvm::VectorType::getExtendedElementVectorType(VTy);
3159
3160    // %sum = add <4 x i32> %lhs, %rhs
3161    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3162    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3163    Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3164
3165    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3166    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
3167                                       SrcTy->getScalarSizeInBits() / 2);
3168    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
3169    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3170
3171    // %res = trunc <4 x i32> %high to <4 x i16>
3172    return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3173  }
3174  case ARM::BI__builtin_neon_vcale_v:
3175    std::swap(Ops[0], Ops[1]);
3176  case ARM::BI__builtin_neon_vcage_v: {
3177    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
3178    return EmitNeonCall(F, Ops, "vcage");
3179  }
3180  case ARM::BI__builtin_neon_vcaleq_v:
3181    std::swap(Ops[0], Ops[1]);
3182  case ARM::BI__builtin_neon_vcageq_v: {
3183    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
3184    return EmitNeonCall(F, Ops, "vcage");
3185  }
3186  case ARM::BI__builtin_neon_vcalt_v:
3187    std::swap(Ops[0], Ops[1]);
3188  case ARM::BI__builtin_neon_vcagt_v: {
3189    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
3190    return EmitNeonCall(F, Ops, "vcagt");
3191  }
3192  case ARM::BI__builtin_neon_vcaltq_v:
3193    std::swap(Ops[0], Ops[1]);
3194  case ARM::BI__builtin_neon_vcagtq_v: {
3195    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
3196    return EmitNeonCall(F, Ops, "vcagt");
3197  }
3198  case ARM::BI__builtin_neon_vcls_v:
3199  case ARM::BI__builtin_neon_vclsq_v: {
3200    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
3201    return EmitNeonCall(F, Ops, "vcls");
3202  }
3203  case ARM::BI__builtin_neon_vclz_v:
3204  case ARM::BI__builtin_neon_vclzq_v: {
3205    // Generate target-independent intrinsic; also need to add second argument
3206    // for whether or not clz of zero is undefined; on ARM it isn't.
3207    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
3208    Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3209    return EmitNeonCall(F, Ops, "vclz");
3210  }
3211  case ARM::BI__builtin_neon_vcnt_v:
3212  case ARM::BI__builtin_neon_vcntq_v: {
3213    // generate target-independent intrinsic
3214    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
3215    return EmitNeonCall(F, Ops, "vctpop");
3216  }
3217  case ARM::BI__builtin_neon_vcvt_f16_v: {
3218    assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
3219           "unexpected vcvt_f16_v builtin");
3220    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
3221    return EmitNeonCall(F, Ops, "vcvt");
3222  }
3223  case ARM::BI__builtin_neon_vcvt_f32_f16: {
3224    assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
3225           "unexpected vcvt_f32_f16 builtin");
3226    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
3227    return EmitNeonCall(F, Ops, "vcvt");
3228  }
3229  case ARM::BI__builtin_neon_vcvt_f32_v:
3230  case ARM::BI__builtin_neon_vcvtq_f32_v:
3231    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3232    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
3233    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3234                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3235  case ARM::BI__builtin_neon_vcvt_s32_v:
3236  case ARM::BI__builtin_neon_vcvt_u32_v:
3237  case ARM::BI__builtin_neon_vcvtq_s32_v:
3238  case ARM::BI__builtin_neon_vcvtq_u32_v: {
3239    llvm::Type *FloatTy =
3240      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
3241    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
3242    return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3243                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3244  }
3245  case ARM::BI__builtin_neon_vcvt_n_f32_v:
3246  case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
3247    llvm::Type *FloatTy =
3248      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
3249    llvm::Type *Tys[2] = { FloatTy, Ty };
3250    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
3251               : Intrinsic::arm_neon_vcvtfxs2fp;
3252    Function *F = CGM.getIntrinsic(Int, Tys);
3253    return EmitNeonCall(F, Ops, "vcvt_n");
3254  }
3255  case ARM::BI__builtin_neon_vcvt_n_s32_v:
3256  case ARM::BI__builtin_neon_vcvt_n_u32_v:
3257  case ARM::BI__builtin_neon_vcvtq_n_s32_v:
3258  case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
3259    llvm::Type *FloatTy =
3260      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
3261    llvm::Type *Tys[2] = { Ty, FloatTy };
3262    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
3263               : Intrinsic::arm_neon_vcvtfp2fxs;
3264    Function *F = CGM.getIntrinsic(Int, Tys);
3265    return EmitNeonCall(F, Ops, "vcvt_n");
3266  }
3267  case ARM::BI__builtin_neon_vext_v:
3268  case ARM::BI__builtin_neon_vextq_v: {
3269    int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3270    SmallVector<Constant*, 16> Indices;
3271    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3272      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
3273
3274    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3275    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3276    Value *SV = llvm::ConstantVector::get(Indices);
3277    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
3278  }
3279  case ARM::BI__builtin_neon_vhadd_v:
3280  case ARM::BI__builtin_neon_vhaddq_v:
3281    Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
3282    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
3283  case ARM::BI__builtin_neon_vhsub_v:
3284  case ARM::BI__builtin_neon_vhsubq_v:
3285    Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
3286    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
3287  case ARM::BI__builtin_neon_vld1_v:
3288  case ARM::BI__builtin_neon_vld1q_v:
3289    Ops.push_back(Align);
3290    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
3291                        Ops, "vld1");
3292  case ARM::BI__builtin_neon_vld1q_lane_v:
3293    // Handle 64-bit integer elements as a special case.  Use shuffles of
3294    // one-element vectors to avoid poor code for i64 in the backend.
3295    if (VTy->getElementType()->isIntegerTy(64)) {
3296      // Extract the other lane.
3297      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3298      int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3299      Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3300      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3301      // Load the value as a one-element vector.
3302      Ty = llvm::VectorType::get(VTy->getElementType(), 1);
3303      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
3304      Value *Ld = Builder.CreateCall2(F, Ops[0], Align);
3305      // Combine them.
3306      SmallVector<Constant*, 2> Indices;
3307      Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane));
3308      Indices.push_back(ConstantInt::get(Int32Ty, Lane));
3309      SV = llvm::ConstantVector::get(Indices);
3310      return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
3311    }
3312    // fall through
3313  case ARM::BI__builtin_neon_vld1_lane_v: {
3314    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3315    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3316    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3317    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
3318    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
3319    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3320  }
3321  case ARM::BI__builtin_neon_vld1_dup_v:
3322  case ARM::BI__builtin_neon_vld1q_dup_v: {
3323    Value *V = UndefValue::get(Ty);
3324    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3325    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3326    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
3327    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
3328    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
3329    Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3330    return EmitNeonSplat(Ops[0], CI);
3331  }
3332  case ARM::BI__builtin_neon_vld2_v:
3333  case ARM::BI__builtin_neon_vld2q_v: {
3334    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
3335    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
3336    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3337    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3338    return Builder.CreateStore(Ops[1], Ops[0]);
3339  }
3340  case ARM::BI__builtin_neon_vld3_v:
3341  case ARM::BI__builtin_neon_vld3q_v: {
3342    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
3343    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
3344    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3345    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3346    return Builder.CreateStore(Ops[1], Ops[0]);
3347  }
3348  case ARM::BI__builtin_neon_vld4_v:
3349  case ARM::BI__builtin_neon_vld4q_v: {
3350    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
3351    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
3352    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3353    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3354    return Builder.CreateStore(Ops[1], Ops[0]);
3355  }
3356  case ARM::BI__builtin_neon_vld2_lane_v:
3357  case ARM::BI__builtin_neon_vld2q_lane_v: {
3358    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
3359    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3360    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
3361    Ops.push_back(Align);
3362    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
3363    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3364    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3365    return Builder.CreateStore(Ops[1], Ops[0]);
3366  }
3367  case ARM::BI__builtin_neon_vld3_lane_v:
3368  case ARM::BI__builtin_neon_vld3q_lane_v: {
3369    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
3370    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3371    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
3372    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
3373    Ops.push_back(Align);
3374    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
3375    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3376    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3377    return Builder.CreateStore(Ops[1], Ops[0]);
3378  }
3379  case ARM::BI__builtin_neon_vld4_lane_v:
3380  case ARM::BI__builtin_neon_vld4q_lane_v: {
3381    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
3382    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3383    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
3384    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
3385    Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
3386    Ops.push_back(Align);
3387    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
3388    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3389    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3390    return Builder.CreateStore(Ops[1], Ops[0]);
3391  }
3392  case ARM::BI__builtin_neon_vld2_dup_v:
3393  case ARM::BI__builtin_neon_vld3_dup_v:
3394  case ARM::BI__builtin_neon_vld4_dup_v: {
3395    // Handle 64-bit elements as a special-case.  There is no "dup" needed.
3396    if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
3397      switch (BuiltinID) {
3398      case ARM::BI__builtin_neon_vld2_dup_v:
3399        Int = Intrinsic::arm_neon_vld2;
3400        break;
3401      case ARM::BI__builtin_neon_vld3_dup_v:
3402        Int = Intrinsic::arm_neon_vld3;
3403        break;
3404      case ARM::BI__builtin_neon_vld4_dup_v:
3405        Int = Intrinsic::arm_neon_vld4;
3406        break;
3407      default: llvm_unreachable("unknown vld_dup intrinsic?");
3408      }
3409      Function *F = CGM.getIntrinsic(Int, Ty);
3410      Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
3411      Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3412      Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3413      return Builder.CreateStore(Ops[1], Ops[0]);
3414    }
3415    switch (BuiltinID) {
3416    case ARM::BI__builtin_neon_vld2_dup_v:
3417      Int = Intrinsic::arm_neon_vld2lane;
3418      break;
3419    case ARM::BI__builtin_neon_vld3_dup_v:
3420      Int = Intrinsic::arm_neon_vld3lane;
3421      break;
3422    case ARM::BI__builtin_neon_vld4_dup_v:
3423      Int = Intrinsic::arm_neon_vld4lane;
3424      break;
3425    default: llvm_unreachable("unknown vld_dup intrinsic?");
3426    }
3427    Function *F = CGM.getIntrinsic(Int, Ty);
3428    llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
3429
3430    SmallVector<Value*, 6> Args;
3431    Args.push_back(Ops[1]);
3432    Args.append(STy->getNumElements(), UndefValue::get(Ty));
3433
3434    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
3435    Args.push_back(CI);
3436    Args.push_back(Align);
3437
3438    Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
3439    // splat lane 0 to all elts in each vector of the result.
3440    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3441      Value *Val = Builder.CreateExtractValue(Ops[1], i);
3442      Value *Elt = Builder.CreateBitCast(Val, Ty);
3443      Elt = EmitNeonSplat(Elt, CI);
3444      Elt = Builder.CreateBitCast(Elt, Val->getType());
3445      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
3446    }
3447    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3448    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3449    return Builder.CreateStore(Ops[1], Ops[0]);
3450  }
3451  case ARM::BI__builtin_neon_vmax_v:
3452  case ARM::BI__builtin_neon_vmaxq_v:
3453    Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
3454    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
3455  case ARM::BI__builtin_neon_vmin_v:
3456  case ARM::BI__builtin_neon_vminq_v:
3457    Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
3458    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
3459  case ARM::BI__builtin_neon_vmovl_v: {
3460    llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3461    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3462    if (usgn)
3463      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3464    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3465  }
3466  case ARM::BI__builtin_neon_vmovn_v: {
3467    llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3468    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3469    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3470  }
3471  case ARM::BI__builtin_neon_vmul_v:
3472  case ARM::BI__builtin_neon_vmulq_v:
3473    assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
3474    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
3475                        Ops, "vmul");
3476  case ARM::BI__builtin_neon_vmull_v:
3477    // FIXME: the integer vmull operations could be emitted in terms of pure
3478    // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3479    // hoisting the exts outside loops. Until global ISel comes along that can
3480    // see through such movement this leads to bad CodeGen. So we need an
3481    // intrinsic for now.
3482    Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3483    Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3484    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3485  case ARM::BI__builtin_neon_vfma_v:
3486  case ARM::BI__builtin_neon_vfmaq_v: {
3487    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3488    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3489    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3490    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3491
3492    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3493    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
3494  }
3495  case ARM::BI__builtin_neon_vpadal_v:
3496  case ARM::BI__builtin_neon_vpadalq_v: {
3497    Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
3498    // The source operand type has twice as many elements of half the size.
3499    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3500    llvm::Type *EltTy =
3501      llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3502    llvm::Type *NarrowTy =
3503      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3504    llvm::Type *Tys[2] = { Ty, NarrowTy };
3505    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
3506  }
3507  case ARM::BI__builtin_neon_vpadd_v:
3508    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
3509                        Ops, "vpadd");
3510  case ARM::BI__builtin_neon_vpaddl_v:
3511  case ARM::BI__builtin_neon_vpaddlq_v: {
3512    Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
3513    // The source operand type has twice as many elements of half the size.
3514    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3515    llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3516    llvm::Type *NarrowTy =
3517      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3518    llvm::Type *Tys[2] = { Ty, NarrowTy };
3519    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3520  }
3521  case ARM::BI__builtin_neon_vpmax_v:
3522    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
3523    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
3524  case ARM::BI__builtin_neon_vpmin_v:
3525    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
3526    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
3527  case ARM::BI__builtin_neon_vqabs_v:
3528  case ARM::BI__builtin_neon_vqabsq_v:
3529    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
3530                        Ops, "vqabs");
3531  case ARM::BI__builtin_neon_vqadd_v:
3532  case ARM::BI__builtin_neon_vqaddq_v:
3533    Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
3534    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
3535  case ARM::BI__builtin_neon_vqdmlal_v: {
3536    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3537    Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
3538                              MulOps, "vqdmlal");
3539
3540    SmallVector<Value *, 2> AddOps;
3541    AddOps.push_back(Ops[0]);
3542    AddOps.push_back(Mul);
3543    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqadds, Ty),
3544                        AddOps, "vqdmlal");
3545  }
3546  case ARM::BI__builtin_neon_vqdmlsl_v: {
3547    SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3548    Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
3549                              MulOps, "vqdmlsl");
3550
3551    SmallVector<Value *, 2> SubOps;
3552    SubOps.push_back(Ops[0]);
3553    SubOps.push_back(Mul);
3554    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqsubs, Ty),
3555                        SubOps, "vqdmlsl");
3556  }
3557  case ARM::BI__builtin_neon_vqdmulh_v:
3558  case ARM::BI__builtin_neon_vqdmulhq_v:
3559    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
3560                        Ops, "vqdmulh");
3561  case ARM::BI__builtin_neon_vqdmull_v:
3562    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
3563                        Ops, "vqdmull");
3564  case ARM::BI__builtin_neon_vqmovn_v:
3565    Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
3566    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
3567  case ARM::BI__builtin_neon_vqmovun_v:
3568    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
3569                        Ops, "vqdmull");
3570  case ARM::BI__builtin_neon_vqneg_v:
3571  case ARM::BI__builtin_neon_vqnegq_v:
3572    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
3573                        Ops, "vqneg");
3574  case ARM::BI__builtin_neon_vqrdmulh_v:
3575  case ARM::BI__builtin_neon_vqrdmulhq_v:
3576    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
3577                        Ops, "vqrdmulh");
3578  case ARM::BI__builtin_neon_vqrshl_v:
3579  case ARM::BI__builtin_neon_vqrshlq_v:
3580    Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
3581    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
3582  case ARM::BI__builtin_neon_vqrshrn_n_v:
3583    Int =
3584      usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3585    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3586                        1, true);
3587  case ARM::BI__builtin_neon_vqrshrun_n_v:
3588    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3589                        Ops, "vqrshrun_n", 1, true);
3590  case ARM::BI__builtin_neon_vqshl_v:
3591  case ARM::BI__builtin_neon_vqshlq_v:
3592    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
3593    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
3594  case ARM::BI__builtin_neon_vqshl_n_v:
3595  case ARM::BI__builtin_neon_vqshlq_n_v:
3596    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
3597    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3598                        1, false);
3599  case ARM::BI__builtin_neon_vqshlu_n_v:
3600  case ARM::BI__builtin_neon_vqshluq_n_v:
3601    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
3602                        Ops, "vqshlu", 1, false);
3603  case ARM::BI__builtin_neon_vqshrn_n_v:
3604    Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3605    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3606                        1, true);
3607  case ARM::BI__builtin_neon_vqshrun_n_v:
3608    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3609                        Ops, "vqshrun_n", 1, true);
3610  case ARM::BI__builtin_neon_vqsub_v:
3611  case ARM::BI__builtin_neon_vqsubq_v:
3612    Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
3613    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
3614  case ARM::BI__builtin_neon_vraddhn_v:
3615    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
3616                        Ops, "vraddhn");
3617  case ARM::BI__builtin_neon_vrecpe_v:
3618  case ARM::BI__builtin_neon_vrecpeq_v:
3619    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3620                        Ops, "vrecpe");
3621  case ARM::BI__builtin_neon_vrecps_v:
3622  case ARM::BI__builtin_neon_vrecpsq_v:
3623    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
3624                        Ops, "vrecps");
3625  case ARM::BI__builtin_neon_vrhadd_v:
3626  case ARM::BI__builtin_neon_vrhaddq_v:
3627    Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
3628    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
3629  case ARM::BI__builtin_neon_vrshl_v:
3630  case ARM::BI__builtin_neon_vrshlq_v:
3631    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3632    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
3633  case ARM::BI__builtin_neon_vrshrn_n_v:
3634    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3635                        Ops, "vrshrn_n", 1, true);
3636  case ARM::BI__builtin_neon_vrshr_n_v:
3637  case ARM::BI__builtin_neon_vrshrq_n_v:
3638    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3639    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
3640  case ARM::BI__builtin_neon_vrsqrte_v:
3641  case ARM::BI__builtin_neon_vrsqrteq_v:
3642    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
3643                        Ops, "vrsqrte");
3644  case ARM::BI__builtin_neon_vrsqrts_v:
3645  case ARM::BI__builtin_neon_vrsqrtsq_v:
3646    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
3647                        Ops, "vrsqrts");
3648  case ARM::BI__builtin_neon_vrsra_n_v:
3649  case ARM::BI__builtin_neon_vrsraq_n_v:
3650    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3651    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3652    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3653    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3654    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
3655    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3656  case ARM::BI__builtin_neon_vrsubhn_v:
3657    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
3658                        Ops, "vrsubhn");
3659  case ARM::BI__builtin_neon_vshl_v:
3660  case ARM::BI__builtin_neon_vshlq_v:
3661    Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
3662    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
3663  case ARM::BI__builtin_neon_vshll_n_v:
3664    Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
3665    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
3666  case ARM::BI__builtin_neon_vshl_n_v:
3667  case ARM::BI__builtin_neon_vshlq_n_v:
3668    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3669    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3670                             "vshl_n");
3671  case ARM::BI__builtin_neon_vshrn_n_v:
3672    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
3673                        Ops, "vshrn_n", 1, true);
3674  case ARM::BI__builtin_neon_vshr_n_v:
3675  case ARM::BI__builtin_neon_vshrq_n_v:
3676    return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, usgn, "vshr_n");
3677  case ARM::BI__builtin_neon_vsri_n_v:
3678  case ARM::BI__builtin_neon_vsriq_n_v:
3679    rightShift = true;
3680  case ARM::BI__builtin_neon_vsli_n_v:
3681  case ARM::BI__builtin_neon_vsliq_n_v:
3682    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3683    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3684                        Ops, "vsli_n");
3685  case ARM::BI__builtin_neon_vsra_n_v:
3686  case ARM::BI__builtin_neon_vsraq_n_v:
3687    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3688    Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3689    return Builder.CreateAdd(Ops[0], Ops[1]);
3690  case ARM::BI__builtin_neon_vst1_v:
3691  case ARM::BI__builtin_neon_vst1q_v:
3692    Ops.push_back(Align);
3693    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
3694                        Ops, "");
3695  case ARM::BI__builtin_neon_vst1q_lane_v:
3696    // Handle 64-bit integer elements as a special case.  Use a shuffle to get
3697    // a one-element vector and avoid poor code for i64 in the backend.
3698    if (VTy->getElementType()->isIntegerTy(64)) {
3699      Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3700      Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3701      Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3702      Ops[2] = Align;
3703      return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3704                                                 Ops[1]->getType()), Ops);
3705    }
3706    // fall through
3707  case ARM::BI__builtin_neon_vst1_lane_v: {
3708    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3709    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3710    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3711    StoreInst *St = Builder.CreateStore(Ops[1],
3712                                        Builder.CreateBitCast(Ops[0], Ty));
3713    St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
3714    return St;
3715  }
3716  case ARM::BI__builtin_neon_vst2_v:
3717  case ARM::BI__builtin_neon_vst2q_v:
3718    Ops.push_back(Align);
3719    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
3720                        Ops, "");
3721  case ARM::BI__builtin_neon_vst2_lane_v:
3722  case ARM::BI__builtin_neon_vst2q_lane_v:
3723    Ops.push_back(Align);
3724    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
3725                        Ops, "");
3726  case ARM::BI__builtin_neon_vst3_v:
3727  case ARM::BI__builtin_neon_vst3q_v:
3728    Ops.push_back(Align);
3729    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
3730                        Ops, "");
3731  case ARM::BI__builtin_neon_vst3_lane_v:
3732  case ARM::BI__builtin_neon_vst3q_lane_v:
3733    Ops.push_back(Align);
3734    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
3735                        Ops, "");
3736  case ARM::BI__builtin_neon_vst4_v:
3737  case ARM::BI__builtin_neon_vst4q_v:
3738    Ops.push_back(Align);
3739    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
3740                        Ops, "");
3741  case ARM::BI__builtin_neon_vst4_lane_v:
3742  case ARM::BI__builtin_neon_vst4q_lane_v:
3743    Ops.push_back(Align);
3744    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
3745                        Ops, "");
3746  case ARM::BI__builtin_neon_vsubhn_v: {
3747    llvm::VectorType *SrcTy =
3748        llvm::VectorType::getExtendedElementVectorType(VTy);
3749
3750    // %sum = add <4 x i32> %lhs, %rhs
3751    Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3752    Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3753    Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3754
3755    // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3756    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
3757                                       SrcTy->getScalarSizeInBits() / 2);
3758    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
3759    Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3760
3761    // %res = trunc <4 x i32> %high to <4 x i16>
3762    return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3763  }
3764  case ARM::BI__builtin_neon_vtbl1_v:
3765    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3766                        Ops, "vtbl1");
3767  case ARM::BI__builtin_neon_vtbl2_v:
3768    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3769                        Ops, "vtbl2");
3770  case ARM::BI__builtin_neon_vtbl3_v:
3771    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3772                        Ops, "vtbl3");
3773  case ARM::BI__builtin_neon_vtbl4_v:
3774    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3775                        Ops, "vtbl4");
3776  case ARM::BI__builtin_neon_vtbx1_v:
3777    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3778                        Ops, "vtbx1");
3779  case ARM::BI__builtin_neon_vtbx2_v:
3780    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3781                        Ops, "vtbx2");
3782  case ARM::BI__builtin_neon_vtbx3_v:
3783    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3784                        Ops, "vtbx3");
3785  case ARM::BI__builtin_neon_vtbx4_v:
3786    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3787                        Ops, "vtbx4");
3788  case ARM::BI__builtin_neon_vtst_v:
3789  case ARM::BI__builtin_neon_vtstq_v: {
3790    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3791    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3792    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3793    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3794                                ConstantAggregateZero::get(Ty));
3795    return Builder.CreateSExt(Ops[0], Ty, "vtst");
3796  }
3797  case ARM::BI__builtin_neon_vtrn_v:
3798  case ARM::BI__builtin_neon_vtrnq_v: {
3799    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3800    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3801    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3802    Value *SV = 0;
3803
3804    for (unsigned vi = 0; vi != 2; ++vi) {
3805      SmallVector<Constant*, 16> Indices;
3806      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3807        Indices.push_back(Builder.getInt32(i+vi));
3808        Indices.push_back(Builder.getInt32(i+e+vi));
3809      }
3810      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
3811      SV = llvm::ConstantVector::get(Indices);
3812      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
3813      SV = Builder.CreateStore(SV, Addr);
3814    }
3815    return SV;
3816  }
3817  case ARM::BI__builtin_neon_vuzp_v:
3818  case ARM::BI__builtin_neon_vuzpq_v: {
3819    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3820    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3821    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3822    Value *SV = 0;
3823
3824    for (unsigned vi = 0; vi != 2; ++vi) {
3825      SmallVector<Constant*, 16> Indices;
3826      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3827        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
3828
3829      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
3830      SV = llvm::ConstantVector::get(Indices);
3831      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
3832      SV = Builder.CreateStore(SV, Addr);
3833    }
3834    return SV;
3835  }
3836  case ARM::BI__builtin_neon_vzip_v:
3837  case ARM::BI__builtin_neon_vzipq_v: {
3838    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3839    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3840    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3841    Value *SV = 0;
3842
3843    for (unsigned vi = 0; vi != 2; ++vi) {
3844      SmallVector<Constant*, 16> Indices;
3845      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3846        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
3847        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
3848      }
3849      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
3850      SV = llvm::ConstantVector::get(Indices);
3851      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
3852      SV = Builder.CreateStore(SV, Addr);
3853    }
3854    return SV;
3855  }
3856  }
3857}
3858
3859llvm::Value *CodeGenFunction::
3860BuildVector(ArrayRef<llvm::Value*> Ops) {
3861  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
3862         "Not a power-of-two sized vector!");
3863  bool AllConstants = true;
3864  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
3865    AllConstants &= isa<Constant>(Ops[i]);
3866
3867  // If this is a constant vector, create a ConstantVector.
3868  if (AllConstants) {
3869    SmallVector<llvm::Constant*, 16> CstOps;
3870    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
3871      CstOps.push_back(cast<Constant>(Ops[i]));
3872    return llvm::ConstantVector::get(CstOps);
3873  }
3874
3875  // Otherwise, insertelement the values to build the vector.
3876  Value *Result =
3877    llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
3878
3879  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
3880    Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
3881
3882  return Result;
3883}
3884
3885Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
3886                                           const CallExpr *E) {
3887  SmallVector<Value*, 4> Ops;
3888
3889  // Find out if any arguments are required to be integer constant expressions.
3890  unsigned ICEArguments = 0;
3891  ASTContext::GetBuiltinTypeError Error;
3892  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3893  assert(Error == ASTContext::GE_None && "Should not codegen an error");
3894
3895  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
3896    // If this is a normal argument, just emit it as a scalar.
3897    if ((ICEArguments & (1 << i)) == 0) {
3898      Ops.push_back(EmitScalarExpr(E->getArg(i)));
3899      continue;
3900    }
3901
3902    // If this is required to be a constant, constant fold it so that we know
3903    // that the generated intrinsic gets a ConstantInt.
3904    llvm::APSInt Result;
3905    bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
3906    assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
3907    Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
3908  }
3909
3910  switch (BuiltinID) {
3911  default: return 0;
3912  case X86::BI__builtin_ia32_vec_init_v8qi:
3913  case X86::BI__builtin_ia32_vec_init_v4hi:
3914  case X86::BI__builtin_ia32_vec_init_v2si:
3915    return Builder.CreateBitCast(BuildVector(Ops),
3916                                 llvm::Type::getX86_MMXTy(getLLVMContext()));
3917  case X86::BI__builtin_ia32_vec_ext_v2si:
3918    return Builder.CreateExtractElement(Ops[0],
3919                                  llvm::ConstantInt::get(Ops[1]->getType(), 0));
3920  case X86::BI__builtin_ia32_ldmxcsr: {
3921    Value *Tmp = CreateMemTemp(E->getArg(0)->getType());
3922    Builder.CreateStore(Ops[0], Tmp);
3923    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
3924                              Builder.CreateBitCast(Tmp, Int8PtrTy));
3925  }
3926  case X86::BI__builtin_ia32_stmxcsr: {
3927    Value *Tmp = CreateMemTemp(E->getType());
3928    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
3929                       Builder.CreateBitCast(Tmp, Int8PtrTy));
3930    return Builder.CreateLoad(Tmp, "stmxcsr");
3931  }
3932  case X86::BI__builtin_ia32_storehps:
3933  case X86::BI__builtin_ia32_storelps: {
3934    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
3935    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
3936
3937    // cast val v2i64
3938    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
3939
3940    // extract (0, 1)
3941    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
3942    llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
3943    Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
3944
3945    // cast pointer to i64 & store
3946    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
3947    return Builder.CreateStore(Ops[1], Ops[0]);
3948  }
3949  case X86::BI__builtin_ia32_palignr: {
3950    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
3951
3952    // If palignr is shifting the pair of input vectors less than 9 bytes,
3953    // emit a shuffle instruction.
3954    if (shiftVal <= 8) {
3955      SmallVector<llvm::Constant*, 8> Indices;
3956      for (unsigned i = 0; i != 8; ++i)
3957        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
3958
3959      Value* SV = llvm::ConstantVector::get(Indices);
3960      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
3961    }
3962
3963    // If palignr is shifting the pair of input vectors more than 8 but less
3964    // than 16 bytes, emit a logical right shift of the destination.
3965    if (shiftVal < 16) {
3966      // MMX has these as 1 x i64 vectors for some odd optimization reasons.
3967      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
3968
3969      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
3970      Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
3971
3972      // create i32 constant
3973      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
3974      return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
3975    }
3976
3977    // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
3978    return llvm::Constant::getNullValue(ConvertType(E->getType()));
3979  }
3980  case X86::BI__builtin_ia32_palignr128: {
3981    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
3982
3983    // If palignr is shifting the pair of input vectors less than 17 bytes,
3984    // emit a shuffle instruction.
3985    if (shiftVal <= 16) {
3986      SmallVector<llvm::Constant*, 16> Indices;
3987      for (unsigned i = 0; i != 16; ++i)
3988        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
3989
3990      Value* SV = llvm::ConstantVector::get(Indices);
3991      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
3992    }
3993
3994    // If palignr is shifting the pair of input vectors more than 16 but less
3995    // than 32 bytes, emit a logical right shift of the destination.
3996    if (shiftVal < 32) {
3997      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
3998
3999      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
4000      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
4001
4002      // create i32 constant
4003      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
4004      return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
4005    }
4006
4007    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
4008    return llvm::Constant::getNullValue(ConvertType(E->getType()));
4009  }
4010  case X86::BI__builtin_ia32_palignr256: {
4011    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
4012
4013    // If palignr is shifting the pair of input vectors less than 17 bytes,
4014    // emit a shuffle instruction.
4015    if (shiftVal <= 16) {
4016      SmallVector<llvm::Constant*, 32> Indices;
4017      // 256-bit palignr operates on 128-bit lanes so we need to handle that
4018      for (unsigned l = 0; l != 2; ++l) {
4019        unsigned LaneStart = l * 16;
4020        unsigned LaneEnd = (l+1) * 16;
4021        for (unsigned i = 0; i != 16; ++i) {
4022          unsigned Idx = shiftVal + i + LaneStart;
4023          if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand
4024          Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx));
4025        }
4026      }
4027
4028      Value* SV = llvm::ConstantVector::get(Indices);
4029      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
4030    }
4031
4032    // If palignr is shifting the pair of input vectors more than 16 but less
4033    // than 32 bytes, emit a logical right shift of the destination.
4034    if (shiftVal < 32) {
4035      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4);
4036
4037      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
4038      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
4039
4040      // create i32 constant
4041      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq);
4042      return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
4043    }
4044
4045    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
4046    return llvm::Constant::getNullValue(ConvertType(E->getType()));
4047  }
4048  case X86::BI__builtin_ia32_movntps:
4049  case X86::BI__builtin_ia32_movntps256:
4050  case X86::BI__builtin_ia32_movntpd:
4051  case X86::BI__builtin_ia32_movntpd256:
4052  case X86::BI__builtin_ia32_movntdq:
4053  case X86::BI__builtin_ia32_movntdq256:
4054  case X86::BI__builtin_ia32_movnti:
4055  case X86::BI__builtin_ia32_movnti64: {
4056    llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
4057                                           Builder.getInt32(1));
4058
4059    // Convert the type of the pointer to a pointer to the stored type.
4060    Value *BC = Builder.CreateBitCast(Ops[0],
4061                                llvm::PointerType::getUnqual(Ops[1]->getType()),
4062                                      "cast");
4063    StoreInst *SI = Builder.CreateStore(Ops[1], BC);
4064    SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
4065
4066    // If the operand is an integer, we can't assume alignment. Otherwise,
4067    // assume natural alignment.
4068    QualType ArgTy = E->getArg(1)->getType();
4069    unsigned Align;
4070    if (ArgTy->isIntegerType())
4071      Align = 1;
4072    else
4073      Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
4074    SI->setAlignment(Align);
4075    return SI;
4076  }
4077  // 3DNow!
4078  case X86::BI__builtin_ia32_pswapdsf:
4079  case X86::BI__builtin_ia32_pswapdsi: {
4080    const char *name = 0;
4081    Intrinsic::ID ID = Intrinsic::not_intrinsic;
4082    switch(BuiltinID) {
4083    default: llvm_unreachable("Unsupported intrinsic!");
4084    case X86::BI__builtin_ia32_pswapdsf:
4085    case X86::BI__builtin_ia32_pswapdsi:
4086      name = "pswapd";
4087      ID = Intrinsic::x86_3dnowa_pswapd;
4088      break;
4089    }
4090    llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
4091    Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
4092    llvm::Function *F = CGM.getIntrinsic(ID);
4093    return Builder.CreateCall(F, Ops, name);
4094  }
4095  case X86::BI__builtin_ia32_rdrand16_step:
4096  case X86::BI__builtin_ia32_rdrand32_step:
4097  case X86::BI__builtin_ia32_rdrand64_step:
4098  case X86::BI__builtin_ia32_rdseed16_step:
4099  case X86::BI__builtin_ia32_rdseed32_step:
4100  case X86::BI__builtin_ia32_rdseed64_step: {
4101    Intrinsic::ID ID;
4102    switch (BuiltinID) {
4103    default: llvm_unreachable("Unsupported intrinsic!");
4104    case X86::BI__builtin_ia32_rdrand16_step:
4105      ID = Intrinsic::x86_rdrand_16;
4106      break;
4107    case X86::BI__builtin_ia32_rdrand32_step:
4108      ID = Intrinsic::x86_rdrand_32;
4109      break;
4110    case X86::BI__builtin_ia32_rdrand64_step:
4111      ID = Intrinsic::x86_rdrand_64;
4112      break;
4113    case X86::BI__builtin_ia32_rdseed16_step:
4114      ID = Intrinsic::x86_rdseed_16;
4115      break;
4116    case X86::BI__builtin_ia32_rdseed32_step:
4117      ID = Intrinsic::x86_rdseed_32;
4118      break;
4119    case X86::BI__builtin_ia32_rdseed64_step:
4120      ID = Intrinsic::x86_rdseed_64;
4121      break;
4122    }
4123
4124    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
4125    Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
4126    return Builder.CreateExtractValue(Call, 1);
4127  }
4128  // AVX2 broadcast
4129  case X86::BI__builtin_ia32_vbroadcastsi256: {
4130    Value *VecTmp = CreateMemTemp(E->getArg(0)->getType());
4131    Builder.CreateStore(Ops[0], VecTmp);
4132    Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128);
4133    return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy));
4134  }
4135  }
4136}
4137
4138
4139Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
4140                                           const CallExpr *E) {
4141  SmallVector<Value*, 4> Ops;
4142
4143  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
4144    Ops.push_back(EmitScalarExpr(E->getArg(i)));
4145
4146  Intrinsic::ID ID = Intrinsic::not_intrinsic;
4147
4148  switch (BuiltinID) {
4149  default: return 0;
4150
4151  // vec_ld, vec_lvsl, vec_lvsr
4152  case PPC::BI__builtin_altivec_lvx:
4153  case PPC::BI__builtin_altivec_lvxl:
4154  case PPC::BI__builtin_altivec_lvebx:
4155  case PPC::BI__builtin_altivec_lvehx:
4156  case PPC::BI__builtin_altivec_lvewx:
4157  case PPC::BI__builtin_altivec_lvsl:
4158  case PPC::BI__builtin_altivec_lvsr:
4159  {
4160    Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
4161
4162    Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
4163    Ops.pop_back();
4164
4165    switch (BuiltinID) {
4166    default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
4167    case PPC::BI__builtin_altivec_lvx:
4168      ID = Intrinsic::ppc_altivec_lvx;
4169      break;
4170    case PPC::BI__builtin_altivec_lvxl:
4171      ID = Intrinsic::ppc_altivec_lvxl;
4172      break;
4173    case PPC::BI__builtin_altivec_lvebx:
4174      ID = Intrinsic::ppc_altivec_lvebx;
4175      break;
4176    case PPC::BI__builtin_altivec_lvehx:
4177      ID = Intrinsic::ppc_altivec_lvehx;
4178      break;
4179    case PPC::BI__builtin_altivec_lvewx:
4180      ID = Intrinsic::ppc_altivec_lvewx;
4181      break;
4182    case PPC::BI__builtin_altivec_lvsl:
4183      ID = Intrinsic::ppc_altivec_lvsl;
4184      break;
4185    case PPC::BI__builtin_altivec_lvsr:
4186      ID = Intrinsic::ppc_altivec_lvsr;
4187      break;
4188    }
4189    llvm::Function *F = CGM.getIntrinsic(ID);
4190    return Builder.CreateCall(F, Ops, "");
4191  }
4192
4193  // vec_st
4194  case PPC::BI__builtin_altivec_stvx:
4195  case PPC::BI__builtin_altivec_stvxl:
4196  case PPC::BI__builtin_altivec_stvebx:
4197  case PPC::BI__builtin_altivec_stvehx:
4198  case PPC::BI__builtin_altivec_stvewx:
4199  {
4200    Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
4201    Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
4202    Ops.pop_back();
4203
4204    switch (BuiltinID) {
4205    default: llvm_unreachable("Unsupported st intrinsic!");
4206    case PPC::BI__builtin_altivec_stvx:
4207      ID = Intrinsic::ppc_altivec_stvx;
4208      break;
4209    case PPC::BI__builtin_altivec_stvxl:
4210      ID = Intrinsic::ppc_altivec_stvxl;
4211      break;
4212    case PPC::BI__builtin_altivec_stvebx:
4213      ID = Intrinsic::ppc_altivec_stvebx;
4214      break;
4215    case PPC::BI__builtin_altivec_stvehx:
4216      ID = Intrinsic::ppc_altivec_stvehx;
4217      break;
4218    case PPC::BI__builtin_altivec_stvewx:
4219      ID = Intrinsic::ppc_altivec_stvewx;
4220      break;
4221    }
4222    llvm::Function *F = CGM.getIntrinsic(ID);
4223    return Builder.CreateCall(F, Ops, "");
4224  }
4225  }
4226}
4227