CGBuiltin.cpp revision a841c19f7860393d6319bf40e9d662284462771d
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This contains code to emit Builtin calls as LLVM code.
11//
12//===----------------------------------------------------------------------===//
13
14#include "TargetInfo.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "CGObjCRuntime.h"
18#include "clang/Basic/TargetInfo.h"
19#include "clang/AST/ASTContext.h"
20#include "clang/AST/Decl.h"
21#include "clang/Basic/TargetBuiltins.h"
22#include "llvm/Intrinsics.h"
23#include "llvm/Target/TargetData.h"
24
25using namespace clang;
26using namespace CodeGen;
27using namespace llvm;
28
29/// getBuiltinLibFunction - Given a builtin id for a function like
30/// "__builtin_fabsf", return a Function* for "fabsf".
31llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
32                                                  unsigned BuiltinID) {
33  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
34
35  // Get the name, skip over the __builtin_ prefix (if necessary).
36  StringRef Name;
37  GlobalDecl D(FD);
38
39  // If the builtin has been declared explicitly with an assembler label,
40  // use the mangled name. This differs from the plain label on platforms
41  // that prefix labels.
42  if (FD->hasAttr<AsmLabelAttr>())
43    Name = getMangledName(D);
44  else
45    Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
46
47  llvm::FunctionType *Ty =
48    cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
49
50  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
51}
52
53/// Emit the conversions required to turn the given value into an
54/// integer of the given size.
55static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
56                        QualType T, llvm::IntegerType *IntType) {
57  V = CGF.EmitToMemory(V, T);
58
59  if (V->getType()->isPointerTy())
60    return CGF.Builder.CreatePtrToInt(V, IntType);
61
62  assert(V->getType() == IntType);
63  return V;
64}
65
66static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
67                          QualType T, llvm::Type *ResultType) {
68  V = CGF.EmitFromMemory(V, T);
69
70  if (ResultType->isPointerTy())
71    return CGF.Builder.CreateIntToPtr(V, ResultType);
72
73  assert(V->getType() == ResultType);
74  return V;
75}
76
77/// Utility to insert an atomic instruction based on Instrinsic::ID
78/// and the expression node.
79static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
80                               llvm::AtomicRMWInst::BinOp Kind,
81                               const CallExpr *E) {
82  QualType T = E->getType();
83  assert(E->getArg(0)->getType()->isPointerType());
84  assert(CGF.getContext().hasSameUnqualifiedType(T,
85                                  E->getArg(0)->getType()->getPointeeType()));
86  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
87
88  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
89  unsigned AddrSpace =
90    cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
91
92  llvm::IntegerType *IntType =
93    llvm::IntegerType::get(CGF.getLLVMContext(),
94                           CGF.getContext().getTypeSize(T));
95  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
96
97  llvm::Value *Args[2];
98  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
99  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
100  llvm::Type *ValueType = Args[1]->getType();
101  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
102
103  llvm::Value *Result =
104      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
105                                  llvm::SequentiallyConsistent);
106  Result = EmitFromInt(CGF, Result, T, ValueType);
107  return RValue::get(Result);
108}
109
110/// Utility to insert an atomic instruction based Instrinsic::ID and
111/// the expression node, where the return value is the result of the
112/// operation.
113static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
114                                   llvm::AtomicRMWInst::BinOp Kind,
115                                   const CallExpr *E,
116                                   Instruction::BinaryOps Op) {
117  QualType T = E->getType();
118  assert(E->getArg(0)->getType()->isPointerType());
119  assert(CGF.getContext().hasSameUnqualifiedType(T,
120                                  E->getArg(0)->getType()->getPointeeType()));
121  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
122
123  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
124  unsigned AddrSpace =
125    cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
126
127  llvm::IntegerType *IntType =
128    llvm::IntegerType::get(CGF.getLLVMContext(),
129                           CGF.getContext().getTypeSize(T));
130  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
131
132  llvm::Value *Args[2];
133  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
134  llvm::Type *ValueType = Args[1]->getType();
135  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
136  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
137
138  llvm::Value *Result =
139      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
140                                  llvm::SequentiallyConsistent);
141  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
142  Result = EmitFromInt(CGF, Result, T, ValueType);
143  return RValue::get(Result);
144}
145
146/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
147/// which must be a scalar floating point type.
148static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
149  const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
150  assert(ValTyP && "isn't scalar fp type!");
151
152  StringRef FnName;
153  switch (ValTyP->getKind()) {
154  default: llvm_unreachable("Isn't a scalar fp type!");
155  case BuiltinType::Float:      FnName = "fabsf"; break;
156  case BuiltinType::Double:     FnName = "fabs"; break;
157  case BuiltinType::LongDouble: FnName = "fabsl"; break;
158  }
159
160  // The prototype is something that takes and returns whatever V's type is.
161  llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
162                                                   false);
163  llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
164
165  return CGF.Builder.CreateCall(Fn, V, "abs");
166}
167
168static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
169                              const CallExpr *E, llvm::Value *calleeValue) {
170  return CGF.EmitCall(E->getCallee()->getType(), calleeValue,
171                      ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
172}
173
174RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
175                                        unsigned BuiltinID, const CallExpr *E) {
176  // See if we can constant fold this builtin.  If so, don't emit it at all.
177  Expr::EvalResult Result;
178  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
179      !Result.hasSideEffects()) {
180    if (Result.Val.isInt())
181      return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
182                                                Result.Val.getInt()));
183    if (Result.Val.isFloat())
184      return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
185                                               Result.Val.getFloat()));
186  }
187
188  switch (BuiltinID) {
189  default: break;  // Handle intrinsics and libm functions below.
190  case Builtin::BI__builtin___CFStringMakeConstantString:
191  case Builtin::BI__builtin___NSStringMakeConstantString:
192    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
193  case Builtin::BI__builtin_stdarg_start:
194  case Builtin::BI__builtin_va_start:
195  case Builtin::BI__builtin_va_end: {
196    Value *ArgValue = EmitVAListRef(E->getArg(0));
197    llvm::Type *DestType = Int8PtrTy;
198    if (ArgValue->getType() != DestType)
199      ArgValue = Builder.CreateBitCast(ArgValue, DestType,
200                                       ArgValue->getName().data());
201
202    Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
203      Intrinsic::vaend : Intrinsic::vastart;
204    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
205  }
206  case Builtin::BI__builtin_va_copy: {
207    Value *DstPtr = EmitVAListRef(E->getArg(0));
208    Value *SrcPtr = EmitVAListRef(E->getArg(1));
209
210    llvm::Type *Type = Int8PtrTy;
211
212    DstPtr = Builder.CreateBitCast(DstPtr, Type);
213    SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
214    return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
215                                           DstPtr, SrcPtr));
216  }
217  case Builtin::BI__builtin_abs:
218  case Builtin::BI__builtin_labs:
219  case Builtin::BI__builtin_llabs: {
220    Value *ArgValue = EmitScalarExpr(E->getArg(0));
221
222    Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
223    Value *CmpResult =
224    Builder.CreateICmpSGE(ArgValue,
225                          llvm::Constant::getNullValue(ArgValue->getType()),
226                                                            "abscond");
227    Value *Result =
228      Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
229
230    return RValue::get(Result);
231  }
232  case Builtin::BI__builtin_ctzs:
233  case Builtin::BI__builtin_ctz:
234  case Builtin::BI__builtin_ctzl:
235  case Builtin::BI__builtin_ctzll: {
236    Value *ArgValue = EmitScalarExpr(E->getArg(0));
237
238    llvm::Type *ArgType = ArgValue->getType();
239    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
240
241    llvm::Type *ResultType = ConvertType(E->getType());
242    Value *ZeroUndef = Builder.getInt1(Target.isCLZForZeroUndef());
243    Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
244    if (Result->getType() != ResultType)
245      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
246                                     "cast");
247    return RValue::get(Result);
248  }
249  case Builtin::BI__builtin_clzs:
250  case Builtin::BI__builtin_clz:
251  case Builtin::BI__builtin_clzl:
252  case Builtin::BI__builtin_clzll: {
253    Value *ArgValue = EmitScalarExpr(E->getArg(0));
254
255    llvm::Type *ArgType = ArgValue->getType();
256    Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
257
258    llvm::Type *ResultType = ConvertType(E->getType());
259    Value *ZeroUndef = Builder.getInt1(Target.isCLZForZeroUndef());
260    Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
261    if (Result->getType() != ResultType)
262      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
263                                     "cast");
264    return RValue::get(Result);
265  }
266  case Builtin::BI__builtin_ffs:
267  case Builtin::BI__builtin_ffsl:
268  case Builtin::BI__builtin_ffsll: {
269    // ffs(x) -> x ? cttz(x) + 1 : 0
270    Value *ArgValue = EmitScalarExpr(E->getArg(0));
271
272    llvm::Type *ArgType = ArgValue->getType();
273    Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
274
275    llvm::Type *ResultType = ConvertType(E->getType());
276    Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue,
277                                                       Builder.getTrue()),
278                                   llvm::ConstantInt::get(ArgType, 1));
279    Value *Zero = llvm::Constant::getNullValue(ArgType);
280    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
281    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
282    if (Result->getType() != ResultType)
283      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
284                                     "cast");
285    return RValue::get(Result);
286  }
287  case Builtin::BI__builtin_parity:
288  case Builtin::BI__builtin_parityl:
289  case Builtin::BI__builtin_parityll: {
290    // parity(x) -> ctpop(x) & 1
291    Value *ArgValue = EmitScalarExpr(E->getArg(0));
292
293    llvm::Type *ArgType = ArgValue->getType();
294    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
295
296    llvm::Type *ResultType = ConvertType(E->getType());
297    Value *Tmp = Builder.CreateCall(F, ArgValue);
298    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
299    if (Result->getType() != ResultType)
300      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
301                                     "cast");
302    return RValue::get(Result);
303  }
304  case Builtin::BI__builtin_popcount:
305  case Builtin::BI__builtin_popcountl:
306  case Builtin::BI__builtin_popcountll: {
307    Value *ArgValue = EmitScalarExpr(E->getArg(0));
308
309    llvm::Type *ArgType = ArgValue->getType();
310    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
311
312    llvm::Type *ResultType = ConvertType(E->getType());
313    Value *Result = Builder.CreateCall(F, ArgValue);
314    if (Result->getType() != ResultType)
315      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
316                                     "cast");
317    return RValue::get(Result);
318  }
319  case Builtin::BI__builtin_expect: {
320    Value *ArgValue = EmitScalarExpr(E->getArg(0));
321    llvm::Type *ArgType = ArgValue->getType();
322
323    Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
324    Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
325
326    Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
327                                        "expval");
328    return RValue::get(Result);
329  }
330  case Builtin::BI__builtin_bswap32:
331  case Builtin::BI__builtin_bswap64: {
332    Value *ArgValue = EmitScalarExpr(E->getArg(0));
333    llvm::Type *ArgType = ArgValue->getType();
334    Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
335    return RValue::get(Builder.CreateCall(F, ArgValue));
336  }
337  case Builtin::BI__builtin_object_size: {
338    // We rely on constant folding to deal with expressions with side effects.
339    assert(!E->getArg(0)->HasSideEffects(getContext()) &&
340           "should have been constant folded");
341
342    // We pass this builtin onto the optimizer so that it can
343    // figure out the object size in more complex cases.
344    llvm::Type *ResType = ConvertType(E->getType());
345
346    // LLVM only supports 0 and 2, make sure that we pass along that
347    // as a boolean.
348    Value *Ty = EmitScalarExpr(E->getArg(1));
349    ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
350    assert(CI);
351    uint64_t val = CI->getZExtValue();
352    CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
353
354    Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType);
355    return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI));
356  }
357  case Builtin::BI__builtin_prefetch: {
358    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
359    // FIXME: Technically these constants should of type 'int', yes?
360    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
361      llvm::ConstantInt::get(Int32Ty, 0);
362    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
363      llvm::ConstantInt::get(Int32Ty, 3);
364    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
365    Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
366    return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
367  }
368  case Builtin::BI__builtin_readcyclecounter: {
369    Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
370    return RValue::get(Builder.CreateCall(F));
371  }
372  case Builtin::BI__builtin_trap: {
373    Value *F = CGM.getIntrinsic(Intrinsic::trap);
374    return RValue::get(Builder.CreateCall(F));
375  }
376  case Builtin::BI__builtin_unreachable: {
377    if (CatchUndefined)
378      EmitBranch(getTrapBB());
379    else
380      Builder.CreateUnreachable();
381
382    // We do need to preserve an insertion point.
383    EmitBlock(createBasicBlock("unreachable.cont"));
384
385    return RValue::get(0);
386  }
387
388  case Builtin::BI__builtin_powi:
389  case Builtin::BI__builtin_powif:
390  case Builtin::BI__builtin_powil: {
391    Value *Base = EmitScalarExpr(E->getArg(0));
392    Value *Exponent = EmitScalarExpr(E->getArg(1));
393    llvm::Type *ArgType = Base->getType();
394    Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
395    return RValue::get(Builder.CreateCall2(F, Base, Exponent));
396  }
397
398  case Builtin::BI__builtin_isgreater:
399  case Builtin::BI__builtin_isgreaterequal:
400  case Builtin::BI__builtin_isless:
401  case Builtin::BI__builtin_islessequal:
402  case Builtin::BI__builtin_islessgreater:
403  case Builtin::BI__builtin_isunordered: {
404    // Ordered comparisons: we know the arguments to these are matching scalar
405    // floating point values.
406    Value *LHS = EmitScalarExpr(E->getArg(0));
407    Value *RHS = EmitScalarExpr(E->getArg(1));
408
409    switch (BuiltinID) {
410    default: llvm_unreachable("Unknown ordered comparison");
411    case Builtin::BI__builtin_isgreater:
412      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
413      break;
414    case Builtin::BI__builtin_isgreaterequal:
415      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
416      break;
417    case Builtin::BI__builtin_isless:
418      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
419      break;
420    case Builtin::BI__builtin_islessequal:
421      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
422      break;
423    case Builtin::BI__builtin_islessgreater:
424      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
425      break;
426    case Builtin::BI__builtin_isunordered:
427      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
428      break;
429    }
430    // ZExt bool to int type.
431    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
432  }
433  case Builtin::BI__builtin_isnan: {
434    Value *V = EmitScalarExpr(E->getArg(0));
435    V = Builder.CreateFCmpUNO(V, V, "cmp");
436    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
437  }
438
439  case Builtin::BI__builtin_isinf: {
440    // isinf(x) --> fabs(x) == infinity
441    Value *V = EmitScalarExpr(E->getArg(0));
442    V = EmitFAbs(*this, V, E->getArg(0)->getType());
443
444    V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
445    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
446  }
447
448  // TODO: BI__builtin_isinf_sign
449  //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
450
451  case Builtin::BI__builtin_isnormal: {
452    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
453    Value *V = EmitScalarExpr(E->getArg(0));
454    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
455
456    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
457    Value *IsLessThanInf =
458      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
459    APFloat Smallest = APFloat::getSmallestNormalized(
460                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
461    Value *IsNormal =
462      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
463                            "isnormal");
464    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
465    V = Builder.CreateAnd(V, IsNormal, "and");
466    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
467  }
468
469  case Builtin::BI__builtin_isfinite: {
470    // isfinite(x) --> x == x && fabs(x) != infinity;
471    Value *V = EmitScalarExpr(E->getArg(0));
472    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
473
474    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
475    Value *IsNotInf =
476      Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
477
478    V = Builder.CreateAnd(Eq, IsNotInf, "and");
479    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
480  }
481
482  case Builtin::BI__builtin_fpclassify: {
483    Value *V = EmitScalarExpr(E->getArg(5));
484    llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
485
486    // Create Result
487    BasicBlock *Begin = Builder.GetInsertBlock();
488    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
489    Builder.SetInsertPoint(End);
490    PHINode *Result =
491      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
492                        "fpclassify_result");
493
494    // if (V==0) return FP_ZERO
495    Builder.SetInsertPoint(Begin);
496    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
497                                          "iszero");
498    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
499    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
500    Builder.CreateCondBr(IsZero, End, NotZero);
501    Result->addIncoming(ZeroLiteral, Begin);
502
503    // if (V != V) return FP_NAN
504    Builder.SetInsertPoint(NotZero);
505    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
506    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
507    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
508    Builder.CreateCondBr(IsNan, End, NotNan);
509    Result->addIncoming(NanLiteral, NotZero);
510
511    // if (fabs(V) == infinity) return FP_INFINITY
512    Builder.SetInsertPoint(NotNan);
513    Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
514    Value *IsInf =
515      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
516                            "isinf");
517    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
518    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
519    Builder.CreateCondBr(IsInf, End, NotInf);
520    Result->addIncoming(InfLiteral, NotNan);
521
522    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
523    Builder.SetInsertPoint(NotInf);
524    APFloat Smallest = APFloat::getSmallestNormalized(
525        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
526    Value *IsNormal =
527      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
528                            "isnormal");
529    Value *NormalResult =
530      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
531                           EmitScalarExpr(E->getArg(3)));
532    Builder.CreateBr(End);
533    Result->addIncoming(NormalResult, NotInf);
534
535    // return Result
536    Builder.SetInsertPoint(End);
537    return RValue::get(Result);
538  }
539
540  case Builtin::BIalloca:
541  case Builtin::BI__builtin_alloca: {
542    Value *Size = EmitScalarExpr(E->getArg(0));
543    return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
544  }
545  case Builtin::BIbzero:
546  case Builtin::BI__builtin_bzero: {
547    Value *Address = EmitScalarExpr(E->getArg(0));
548    Value *SizeVal = EmitScalarExpr(E->getArg(1));
549    unsigned Align = GetPointeeAlignment(E->getArg(0));
550    Builder.CreateMemSet(Address, Builder.getInt8(0), SizeVal, Align, false);
551    return RValue::get(Address);
552  }
553  case Builtin::BImemcpy:
554  case Builtin::BI__builtin_memcpy: {
555    Value *Address = EmitScalarExpr(E->getArg(0));
556    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
557    Value *SizeVal = EmitScalarExpr(E->getArg(2));
558    unsigned Align = std::min(GetPointeeAlignment(E->getArg(0)),
559                              GetPointeeAlignment(E->getArg(1)));
560    Builder.CreateMemCpy(Address, SrcAddr, SizeVal, Align, false);
561    return RValue::get(Address);
562  }
563
564  case Builtin::BI__builtin___memcpy_chk: {
565    // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
566    llvm::APSInt Size, DstSize;
567    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
568        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
569      break;
570    if (Size.ugt(DstSize))
571      break;
572    Value *Dest = EmitScalarExpr(E->getArg(0));
573    Value *Src = EmitScalarExpr(E->getArg(1));
574    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
575    unsigned Align = std::min(GetPointeeAlignment(E->getArg(0)),
576                              GetPointeeAlignment(E->getArg(1)));
577    Builder.CreateMemCpy(Dest, Src, SizeVal, Align, false);
578    return RValue::get(Dest);
579  }
580
581  case Builtin::BI__builtin_objc_memmove_collectable: {
582    Value *Address = EmitScalarExpr(E->getArg(0));
583    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
584    Value *SizeVal = EmitScalarExpr(E->getArg(2));
585    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
586                                                  Address, SrcAddr, SizeVal);
587    return RValue::get(Address);
588  }
589
590  case Builtin::BI__builtin___memmove_chk: {
591    // fold __builtin_memmove_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
592    llvm::APSInt Size, DstSize;
593    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
594        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
595      break;
596    if (Size.ugt(DstSize))
597      break;
598    Value *Dest = EmitScalarExpr(E->getArg(0));
599    Value *Src = EmitScalarExpr(E->getArg(1));
600    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
601    unsigned Align = std::min(GetPointeeAlignment(E->getArg(0)),
602                              GetPointeeAlignment(E->getArg(1)));
603    Builder.CreateMemMove(Dest, Src, SizeVal, Align, false);
604    return RValue::get(Dest);
605  }
606
607  case Builtin::BImemmove:
608  case Builtin::BI__builtin_memmove: {
609    Value *Address = EmitScalarExpr(E->getArg(0));
610    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
611    Value *SizeVal = EmitScalarExpr(E->getArg(2));
612    unsigned Align = std::min(GetPointeeAlignment(E->getArg(0)),
613                              GetPointeeAlignment(E->getArg(1)));
614    Builder.CreateMemMove(Address, SrcAddr, SizeVal, Align, false);
615    return RValue::get(Address);
616  }
617  case Builtin::BImemset:
618  case Builtin::BI__builtin_memset: {
619    Value *Address = EmitScalarExpr(E->getArg(0));
620    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
621                                         Builder.getInt8Ty());
622    Value *SizeVal = EmitScalarExpr(E->getArg(2));
623    unsigned Align = GetPointeeAlignment(E->getArg(0));
624    Builder.CreateMemSet(Address, ByteVal, SizeVal, Align, false);
625    return RValue::get(Address);
626  }
627  case Builtin::BI__builtin___memset_chk: {
628    // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
629    llvm::APSInt Size, DstSize;
630    if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
631        !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
632      break;
633    if (Size.ugt(DstSize))
634      break;
635    Value *Address = EmitScalarExpr(E->getArg(0));
636    Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
637                                         Builder.getInt8Ty());
638    Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
639    unsigned Align = GetPointeeAlignment(E->getArg(0));
640    Builder.CreateMemSet(Address, ByteVal, SizeVal, Align, false);
641
642    return RValue::get(Address);
643  }
644  case Builtin::BI__builtin_dwarf_cfa: {
645    // The offset in bytes from the first argument to the CFA.
646    //
647    // Why on earth is this in the frontend?  Is there any reason at
648    // all that the backend can't reasonably determine this while
649    // lowering llvm.eh.dwarf.cfa()?
650    //
651    // TODO: If there's a satisfactory reason, add a target hook for
652    // this instead of hard-coding 0, which is correct for most targets.
653    int32_t Offset = 0;
654
655    Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
656    return RValue::get(Builder.CreateCall(F,
657                                      llvm::ConstantInt::get(Int32Ty, Offset)));
658  }
659  case Builtin::BI__builtin_return_address: {
660    Value *Depth = EmitScalarExpr(E->getArg(0));
661    Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
662    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
663    return RValue::get(Builder.CreateCall(F, Depth));
664  }
665  case Builtin::BI__builtin_frame_address: {
666    Value *Depth = EmitScalarExpr(E->getArg(0));
667    Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
668    Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
669    return RValue::get(Builder.CreateCall(F, Depth));
670  }
671  case Builtin::BI__builtin_extract_return_addr: {
672    Value *Address = EmitScalarExpr(E->getArg(0));
673    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
674    return RValue::get(Result);
675  }
676  case Builtin::BI__builtin_frob_return_addr: {
677    Value *Address = EmitScalarExpr(E->getArg(0));
678    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
679    return RValue::get(Result);
680  }
681  case Builtin::BI__builtin_dwarf_sp_column: {
682    llvm::IntegerType *Ty
683      = cast<llvm::IntegerType>(ConvertType(E->getType()));
684    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
685    if (Column == -1) {
686      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
687      return RValue::get(llvm::UndefValue::get(Ty));
688    }
689    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
690  }
691  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
692    Value *Address = EmitScalarExpr(E->getArg(0));
693    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
694      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
695    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
696  }
697  case Builtin::BI__builtin_eh_return: {
698    Value *Int = EmitScalarExpr(E->getArg(0));
699    Value *Ptr = EmitScalarExpr(E->getArg(1));
700
701    llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
702    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
703           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
704    Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
705                                  ? Intrinsic::eh_return_i32
706                                  : Intrinsic::eh_return_i64);
707    Builder.CreateCall2(F, Int, Ptr);
708    Builder.CreateUnreachable();
709
710    // We do need to preserve an insertion point.
711    EmitBlock(createBasicBlock("builtin_eh_return.cont"));
712
713    return RValue::get(0);
714  }
715  case Builtin::BI__builtin_unwind_init: {
716    Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
717    return RValue::get(Builder.CreateCall(F));
718  }
719  case Builtin::BI__builtin_extend_pointer: {
720    // Extends a pointer to the size of an _Unwind_Word, which is
721    // uint64_t on all platforms.  Generally this gets poked into a
722    // register and eventually used as an address, so if the
723    // addressing registers are wider than pointers and the platform
724    // doesn't implicitly ignore high-order bits when doing
725    // addressing, we need to make sure we zext / sext based on
726    // the platform's expectations.
727    //
728    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
729
730    // Cast the pointer to intptr_t.
731    Value *Ptr = EmitScalarExpr(E->getArg(0));
732    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
733
734    // If that's 64 bits, we're done.
735    if (IntPtrTy->getBitWidth() == 64)
736      return RValue::get(Result);
737
738    // Otherwise, ask the codegen data what to do.
739    if (getTargetHooks().extendPointerWithSExt())
740      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
741    else
742      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
743  }
744  case Builtin::BI__builtin_setjmp: {
745    // Buffer is a void**.
746    Value *Buf = EmitScalarExpr(E->getArg(0));
747
748    // Store the frame pointer to the setjmp buffer.
749    Value *FrameAddr =
750      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
751                         ConstantInt::get(Int32Ty, 0));
752    Builder.CreateStore(FrameAddr, Buf);
753
754    // Store the stack pointer to the setjmp buffer.
755    Value *StackAddr =
756      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
757    Value *StackSaveSlot =
758      Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
759    Builder.CreateStore(StackAddr, StackSaveSlot);
760
761    // Call LLVM's EH setjmp, which is lightweight.
762    Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
763    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
764    return RValue::get(Builder.CreateCall(F, Buf));
765  }
766  case Builtin::BI__builtin_longjmp: {
767    Value *Buf = EmitScalarExpr(E->getArg(0));
768    Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
769
770    // Call LLVM's EH longjmp, which is lightweight.
771    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
772
773    // longjmp doesn't return; mark this as unreachable.
774    Builder.CreateUnreachable();
775
776    // We do need to preserve an insertion point.
777    EmitBlock(createBasicBlock("longjmp.cont"));
778
779    return RValue::get(0);
780  }
781  case Builtin::BI__sync_fetch_and_add:
782  case Builtin::BI__sync_fetch_and_sub:
783  case Builtin::BI__sync_fetch_and_or:
784  case Builtin::BI__sync_fetch_and_and:
785  case Builtin::BI__sync_fetch_and_xor:
786  case Builtin::BI__sync_add_and_fetch:
787  case Builtin::BI__sync_sub_and_fetch:
788  case Builtin::BI__sync_and_and_fetch:
789  case Builtin::BI__sync_or_and_fetch:
790  case Builtin::BI__sync_xor_and_fetch:
791  case Builtin::BI__sync_val_compare_and_swap:
792  case Builtin::BI__sync_bool_compare_and_swap:
793  case Builtin::BI__sync_lock_test_and_set:
794  case Builtin::BI__sync_lock_release:
795  case Builtin::BI__sync_swap:
796    llvm_unreachable("Shouldn't make it through sema");
797  case Builtin::BI__sync_fetch_and_add_1:
798  case Builtin::BI__sync_fetch_and_add_2:
799  case Builtin::BI__sync_fetch_and_add_4:
800  case Builtin::BI__sync_fetch_and_add_8:
801  case Builtin::BI__sync_fetch_and_add_16:
802    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
803  case Builtin::BI__sync_fetch_and_sub_1:
804  case Builtin::BI__sync_fetch_and_sub_2:
805  case Builtin::BI__sync_fetch_and_sub_4:
806  case Builtin::BI__sync_fetch_and_sub_8:
807  case Builtin::BI__sync_fetch_and_sub_16:
808    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
809  case Builtin::BI__sync_fetch_and_or_1:
810  case Builtin::BI__sync_fetch_and_or_2:
811  case Builtin::BI__sync_fetch_and_or_4:
812  case Builtin::BI__sync_fetch_and_or_8:
813  case Builtin::BI__sync_fetch_and_or_16:
814    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
815  case Builtin::BI__sync_fetch_and_and_1:
816  case Builtin::BI__sync_fetch_and_and_2:
817  case Builtin::BI__sync_fetch_and_and_4:
818  case Builtin::BI__sync_fetch_and_and_8:
819  case Builtin::BI__sync_fetch_and_and_16:
820    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
821  case Builtin::BI__sync_fetch_and_xor_1:
822  case Builtin::BI__sync_fetch_and_xor_2:
823  case Builtin::BI__sync_fetch_and_xor_4:
824  case Builtin::BI__sync_fetch_and_xor_8:
825  case Builtin::BI__sync_fetch_and_xor_16:
826    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
827
828  // Clang extensions: not overloaded yet.
829  case Builtin::BI__sync_fetch_and_min:
830    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
831  case Builtin::BI__sync_fetch_and_max:
832    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
833  case Builtin::BI__sync_fetch_and_umin:
834    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
835  case Builtin::BI__sync_fetch_and_umax:
836    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
837
838  case Builtin::BI__sync_add_and_fetch_1:
839  case Builtin::BI__sync_add_and_fetch_2:
840  case Builtin::BI__sync_add_and_fetch_4:
841  case Builtin::BI__sync_add_and_fetch_8:
842  case Builtin::BI__sync_add_and_fetch_16:
843    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
844                                llvm::Instruction::Add);
845  case Builtin::BI__sync_sub_and_fetch_1:
846  case Builtin::BI__sync_sub_and_fetch_2:
847  case Builtin::BI__sync_sub_and_fetch_4:
848  case Builtin::BI__sync_sub_and_fetch_8:
849  case Builtin::BI__sync_sub_and_fetch_16:
850    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
851                                llvm::Instruction::Sub);
852  case Builtin::BI__sync_and_and_fetch_1:
853  case Builtin::BI__sync_and_and_fetch_2:
854  case Builtin::BI__sync_and_and_fetch_4:
855  case Builtin::BI__sync_and_and_fetch_8:
856  case Builtin::BI__sync_and_and_fetch_16:
857    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
858                                llvm::Instruction::And);
859  case Builtin::BI__sync_or_and_fetch_1:
860  case Builtin::BI__sync_or_and_fetch_2:
861  case Builtin::BI__sync_or_and_fetch_4:
862  case Builtin::BI__sync_or_and_fetch_8:
863  case Builtin::BI__sync_or_and_fetch_16:
864    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
865                                llvm::Instruction::Or);
866  case Builtin::BI__sync_xor_and_fetch_1:
867  case Builtin::BI__sync_xor_and_fetch_2:
868  case Builtin::BI__sync_xor_and_fetch_4:
869  case Builtin::BI__sync_xor_and_fetch_8:
870  case Builtin::BI__sync_xor_and_fetch_16:
871    return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
872                                llvm::Instruction::Xor);
873
874  case Builtin::BI__sync_val_compare_and_swap_1:
875  case Builtin::BI__sync_val_compare_and_swap_2:
876  case Builtin::BI__sync_val_compare_and_swap_4:
877  case Builtin::BI__sync_val_compare_and_swap_8:
878  case Builtin::BI__sync_val_compare_and_swap_16: {
879    QualType T = E->getType();
880    llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
881    unsigned AddrSpace =
882      cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
883
884    llvm::IntegerType *IntType =
885      llvm::IntegerType::get(getLLVMContext(),
886                             getContext().getTypeSize(T));
887    llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
888
889    Value *Args[3];
890    Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
891    Args[1] = EmitScalarExpr(E->getArg(1));
892    llvm::Type *ValueType = Args[1]->getType();
893    Args[1] = EmitToInt(*this, Args[1], T, IntType);
894    Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
895
896    Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
897                                                llvm::SequentiallyConsistent);
898    Result = EmitFromInt(*this, Result, T, ValueType);
899    return RValue::get(Result);
900  }
901
902  case Builtin::BI__sync_bool_compare_and_swap_1:
903  case Builtin::BI__sync_bool_compare_and_swap_2:
904  case Builtin::BI__sync_bool_compare_and_swap_4:
905  case Builtin::BI__sync_bool_compare_and_swap_8:
906  case Builtin::BI__sync_bool_compare_and_swap_16: {
907    QualType T = E->getArg(1)->getType();
908    llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
909    unsigned AddrSpace =
910      cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
911
912    llvm::IntegerType *IntType =
913      llvm::IntegerType::get(getLLVMContext(),
914                             getContext().getTypeSize(T));
915    llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
916
917    Value *Args[3];
918    Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
919    Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
920    Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
921
922    Value *OldVal = Args[1];
923    Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
924                                                 llvm::SequentiallyConsistent);
925    Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
926    // zext bool to int.
927    Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
928    return RValue::get(Result);
929  }
930
931  case Builtin::BI__sync_swap_1:
932  case Builtin::BI__sync_swap_2:
933  case Builtin::BI__sync_swap_4:
934  case Builtin::BI__sync_swap_8:
935  case Builtin::BI__sync_swap_16:
936    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
937
938  case Builtin::BI__sync_lock_test_and_set_1:
939  case Builtin::BI__sync_lock_test_and_set_2:
940  case Builtin::BI__sync_lock_test_and_set_4:
941  case Builtin::BI__sync_lock_test_and_set_8:
942  case Builtin::BI__sync_lock_test_and_set_16:
943    return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
944
945  case Builtin::BI__sync_lock_release_1:
946  case Builtin::BI__sync_lock_release_2:
947  case Builtin::BI__sync_lock_release_4:
948  case Builtin::BI__sync_lock_release_8:
949  case Builtin::BI__sync_lock_release_16: {
950    Value *Ptr = EmitScalarExpr(E->getArg(0));
951    QualType ElTy = E->getArg(0)->getType()->getPointeeType();
952    CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
953    llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
954                                             StoreSize.getQuantity() * 8);
955    Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
956    llvm::StoreInst *Store =
957      Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
958    Store->setAlignment(StoreSize.getQuantity());
959    Store->setAtomic(llvm::Release);
960    return RValue::get(0);
961  }
962
963  case Builtin::BI__sync_synchronize: {
964    // We assume this is supposed to correspond to a C++0x-style
965    // sequentially-consistent fence (i.e. this is only usable for
966    // synchonization, not device I/O or anything like that). This intrinsic
967    // is really badly designed in the sense that in theory, there isn't
968    // any way to safely use it... but in practice, it mostly works
969    // to use it with non-atomic loads and stores to get acquire/release
970    // semantics.
971    Builder.CreateFence(llvm::SequentiallyConsistent);
972    return RValue::get(0);
973  }
974
975  case Builtin::BI__c11_atomic_is_lock_free:
976  case Builtin::BI__atomic_is_lock_free: {
977    // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
978    // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
979    // _Atomic(T) is always properly-aligned.
980    const char *LibCallName = "__atomic_is_lock_free";
981    CallArgList Args;
982    Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
983             getContext().getSizeType());
984    if (BuiltinID == Builtin::BI__atomic_is_lock_free)
985      Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
986               getContext().VoidPtrTy);
987    else
988      Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
989               getContext().VoidPtrTy);
990    const CGFunctionInfo &FuncInfo =
991        CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
992                                               FunctionType::ExtInfo(),
993                                               RequiredArgs::All);
994    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
995    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
996    return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
997  }
998
999  case Builtin::BI__atomic_test_and_set: {
1000    // Look at the argument type to determine whether this is a volatile
1001    // operation. The parameter type is always volatile.
1002    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1003    bool Volatile =
1004        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1005
1006    Value *Ptr = EmitScalarExpr(E->getArg(0));
1007    unsigned AddrSpace =
1008        cast<llvm::PointerType>(Ptr->getType())->getAddressSpace();
1009    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1010    Value *NewVal = Builder.getInt8(1);
1011    Value *Order = EmitScalarExpr(E->getArg(1));
1012    if (isa<llvm::ConstantInt>(Order)) {
1013      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1014      AtomicRMWInst *Result = 0;
1015      switch (ord) {
1016      case 0:  // memory_order_relaxed
1017      default: // invalid order
1018        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1019                                         Ptr, NewVal,
1020                                         llvm::Monotonic);
1021        break;
1022      case 1:  // memory_order_consume
1023      case 2:  // memory_order_acquire
1024        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1025                                         Ptr, NewVal,
1026                                         llvm::Acquire);
1027        break;
1028      case 3:  // memory_order_release
1029        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1030                                         Ptr, NewVal,
1031                                         llvm::Release);
1032        break;
1033      case 4:  // memory_order_acq_rel
1034        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1035                                         Ptr, NewVal,
1036                                         llvm::AcquireRelease);
1037        break;
1038      case 5:  // memory_order_seq_cst
1039        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1040                                         Ptr, NewVal,
1041                                         llvm::SequentiallyConsistent);
1042        break;
1043      }
1044      Result->setVolatile(Volatile);
1045      return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1046    }
1047
1048    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1049
1050    llvm::BasicBlock *BBs[5] = {
1051      createBasicBlock("monotonic", CurFn),
1052      createBasicBlock("acquire", CurFn),
1053      createBasicBlock("release", CurFn),
1054      createBasicBlock("acqrel", CurFn),
1055      createBasicBlock("seqcst", CurFn)
1056    };
1057    llvm::AtomicOrdering Orders[5] = {
1058      llvm::Monotonic, llvm::Acquire, llvm::Release,
1059      llvm::AcquireRelease, llvm::SequentiallyConsistent
1060    };
1061
1062    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1063    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1064
1065    Builder.SetInsertPoint(ContBB);
1066    PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1067
1068    for (unsigned i = 0; i < 5; ++i) {
1069      Builder.SetInsertPoint(BBs[i]);
1070      AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1071                                                   Ptr, NewVal, Orders[i]);
1072      RMW->setVolatile(Volatile);
1073      Result->addIncoming(RMW, BBs[i]);
1074      Builder.CreateBr(ContBB);
1075    }
1076
1077    SI->addCase(Builder.getInt32(0), BBs[0]);
1078    SI->addCase(Builder.getInt32(1), BBs[1]);
1079    SI->addCase(Builder.getInt32(2), BBs[1]);
1080    SI->addCase(Builder.getInt32(3), BBs[2]);
1081    SI->addCase(Builder.getInt32(4), BBs[3]);
1082    SI->addCase(Builder.getInt32(5), BBs[4]);
1083
1084    Builder.SetInsertPoint(ContBB);
1085    return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1086  }
1087
1088  case Builtin::BI__atomic_clear: {
1089    QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1090    bool Volatile =
1091        PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1092
1093    Value *Ptr = EmitScalarExpr(E->getArg(0));
1094    unsigned AddrSpace =
1095        cast<llvm::PointerType>(Ptr->getType())->getAddressSpace();
1096    Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1097    Value *NewVal = Builder.getInt8(0);
1098    Value *Order = EmitScalarExpr(E->getArg(1));
1099    if (isa<llvm::ConstantInt>(Order)) {
1100      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1101      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1102      Store->setAlignment(1);
1103      switch (ord) {
1104      case 0:  // memory_order_relaxed
1105      default: // invalid order
1106        Store->setOrdering(llvm::Monotonic);
1107        break;
1108      case 3:  // memory_order_release
1109        Store->setOrdering(llvm::Release);
1110        break;
1111      case 5:  // memory_order_seq_cst
1112        Store->setOrdering(llvm::SequentiallyConsistent);
1113        break;
1114      }
1115      return RValue::get(0);
1116    }
1117
1118    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1119
1120    llvm::BasicBlock *BBs[3] = {
1121      createBasicBlock("monotonic", CurFn),
1122      createBasicBlock("release", CurFn),
1123      createBasicBlock("seqcst", CurFn)
1124    };
1125    llvm::AtomicOrdering Orders[3] = {
1126      llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1127    };
1128
1129    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1130    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1131
1132    for (unsigned i = 0; i < 3; ++i) {
1133      Builder.SetInsertPoint(BBs[i]);
1134      StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1135      Store->setAlignment(1);
1136      Store->setOrdering(Orders[i]);
1137      Builder.CreateBr(ContBB);
1138    }
1139
1140    SI->addCase(Builder.getInt32(0), BBs[0]);
1141    SI->addCase(Builder.getInt32(3), BBs[1]);
1142    SI->addCase(Builder.getInt32(5), BBs[2]);
1143
1144    Builder.SetInsertPoint(ContBB);
1145    return RValue::get(0);
1146  }
1147
1148  case Builtin::BI__atomic_thread_fence:
1149  case Builtin::BI__atomic_signal_fence:
1150  case Builtin::BI__c11_atomic_thread_fence:
1151  case Builtin::BI__c11_atomic_signal_fence: {
1152    llvm::SynchronizationScope Scope;
1153    if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1154        BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1155      Scope = llvm::SingleThread;
1156    else
1157      Scope = llvm::CrossThread;
1158    Value *Order = EmitScalarExpr(E->getArg(0));
1159    if (isa<llvm::ConstantInt>(Order)) {
1160      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1161      switch (ord) {
1162      case 0:  // memory_order_relaxed
1163      default: // invalid order
1164        break;
1165      case 1:  // memory_order_consume
1166      case 2:  // memory_order_acquire
1167        Builder.CreateFence(llvm::Acquire, Scope);
1168        break;
1169      case 3:  // memory_order_release
1170        Builder.CreateFence(llvm::Release, Scope);
1171        break;
1172      case 4:  // memory_order_acq_rel
1173        Builder.CreateFence(llvm::AcquireRelease, Scope);
1174        break;
1175      case 5:  // memory_order_seq_cst
1176        Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1177        break;
1178      }
1179      return RValue::get(0);
1180    }
1181
1182    llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1183    AcquireBB = createBasicBlock("acquire", CurFn);
1184    ReleaseBB = createBasicBlock("release", CurFn);
1185    AcqRelBB = createBasicBlock("acqrel", CurFn);
1186    SeqCstBB = createBasicBlock("seqcst", CurFn);
1187    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1188
1189    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1190    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1191
1192    Builder.SetInsertPoint(AcquireBB);
1193    Builder.CreateFence(llvm::Acquire, Scope);
1194    Builder.CreateBr(ContBB);
1195    SI->addCase(Builder.getInt32(1), AcquireBB);
1196    SI->addCase(Builder.getInt32(2), AcquireBB);
1197
1198    Builder.SetInsertPoint(ReleaseBB);
1199    Builder.CreateFence(llvm::Release, Scope);
1200    Builder.CreateBr(ContBB);
1201    SI->addCase(Builder.getInt32(3), ReleaseBB);
1202
1203    Builder.SetInsertPoint(AcqRelBB);
1204    Builder.CreateFence(llvm::AcquireRelease, Scope);
1205    Builder.CreateBr(ContBB);
1206    SI->addCase(Builder.getInt32(4), AcqRelBB);
1207
1208    Builder.SetInsertPoint(SeqCstBB);
1209    Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1210    Builder.CreateBr(ContBB);
1211    SI->addCase(Builder.getInt32(5), SeqCstBB);
1212
1213    Builder.SetInsertPoint(ContBB);
1214    return RValue::get(0);
1215  }
1216
1217    // Library functions with special handling.
1218  case Builtin::BIsqrt:
1219  case Builtin::BIsqrtf:
1220  case Builtin::BIsqrtl: {
1221    // TODO: there is currently no set of optimizer flags
1222    // sufficient for us to rewrite sqrt to @llvm.sqrt.
1223    // -fmath-errno=0 is not good enough; we need finiteness.
1224    // We could probably precondition the call with an ult
1225    // against 0, but is that worth the complexity?
1226    break;
1227  }
1228
1229  case Builtin::BIpow:
1230  case Builtin::BIpowf:
1231  case Builtin::BIpowl: {
1232    // Rewrite sqrt to intrinsic if allowed.
1233    if (!FD->hasAttr<ConstAttr>())
1234      break;
1235    Value *Base = EmitScalarExpr(E->getArg(0));
1236    Value *Exponent = EmitScalarExpr(E->getArg(1));
1237    llvm::Type *ArgType = Base->getType();
1238    Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1239    return RValue::get(Builder.CreateCall2(F, Base, Exponent));
1240  }
1241
1242  case Builtin::BIfma:
1243  case Builtin::BIfmaf:
1244  case Builtin::BIfmal:
1245  case Builtin::BI__builtin_fma:
1246  case Builtin::BI__builtin_fmaf:
1247  case Builtin::BI__builtin_fmal: {
1248    // Rewrite fma to intrinsic.
1249    Value *FirstArg = EmitScalarExpr(E->getArg(0));
1250    llvm::Type *ArgType = FirstArg->getType();
1251    Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1252    return RValue::get(Builder.CreateCall3(F, FirstArg,
1253                                              EmitScalarExpr(E->getArg(1)),
1254                                              EmitScalarExpr(E->getArg(2))));
1255  }
1256
1257  case Builtin::BI__builtin_signbit:
1258  case Builtin::BI__builtin_signbitf:
1259  case Builtin::BI__builtin_signbitl: {
1260    LLVMContext &C = CGM.getLLVMContext();
1261
1262    Value *Arg = EmitScalarExpr(E->getArg(0));
1263    llvm::Type *ArgTy = Arg->getType();
1264    if (ArgTy->isPPC_FP128Ty())
1265      break; // FIXME: I'm not sure what the right implementation is here.
1266    int ArgWidth = ArgTy->getPrimitiveSizeInBits();
1267    llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
1268    Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
1269    Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
1270    Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
1271    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
1272  }
1273  case Builtin::BI__builtin_annotation: {
1274    llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1275    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1276                                      AnnVal->getType());
1277
1278    // Get the annotation string, go through casts. Sema requires this to be a
1279    // non-wide string literal, potentially casted, so the cast<> is safe.
1280    const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1281    llvm::StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1282    return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1283  }
1284  }
1285
1286  // If this is an alias for a lib function (e.g. __builtin_sin), emit
1287  // the call using the normal call path, but using the unmangled
1288  // version of the function name.
1289  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1290    return emitLibraryCall(*this, FD, E,
1291                           CGM.getBuiltinLibFunction(FD, BuiltinID));
1292
1293  // If this is a predefined lib function (e.g. malloc), emit the call
1294  // using exactly the normal call path.
1295  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1296    return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1297
1298  // See if we have a target specific intrinsic.
1299  const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
1300  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1301  if (const char *Prefix =
1302      llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
1303    IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1304
1305  if (IntrinsicID != Intrinsic::not_intrinsic) {
1306    SmallVector<Value*, 16> Args;
1307
1308    // Find out if any arguments are required to be integer constant
1309    // expressions.
1310    unsigned ICEArguments = 0;
1311    ASTContext::GetBuiltinTypeError Error;
1312    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1313    assert(Error == ASTContext::GE_None && "Should not codegen an error");
1314
1315    Function *F = CGM.getIntrinsic(IntrinsicID);
1316    llvm::FunctionType *FTy = F->getFunctionType();
1317
1318    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1319      Value *ArgValue;
1320      // If this is a normal argument, just emit it as a scalar.
1321      if ((ICEArguments & (1 << i)) == 0) {
1322        ArgValue = EmitScalarExpr(E->getArg(i));
1323      } else {
1324        // If this is required to be a constant, constant fold it so that we
1325        // know that the generated intrinsic gets a ConstantInt.
1326        llvm::APSInt Result;
1327        bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1328        assert(IsConst && "Constant arg isn't actually constant?");
1329        (void)IsConst;
1330        ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1331      }
1332
1333      // If the intrinsic arg type is different from the builtin arg type
1334      // we need to do a bit cast.
1335      llvm::Type *PTy = FTy->getParamType(i);
1336      if (PTy != ArgValue->getType()) {
1337        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1338               "Must be able to losslessly bit cast to param");
1339        ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1340      }
1341
1342      Args.push_back(ArgValue);
1343    }
1344
1345    Value *V = Builder.CreateCall(F, Args);
1346    QualType BuiltinRetType = E->getType();
1347
1348    llvm::Type *RetTy = VoidTy;
1349    if (!BuiltinRetType->isVoidType())
1350      RetTy = ConvertType(BuiltinRetType);
1351
1352    if (RetTy != V->getType()) {
1353      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1354             "Must be able to losslessly bit cast result type");
1355      V = Builder.CreateBitCast(V, RetTy);
1356    }
1357
1358    return RValue::get(V);
1359  }
1360
1361  // See if we have a target specific builtin that needs to be lowered.
1362  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1363    return RValue::get(V);
1364
1365  ErrorUnsupported(E, "builtin function");
1366
1367  // Unknown builtin, for now just dump it out and return undef.
1368  if (hasAggregateLLVMType(E->getType()))
1369    return RValue::getAggregate(CreateMemTemp(E->getType()));
1370  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1371}
1372
1373Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1374                                              const CallExpr *E) {
1375  switch (Target.getTriple().getArch()) {
1376  case llvm::Triple::arm:
1377  case llvm::Triple::thumb:
1378    return EmitARMBuiltinExpr(BuiltinID, E);
1379  case llvm::Triple::x86:
1380  case llvm::Triple::x86_64:
1381    return EmitX86BuiltinExpr(BuiltinID, E);
1382  case llvm::Triple::ppc:
1383  case llvm::Triple::ppc64:
1384    return EmitPPCBuiltinExpr(BuiltinID, E);
1385  default:
1386    return 0;
1387  }
1388}
1389
1390static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
1391                                     NeonTypeFlags TypeFlags) {
1392  int IsQuad = TypeFlags.isQuad();
1393  switch (TypeFlags.getEltType()) {
1394  case NeonTypeFlags::Int8:
1395  case NeonTypeFlags::Poly8:
1396    return llvm::VectorType::get(CGF->Int8Ty, 8 << IsQuad);
1397  case NeonTypeFlags::Int16:
1398  case NeonTypeFlags::Poly16:
1399  case NeonTypeFlags::Float16:
1400    return llvm::VectorType::get(CGF->Int16Ty, 4 << IsQuad);
1401  case NeonTypeFlags::Int32:
1402    return llvm::VectorType::get(CGF->Int32Ty, 2 << IsQuad);
1403  case NeonTypeFlags::Int64:
1404    return llvm::VectorType::get(CGF->Int64Ty, 1 << IsQuad);
1405  case NeonTypeFlags::Float32:
1406    return llvm::VectorType::get(CGF->FloatTy, 2 << IsQuad);
1407  }
1408  llvm_unreachable("Invalid NeonTypeFlags element type!");
1409}
1410
1411Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1412  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1413  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
1414  return Builder.CreateShuffleVector(V, V, SV, "lane");
1415}
1416
1417Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1418                                     const char *name,
1419                                     unsigned shift, bool rightshift) {
1420  unsigned j = 0;
1421  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1422       ai != ae; ++ai, ++j)
1423    if (shift > 0 && shift == j)
1424      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1425    else
1426      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1427
1428  return Builder.CreateCall(F, Ops, name);
1429}
1430
1431Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
1432                                            bool neg) {
1433  int SV = cast<ConstantInt>(V)->getSExtValue();
1434
1435  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1436  llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1437  return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
1438}
1439
1440/// GetPointeeAlignment - Given an expression with a pointer type, find the
1441/// alignment of the type referenced by the pointer.  Skip over implicit
1442/// casts.
1443unsigned CodeGenFunction::GetPointeeAlignment(const Expr *Addr) {
1444  unsigned Align = 1;
1445  // Check if the type is a pointer.  The implicit cast operand might not be.
1446  while (Addr->getType()->isPointerType()) {
1447    QualType PtTy = Addr->getType()->getPointeeType();
1448
1449    // Can't get alignment of incomplete types.
1450    if (!PtTy->isIncompleteType()) {
1451      unsigned NewA = getContext().getTypeAlignInChars(PtTy).getQuantity();
1452      if (NewA > Align)
1453        Align = NewA;
1454    }
1455
1456    // If the address is an implicit cast, repeat with the cast operand.
1457    if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) {
1458      Addr = CastAddr->getSubExpr();
1459      continue;
1460    }
1461    break;
1462  }
1463  return Align;
1464}
1465
1466/// GetPointeeAlignmentValue - Given an expression with a pointer type, find
1467/// the alignment of the type referenced by the pointer.  Skip over implicit
1468/// casts.  Return the alignment as an llvm::Value.
1469Value *CodeGenFunction::GetPointeeAlignmentValue(const Expr *Addr) {
1470  return llvm::ConstantInt::get(Int32Ty, GetPointeeAlignment(Addr));
1471}
1472
1473Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1474                                           const CallExpr *E) {
1475  if (BuiltinID == ARM::BI__clear_cache) {
1476    const FunctionDecl *FD = E->getDirectCallee();
1477    // Oddly people write this call without args on occasion and gcc accepts
1478    // it - it's also marked as varargs in the description file.
1479    SmallVector<Value*, 2> Ops;
1480    for (unsigned i = 0; i < E->getNumArgs(); i++)
1481      Ops.push_back(EmitScalarExpr(E->getArg(i)));
1482    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1483    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1484    StringRef Name = FD->getName();
1485    return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
1486  }
1487
1488  if (BuiltinID == ARM::BI__builtin_arm_ldrexd) {
1489    Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
1490
1491    Value *LdPtr = EmitScalarExpr(E->getArg(0));
1492    Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
1493
1494    Value *Val0 = Builder.CreateExtractValue(Val, 1);
1495    Value *Val1 = Builder.CreateExtractValue(Val, 0);
1496    Val0 = Builder.CreateZExt(Val0, Int64Ty);
1497    Val1 = Builder.CreateZExt(Val1, Int64Ty);
1498
1499    Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
1500    Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
1501    return Builder.CreateOr(Val, Val1);
1502  }
1503
1504  if (BuiltinID == ARM::BI__builtin_arm_strexd) {
1505    Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
1506    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
1507
1508    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
1509    Value *Tmp = Builder.CreateAlloca(Int64Ty, One);
1510    Value *Val = EmitScalarExpr(E->getArg(0));
1511    Builder.CreateStore(Val, Tmp);
1512
1513    Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
1514    Val = Builder.CreateLoad(LdPtr);
1515
1516    Value *Arg0 = Builder.CreateExtractValue(Val, 0);
1517    Value *Arg1 = Builder.CreateExtractValue(Val, 1);
1518    Value *StPtr = EmitScalarExpr(E->getArg(1));
1519    return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
1520  }
1521
1522  SmallVector<Value*, 4> Ops;
1523  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
1524    Ops.push_back(EmitScalarExpr(E->getArg(i)));
1525
1526  // vget_lane and vset_lane are not overloaded and do not have an extra
1527  // argument that specifies the vector type.
1528  switch (BuiltinID) {
1529  default: break;
1530  case ARM::BI__builtin_neon_vget_lane_i8:
1531  case ARM::BI__builtin_neon_vget_lane_i16:
1532  case ARM::BI__builtin_neon_vget_lane_i32:
1533  case ARM::BI__builtin_neon_vget_lane_i64:
1534  case ARM::BI__builtin_neon_vget_lane_f32:
1535  case ARM::BI__builtin_neon_vgetq_lane_i8:
1536  case ARM::BI__builtin_neon_vgetq_lane_i16:
1537  case ARM::BI__builtin_neon_vgetq_lane_i32:
1538  case ARM::BI__builtin_neon_vgetq_lane_i64:
1539  case ARM::BI__builtin_neon_vgetq_lane_f32:
1540    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1541                                        "vget_lane");
1542  case ARM::BI__builtin_neon_vset_lane_i8:
1543  case ARM::BI__builtin_neon_vset_lane_i16:
1544  case ARM::BI__builtin_neon_vset_lane_i32:
1545  case ARM::BI__builtin_neon_vset_lane_i64:
1546  case ARM::BI__builtin_neon_vset_lane_f32:
1547  case ARM::BI__builtin_neon_vsetq_lane_i8:
1548  case ARM::BI__builtin_neon_vsetq_lane_i16:
1549  case ARM::BI__builtin_neon_vsetq_lane_i32:
1550  case ARM::BI__builtin_neon_vsetq_lane_i64:
1551  case ARM::BI__builtin_neon_vsetq_lane_f32:
1552    Ops.push_back(EmitScalarExpr(E->getArg(2)));
1553    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1554  }
1555
1556  // Get the last argument, which specifies the vector type.
1557  llvm::APSInt Result;
1558  const Expr *Arg = E->getArg(E->getNumArgs()-1);
1559  if (!Arg->isIntegerConstantExpr(Result, getContext()))
1560    return 0;
1561
1562  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1563      BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1564    // Determine the overloaded type of this builtin.
1565    llvm::Type *Ty;
1566    if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1567      Ty = FloatTy;
1568    else
1569      Ty = DoubleTy;
1570
1571    // Determine whether this is an unsigned conversion or not.
1572    bool usgn = Result.getZExtValue() == 1;
1573    unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1574
1575    // Call the appropriate intrinsic.
1576    Function *F = CGM.getIntrinsic(Int, Ty);
1577    return Builder.CreateCall(F, Ops, "vcvtr");
1578  }
1579
1580  // Determine the type of this overloaded NEON intrinsic.
1581  NeonTypeFlags Type(Result.getZExtValue());
1582  bool usgn = Type.isUnsigned();
1583  bool quad = Type.isQuad();
1584  bool rightShift = false;
1585
1586  llvm::VectorType *VTy = GetNeonType(this, Type);
1587  llvm::Type *Ty = VTy;
1588  if (!Ty)
1589    return 0;
1590
1591  unsigned Int;
1592  switch (BuiltinID) {
1593  default: return 0;
1594  case ARM::BI__builtin_neon_vabd_v:
1595  case ARM::BI__builtin_neon_vabdq_v:
1596    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1597    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
1598  case ARM::BI__builtin_neon_vabs_v:
1599  case ARM::BI__builtin_neon_vabsq_v:
1600    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
1601                        Ops, "vabs");
1602  case ARM::BI__builtin_neon_vaddhn_v:
1603    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
1604                        Ops, "vaddhn");
1605  case ARM::BI__builtin_neon_vcale_v:
1606    std::swap(Ops[0], Ops[1]);
1607  case ARM::BI__builtin_neon_vcage_v: {
1608    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
1609    return EmitNeonCall(F, Ops, "vcage");
1610  }
1611  case ARM::BI__builtin_neon_vcaleq_v:
1612    std::swap(Ops[0], Ops[1]);
1613  case ARM::BI__builtin_neon_vcageq_v: {
1614    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
1615    return EmitNeonCall(F, Ops, "vcage");
1616  }
1617  case ARM::BI__builtin_neon_vcalt_v:
1618    std::swap(Ops[0], Ops[1]);
1619  case ARM::BI__builtin_neon_vcagt_v: {
1620    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
1621    return EmitNeonCall(F, Ops, "vcagt");
1622  }
1623  case ARM::BI__builtin_neon_vcaltq_v:
1624    std::swap(Ops[0], Ops[1]);
1625  case ARM::BI__builtin_neon_vcagtq_v: {
1626    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
1627    return EmitNeonCall(F, Ops, "vcagt");
1628  }
1629  case ARM::BI__builtin_neon_vcls_v:
1630  case ARM::BI__builtin_neon_vclsq_v: {
1631    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
1632    return EmitNeonCall(F, Ops, "vcls");
1633  }
1634  case ARM::BI__builtin_neon_vclz_v:
1635  case ARM::BI__builtin_neon_vclzq_v: {
1636    // Generate target-independent intrinsic; also need to add second argument
1637    // for whether or not clz of zero is undefined; on ARM it isn't.
1638    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
1639    Ops.push_back(Builder.getInt1(Target.isCLZForZeroUndef()));
1640    return EmitNeonCall(F, Ops, "vclz");
1641  }
1642  case ARM::BI__builtin_neon_vcnt_v:
1643  case ARM::BI__builtin_neon_vcntq_v: {
1644    // generate target-independent intrinsic
1645    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
1646    return EmitNeonCall(F, Ops, "vctpop");
1647  }
1648  case ARM::BI__builtin_neon_vcvt_f16_v: {
1649    assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
1650           "unexpected vcvt_f16_v builtin");
1651    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
1652    return EmitNeonCall(F, Ops, "vcvt");
1653  }
1654  case ARM::BI__builtin_neon_vcvt_f32_f16: {
1655    assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
1656           "unexpected vcvt_f32_f16 builtin");
1657    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
1658    return EmitNeonCall(F, Ops, "vcvt");
1659  }
1660  case ARM::BI__builtin_neon_vcvt_f32_v:
1661  case ARM::BI__builtin_neon_vcvtq_f32_v:
1662    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1663    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1664    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1665                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1666  case ARM::BI__builtin_neon_vcvt_s32_v:
1667  case ARM::BI__builtin_neon_vcvt_u32_v:
1668  case ARM::BI__builtin_neon_vcvtq_s32_v:
1669  case ARM::BI__builtin_neon_vcvtq_u32_v: {
1670    llvm::Type *FloatTy =
1671      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1672    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
1673    return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1674                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1675  }
1676  case ARM::BI__builtin_neon_vcvt_n_f32_v:
1677  case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1678    llvm::Type *FloatTy =
1679      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1680    llvm::Type *Tys[2] = { FloatTy, Ty };
1681    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
1682               : Intrinsic::arm_neon_vcvtfxs2fp;
1683    Function *F = CGM.getIntrinsic(Int, Tys);
1684    return EmitNeonCall(F, Ops, "vcvt_n");
1685  }
1686  case ARM::BI__builtin_neon_vcvt_n_s32_v:
1687  case ARM::BI__builtin_neon_vcvt_n_u32_v:
1688  case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1689  case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1690    llvm::Type *FloatTy =
1691      GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
1692    llvm::Type *Tys[2] = { Ty, FloatTy };
1693    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
1694               : Intrinsic::arm_neon_vcvtfp2fxs;
1695    Function *F = CGM.getIntrinsic(Int, Tys);
1696    return EmitNeonCall(F, Ops, "vcvt_n");
1697  }
1698  case ARM::BI__builtin_neon_vext_v:
1699  case ARM::BI__builtin_neon_vextq_v: {
1700    int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
1701    SmallVector<Constant*, 16> Indices;
1702    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1703      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1704
1705    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1706    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1707    Value *SV = llvm::ConstantVector::get(Indices);
1708    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1709  }
1710  case ARM::BI__builtin_neon_vhadd_v:
1711  case ARM::BI__builtin_neon_vhaddq_v:
1712    Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1713    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
1714  case ARM::BI__builtin_neon_vhsub_v:
1715  case ARM::BI__builtin_neon_vhsubq_v:
1716    Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1717    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
1718  case ARM::BI__builtin_neon_vld1_v:
1719  case ARM::BI__builtin_neon_vld1q_v:
1720    Ops.push_back(GetPointeeAlignmentValue(E->getArg(0)));
1721    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
1722                        Ops, "vld1");
1723  case ARM::BI__builtin_neon_vld1_lane_v:
1724  case ARM::BI__builtin_neon_vld1q_lane_v: {
1725    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1726    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1727    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1728    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
1729    Value *Align = GetPointeeAlignmentValue(E->getArg(0));
1730    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
1731    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
1732  }
1733  case ARM::BI__builtin_neon_vld1_dup_v:
1734  case ARM::BI__builtin_neon_vld1q_dup_v: {
1735    Value *V = UndefValue::get(Ty);
1736    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1737    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1738    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
1739    Value *Align = GetPointeeAlignmentValue(E->getArg(0));
1740    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
1741    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1742    Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
1743    return EmitNeonSplat(Ops[0], CI);
1744  }
1745  case ARM::BI__builtin_neon_vld2_v:
1746  case ARM::BI__builtin_neon_vld2q_v: {
1747    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
1748    Value *Align = GetPointeeAlignmentValue(E->getArg(1));
1749    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1750    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1751    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1752    return Builder.CreateStore(Ops[1], Ops[0]);
1753  }
1754  case ARM::BI__builtin_neon_vld3_v:
1755  case ARM::BI__builtin_neon_vld3q_v: {
1756    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
1757    Value *Align = GetPointeeAlignmentValue(E->getArg(1));
1758    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1759    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1760    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1761    return Builder.CreateStore(Ops[1], Ops[0]);
1762  }
1763  case ARM::BI__builtin_neon_vld4_v:
1764  case ARM::BI__builtin_neon_vld4q_v: {
1765    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
1766    Value *Align = GetPointeeAlignmentValue(E->getArg(1));
1767    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1768    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1769    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1770    return Builder.CreateStore(Ops[1], Ops[0]);
1771  }
1772  case ARM::BI__builtin_neon_vld2_lane_v:
1773  case ARM::BI__builtin_neon_vld2q_lane_v: {
1774    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
1775    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1776    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1777    Ops.push_back(GetPointeeAlignmentValue(E->getArg(1)));
1778    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
1779    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1780    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1781    return Builder.CreateStore(Ops[1], Ops[0]);
1782  }
1783  case ARM::BI__builtin_neon_vld3_lane_v:
1784  case ARM::BI__builtin_neon_vld3q_lane_v: {
1785    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
1786    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1787    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1788    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1789    Ops.push_back(GetPointeeAlignmentValue(E->getArg(1)));
1790    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
1791    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1792    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1793    return Builder.CreateStore(Ops[1], Ops[0]);
1794  }
1795  case ARM::BI__builtin_neon_vld4_lane_v:
1796  case ARM::BI__builtin_neon_vld4q_lane_v: {
1797    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
1798    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1799    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1800    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1801    Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
1802    Ops.push_back(GetPointeeAlignmentValue(E->getArg(1)));
1803    Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
1804    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1805    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1806    return Builder.CreateStore(Ops[1], Ops[0]);
1807  }
1808  case ARM::BI__builtin_neon_vld2_dup_v:
1809  case ARM::BI__builtin_neon_vld3_dup_v:
1810  case ARM::BI__builtin_neon_vld4_dup_v: {
1811    // Handle 64-bit elements as a special-case.  There is no "dup" needed.
1812    if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
1813      switch (BuiltinID) {
1814      case ARM::BI__builtin_neon_vld2_dup_v:
1815        Int = Intrinsic::arm_neon_vld2;
1816        break;
1817      case ARM::BI__builtin_neon_vld3_dup_v:
1818        Int = Intrinsic::arm_neon_vld3;
1819        break;
1820      case ARM::BI__builtin_neon_vld4_dup_v:
1821        Int = Intrinsic::arm_neon_vld4;
1822        break;
1823      default: llvm_unreachable("unknown vld_dup intrinsic?");
1824      }
1825      Function *F = CGM.getIntrinsic(Int, Ty);
1826      Value *Align = GetPointeeAlignmentValue(E->getArg(1));
1827      Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
1828      Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1829      Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1830      return Builder.CreateStore(Ops[1], Ops[0]);
1831    }
1832    switch (BuiltinID) {
1833    case ARM::BI__builtin_neon_vld2_dup_v:
1834      Int = Intrinsic::arm_neon_vld2lane;
1835      break;
1836    case ARM::BI__builtin_neon_vld3_dup_v:
1837      Int = Intrinsic::arm_neon_vld3lane;
1838      break;
1839    case ARM::BI__builtin_neon_vld4_dup_v:
1840      Int = Intrinsic::arm_neon_vld4lane;
1841      break;
1842    default: llvm_unreachable("unknown vld_dup intrinsic?");
1843    }
1844    Function *F = CGM.getIntrinsic(Int, Ty);
1845    llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
1846
1847    SmallVector<Value*, 6> Args;
1848    Args.push_back(Ops[1]);
1849    Args.append(STy->getNumElements(), UndefValue::get(Ty));
1850
1851    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1852    Args.push_back(CI);
1853    Args.push_back(GetPointeeAlignmentValue(E->getArg(1)));
1854
1855    Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
1856    // splat lane 0 to all elts in each vector of the result.
1857    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1858      Value *Val = Builder.CreateExtractValue(Ops[1], i);
1859      Value *Elt = Builder.CreateBitCast(Val, Ty);
1860      Elt = EmitNeonSplat(Elt, CI);
1861      Elt = Builder.CreateBitCast(Elt, Val->getType());
1862      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
1863    }
1864    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1865    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1866    return Builder.CreateStore(Ops[1], Ops[0]);
1867  }
1868  case ARM::BI__builtin_neon_vmax_v:
1869  case ARM::BI__builtin_neon_vmaxq_v:
1870    Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
1871    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
1872  case ARM::BI__builtin_neon_vmin_v:
1873  case ARM::BI__builtin_neon_vminq_v:
1874    Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
1875    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
1876  case ARM::BI__builtin_neon_vmovl_v: {
1877    llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1878    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1879    if (usgn)
1880      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1881    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1882  }
1883  case ARM::BI__builtin_neon_vmovn_v: {
1884    llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
1885    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1886    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1887  }
1888  case ARM::BI__builtin_neon_vmul_v:
1889  case ARM::BI__builtin_neon_vmulq_v:
1890    assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
1891    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
1892                        Ops, "vmul");
1893  case ARM::BI__builtin_neon_vmull_v:
1894    Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
1895    Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
1896    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
1897  case ARM::BI__builtin_neon_vpadal_v:
1898  case ARM::BI__builtin_neon_vpadalq_v: {
1899    Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
1900    // The source operand type has twice as many elements of half the size.
1901    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1902    llvm::Type *EltTy =
1903      llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1904    llvm::Type *NarrowTy =
1905      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
1906    llvm::Type *Tys[2] = { Ty, NarrowTy };
1907    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
1908  }
1909  case ARM::BI__builtin_neon_vpadd_v:
1910    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
1911                        Ops, "vpadd");
1912  case ARM::BI__builtin_neon_vpaddl_v:
1913  case ARM::BI__builtin_neon_vpaddlq_v: {
1914    Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
1915    // The source operand type has twice as many elements of half the size.
1916    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
1917    llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
1918    llvm::Type *NarrowTy =
1919      llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
1920    llvm::Type *Tys[2] = { Ty, NarrowTy };
1921    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
1922  }
1923  case ARM::BI__builtin_neon_vpmax_v:
1924    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
1925    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
1926  case ARM::BI__builtin_neon_vpmin_v:
1927    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
1928    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
1929  case ARM::BI__builtin_neon_vqabs_v:
1930  case ARM::BI__builtin_neon_vqabsq_v:
1931    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
1932                        Ops, "vqabs");
1933  case ARM::BI__builtin_neon_vqadd_v:
1934  case ARM::BI__builtin_neon_vqaddq_v:
1935    Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
1936    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
1937  case ARM::BI__builtin_neon_vqdmlal_v:
1938    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
1939                        Ops, "vqdmlal");
1940  case ARM::BI__builtin_neon_vqdmlsl_v:
1941    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
1942                        Ops, "vqdmlsl");
1943  case ARM::BI__builtin_neon_vqdmulh_v:
1944  case ARM::BI__builtin_neon_vqdmulhq_v:
1945    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
1946                        Ops, "vqdmulh");
1947  case ARM::BI__builtin_neon_vqdmull_v:
1948    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
1949                        Ops, "vqdmull");
1950  case ARM::BI__builtin_neon_vqmovn_v:
1951    Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
1952    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
1953  case ARM::BI__builtin_neon_vqmovun_v:
1954    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
1955                        Ops, "vqdmull");
1956  case ARM::BI__builtin_neon_vqneg_v:
1957  case ARM::BI__builtin_neon_vqnegq_v:
1958    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
1959                        Ops, "vqneg");
1960  case ARM::BI__builtin_neon_vqrdmulh_v:
1961  case ARM::BI__builtin_neon_vqrdmulhq_v:
1962    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
1963                        Ops, "vqrdmulh");
1964  case ARM::BI__builtin_neon_vqrshl_v:
1965  case ARM::BI__builtin_neon_vqrshlq_v:
1966    Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
1967    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
1968  case ARM::BI__builtin_neon_vqrshrn_n_v:
1969    Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
1970    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
1971                        1, true);
1972  case ARM::BI__builtin_neon_vqrshrun_n_v:
1973    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
1974                        Ops, "vqrshrun_n", 1, true);
1975  case ARM::BI__builtin_neon_vqshl_v:
1976  case ARM::BI__builtin_neon_vqshlq_v:
1977    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1978    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
1979  case ARM::BI__builtin_neon_vqshl_n_v:
1980  case ARM::BI__builtin_neon_vqshlq_n_v:
1981    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1982    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
1983                        1, false);
1984  case ARM::BI__builtin_neon_vqshlu_n_v:
1985  case ARM::BI__builtin_neon_vqshluq_n_v:
1986    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
1987                        Ops, "vqshlu", 1, false);
1988  case ARM::BI__builtin_neon_vqshrn_n_v:
1989    Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
1990    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
1991                        1, true);
1992  case ARM::BI__builtin_neon_vqshrun_n_v:
1993    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
1994                        Ops, "vqshrun_n", 1, true);
1995  case ARM::BI__builtin_neon_vqsub_v:
1996  case ARM::BI__builtin_neon_vqsubq_v:
1997    Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
1998    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
1999  case ARM::BI__builtin_neon_vraddhn_v:
2000    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
2001                        Ops, "vraddhn");
2002  case ARM::BI__builtin_neon_vrecpe_v:
2003  case ARM::BI__builtin_neon_vrecpeq_v:
2004    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
2005                        Ops, "vrecpe");
2006  case ARM::BI__builtin_neon_vrecps_v:
2007  case ARM::BI__builtin_neon_vrecpsq_v:
2008    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
2009                        Ops, "vrecps");
2010  case ARM::BI__builtin_neon_vrhadd_v:
2011  case ARM::BI__builtin_neon_vrhaddq_v:
2012    Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
2013    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
2014  case ARM::BI__builtin_neon_vrshl_v:
2015  case ARM::BI__builtin_neon_vrshlq_v:
2016    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2017    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
2018  case ARM::BI__builtin_neon_vrshrn_n_v:
2019    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
2020                        Ops, "vrshrn_n", 1, true);
2021  case ARM::BI__builtin_neon_vrshr_n_v:
2022  case ARM::BI__builtin_neon_vrshrq_n_v:
2023    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2024    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
2025  case ARM::BI__builtin_neon_vrsqrte_v:
2026  case ARM::BI__builtin_neon_vrsqrteq_v:
2027    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
2028                        Ops, "vrsqrte");
2029  case ARM::BI__builtin_neon_vrsqrts_v:
2030  case ARM::BI__builtin_neon_vrsqrtsq_v:
2031    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
2032                        Ops, "vrsqrts");
2033  case ARM::BI__builtin_neon_vrsra_n_v:
2034  case ARM::BI__builtin_neon_vrsraq_n_v:
2035    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2036    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2037    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
2038    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
2039    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
2040    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
2041  case ARM::BI__builtin_neon_vrsubhn_v:
2042    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
2043                        Ops, "vrsubhn");
2044  case ARM::BI__builtin_neon_vshl_v:
2045  case ARM::BI__builtin_neon_vshlq_v:
2046    Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
2047    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
2048  case ARM::BI__builtin_neon_vshll_n_v:
2049    Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
2050    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
2051  case ARM::BI__builtin_neon_vshl_n_v:
2052  case ARM::BI__builtin_neon_vshlq_n_v:
2053    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2054    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
2055  case ARM::BI__builtin_neon_vshrn_n_v:
2056    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
2057                        Ops, "vshrn_n", 1, true);
2058  case ARM::BI__builtin_neon_vshr_n_v:
2059  case ARM::BI__builtin_neon_vshrq_n_v:
2060    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2061    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2062    if (usgn)
2063      return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
2064    else
2065      return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
2066  case ARM::BI__builtin_neon_vsri_n_v:
2067  case ARM::BI__builtin_neon_vsriq_n_v:
2068    rightShift = true;
2069  case ARM::BI__builtin_neon_vsli_n_v:
2070  case ARM::BI__builtin_neon_vsliq_n_v:
2071    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
2072    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
2073                        Ops, "vsli_n");
2074  case ARM::BI__builtin_neon_vsra_n_v:
2075  case ARM::BI__builtin_neon_vsraq_n_v:
2076    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2077    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2078    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
2079    if (usgn)
2080      Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
2081    else
2082      Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
2083    return Builder.CreateAdd(Ops[0], Ops[1]);
2084  case ARM::BI__builtin_neon_vst1_v:
2085  case ARM::BI__builtin_neon_vst1q_v:
2086    Ops.push_back(GetPointeeAlignmentValue(E->getArg(0)));
2087    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
2088                        Ops, "");
2089  case ARM::BI__builtin_neon_vst1_lane_v:
2090  case ARM::BI__builtin_neon_vst1q_lane_v: {
2091    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2092    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
2093    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2094    StoreInst *St = Builder.CreateStore(Ops[1],
2095                                        Builder.CreateBitCast(Ops[0], Ty));
2096    Value *Align = GetPointeeAlignmentValue(E->getArg(0));
2097    St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
2098    return St;
2099  }
2100  case ARM::BI__builtin_neon_vst2_v:
2101  case ARM::BI__builtin_neon_vst2q_v:
2102    Ops.push_back(GetPointeeAlignmentValue(E->getArg(0)));
2103    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
2104                        Ops, "");
2105  case ARM::BI__builtin_neon_vst2_lane_v:
2106  case ARM::BI__builtin_neon_vst2q_lane_v:
2107    Ops.push_back(GetPointeeAlignmentValue(E->getArg(0)));
2108    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
2109                        Ops, "");
2110  case ARM::BI__builtin_neon_vst3_v:
2111  case ARM::BI__builtin_neon_vst3q_v:
2112    Ops.push_back(GetPointeeAlignmentValue(E->getArg(0)));
2113    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
2114                        Ops, "");
2115  case ARM::BI__builtin_neon_vst3_lane_v:
2116  case ARM::BI__builtin_neon_vst3q_lane_v:
2117    Ops.push_back(GetPointeeAlignmentValue(E->getArg(0)));
2118    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
2119                        Ops, "");
2120  case ARM::BI__builtin_neon_vst4_v:
2121  case ARM::BI__builtin_neon_vst4q_v:
2122    Ops.push_back(GetPointeeAlignmentValue(E->getArg(0)));
2123    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
2124                        Ops, "");
2125  case ARM::BI__builtin_neon_vst4_lane_v:
2126  case ARM::BI__builtin_neon_vst4q_lane_v:
2127    Ops.push_back(GetPointeeAlignmentValue(E->getArg(0)));
2128    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
2129                        Ops, "");
2130  case ARM::BI__builtin_neon_vsubhn_v:
2131    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
2132                        Ops, "vsubhn");
2133  case ARM::BI__builtin_neon_vtbl1_v:
2134    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
2135                        Ops, "vtbl1");
2136  case ARM::BI__builtin_neon_vtbl2_v:
2137    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
2138                        Ops, "vtbl2");
2139  case ARM::BI__builtin_neon_vtbl3_v:
2140    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
2141                        Ops, "vtbl3");
2142  case ARM::BI__builtin_neon_vtbl4_v:
2143    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
2144                        Ops, "vtbl4");
2145  case ARM::BI__builtin_neon_vtbx1_v:
2146    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
2147                        Ops, "vtbx1");
2148  case ARM::BI__builtin_neon_vtbx2_v:
2149    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
2150                        Ops, "vtbx2");
2151  case ARM::BI__builtin_neon_vtbx3_v:
2152    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
2153                        Ops, "vtbx3");
2154  case ARM::BI__builtin_neon_vtbx4_v:
2155    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
2156                        Ops, "vtbx4");
2157  case ARM::BI__builtin_neon_vtst_v:
2158  case ARM::BI__builtin_neon_vtstq_v: {
2159    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2160    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2161    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
2162    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
2163                                ConstantAggregateZero::get(Ty));
2164    return Builder.CreateSExt(Ops[0], Ty, "vtst");
2165  }
2166  case ARM::BI__builtin_neon_vtrn_v:
2167  case ARM::BI__builtin_neon_vtrnq_v: {
2168    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2169    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2170    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2171    Value *SV = 0;
2172
2173    for (unsigned vi = 0; vi != 2; ++vi) {
2174      SmallVector<Constant*, 16> Indices;
2175      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2176        Indices.push_back(Builder.getInt32(i+vi));
2177        Indices.push_back(Builder.getInt32(i+e+vi));
2178      }
2179      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2180      SV = llvm::ConstantVector::get(Indices);
2181      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
2182      SV = Builder.CreateStore(SV, Addr);
2183    }
2184    return SV;
2185  }
2186  case ARM::BI__builtin_neon_vuzp_v:
2187  case ARM::BI__builtin_neon_vuzpq_v: {
2188    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2189    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2190    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2191    Value *SV = 0;
2192
2193    for (unsigned vi = 0; vi != 2; ++vi) {
2194      SmallVector<Constant*, 16> Indices;
2195      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2196        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
2197
2198      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2199      SV = llvm::ConstantVector::get(Indices);
2200      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
2201      SV = Builder.CreateStore(SV, Addr);
2202    }
2203    return SV;
2204  }
2205  case ARM::BI__builtin_neon_vzip_v:
2206  case ARM::BI__builtin_neon_vzipq_v: {
2207    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
2208    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2209    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2210    Value *SV = 0;
2211
2212    for (unsigned vi = 0; vi != 2; ++vi) {
2213      SmallVector<Constant*, 16> Indices;
2214      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
2215        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
2216        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
2217      }
2218      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
2219      SV = llvm::ConstantVector::get(Indices);
2220      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
2221      SV = Builder.CreateStore(SV, Addr);
2222    }
2223    return SV;
2224  }
2225  }
2226}
2227
2228llvm::Value *CodeGenFunction::
2229BuildVector(ArrayRef<llvm::Value*> Ops) {
2230  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
2231         "Not a power-of-two sized vector!");
2232  bool AllConstants = true;
2233  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
2234    AllConstants &= isa<Constant>(Ops[i]);
2235
2236  // If this is a constant vector, create a ConstantVector.
2237  if (AllConstants) {
2238    SmallVector<llvm::Constant*, 16> CstOps;
2239    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2240      CstOps.push_back(cast<Constant>(Ops[i]));
2241    return llvm::ConstantVector::get(CstOps);
2242  }
2243
2244  // Otherwise, insertelement the values to build the vector.
2245  Value *Result =
2246    llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
2247
2248  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
2249    Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
2250
2251  return Result;
2252}
2253
2254Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
2255                                           const CallExpr *E) {
2256  SmallVector<Value*, 4> Ops;
2257
2258  // Find out if any arguments are required to be integer constant expressions.
2259  unsigned ICEArguments = 0;
2260  ASTContext::GetBuiltinTypeError Error;
2261  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2262  assert(Error == ASTContext::GE_None && "Should not codegen an error");
2263
2264  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
2265    // If this is a normal argument, just emit it as a scalar.
2266    if ((ICEArguments & (1 << i)) == 0) {
2267      Ops.push_back(EmitScalarExpr(E->getArg(i)));
2268      continue;
2269    }
2270
2271    // If this is required to be a constant, constant fold it so that we know
2272    // that the generated intrinsic gets a ConstantInt.
2273    llvm::APSInt Result;
2274    bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
2275    assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
2276    Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
2277  }
2278
2279  switch (BuiltinID) {
2280  default: return 0;
2281  case X86::BI__builtin_ia32_vec_init_v8qi:
2282  case X86::BI__builtin_ia32_vec_init_v4hi:
2283  case X86::BI__builtin_ia32_vec_init_v2si:
2284    return Builder.CreateBitCast(BuildVector(Ops),
2285                                 llvm::Type::getX86_MMXTy(getLLVMContext()));
2286  case X86::BI__builtin_ia32_vec_ext_v2si:
2287    return Builder.CreateExtractElement(Ops[0],
2288                                  llvm::ConstantInt::get(Ops[1]->getType(), 0));
2289  case X86::BI__builtin_ia32_ldmxcsr: {
2290    llvm::Type *PtrTy = Int8PtrTy;
2291    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2292    Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2293    Builder.CreateStore(Ops[0], Tmp);
2294    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
2295                              Builder.CreateBitCast(Tmp, PtrTy));
2296  }
2297  case X86::BI__builtin_ia32_stmxcsr: {
2298    llvm::Type *PtrTy = Int8PtrTy;
2299    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2300    Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
2301    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
2302                       Builder.CreateBitCast(Tmp, PtrTy));
2303    return Builder.CreateLoad(Tmp, "stmxcsr");
2304  }
2305  case X86::BI__builtin_ia32_storehps:
2306  case X86::BI__builtin_ia32_storelps: {
2307    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
2308    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2309
2310    // cast val v2i64
2311    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
2312
2313    // extract (0, 1)
2314    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
2315    llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
2316    Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
2317
2318    // cast pointer to i64 & store
2319    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
2320    return Builder.CreateStore(Ops[1], Ops[0]);
2321  }
2322  case X86::BI__builtin_ia32_palignr: {
2323    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2324
2325    // If palignr is shifting the pair of input vectors less than 9 bytes,
2326    // emit a shuffle instruction.
2327    if (shiftVal <= 8) {
2328      SmallVector<llvm::Constant*, 8> Indices;
2329      for (unsigned i = 0; i != 8; ++i)
2330        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2331
2332      Value* SV = llvm::ConstantVector::get(Indices);
2333      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2334    }
2335
2336    // If palignr is shifting the pair of input vectors more than 8 but less
2337    // than 16 bytes, emit a logical right shift of the destination.
2338    if (shiftVal < 16) {
2339      // MMX has these as 1 x i64 vectors for some odd optimization reasons.
2340      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
2341
2342      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2343      Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
2344
2345      // create i32 constant
2346      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
2347      return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2348    }
2349
2350    // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
2351    return llvm::Constant::getNullValue(ConvertType(E->getType()));
2352  }
2353  case X86::BI__builtin_ia32_palignr128: {
2354    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2355
2356    // If palignr is shifting the pair of input vectors less than 17 bytes,
2357    // emit a shuffle instruction.
2358    if (shiftVal <= 16) {
2359      SmallVector<llvm::Constant*, 16> Indices;
2360      for (unsigned i = 0; i != 16; ++i)
2361        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2362
2363      Value* SV = llvm::ConstantVector::get(Indices);
2364      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2365    }
2366
2367    // If palignr is shifting the pair of input vectors more than 16 but less
2368    // than 32 bytes, emit a logical right shift of the destination.
2369    if (shiftVal < 32) {
2370      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2371
2372      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2373      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2374
2375      // create i32 constant
2376      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2377      return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2378    }
2379
2380    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2381    return llvm::Constant::getNullValue(ConvertType(E->getType()));
2382  }
2383  case X86::BI__builtin_ia32_palignr256: {
2384    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2385
2386    // If palignr is shifting the pair of input vectors less than 17 bytes,
2387    // emit a shuffle instruction.
2388    if (shiftVal <= 16) {
2389      SmallVector<llvm::Constant*, 32> Indices;
2390      // 256-bit palignr operates on 128-bit lanes so we need to handle that
2391      for (unsigned l = 0; l != 2; ++l) {
2392        unsigned LaneStart = l * 16;
2393        unsigned LaneEnd = (l+1) * 16;
2394        for (unsigned i = 0; i != 16; ++i) {
2395          unsigned Idx = shiftVal + i + LaneStart;
2396          if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand
2397          Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx));
2398        }
2399      }
2400
2401      Value* SV = llvm::ConstantVector::get(Indices);
2402      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2403    }
2404
2405    // If palignr is shifting the pair of input vectors more than 16 but less
2406    // than 32 bytes, emit a logical right shift of the destination.
2407    if (shiftVal < 32) {
2408      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4);
2409
2410      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2411      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2412
2413      // create i32 constant
2414      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq);
2415      return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
2416    }
2417
2418    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2419    return llvm::Constant::getNullValue(ConvertType(E->getType()));
2420  }
2421  case X86::BI__builtin_ia32_movntps:
2422  case X86::BI__builtin_ia32_movntps256:
2423  case X86::BI__builtin_ia32_movntpd:
2424  case X86::BI__builtin_ia32_movntpd256:
2425  case X86::BI__builtin_ia32_movntdq:
2426  case X86::BI__builtin_ia32_movntdq256:
2427  case X86::BI__builtin_ia32_movnti: {
2428    llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
2429                                           Builder.getInt32(1));
2430
2431    // Convert the type of the pointer to a pointer to the stored type.
2432    Value *BC = Builder.CreateBitCast(Ops[0],
2433                                llvm::PointerType::getUnqual(Ops[1]->getType()),
2434                                      "cast");
2435    StoreInst *SI = Builder.CreateStore(Ops[1], BC);
2436    SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
2437    SI->setAlignment(16);
2438    return SI;
2439  }
2440  // 3DNow!
2441  case X86::BI__builtin_ia32_pswapdsf:
2442  case X86::BI__builtin_ia32_pswapdsi: {
2443    const char *name = 0;
2444    Intrinsic::ID ID = Intrinsic::not_intrinsic;
2445    switch(BuiltinID) {
2446    default: llvm_unreachable("Unsupported intrinsic!");
2447    case X86::BI__builtin_ia32_pswapdsf:
2448    case X86::BI__builtin_ia32_pswapdsi:
2449      name = "pswapd";
2450      ID = Intrinsic::x86_3dnowa_pswapd;
2451      break;
2452    }
2453    llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
2454    Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
2455    llvm::Function *F = CGM.getIntrinsic(ID);
2456    return Builder.CreateCall(F, Ops, name);
2457  }
2458  case X86::BI__builtin_ia32_rdrand16_step:
2459  case X86::BI__builtin_ia32_rdrand32_step:
2460  case X86::BI__builtin_ia32_rdrand64_step: {
2461    Intrinsic::ID ID;
2462    switch (BuiltinID) {
2463    default: llvm_unreachable("Unsupported intrinsic!");
2464    case X86::BI__builtin_ia32_rdrand16_step:
2465      ID = Intrinsic::x86_rdrand_16;
2466      break;
2467    case X86::BI__builtin_ia32_rdrand32_step:
2468      ID = Intrinsic::x86_rdrand_32;
2469      break;
2470    case X86::BI__builtin_ia32_rdrand64_step:
2471      ID = Intrinsic::x86_rdrand_64;
2472      break;
2473    }
2474
2475    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
2476    Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
2477    return Builder.CreateExtractValue(Call, 1);
2478  }
2479  }
2480}
2481
2482
2483Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
2484                                           const CallExpr *E) {
2485  SmallVector<Value*, 4> Ops;
2486
2487  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
2488    Ops.push_back(EmitScalarExpr(E->getArg(i)));
2489
2490  Intrinsic::ID ID = Intrinsic::not_intrinsic;
2491
2492  switch (BuiltinID) {
2493  default: return 0;
2494
2495  // vec_ld, vec_lvsl, vec_lvsr
2496  case PPC::BI__builtin_altivec_lvx:
2497  case PPC::BI__builtin_altivec_lvxl:
2498  case PPC::BI__builtin_altivec_lvebx:
2499  case PPC::BI__builtin_altivec_lvehx:
2500  case PPC::BI__builtin_altivec_lvewx:
2501  case PPC::BI__builtin_altivec_lvsl:
2502  case PPC::BI__builtin_altivec_lvsr:
2503  {
2504    Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
2505
2506    Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
2507    Ops.pop_back();
2508
2509    switch (BuiltinID) {
2510    default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
2511    case PPC::BI__builtin_altivec_lvx:
2512      ID = Intrinsic::ppc_altivec_lvx;
2513      break;
2514    case PPC::BI__builtin_altivec_lvxl:
2515      ID = Intrinsic::ppc_altivec_lvxl;
2516      break;
2517    case PPC::BI__builtin_altivec_lvebx:
2518      ID = Intrinsic::ppc_altivec_lvebx;
2519      break;
2520    case PPC::BI__builtin_altivec_lvehx:
2521      ID = Intrinsic::ppc_altivec_lvehx;
2522      break;
2523    case PPC::BI__builtin_altivec_lvewx:
2524      ID = Intrinsic::ppc_altivec_lvewx;
2525      break;
2526    case PPC::BI__builtin_altivec_lvsl:
2527      ID = Intrinsic::ppc_altivec_lvsl;
2528      break;
2529    case PPC::BI__builtin_altivec_lvsr:
2530      ID = Intrinsic::ppc_altivec_lvsr;
2531      break;
2532    }
2533    llvm::Function *F = CGM.getIntrinsic(ID);
2534    return Builder.CreateCall(F, Ops, "");
2535  }
2536
2537  // vec_st
2538  case PPC::BI__builtin_altivec_stvx:
2539  case PPC::BI__builtin_altivec_stvxl:
2540  case PPC::BI__builtin_altivec_stvebx:
2541  case PPC::BI__builtin_altivec_stvehx:
2542  case PPC::BI__builtin_altivec_stvewx:
2543  {
2544    Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
2545    Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
2546    Ops.pop_back();
2547
2548    switch (BuiltinID) {
2549    default: llvm_unreachable("Unsupported st intrinsic!");
2550    case PPC::BI__builtin_altivec_stvx:
2551      ID = Intrinsic::ppc_altivec_stvx;
2552      break;
2553    case PPC::BI__builtin_altivec_stvxl:
2554      ID = Intrinsic::ppc_altivec_stvxl;
2555      break;
2556    case PPC::BI__builtin_altivec_stvebx:
2557      ID = Intrinsic::ppc_altivec_stvebx;
2558      break;
2559    case PPC::BI__builtin_altivec_stvehx:
2560      ID = Intrinsic::ppc_altivec_stvehx;
2561      break;
2562    case PPC::BI__builtin_altivec_stvewx:
2563      ID = Intrinsic::ppc_altivec_stvewx;
2564      break;
2565    }
2566    llvm::Function *F = CGM.getIntrinsic(ID);
2567    return Builder.CreateCall(F, Ops, "");
2568  }
2569  }
2570}
2571