CGBuiltin.cpp revision 1944ec188408aff1931c62c79a069e30f2549ec2
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This contains code to emit Builtin calls as LLVM code.
11//
12//===----------------------------------------------------------------------===//
13
14#include "TargetInfo.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "CGObjCRuntime.h"
18#include "clang/Basic/TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/ASTContext.h"
21#include "clang/AST/Decl.h"
22#include "clang/Basic/TargetBuiltins.h"
23#include "llvm/Intrinsics.h"
24#include "llvm/Target/TargetData.h"
25using namespace clang;
26using namespace CodeGen;
27using namespace llvm;
28
29static void EmitMemoryBarrier(CodeGenFunction &CGF,
30                              bool LoadLoad, bool LoadStore,
31                              bool StoreLoad, bool StoreStore,
32                              bool Device) {
33  Value *True = llvm::ConstantInt::getTrue(CGF.getLLVMContext());
34  Value *False = llvm::ConstantInt::getFalse(CGF.getLLVMContext());
35  Value *C[5] = { LoadLoad ? True : False,
36                  LoadStore ? True : False,
37                  StoreLoad ? True : False,
38                  StoreStore ? True : False,
39                  Device ? True : False };
40  CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::memory_barrier),
41                         C, C + 5);
42}
43
44static Value *EmitCastToInt(CodeGenFunction &CGF,
45                            const llvm::Type *ToType, Value *Val) {
46  if (Val->getType()->isPointerTy())
47    return CGF.Builder.CreatePtrToInt(Val, ToType);
48
49  assert(Val->getType()->isIntegerTy() &&
50         "Used a non-integer and non-pointer type with atomic builtin");
51  assert(Val->getType()->getScalarSizeInBits() <=
52         ToType->getScalarSizeInBits() && "Integer type too small");
53  return CGF.Builder.CreateSExtOrBitCast(Val, ToType);
54}
55
56static Value *EmitCastFromInt(CodeGenFunction &CGF, QualType ToQualType,
57                              Value *Val) {
58  const llvm::Type *ToType = CGF.ConvertType(ToQualType);
59  if (ToType->isPointerTy()) {
60    return CGF.Builder.CreateIntToPtr(Val, ToType);
61  }
62  assert(Val->getType()->isIntegerTy() &&
63         "Used a non-integer and non-pointer type with atomic builtin");
64  assert(Val->getType()->getScalarSizeInBits() >=
65         ToType->getScalarSizeInBits() && "Integer type too small");
66  return CGF.Builder.CreateTruncOrBitCast(Val, ToType);
67}
68
69// The atomic builtins are also full memory barriers. This is a utility for
70// wrapping a call to the builtins with memory barriers.
71static Value *EmitCallWithBarrier(CodeGenFunction &CGF, Value *Fn,
72                                  Value **ArgBegin, Value **ArgEnd) {
73  // FIXME: We need a target hook for whether this applies to device memory or
74  // not.
75  bool Device = true;
76
77  // Create barriers both before and after the call.
78  EmitMemoryBarrier(CGF, true, true, true, true, Device);
79  Value *Result = CGF.Builder.CreateCall(Fn, ArgBegin, ArgEnd);
80  EmitMemoryBarrier(CGF, true, true, true, true, Device);
81  return Result;
82}
83
84/// Utility to insert an atomic instruction based on Instrinsic::ID
85/// and the expression node.
86static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
87                               Intrinsic::ID Id, const CallExpr *E) {
88  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
89  unsigned AddrSpace =
90    cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
91  const llvm::Type *ValueType =
92    llvm::IntegerType::get(CGF.getLLVMContext(),
93                           CGF.getContext().getTypeSize(E->getType()));
94  const llvm::Type *PtrType = ValueType->getPointerTo(AddrSpace);
95  const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
96  Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2);
97
98  Value *Args[2] = { CGF.Builder.CreateBitCast(DestPtr, PtrType),
99                     EmitCastToInt(CGF, ValueType,
100                                   CGF.EmitScalarExpr(E->getArg(1))) };
101  return RValue::get(EmitCastFromInt(CGF, E->getType(),
102                                     EmitCallWithBarrier(CGF, AtomF, Args,
103                                                         Args + 2)));
104}
105
106/// Utility to insert an atomic instruction based Instrinsic::ID and
107// the expression node, where the return value is the result of the
108// operation.
109static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
110                                   Intrinsic::ID Id, const CallExpr *E,
111                                   Instruction::BinaryOps Op) {
112  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
113  unsigned AddrSpace =
114    cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
115
116  const llvm::Type *ValueType =
117    llvm::IntegerType::get(CGF.getLLVMContext(),
118                           CGF.getContext().getTypeSize(E->getType()));
119  const llvm::Type *PtrType = ValueType->getPointerTo(AddrSpace);
120  const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
121  Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2);
122
123  Value *Args[2] = { CGF.Builder.CreateBitCast(DestPtr, PtrType),
124                     EmitCastToInt(CGF, ValueType,
125                                   CGF.EmitScalarExpr(E->getArg(1))) };
126  Value *Result = EmitCallWithBarrier(CGF, AtomF, Args, Args + 2);
127  return RValue::get(EmitCastFromInt(CGF, E->getType(),
128                                     CGF.Builder.CreateBinOp(Op, Result,
129                                                             Args[1])));
130}
131
132/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
133/// which must be a scalar floating point type.
134static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
135  const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
136  assert(ValTyP && "isn't scalar fp type!");
137
138  StringRef FnName;
139  switch (ValTyP->getKind()) {
140  default: assert(0 && "Isn't a scalar fp type!");
141  case BuiltinType::Float:      FnName = "fabsf"; break;
142  case BuiltinType::Double:     FnName = "fabs"; break;
143  case BuiltinType::LongDouble: FnName = "fabsl"; break;
144  }
145
146  // The prototype is something that takes and returns whatever V's type is.
147  std::vector<const llvm::Type*> Args;
148  Args.push_back(V->getType());
149  llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), Args, false);
150  llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
151
152  return CGF.Builder.CreateCall(Fn, V, "abs");
153}
154
155RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
156                                        unsigned BuiltinID, const CallExpr *E) {
157  // See if we can constant fold this builtin.  If so, don't emit it at all.
158  Expr::EvalResult Result;
159  if (E->Evaluate(Result, CGM.getContext())) {
160    if (Result.Val.isInt())
161      return RValue::get(llvm::ConstantInt::get(VMContext,
162                                                Result.Val.getInt()));
163    if (Result.Val.isFloat())
164      return RValue::get(ConstantFP::get(VMContext, Result.Val.getFloat()));
165  }
166
167  switch (BuiltinID) {
168  default: break;  // Handle intrinsics and libm functions below.
169  case Builtin::BI__builtin___CFStringMakeConstantString:
170  case Builtin::BI__builtin___NSStringMakeConstantString:
171    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
172  case Builtin::BI__builtin_stdarg_start:
173  case Builtin::BI__builtin_va_start:
174  case Builtin::BI__builtin_va_end: {
175    Value *ArgValue = EmitVAListRef(E->getArg(0));
176    const llvm::Type *DestType = llvm::Type::getInt8PtrTy(VMContext);
177    if (ArgValue->getType() != DestType)
178      ArgValue = Builder.CreateBitCast(ArgValue, DestType,
179                                       ArgValue->getName().data());
180
181    Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
182      Intrinsic::vaend : Intrinsic::vastart;
183    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
184  }
185  case Builtin::BI__builtin_va_copy: {
186    Value *DstPtr = EmitVAListRef(E->getArg(0));
187    Value *SrcPtr = EmitVAListRef(E->getArg(1));
188
189    const llvm::Type *Type = llvm::Type::getInt8PtrTy(VMContext);
190
191    DstPtr = Builder.CreateBitCast(DstPtr, Type);
192    SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
193    return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
194                                           DstPtr, SrcPtr));
195  }
196  case Builtin::BI__builtin_abs: {
197    Value *ArgValue = EmitScalarExpr(E->getArg(0));
198
199    Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
200    Value *CmpResult =
201    Builder.CreateICmpSGE(ArgValue,
202                          llvm::Constant::getNullValue(ArgValue->getType()),
203                                                            "abscond");
204    Value *Result =
205      Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
206
207    return RValue::get(Result);
208  }
209  case Builtin::BI__builtin_ctz:
210  case Builtin::BI__builtin_ctzl:
211  case Builtin::BI__builtin_ctzll: {
212    Value *ArgValue = EmitScalarExpr(E->getArg(0));
213
214    const llvm::Type *ArgType = ArgValue->getType();
215    Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1);
216
217    const llvm::Type *ResultType = ConvertType(E->getType());
218    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
219    if (Result->getType() != ResultType)
220      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
221                                     "cast");
222    return RValue::get(Result);
223  }
224  case Builtin::BI__builtin_clz:
225  case Builtin::BI__builtin_clzl:
226  case Builtin::BI__builtin_clzll: {
227    Value *ArgValue = EmitScalarExpr(E->getArg(0));
228
229    const llvm::Type *ArgType = ArgValue->getType();
230    Value *F = CGM.getIntrinsic(Intrinsic::ctlz, &ArgType, 1);
231
232    const llvm::Type *ResultType = ConvertType(E->getType());
233    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
234    if (Result->getType() != ResultType)
235      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
236                                     "cast");
237    return RValue::get(Result);
238  }
239  case Builtin::BI__builtin_ffs:
240  case Builtin::BI__builtin_ffsl:
241  case Builtin::BI__builtin_ffsll: {
242    // ffs(x) -> x ? cttz(x) + 1 : 0
243    Value *ArgValue = EmitScalarExpr(E->getArg(0));
244
245    const llvm::Type *ArgType = ArgValue->getType();
246    Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1);
247
248    const llvm::Type *ResultType = ConvertType(E->getType());
249    Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue, "tmp"),
250                                   llvm::ConstantInt::get(ArgType, 1), "tmp");
251    Value *Zero = llvm::Constant::getNullValue(ArgType);
252    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
253    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
254    if (Result->getType() != ResultType)
255      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
256                                     "cast");
257    return RValue::get(Result);
258  }
259  case Builtin::BI__builtin_parity:
260  case Builtin::BI__builtin_parityl:
261  case Builtin::BI__builtin_parityll: {
262    // parity(x) -> ctpop(x) & 1
263    Value *ArgValue = EmitScalarExpr(E->getArg(0));
264
265    const llvm::Type *ArgType = ArgValue->getType();
266    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1);
267
268    const llvm::Type *ResultType = ConvertType(E->getType());
269    Value *Tmp = Builder.CreateCall(F, ArgValue, "tmp");
270    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1),
271                                      "tmp");
272    if (Result->getType() != ResultType)
273      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
274                                     "cast");
275    return RValue::get(Result);
276  }
277  case Builtin::BI__builtin_popcount:
278  case Builtin::BI__builtin_popcountl:
279  case Builtin::BI__builtin_popcountll: {
280    Value *ArgValue = EmitScalarExpr(E->getArg(0));
281
282    const llvm::Type *ArgType = ArgValue->getType();
283    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1);
284
285    const llvm::Type *ResultType = ConvertType(E->getType());
286    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
287    if (Result->getType() != ResultType)
288      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
289                                     "cast");
290    return RValue::get(Result);
291  }
292  case Builtin::BI__builtin_expect: {
293    // FIXME: pass expect through to LLVM
294    if (E->getArg(1)->HasSideEffects(getContext()))
295      (void)EmitScalarExpr(E->getArg(1));
296    return RValue::get(EmitScalarExpr(E->getArg(0)));
297  }
298  case Builtin::BI__builtin_bswap32:
299  case Builtin::BI__builtin_bswap64: {
300    Value *ArgValue = EmitScalarExpr(E->getArg(0));
301    const llvm::Type *ArgType = ArgValue->getType();
302    Value *F = CGM.getIntrinsic(Intrinsic::bswap, &ArgType, 1);
303    return RValue::get(Builder.CreateCall(F, ArgValue, "tmp"));
304  }
305  case Builtin::BI__builtin_object_size: {
306    // We pass this builtin onto the optimizer so that it can
307    // figure out the object size in more complex cases.
308    const llvm::Type *ResType[] = {
309      ConvertType(E->getType())
310    };
311
312    // LLVM only supports 0 and 2, make sure that we pass along that
313    // as a boolean.
314    Value *Ty = EmitScalarExpr(E->getArg(1));
315    ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
316    assert(CI);
317    uint64_t val = CI->getZExtValue();
318    CI = ConstantInt::get(llvm::Type::getInt1Ty(VMContext), (val & 0x2) >> 1);
319
320    Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType, 1);
321    return RValue::get(Builder.CreateCall2(F,
322                                           EmitScalarExpr(E->getArg(0)),
323                                           CI));
324  }
325  case Builtin::BI__builtin_prefetch: {
326    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
327    // FIXME: Technically these constants should of type 'int', yes?
328    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
329      llvm::ConstantInt::get(Int32Ty, 0);
330    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
331      llvm::ConstantInt::get(Int32Ty, 3);
332    Value *F = CGM.getIntrinsic(Intrinsic::prefetch, 0, 0);
333    return RValue::get(Builder.CreateCall3(F, Address, RW, Locality));
334  }
335  case Builtin::BI__builtin_trap: {
336    Value *F = CGM.getIntrinsic(Intrinsic::trap, 0, 0);
337    return RValue::get(Builder.CreateCall(F));
338  }
339  case Builtin::BI__builtin_unreachable: {
340    if (CatchUndefined && HaveInsertPoint())
341      EmitBranch(getTrapBB());
342    Value *V = Builder.CreateUnreachable();
343    Builder.ClearInsertionPoint();
344    return RValue::get(V);
345  }
346
347  case Builtin::BI__builtin_powi:
348  case Builtin::BI__builtin_powif:
349  case Builtin::BI__builtin_powil: {
350    Value *Base = EmitScalarExpr(E->getArg(0));
351    Value *Exponent = EmitScalarExpr(E->getArg(1));
352    const llvm::Type *ArgType = Base->getType();
353    Value *F = CGM.getIntrinsic(Intrinsic::powi, &ArgType, 1);
354    return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
355  }
356
357  case Builtin::BI__builtin_isgreater:
358  case Builtin::BI__builtin_isgreaterequal:
359  case Builtin::BI__builtin_isless:
360  case Builtin::BI__builtin_islessequal:
361  case Builtin::BI__builtin_islessgreater:
362  case Builtin::BI__builtin_isunordered: {
363    // Ordered comparisons: we know the arguments to these are matching scalar
364    // floating point values.
365    Value *LHS = EmitScalarExpr(E->getArg(0));
366    Value *RHS = EmitScalarExpr(E->getArg(1));
367
368    switch (BuiltinID) {
369    default: assert(0 && "Unknown ordered comparison");
370    case Builtin::BI__builtin_isgreater:
371      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
372      break;
373    case Builtin::BI__builtin_isgreaterequal:
374      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
375      break;
376    case Builtin::BI__builtin_isless:
377      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
378      break;
379    case Builtin::BI__builtin_islessequal:
380      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
381      break;
382    case Builtin::BI__builtin_islessgreater:
383      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
384      break;
385    case Builtin::BI__builtin_isunordered:
386      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
387      break;
388    }
389    // ZExt bool to int type.
390    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()),
391                                          "tmp"));
392  }
393  case Builtin::BI__builtin_isnan: {
394    Value *V = EmitScalarExpr(E->getArg(0));
395    V = Builder.CreateFCmpUNO(V, V, "cmp");
396    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
397  }
398
399  case Builtin::BI__builtin_isinf: {
400    // isinf(x) --> fabs(x) == infinity
401    Value *V = EmitScalarExpr(E->getArg(0));
402    V = EmitFAbs(*this, V, E->getArg(0)->getType());
403
404    V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
405    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
406  }
407
408  // TODO: BI__builtin_isinf_sign
409  //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
410
411  case Builtin::BI__builtin_isnormal: {
412    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
413    Value *V = EmitScalarExpr(E->getArg(0));
414    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
415
416    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
417    Value *IsLessThanInf =
418      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
419    APFloat Smallest = APFloat::getSmallestNormalized(
420                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
421    Value *IsNormal =
422      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
423                            "isnormal");
424    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
425    V = Builder.CreateAnd(V, IsNormal, "and");
426    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
427  }
428
429  case Builtin::BI__builtin_isfinite: {
430    // isfinite(x) --> x == x && fabs(x) != infinity; }
431    Value *V = EmitScalarExpr(E->getArg(0));
432    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
433
434    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
435    Value *IsNotInf =
436      Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
437
438    V = Builder.CreateAnd(Eq, IsNotInf, "and");
439    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
440  }
441
442  case Builtin::BI__builtin_fpclassify: {
443    Value *V = EmitScalarExpr(E->getArg(5));
444    const llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
445
446    // Create Result
447    BasicBlock *Begin = Builder.GetInsertBlock();
448    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
449    Builder.SetInsertPoint(End);
450    PHINode *Result =
451      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()),
452                        "fpclassify_result");
453
454    // if (V==0) return FP_ZERO
455    Builder.SetInsertPoint(Begin);
456    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
457                                          "iszero");
458    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
459    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
460    Builder.CreateCondBr(IsZero, End, NotZero);
461    Result->addIncoming(ZeroLiteral, Begin);
462
463    // if (V != V) return FP_NAN
464    Builder.SetInsertPoint(NotZero);
465    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
466    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
467    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
468    Builder.CreateCondBr(IsNan, End, NotNan);
469    Result->addIncoming(NanLiteral, NotZero);
470
471    // if (fabs(V) == infinity) return FP_INFINITY
472    Builder.SetInsertPoint(NotNan);
473    Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
474    Value *IsInf =
475      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
476                            "isinf");
477    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
478    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
479    Builder.CreateCondBr(IsInf, End, NotInf);
480    Result->addIncoming(InfLiteral, NotNan);
481
482    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
483    Builder.SetInsertPoint(NotInf);
484    APFloat Smallest = APFloat::getSmallestNormalized(
485        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
486    Value *IsNormal =
487      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
488                            "isnormal");
489    Value *NormalResult =
490      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
491                           EmitScalarExpr(E->getArg(3)));
492    Builder.CreateBr(End);
493    Result->addIncoming(NormalResult, NotInf);
494
495    // return Result
496    Builder.SetInsertPoint(End);
497    return RValue::get(Result);
498  }
499
500  case Builtin::BIalloca:
501  case Builtin::BI__builtin_alloca: {
502    Value *Size = EmitScalarExpr(E->getArg(0));
503    return RValue::get(Builder.CreateAlloca(llvm::Type::getInt8Ty(VMContext), Size, "tmp"));
504  }
505  case Builtin::BIbzero:
506  case Builtin::BI__builtin_bzero: {
507    Value *Address = EmitScalarExpr(E->getArg(0));
508    Value *SizeVal = EmitScalarExpr(E->getArg(1));
509    Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()),
510                   Address,
511                   llvm::ConstantInt::get(llvm::Type::getInt8Ty(VMContext), 0),
512                   SizeVal,
513                   llvm::ConstantInt::get(Int32Ty, 1),
514                   llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
515    return RValue::get(Address);
516  }
517  case Builtin::BImemcpy:
518  case Builtin::BI__builtin_memcpy: {
519    Value *Address = EmitScalarExpr(E->getArg(0));
520    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
521    Value *SizeVal = EmitScalarExpr(E->getArg(2));
522    Builder.CreateCall5(CGM.getMemCpyFn(Address->getType(), SrcAddr->getType(),
523                                        SizeVal->getType()),
524                  Address, SrcAddr, SizeVal,
525                  llvm::ConstantInt::get(Int32Ty, 1),
526                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
527    return RValue::get(Address);
528  }
529
530  case Builtin::BI__builtin_objc_memmove_collectable: {
531    Value *Address = EmitScalarExpr(E->getArg(0));
532    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
533    Value *SizeVal = EmitScalarExpr(E->getArg(2));
534    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
535                                                  Address, SrcAddr, SizeVal);
536    return RValue::get(Address);
537  }
538
539  case Builtin::BImemmove:
540  case Builtin::BI__builtin_memmove: {
541    Value *Address = EmitScalarExpr(E->getArg(0));
542    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
543    Value *SizeVal = EmitScalarExpr(E->getArg(2));
544    Builder.CreateCall5(CGM.getMemMoveFn(Address->getType(), SrcAddr->getType(),
545                                         SizeVal->getType()),
546                  Address, SrcAddr, SizeVal,
547                  llvm::ConstantInt::get(Int32Ty, 1),
548                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
549    return RValue::get(Address);
550  }
551  case Builtin::BImemset:
552  case Builtin::BI__builtin_memset: {
553    Value *Address = EmitScalarExpr(E->getArg(0));
554    Value *SizeVal = EmitScalarExpr(E->getArg(2));
555    Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()),
556                  Address,
557                  Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
558                                      llvm::Type::getInt8Ty(VMContext)),
559                  SizeVal,
560                  llvm::ConstantInt::get(Int32Ty, 1),
561                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
562    return RValue::get(Address);
563  }
564  case Builtin::BI__builtin_dwarf_cfa: {
565    // The offset in bytes from the first argument to the CFA.
566    //
567    // Why on earth is this in the frontend?  Is there any reason at
568    // all that the backend can't reasonably determine this while
569    // lowering llvm.eh.dwarf.cfa()?
570    //
571    // TODO: If there's a satisfactory reason, add a target hook for
572    // this instead of hard-coding 0, which is correct for most targets.
573    int32_t Offset = 0;
574
575    Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa, 0, 0);
576    return RValue::get(Builder.CreateCall(F,
577                                      llvm::ConstantInt::get(Int32Ty, Offset)));
578  }
579  case Builtin::BI__builtin_return_address: {
580    Value *Depth = EmitScalarExpr(E->getArg(0));
581    Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
582    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress, 0, 0);
583    return RValue::get(Builder.CreateCall(F, Depth));
584  }
585  case Builtin::BI__builtin_frame_address: {
586    Value *Depth = EmitScalarExpr(E->getArg(0));
587    Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
588    Value *F = CGM.getIntrinsic(Intrinsic::frameaddress, 0, 0);
589    return RValue::get(Builder.CreateCall(F, Depth));
590  }
591  case Builtin::BI__builtin_extract_return_addr: {
592    Value *Address = EmitScalarExpr(E->getArg(0));
593    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
594    return RValue::get(Result);
595  }
596  case Builtin::BI__builtin_frob_return_addr: {
597    Value *Address = EmitScalarExpr(E->getArg(0));
598    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
599    return RValue::get(Result);
600  }
601  case Builtin::BI__builtin_dwarf_sp_column: {
602    const llvm::IntegerType *Ty
603      = cast<llvm::IntegerType>(ConvertType(E->getType()));
604    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
605    if (Column == -1) {
606      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
607      return RValue::get(llvm::UndefValue::get(Ty));
608    }
609    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
610  }
611  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
612    Value *Address = EmitScalarExpr(E->getArg(0));
613    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
614      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
615    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
616  }
617  case Builtin::BI__builtin_eh_return: {
618    Value *Int = EmitScalarExpr(E->getArg(0));
619    Value *Ptr = EmitScalarExpr(E->getArg(1));
620
621    const llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
622    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
623           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
624    Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
625                                  ? Intrinsic::eh_return_i32
626                                  : Intrinsic::eh_return_i64,
627                                0, 0);
628    Builder.CreateCall2(F, Int, Ptr);
629    Value *V = Builder.CreateUnreachable();
630    Builder.ClearInsertionPoint();
631    return RValue::get(V);
632  }
633  case Builtin::BI__builtin_unwind_init: {
634    Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init, 0, 0);
635    return RValue::get(Builder.CreateCall(F));
636  }
637  case Builtin::BI__builtin_extend_pointer: {
638    // Extends a pointer to the size of an _Unwind_Word, which is
639    // uint64_t on all platforms.  Generally this gets poked into a
640    // register and eventually used as an address, so if the
641    // addressing registers are wider than pointers and the platform
642    // doesn't implicitly ignore high-order bits when doing
643    // addressing, we need to make sure we zext / sext based on
644    // the platform's expectations.
645    //
646    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
647
648    LLVMContext &C = CGM.getLLVMContext();
649
650    // Cast the pointer to intptr_t.
651    Value *Ptr = EmitScalarExpr(E->getArg(0));
652    const llvm::IntegerType *IntPtrTy = CGM.getTargetData().getIntPtrType(C);
653    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
654
655    // If that's 64 bits, we're done.
656    if (IntPtrTy->getBitWidth() == 64)
657      return RValue::get(Result);
658
659    // Otherwise, ask the codegen data what to do.
660    if (getTargetHooks().extendPointerWithSExt())
661      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
662    else
663      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
664  }
665  case Builtin::BI__builtin_setjmp: {
666    // Buffer is a void**.
667    Value *Buf = EmitScalarExpr(E->getArg(0));
668
669    // Store the frame pointer to the setjmp buffer.
670    Value *FrameAddr =
671      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
672                         ConstantInt::get(Int32Ty, 0));
673    Builder.CreateStore(FrameAddr, Buf);
674
675    // Store the stack pointer to the setjmp buffer.
676    Value *StackAddr =
677      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
678    Value *StackSaveSlot =
679      Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
680    Builder.CreateStore(StackAddr, StackSaveSlot);
681
682    // Call LLVM's EH setjmp, which is lightweight.
683    Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
684    Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext));
685    return RValue::get(Builder.CreateCall(F, Buf));
686  }
687  case Builtin::BI__builtin_longjmp: {
688    Value *Buf = EmitScalarExpr(E->getArg(0));
689    Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext));
690
691    // Call LLVM's EH longjmp, which is lightweight.
692    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
693
694    // longjmp doesn't return; mark this as unreachable
695    Value *V = Builder.CreateUnreachable();
696    Builder.ClearInsertionPoint();
697    return RValue::get(V);
698  }
699  case Builtin::BI__sync_fetch_and_add:
700  case Builtin::BI__sync_fetch_and_sub:
701  case Builtin::BI__sync_fetch_and_or:
702  case Builtin::BI__sync_fetch_and_and:
703  case Builtin::BI__sync_fetch_and_xor:
704  case Builtin::BI__sync_add_and_fetch:
705  case Builtin::BI__sync_sub_and_fetch:
706  case Builtin::BI__sync_and_and_fetch:
707  case Builtin::BI__sync_or_and_fetch:
708  case Builtin::BI__sync_xor_and_fetch:
709  case Builtin::BI__sync_val_compare_and_swap:
710  case Builtin::BI__sync_bool_compare_and_swap:
711  case Builtin::BI__sync_lock_test_and_set:
712  case Builtin::BI__sync_lock_release:
713    assert(0 && "Shouldn't make it through sema");
714  case Builtin::BI__sync_fetch_and_add_1:
715  case Builtin::BI__sync_fetch_and_add_2:
716  case Builtin::BI__sync_fetch_and_add_4:
717  case Builtin::BI__sync_fetch_and_add_8:
718  case Builtin::BI__sync_fetch_and_add_16:
719    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_add, E);
720  case Builtin::BI__sync_fetch_and_sub_1:
721  case Builtin::BI__sync_fetch_and_sub_2:
722  case Builtin::BI__sync_fetch_and_sub_4:
723  case Builtin::BI__sync_fetch_and_sub_8:
724  case Builtin::BI__sync_fetch_and_sub_16:
725    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_sub, E);
726  case Builtin::BI__sync_fetch_and_or_1:
727  case Builtin::BI__sync_fetch_and_or_2:
728  case Builtin::BI__sync_fetch_and_or_4:
729  case Builtin::BI__sync_fetch_and_or_8:
730  case Builtin::BI__sync_fetch_and_or_16:
731    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_or, E);
732  case Builtin::BI__sync_fetch_and_and_1:
733  case Builtin::BI__sync_fetch_and_and_2:
734  case Builtin::BI__sync_fetch_and_and_4:
735  case Builtin::BI__sync_fetch_and_and_8:
736  case Builtin::BI__sync_fetch_and_and_16:
737    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_and, E);
738  case Builtin::BI__sync_fetch_and_xor_1:
739  case Builtin::BI__sync_fetch_and_xor_2:
740  case Builtin::BI__sync_fetch_and_xor_4:
741  case Builtin::BI__sync_fetch_and_xor_8:
742  case Builtin::BI__sync_fetch_and_xor_16:
743    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_xor, E);
744
745  // Clang extensions: not overloaded yet.
746  case Builtin::BI__sync_fetch_and_min:
747    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_min, E);
748  case Builtin::BI__sync_fetch_and_max:
749    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_max, E);
750  case Builtin::BI__sync_fetch_and_umin:
751    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umin, E);
752  case Builtin::BI__sync_fetch_and_umax:
753    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umax, E);
754
755  case Builtin::BI__sync_add_and_fetch_1:
756  case Builtin::BI__sync_add_and_fetch_2:
757  case Builtin::BI__sync_add_and_fetch_4:
758  case Builtin::BI__sync_add_and_fetch_8:
759  case Builtin::BI__sync_add_and_fetch_16:
760    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_add, E,
761                                llvm::Instruction::Add);
762  case Builtin::BI__sync_sub_and_fetch_1:
763  case Builtin::BI__sync_sub_and_fetch_2:
764  case Builtin::BI__sync_sub_and_fetch_4:
765  case Builtin::BI__sync_sub_and_fetch_8:
766  case Builtin::BI__sync_sub_and_fetch_16:
767    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_sub, E,
768                                llvm::Instruction::Sub);
769  case Builtin::BI__sync_and_and_fetch_1:
770  case Builtin::BI__sync_and_and_fetch_2:
771  case Builtin::BI__sync_and_and_fetch_4:
772  case Builtin::BI__sync_and_and_fetch_8:
773  case Builtin::BI__sync_and_and_fetch_16:
774    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_and, E,
775                                llvm::Instruction::And);
776  case Builtin::BI__sync_or_and_fetch_1:
777  case Builtin::BI__sync_or_and_fetch_2:
778  case Builtin::BI__sync_or_and_fetch_4:
779  case Builtin::BI__sync_or_and_fetch_8:
780  case Builtin::BI__sync_or_and_fetch_16:
781    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_or, E,
782                                llvm::Instruction::Or);
783  case Builtin::BI__sync_xor_and_fetch_1:
784  case Builtin::BI__sync_xor_and_fetch_2:
785  case Builtin::BI__sync_xor_and_fetch_4:
786  case Builtin::BI__sync_xor_and_fetch_8:
787  case Builtin::BI__sync_xor_and_fetch_16:
788    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_xor, E,
789                                llvm::Instruction::Xor);
790
791  case Builtin::BI__sync_val_compare_and_swap_1:
792  case Builtin::BI__sync_val_compare_and_swap_2:
793  case Builtin::BI__sync_val_compare_and_swap_4:
794  case Builtin::BI__sync_val_compare_and_swap_8:
795  case Builtin::BI__sync_val_compare_and_swap_16: {
796    llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
797    unsigned AddrSpace =
798      cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
799    const llvm::Type *ValueType =
800      llvm::IntegerType::get(CGF.getLLVMContext(),
801                             CGF.getContext().getTypeSize(E->getType()));
802    const llvm::Type *PtrType = ValueType->getPointerTo(AddrSpace);
803    const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
804    Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
805                                    IntrinsicTypes, 2);
806
807    Value *Args[3] = { Builder.CreateBitCast(DestPtr, PtrType),
808                       EmitCastToInt(CGF, ValueType,
809                                     CGF.EmitScalarExpr(E->getArg(1))),
810                       EmitCastToInt(CGF, ValueType,
811                                     CGF.EmitScalarExpr(E->getArg(2))) };
812    return RValue::get(EmitCastFromInt(CGF, E->getType(),
813                                       EmitCallWithBarrier(CGF, AtomF, Args,
814                                                           Args + 3)));
815  }
816
817  case Builtin::BI__sync_bool_compare_and_swap_1:
818  case Builtin::BI__sync_bool_compare_and_swap_2:
819  case Builtin::BI__sync_bool_compare_and_swap_4:
820  case Builtin::BI__sync_bool_compare_and_swap_8:
821  case Builtin::BI__sync_bool_compare_and_swap_16: {
822    llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
823    unsigned AddrSpace =
824      cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
825    const llvm::Type *ValueType =
826      llvm::IntegerType::get(CGF.getLLVMContext(),
827        CGF.getContext().getTypeSize(E->getArg(1)->getType()));
828    const llvm::Type *PtrType = ValueType->getPointerTo(AddrSpace);
829    const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
830    Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
831                                    IntrinsicTypes, 2);
832
833    Value *Args[3] = { Builder.CreateBitCast(DestPtr, PtrType),
834                       EmitCastToInt(CGF, ValueType,
835                                     CGF.EmitScalarExpr(E->getArg(1))),
836                       EmitCastToInt(CGF, ValueType,
837                                     CGF.EmitScalarExpr(E->getArg(2))) };
838    Value *OldVal = Args[1];
839    Value *PrevVal = EmitCallWithBarrier(*this, AtomF, Args, Args + 3);
840    Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
841    // zext bool to int.
842    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
843  }
844
845  case Builtin::BI__sync_lock_test_and_set_1:
846  case Builtin::BI__sync_lock_test_and_set_2:
847  case Builtin::BI__sync_lock_test_and_set_4:
848  case Builtin::BI__sync_lock_test_and_set_8:
849  case Builtin::BI__sync_lock_test_and_set_16:
850    return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E);
851
852  case Builtin::BI__sync_lock_release_1:
853  case Builtin::BI__sync_lock_release_2:
854  case Builtin::BI__sync_lock_release_4:
855  case Builtin::BI__sync_lock_release_8:
856  case Builtin::BI__sync_lock_release_16: {
857    Value *Ptr = EmitScalarExpr(E->getArg(0));
858    const llvm::Type *ElTy =
859      cast<llvm::PointerType>(Ptr->getType())->getElementType();
860    llvm::StoreInst *Store =
861      Builder.CreateStore(llvm::Constant::getNullValue(ElTy), Ptr);
862    Store->setVolatile(true);
863    return RValue::get(0);
864  }
865
866  case Builtin::BI__sync_synchronize: {
867    // We assume like gcc appears to, that this only applies to cached memory.
868    EmitMemoryBarrier(*this, true, true, true, true, false);
869    return RValue::get(0);
870  }
871
872  case Builtin::BI__builtin_llvm_memory_barrier: {
873    Value *C[5] = {
874      EmitScalarExpr(E->getArg(0)),
875      EmitScalarExpr(E->getArg(1)),
876      EmitScalarExpr(E->getArg(2)),
877      EmitScalarExpr(E->getArg(3)),
878      EmitScalarExpr(E->getArg(4))
879    };
880    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::memory_barrier), C, C + 5);
881    return RValue::get(0);
882  }
883
884    // Library functions with special handling.
885  case Builtin::BIsqrt:
886  case Builtin::BIsqrtf:
887  case Builtin::BIsqrtl: {
888    // TODO: there is currently no set of optimizer flags
889    // sufficient for us to rewrite sqrt to @llvm.sqrt.
890    // -fmath-errno=0 is not good enough; we need finiteness.
891    // We could probably precondition the call with an ult
892    // against 0, but is that worth the complexity?
893    break;
894  }
895
896  case Builtin::BIpow:
897  case Builtin::BIpowf:
898  case Builtin::BIpowl: {
899    // Rewrite sqrt to intrinsic if allowed.
900    if (!FD->hasAttr<ConstAttr>())
901      break;
902    Value *Base = EmitScalarExpr(E->getArg(0));
903    Value *Exponent = EmitScalarExpr(E->getArg(1));
904    const llvm::Type *ArgType = Base->getType();
905    Value *F = CGM.getIntrinsic(Intrinsic::pow, &ArgType, 1);
906    return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
907  }
908
909  case Builtin::BI__builtin_signbit:
910  case Builtin::BI__builtin_signbitf:
911  case Builtin::BI__builtin_signbitl: {
912    LLVMContext &C = CGM.getLLVMContext();
913
914    Value *Arg = EmitScalarExpr(E->getArg(0));
915    const llvm::Type *ArgTy = Arg->getType();
916    if (ArgTy->isPPC_FP128Ty())
917      break; // FIXME: I'm not sure what the right implementation is here.
918    int ArgWidth = ArgTy->getPrimitiveSizeInBits();
919    const llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
920    Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
921    Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
922    Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
923    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
924  }
925  }
926
927  // If this is an alias for a libm function (e.g. __builtin_sin) turn it into
928  // that function.
929  if (getContext().BuiltinInfo.isLibFunction(BuiltinID) ||
930      getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
931    return EmitCall(E->getCallee()->getType(),
932                    CGM.getBuiltinLibFunction(FD, BuiltinID),
933                    ReturnValueSlot(),
934                    E->arg_begin(), E->arg_end());
935
936  // See if we have a target specific intrinsic.
937  const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
938  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
939  if (const char *Prefix =
940      llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
941    IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
942
943  if (IntrinsicID != Intrinsic::not_intrinsic) {
944    SmallVector<Value*, 16> Args;
945
946    // Find out if any arguments are required to be integer constant
947    // expressions.
948    unsigned ICEArguments = 0;
949    ASTContext::GetBuiltinTypeError Error;
950    getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
951    assert(Error == ASTContext::GE_None && "Should not codegen an error");
952
953    Function *F = CGM.getIntrinsic(IntrinsicID);
954    const llvm::FunctionType *FTy = F->getFunctionType();
955
956    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
957      Value *ArgValue;
958      // If this is a normal argument, just emit it as a scalar.
959      if ((ICEArguments & (1 << i)) == 0) {
960        ArgValue = EmitScalarExpr(E->getArg(i));
961      } else {
962        // If this is required to be a constant, constant fold it so that we
963        // know that the generated intrinsic gets a ConstantInt.
964        llvm::APSInt Result;
965        bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
966        assert(IsConst && "Constant arg isn't actually constant?");
967        (void)IsConst;
968        ArgValue = llvm::ConstantInt::get(VMContext, Result);
969      }
970
971      // If the intrinsic arg type is different from the builtin arg type
972      // we need to do a bit cast.
973      const llvm::Type *PTy = FTy->getParamType(i);
974      if (PTy != ArgValue->getType()) {
975        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
976               "Must be able to losslessly bit cast to param");
977        ArgValue = Builder.CreateBitCast(ArgValue, PTy);
978      }
979
980      Args.push_back(ArgValue);
981    }
982
983    Value *V = Builder.CreateCall(F, Args.data(), Args.data() + Args.size());
984    QualType BuiltinRetType = E->getType();
985
986    const llvm::Type *RetTy = llvm::Type::getVoidTy(VMContext);
987    if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType);
988
989    if (RetTy != V->getType()) {
990      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
991             "Must be able to losslessly bit cast result type");
992      V = Builder.CreateBitCast(V, RetTy);
993    }
994
995    return RValue::get(V);
996  }
997
998  // See if we have a target specific builtin that needs to be lowered.
999  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1000    return RValue::get(V);
1001
1002  ErrorUnsupported(E, "builtin function");
1003
1004  // Unknown builtin, for now just dump it out and return undef.
1005  if (hasAggregateLLVMType(E->getType()))
1006    return RValue::getAggregate(CreateMemTemp(E->getType()));
1007  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1008}
1009
1010Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1011                                              const CallExpr *E) {
1012  switch (Target.getTriple().getArch()) {
1013  case llvm::Triple::arm:
1014  case llvm::Triple::thumb:
1015    return EmitARMBuiltinExpr(BuiltinID, E);
1016  case llvm::Triple::x86:
1017  case llvm::Triple::x86_64:
1018    return EmitX86BuiltinExpr(BuiltinID, E);
1019  case llvm::Triple::ppc:
1020  case llvm::Triple::ppc64:
1021    return EmitPPCBuiltinExpr(BuiltinID, E);
1022  default:
1023    return 0;
1024  }
1025}
1026
1027const llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) {
1028  switch (type) {
1029    default: break;
1030    case 0:
1031    case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q);
1032    case 6:
1033    case 7:
1034    case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q);
1035    case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q);
1036    case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q);
1037    case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q);
1038  };
1039  return 0;
1040}
1041
1042Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C, bool widen) {
1043  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1044  if (widen)
1045    nElts <<= 1;
1046  SmallVector<Constant*, 16> Indices(nElts, C);
1047  Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1048  return Builder.CreateShuffleVector(V, V, SV, "lane");
1049}
1050
1051Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1052                                     const char *name, bool splat,
1053                                     unsigned shift, bool rightshift) {
1054  unsigned j = 0;
1055  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1056       ai != ae; ++ai, ++j)
1057    if (shift > 0 && shift == j)
1058      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1059    else
1060      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1061
1062  if (splat) {
1063    Ops[j-1] = EmitNeonSplat(Ops[j-1], cast<Constant>(Ops[j]));
1064    Ops.resize(j);
1065  }
1066  return Builder.CreateCall(F, Ops.begin(), Ops.end(), name);
1067}
1068
1069Value *CodeGenFunction::EmitNeonShiftVector(Value *V, const llvm::Type *Ty,
1070                                            bool neg) {
1071  ConstantInt *CI = cast<ConstantInt>(V);
1072  int SV = CI->getSExtValue();
1073
1074  const llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1075  llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1076  SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C);
1077  return llvm::ConstantVector::get(CV.begin(), CV.size());
1078}
1079
1080/// GetPointeeAlignment - Given an expression with a pointer type, find the
1081/// alignment of the type referenced by the pointer.  Skip over implicit
1082/// casts.
1083static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) {
1084  unsigned Align = 1;
1085  // Check if the type is a pointer.  The implicit cast operand might not be.
1086  while (Addr->getType()->isPointerType()) {
1087    QualType PtTy = Addr->getType()->getPointeeType();
1088    unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity();
1089    if (NewA > Align)
1090      Align = NewA;
1091
1092    // If the address is an implicit cast, repeat with the cast operand.
1093    if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) {
1094      Addr = CastAddr->getSubExpr();
1095      continue;
1096    }
1097    break;
1098  }
1099  return llvm::ConstantInt::get(CGF.Int32Ty, Align);
1100}
1101
1102Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1103                                           const CallExpr *E) {
1104  if (BuiltinID == ARM::BI__clear_cache) {
1105    const FunctionDecl *FD = E->getDirectCallee();
1106    Value *a = EmitScalarExpr(E->getArg(0));
1107    Value *b = EmitScalarExpr(E->getArg(1));
1108    const llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1109    const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1110    llvm::StringRef Name = FD->getName();
1111    return Builder.CreateCall2(CGM.CreateRuntimeFunction(FTy, Name),
1112                               a, b);
1113  }
1114
1115  llvm::SmallVector<Value*, 4> Ops;
1116  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
1117    Ops.push_back(EmitScalarExpr(E->getArg(i)));
1118
1119  llvm::APSInt Result;
1120  const Expr *Arg = E->getArg(E->getNumArgs()-1);
1121  if (!Arg->isIntegerConstantExpr(Result, getContext()))
1122    return 0;
1123
1124  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1125      BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1126    // Determine the overloaded type of this builtin.
1127    const llvm::Type *Ty;
1128    if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1129      Ty = llvm::Type::getFloatTy(VMContext);
1130    else
1131      Ty = llvm::Type::getDoubleTy(VMContext);
1132
1133    // Determine whether this is an unsigned conversion or not.
1134    bool usgn = Result.getZExtValue() == 1;
1135    unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1136
1137    // Call the appropriate intrinsic.
1138    Function *F = CGM.getIntrinsic(Int, &Ty, 1);
1139    return Builder.CreateCall(F, Ops.begin(), Ops.end(), "vcvtr");
1140  }
1141
1142  // Determine the type of this overloaded NEON intrinsic.
1143  unsigned type = Result.getZExtValue();
1144  bool usgn = type & 0x08;
1145  bool quad = type & 0x10;
1146  bool poly = (type & 0x7) == 5 || (type & 0x7) == 6;
1147  bool splat = false;
1148
1149  const llvm::VectorType *VTy = GetNeonType(VMContext, type & 0x7, quad);
1150  const llvm::Type *Ty = VTy;
1151  if (!Ty)
1152    return 0;
1153
1154  unsigned Int;
1155  switch (BuiltinID) {
1156  default: return 0;
1157  case ARM::BI__builtin_neon_vaba_v:
1158  case ARM::BI__builtin_neon_vabaq_v: {
1159    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1160    SmallVector<Value*, 2> Args;
1161    Args.push_back(Ops[1]);
1162    Args.push_back(Ops[2]);
1163    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1164    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Args, "vaba");
1165    return Builder.CreateAdd(Ops[0], Ops[1], "vaba");
1166  }
1167  case ARM::BI__builtin_neon_vabal_v: {
1168    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1169    SmallVector<Value*, 2> Args;
1170    Args.push_back(Ops[1]);
1171    Args.push_back(Ops[2]);
1172    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1173    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1174    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Int, &DTy, 1), Args, "vabal");
1175    Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1176    return Builder.CreateAdd(Ops[0], Ops[1], "vabal");
1177  }
1178  case ARM::BI__builtin_neon_vabd_v:
1179  case ARM::BI__builtin_neon_vabdq_v:
1180    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1181    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabd");
1182  case ARM::BI__builtin_neon_vabdl_v: {
1183    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1184    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1185    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, &DTy, 1), Ops, "vabdl");
1186    return Builder.CreateZExt(Ops[0], Ty, "vabdl");
1187  }
1188  case ARM::BI__builtin_neon_vabs_v:
1189  case ARM::BI__builtin_neon_vabsq_v:
1190    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1),
1191                        Ops, "vabs");
1192  case ARM::BI__builtin_neon_vaddhn_v:
1193    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1),
1194                        Ops, "vaddhn");
1195  case ARM::BI__builtin_neon_vaddl_v: {
1196    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1197    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1198    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1199    if (usgn) {
1200      Ops[0] = Builder.CreateZExt(Ops[0], Ty);
1201      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1202    } else {
1203      Ops[0] = Builder.CreateSExt(Ops[0], Ty);
1204      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1205    }
1206    return Builder.CreateAdd(Ops[0], Ops[1], "vaddl");
1207  }
1208  case ARM::BI__builtin_neon_vaddw_v: {
1209    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1210    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1211    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1212    if (usgn)
1213      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1214    else
1215      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1216    return Builder.CreateAdd(Ops[0], Ops[1], "vaddw");
1217  }
1218  case ARM::BI__builtin_neon_vcale_v:
1219    std::swap(Ops[0], Ops[1]);
1220  case ARM::BI__builtin_neon_vcage_v: {
1221    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged, &Ty, 1);
1222    return EmitNeonCall(F, Ops, "vcage");
1223  }
1224  case ARM::BI__builtin_neon_vcaleq_v:
1225    std::swap(Ops[0], Ops[1]);
1226  case ARM::BI__builtin_neon_vcageq_v: {
1227    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq, &Ty, 1);
1228    return EmitNeonCall(F, Ops, "vcage");
1229  }
1230  case ARM::BI__builtin_neon_vcalt_v:
1231    std::swap(Ops[0], Ops[1]);
1232  case ARM::BI__builtin_neon_vcagt_v: {
1233    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd, &Ty, 1);
1234    return EmitNeonCall(F, Ops, "vcagt");
1235  }
1236  case ARM::BI__builtin_neon_vcaltq_v:
1237    std::swap(Ops[0], Ops[1]);
1238  case ARM::BI__builtin_neon_vcagtq_v: {
1239    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq, &Ty, 1);
1240    return EmitNeonCall(F, Ops, "vcagt");
1241  }
1242  case ARM::BI__builtin_neon_vcls_v:
1243  case ARM::BI__builtin_neon_vclsq_v: {
1244    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1);
1245    return EmitNeonCall(F, Ops, "vcls");
1246  }
1247  case ARM::BI__builtin_neon_vclz_v:
1248  case ARM::BI__builtin_neon_vclzq_v: {
1249    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1);
1250    return EmitNeonCall(F, Ops, "vclz");
1251  }
1252  case ARM::BI__builtin_neon_vcnt_v:
1253  case ARM::BI__builtin_neon_vcntq_v: {
1254    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1);
1255    return EmitNeonCall(F, Ops, "vcnt");
1256  }
1257  // FIXME: intrinsics for f16<->f32 convert missing from ARM target.
1258  case ARM::BI__builtin_neon_vcvt_f32_v:
1259  case ARM::BI__builtin_neon_vcvtq_f32_v: {
1260    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1261    Ty = GetNeonType(VMContext, 4, quad);
1262    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1263                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1264  }
1265  case ARM::BI__builtin_neon_vcvt_s32_v:
1266  case ARM::BI__builtin_neon_vcvt_u32_v:
1267  case ARM::BI__builtin_neon_vcvtq_s32_v:
1268  case ARM::BI__builtin_neon_vcvtq_u32_v: {
1269    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(VMContext, 4, quad));
1270    return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1271                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1272  }
1273  case ARM::BI__builtin_neon_vcvt_n_f32_v:
1274  case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1275    const llvm::Type *Tys[2] = { GetNeonType(VMContext, 4, quad), Ty };
1276    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp;
1277    Function *F = CGM.getIntrinsic(Int, Tys, 2);
1278    return EmitNeonCall(F, Ops, "vcvt_n");
1279  }
1280  case ARM::BI__builtin_neon_vcvt_n_s32_v:
1281  case ARM::BI__builtin_neon_vcvt_n_u32_v:
1282  case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1283  case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1284    const llvm::Type *Tys[2] = { Ty, GetNeonType(VMContext, 4, quad) };
1285    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs;
1286    Function *F = CGM.getIntrinsic(Int, Tys, 2);
1287    return EmitNeonCall(F, Ops, "vcvt_n");
1288  }
1289  case ARM::BI__builtin_neon_vdup_lane_v:
1290    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1291    return EmitNeonSplat(Ops[0], cast<Constant>(Ops[1]));
1292  case ARM::BI__builtin_neon_vdupq_lane_v:
1293    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1294    return EmitNeonSplat(Ops[0], cast<Constant>(Ops[1]), true);
1295  case ARM::BI__builtin_neon_vext_v:
1296  case ARM::BI__builtin_neon_vextq_v: {
1297    ConstantInt *C = dyn_cast<ConstantInt>(Ops[2]);
1298    int CV = C->getSExtValue();
1299    SmallVector<Constant*, 16> Indices;
1300    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1301      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1302
1303    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1304    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1305    Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1306    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1307  }
1308  case ARM::BI__builtin_neon_vget_lane_i8:
1309  case ARM::BI__builtin_neon_vget_lane_i16:
1310  case ARM::BI__builtin_neon_vget_lane_i32:
1311  case ARM::BI__builtin_neon_vget_lane_i64:
1312  case ARM::BI__builtin_neon_vget_lane_f32:
1313  case ARM::BI__builtin_neon_vgetq_lane_i8:
1314  case ARM::BI__builtin_neon_vgetq_lane_i16:
1315  case ARM::BI__builtin_neon_vgetq_lane_i32:
1316  case ARM::BI__builtin_neon_vgetq_lane_i64:
1317  case ARM::BI__builtin_neon_vgetq_lane_f32:
1318    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1319                                        "vget_lane");
1320  case ARM::BI__builtin_neon_vhadd_v:
1321  case ARM::BI__builtin_neon_vhaddq_v:
1322    Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1323    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhadd");
1324  case ARM::BI__builtin_neon_vhsub_v:
1325  case ARM::BI__builtin_neon_vhsubq_v:
1326    Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1327    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhsub");
1328  case ARM::BI__builtin_neon_vld1_v:
1329  case ARM::BI__builtin_neon_vld1q_v:
1330    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1331    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, &Ty, 1),
1332                        Ops, "vld1");
1333  case ARM::BI__builtin_neon_vld1_lane_v:
1334  case ARM::BI__builtin_neon_vld1q_lane_v:
1335    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1336    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1337    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1338    Ops[0] = Builder.CreateLoad(Ops[0]);
1339    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
1340  case ARM::BI__builtin_neon_vld1_dup_v:
1341  case ARM::BI__builtin_neon_vld1q_dup_v: {
1342    Value *V = UndefValue::get(Ty);
1343    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1344    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1345    Ops[0] = Builder.CreateLoad(Ops[0]);
1346    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1347    Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
1348    return EmitNeonSplat(Ops[0], CI);
1349  }
1350  case ARM::BI__builtin_neon_vld2_v:
1351  case ARM::BI__builtin_neon_vld2q_v: {
1352    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, &Ty, 1);
1353    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1354    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1355    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1356    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1357    return Builder.CreateStore(Ops[1], Ops[0]);
1358  }
1359  case ARM::BI__builtin_neon_vld3_v:
1360  case ARM::BI__builtin_neon_vld3q_v: {
1361    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, &Ty, 1);
1362    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1363    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1364    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1365    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1366    return Builder.CreateStore(Ops[1], Ops[0]);
1367  }
1368  case ARM::BI__builtin_neon_vld4_v:
1369  case ARM::BI__builtin_neon_vld4q_v: {
1370    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, &Ty, 1);
1371    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1372    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1373    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1374    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1375    return Builder.CreateStore(Ops[1], Ops[0]);
1376  }
1377  case ARM::BI__builtin_neon_vld2_lane_v:
1378  case ARM::BI__builtin_neon_vld2q_lane_v: {
1379    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, &Ty, 1);
1380    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1381    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1382    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1383    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld2_lane");
1384    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1385    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1386    return Builder.CreateStore(Ops[1], Ops[0]);
1387  }
1388  case ARM::BI__builtin_neon_vld3_lane_v:
1389  case ARM::BI__builtin_neon_vld3q_lane_v: {
1390    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, &Ty, 1);
1391    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1392    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1393    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1394    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1395    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
1396    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1397    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1398    return Builder.CreateStore(Ops[1], Ops[0]);
1399  }
1400  case ARM::BI__builtin_neon_vld4_lane_v:
1401  case ARM::BI__builtin_neon_vld4q_lane_v: {
1402    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, &Ty, 1);
1403    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1404    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1405    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1406    Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
1407    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1408    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
1409    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1410    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1411    return Builder.CreateStore(Ops[1], Ops[0]);
1412  }
1413  case ARM::BI__builtin_neon_vld2_dup_v:
1414  case ARM::BI__builtin_neon_vld3_dup_v:
1415  case ARM::BI__builtin_neon_vld4_dup_v: {
1416    switch (BuiltinID) {
1417    case ARM::BI__builtin_neon_vld2_dup_v:
1418      Int = Intrinsic::arm_neon_vld2lane;
1419      break;
1420    case ARM::BI__builtin_neon_vld3_dup_v:
1421      Int = Intrinsic::arm_neon_vld2lane;
1422      break;
1423    case ARM::BI__builtin_neon_vld4_dup_v:
1424      Int = Intrinsic::arm_neon_vld2lane;
1425      break;
1426    default: assert(0 && "unknown vld_dup intrinsic?");
1427    }
1428    Function *F = CGM.getIntrinsic(Int, &Ty, 1);
1429    const llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
1430
1431    SmallVector<Value*, 6> Args;
1432    Args.push_back(Ops[1]);
1433    Args.append(STy->getNumElements(), UndefValue::get(Ty));
1434
1435    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1436    Args.push_back(CI);
1437    Args.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1438
1439    Ops[1] = Builder.CreateCall(F, Args.begin(), Args.end(), "vld_dup");
1440    // splat lane 0 to all elts in each vector of the result.
1441    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1442      Value *Val = Builder.CreateExtractValue(Ops[1], i);
1443      Value *Elt = Builder.CreateBitCast(Val, Ty);
1444      Elt = EmitNeonSplat(Elt, CI);
1445      Elt = Builder.CreateBitCast(Elt, Val->getType());
1446      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
1447    }
1448    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1449    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1450    return Builder.CreateStore(Ops[1], Ops[0]);
1451  }
1452  case ARM::BI__builtin_neon_vmax_v:
1453  case ARM::BI__builtin_neon_vmaxq_v:
1454    Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
1455    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmax");
1456  case ARM::BI__builtin_neon_vmin_v:
1457  case ARM::BI__builtin_neon_vminq_v:
1458    Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
1459    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin");
1460  case ARM::BI__builtin_neon_vmlal_lane_v: {
1461    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1462    Ops[2] = Builder.CreateBitCast(Ops[2], DTy);
1463    Ops[2] = EmitNeonSplat(Ops[2], cast<Constant>(Ops[3]));
1464  }
1465  case ARM::BI__builtin_neon_vmlal_v: {
1466    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1467    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1468    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1469    Ops[2] = Builder.CreateBitCast(Ops[2], DTy);
1470    if (usgn) {
1471      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1472      Ops[2] = Builder.CreateZExt(Ops[2], Ty);
1473    } else {
1474      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1475      Ops[2] = Builder.CreateSExt(Ops[2], Ty);
1476    }
1477    Ops[1] = Builder.CreateMul(Ops[1], Ops[2]);
1478    return Builder.CreateAdd(Ops[0], Ops[1], "vmlal");
1479  }
1480  case ARM::BI__builtin_neon_vmlsl_lane_v: {
1481    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1482    Ops[2] = Builder.CreateBitCast(Ops[2], DTy);
1483    Ops[2] = EmitNeonSplat(Ops[2], cast<Constant>(Ops[3]));
1484  }
1485  case ARM::BI__builtin_neon_vmlsl_v: {
1486    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1487    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1488    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1489    Ops[2] = Builder.CreateBitCast(Ops[2], DTy);
1490    if (usgn) {
1491      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1492      Ops[2] = Builder.CreateZExt(Ops[2], Ty);
1493    } else {
1494      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1495      Ops[2] = Builder.CreateSExt(Ops[2], Ty);
1496    }
1497    Ops[1] = Builder.CreateMul(Ops[1], Ops[2]);
1498    return Builder.CreateSub(Ops[0], Ops[1], "vmlsl");
1499  }
1500  case ARM::BI__builtin_neon_vmovl_v: {
1501    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1502    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1503    if (usgn)
1504      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1505    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1506  }
1507  case ARM::BI__builtin_neon_vmovn_v: {
1508    const llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
1509    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1510    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1511  }
1512  case ARM::BI__builtin_neon_vmull_lane_v: {
1513    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1514    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1515    Ops[1] = EmitNeonSplat(Ops[1], cast<Constant>(Ops[2]));
1516  }
1517  case ARM::BI__builtin_neon_vmull_v: {
1518    if (poly)
1519      return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmullp, &Ty, 1),
1520                          Ops, "vmull");
1521    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1522    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1523    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1524    if (usgn) {
1525      Ops[0] = Builder.CreateZExt(Ops[0], Ty);
1526      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1527    } else {
1528      Ops[0] = Builder.CreateSExt(Ops[0], Ty);
1529      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1530    }
1531    return Builder.CreateMul(Ops[0], Ops[1], "vmull");
1532  }
1533  case ARM::BI__builtin_neon_vpadal_v:
1534  case ARM::BI__builtin_neon_vpadalq_v:
1535    Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
1536    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpadal");
1537  case ARM::BI__builtin_neon_vpadd_v:
1538    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, &Ty, 1),
1539                        Ops, "vpadd");
1540  case ARM::BI__builtin_neon_vpaddl_v:
1541  case ARM::BI__builtin_neon_vpaddlq_v:
1542    Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
1543    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpaddl");
1544  case ARM::BI__builtin_neon_vpmax_v:
1545    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
1546    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmax");
1547  case ARM::BI__builtin_neon_vpmin_v:
1548    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
1549    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmin");
1550  case ARM::BI__builtin_neon_vqabs_v:
1551  case ARM::BI__builtin_neon_vqabsq_v:
1552    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, &Ty, 1),
1553                        Ops, "vqabs");
1554  case ARM::BI__builtin_neon_vqadd_v:
1555  case ARM::BI__builtin_neon_vqaddq_v:
1556    Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
1557    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqadd");
1558  case ARM::BI__builtin_neon_vqdmlal_lane_v:
1559    splat = true;
1560  case ARM::BI__builtin_neon_vqdmlal_v:
1561    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, &Ty, 1),
1562                        Ops, "vqdmlal", splat);
1563  case ARM::BI__builtin_neon_vqdmlsl_lane_v:
1564    splat = true;
1565  case ARM::BI__builtin_neon_vqdmlsl_v:
1566    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, &Ty, 1),
1567                        Ops, "vqdmlsl", splat);
1568  case ARM::BI__builtin_neon_vqdmulh_lane_v:
1569  case ARM::BI__builtin_neon_vqdmulhq_lane_v:
1570    splat = true;
1571  case ARM::BI__builtin_neon_vqdmulh_v:
1572  case ARM::BI__builtin_neon_vqdmulhq_v:
1573    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, &Ty, 1),
1574                        Ops, "vqdmulh", splat);
1575  case ARM::BI__builtin_neon_vqdmull_lane_v:
1576    splat = true;
1577  case ARM::BI__builtin_neon_vqdmull_v:
1578    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, &Ty, 1),
1579                        Ops, "vqdmull", splat);
1580  case ARM::BI__builtin_neon_vqmovn_v:
1581    Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
1582    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqmovn");
1583  case ARM::BI__builtin_neon_vqmovun_v:
1584    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, &Ty, 1),
1585                        Ops, "vqdmull");
1586  case ARM::BI__builtin_neon_vqneg_v:
1587    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, &Ty, 1),
1588                        Ops, "vqneg");
1589  case ARM::BI__builtin_neon_vqrdmulh_lane_v:
1590  case ARM::BI__builtin_neon_vqrdmulhq_lane_v:
1591    splat = true;
1592  case ARM::BI__builtin_neon_vqrdmulh_v:
1593  case ARM::BI__builtin_neon_vqrdmulhq_v:
1594    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, &Ty, 1),
1595                        Ops, "vqrdmulh", splat);
1596  case ARM::BI__builtin_neon_vqrshl_v:
1597  case ARM::BI__builtin_neon_vqrshlq_v:
1598    Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
1599    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshl");
1600  case ARM::BI__builtin_neon_vqrshrn_n_v:
1601    Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
1602    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshrn_n", false,
1603                        1, true);
1604  case ARM::BI__builtin_neon_vqrshrun_n_v:
1605    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, &Ty, 1),
1606                        Ops, "vqrshrun_n", false, 1, true);
1607  case ARM::BI__builtin_neon_vqshl_v:
1608  case ARM::BI__builtin_neon_vqshlq_v:
1609    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1610    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl");
1611  case ARM::BI__builtin_neon_vqshl_n_v:
1612  case ARM::BI__builtin_neon_vqshlq_n_v:
1613    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1614    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl_n", false,
1615                        1, false);
1616  case ARM::BI__builtin_neon_vqshlu_n_v:
1617  case ARM::BI__builtin_neon_vqshluq_n_v:
1618    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, &Ty, 1),
1619                        Ops, "vqshlu", 1, false);
1620  case ARM::BI__builtin_neon_vqshrn_n_v:
1621    Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
1622    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshrn_n", false,
1623                        1, true);
1624  case ARM::BI__builtin_neon_vqshrun_n_v:
1625    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, &Ty, 1),
1626                        Ops, "vqshrun_n", false, 1, true);
1627  case ARM::BI__builtin_neon_vqsub_v:
1628  case ARM::BI__builtin_neon_vqsubq_v:
1629    Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
1630    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqsub");
1631  case ARM::BI__builtin_neon_vraddhn_v:
1632    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, &Ty, 1),
1633                        Ops, "vraddhn");
1634  case ARM::BI__builtin_neon_vrecpe_v:
1635  case ARM::BI__builtin_neon_vrecpeq_v:
1636    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, &Ty, 1),
1637                        Ops, "vrecpe");
1638  case ARM::BI__builtin_neon_vrecps_v:
1639  case ARM::BI__builtin_neon_vrecpsq_v:
1640    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, &Ty, 1),
1641                        Ops, "vrecps");
1642  case ARM::BI__builtin_neon_vrhadd_v:
1643  case ARM::BI__builtin_neon_vrhaddq_v:
1644    Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
1645    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrhadd");
1646  case ARM::BI__builtin_neon_vrshl_v:
1647  case ARM::BI__builtin_neon_vrshlq_v:
1648    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1649    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshl");
1650  case ARM::BI__builtin_neon_vrshrn_n_v:
1651    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, &Ty, 1),
1652                        Ops, "vrshrn_n", false, 1, true);
1653  case ARM::BI__builtin_neon_vrshr_n_v:
1654  case ARM::BI__builtin_neon_vrshrq_n_v:
1655    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1656    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshr_n", false,
1657                        1, true);
1658  case ARM::BI__builtin_neon_vrsqrte_v:
1659  case ARM::BI__builtin_neon_vrsqrteq_v:
1660    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, &Ty, 1),
1661                        Ops, "vrsqrte");
1662  case ARM::BI__builtin_neon_vrsqrts_v:
1663  case ARM::BI__builtin_neon_vrsqrtsq_v:
1664    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, &Ty, 1),
1665                        Ops, "vrsqrts");
1666  case ARM::BI__builtin_neon_vrsra_n_v:
1667  case ARM::BI__builtin_neon_vrsraq_n_v:
1668    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1669    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1670    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
1671    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1672    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, &Ty, 1), Ops[1], Ops[2]);
1673    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
1674  case ARM::BI__builtin_neon_vrsubhn_v:
1675    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, &Ty, 1),
1676                        Ops, "vrsubhn");
1677  case ARM::BI__builtin_neon_vset_lane_i8:
1678  case ARM::BI__builtin_neon_vset_lane_i16:
1679  case ARM::BI__builtin_neon_vset_lane_i32:
1680  case ARM::BI__builtin_neon_vset_lane_i64:
1681  case ARM::BI__builtin_neon_vset_lane_f32:
1682  case ARM::BI__builtin_neon_vsetq_lane_i8:
1683  case ARM::BI__builtin_neon_vsetq_lane_i16:
1684  case ARM::BI__builtin_neon_vsetq_lane_i32:
1685  case ARM::BI__builtin_neon_vsetq_lane_i64:
1686  case ARM::BI__builtin_neon_vsetq_lane_f32:
1687    Ops.push_back(EmitScalarExpr(E->getArg(2)));
1688    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1689  case ARM::BI__builtin_neon_vshl_v:
1690  case ARM::BI__builtin_neon_vshlq_v:
1691    Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
1692    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshl");
1693  case ARM::BI__builtin_neon_vshll_n_v:
1694    Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
1695    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshll", false, 1);
1696  case ARM::BI__builtin_neon_vshl_n_v:
1697  case ARM::BI__builtin_neon_vshlq_n_v:
1698    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1699    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
1700  case ARM::BI__builtin_neon_vshrn_n_v:
1701    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, &Ty, 1),
1702                        Ops, "vshrn_n", false, 1, true);
1703  case ARM::BI__builtin_neon_vshr_n_v:
1704  case ARM::BI__builtin_neon_vshrq_n_v:
1705    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1706    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1707    if (usgn)
1708      return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
1709    else
1710      return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
1711  case ARM::BI__builtin_neon_vsri_n_v:
1712  case ARM::BI__builtin_neon_vsriq_n_v:
1713    poly = true;
1714  case ARM::BI__builtin_neon_vsli_n_v:
1715  case ARM::BI__builtin_neon_vsliq_n_v:
1716    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, poly);
1717    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, &Ty, 1),
1718                        Ops, "vsli_n");
1719  case ARM::BI__builtin_neon_vsra_n_v:
1720  case ARM::BI__builtin_neon_vsraq_n_v:
1721    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1722    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1723    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
1724    if (usgn)
1725      Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
1726    else
1727      Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
1728    return Builder.CreateAdd(Ops[0], Ops[1]);
1729  case ARM::BI__builtin_neon_vst1_v:
1730  case ARM::BI__builtin_neon_vst1q_v:
1731    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1732    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, &Ty, 1),
1733                        Ops, "");
1734  case ARM::BI__builtin_neon_vst1_lane_v:
1735  case ARM::BI__builtin_neon_vst1q_lane_v:
1736    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1737    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
1738    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1739    return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
1740  case ARM::BI__builtin_neon_vst2_v:
1741  case ARM::BI__builtin_neon_vst2q_v:
1742    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1743    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, &Ty, 1),
1744                        Ops, "");
1745  case ARM::BI__builtin_neon_vst2_lane_v:
1746  case ARM::BI__builtin_neon_vst2q_lane_v:
1747    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1748    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, &Ty, 1),
1749                        Ops, "");
1750  case ARM::BI__builtin_neon_vst3_v:
1751  case ARM::BI__builtin_neon_vst3q_v:
1752    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1753    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, &Ty, 1),
1754                        Ops, "");
1755  case ARM::BI__builtin_neon_vst3_lane_v:
1756  case ARM::BI__builtin_neon_vst3q_lane_v:
1757    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1758    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, &Ty, 1),
1759                        Ops, "");
1760  case ARM::BI__builtin_neon_vst4_v:
1761  case ARM::BI__builtin_neon_vst4q_v:
1762    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1763    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, &Ty, 1),
1764                        Ops, "");
1765  case ARM::BI__builtin_neon_vst4_lane_v:
1766  case ARM::BI__builtin_neon_vst4q_lane_v:
1767    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1768    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, &Ty, 1),
1769                        Ops, "");
1770  case ARM::BI__builtin_neon_vsubhn_v:
1771    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, &Ty, 1),
1772                        Ops, "vsubhn");
1773  case ARM::BI__builtin_neon_vsubl_v: {
1774    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1775    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1776    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1777    if (usgn) {
1778      Ops[0] = Builder.CreateZExt(Ops[0], Ty);
1779      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1780    } else {
1781      Ops[0] = Builder.CreateSExt(Ops[0], Ty);
1782      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1783    }
1784    return Builder.CreateSub(Ops[0], Ops[1], "vsubl");
1785  }
1786  case ARM::BI__builtin_neon_vsubw_v: {
1787    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1788    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1789    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1790    if (usgn)
1791      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1792    else
1793      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1794    return Builder.CreateSub(Ops[0], Ops[1], "vsubw");
1795  }
1796  case ARM::BI__builtin_neon_vtbl1_v:
1797    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
1798                        Ops, "vtbl1");
1799  case ARM::BI__builtin_neon_vtbl2_v:
1800    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
1801                        Ops, "vtbl2");
1802  case ARM::BI__builtin_neon_vtbl3_v:
1803    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
1804                        Ops, "vtbl3");
1805  case ARM::BI__builtin_neon_vtbl4_v:
1806    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
1807                        Ops, "vtbl4");
1808  case ARM::BI__builtin_neon_vtbx1_v:
1809    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
1810                        Ops, "vtbx1");
1811  case ARM::BI__builtin_neon_vtbx2_v:
1812    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
1813                        Ops, "vtbx2");
1814  case ARM::BI__builtin_neon_vtbx3_v:
1815    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
1816                        Ops, "vtbx3");
1817  case ARM::BI__builtin_neon_vtbx4_v:
1818    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
1819                        Ops, "vtbx4");
1820  case ARM::BI__builtin_neon_vtst_v:
1821  case ARM::BI__builtin_neon_vtstq_v: {
1822    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1823    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1824    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1825    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1826                                ConstantAggregateZero::get(Ty));
1827    return Builder.CreateSExt(Ops[0], Ty, "vtst");
1828  }
1829  case ARM::BI__builtin_neon_vtrn_v:
1830  case ARM::BI__builtin_neon_vtrnq_v: {
1831    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1832    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1833    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1834    Value *SV;
1835
1836    for (unsigned vi = 0; vi != 2; ++vi) {
1837      SmallVector<Constant*, 16> Indices;
1838      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1839        Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
1840        Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
1841      }
1842      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1843      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1844      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
1845      SV = Builder.CreateStore(SV, Addr);
1846    }
1847    return SV;
1848  }
1849  case ARM::BI__builtin_neon_vuzp_v:
1850  case ARM::BI__builtin_neon_vuzpq_v: {
1851    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1852    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1853    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1854    Value *SV;
1855
1856    for (unsigned vi = 0; vi != 2; ++vi) {
1857      SmallVector<Constant*, 16> Indices;
1858      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1859        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
1860
1861      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1862      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1863      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
1864      SV = Builder.CreateStore(SV, Addr);
1865    }
1866    return SV;
1867  }
1868  case ARM::BI__builtin_neon_vzip_v:
1869  case ARM::BI__builtin_neon_vzipq_v: {
1870    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1871    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1872    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1873    Value *SV;
1874
1875    for (unsigned vi = 0; vi != 2; ++vi) {
1876      SmallVector<Constant*, 16> Indices;
1877      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1878        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
1879        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
1880      }
1881      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1882      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1883      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
1884      SV = Builder.CreateStore(SV, Addr);
1885    }
1886    return SV;
1887  }
1888  }
1889}
1890
1891llvm::Value *CodeGenFunction::
1892BuildVector(const llvm::SmallVectorImpl<llvm::Value*> &Ops) {
1893  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
1894         "Not a power-of-two sized vector!");
1895  bool AllConstants = true;
1896  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
1897    AllConstants &= isa<Constant>(Ops[i]);
1898
1899  // If this is a constant vector, create a ConstantVector.
1900  if (AllConstants) {
1901    std::vector<llvm::Constant*> CstOps;
1902    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
1903      CstOps.push_back(cast<Constant>(Ops[i]));
1904    return llvm::ConstantVector::get(CstOps);
1905  }
1906
1907  // Otherwise, insertelement the values to build the vector.
1908  Value *Result =
1909    llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
1910
1911  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
1912    Result = Builder.CreateInsertElement(Result, Ops[i],
1913               llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext), i));
1914
1915  return Result;
1916}
1917
1918Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1919                                           const CallExpr *E) {
1920  llvm::SmallVector<Value*, 4> Ops;
1921
1922  // Find out if any arguments are required to be integer constant expressions.
1923  unsigned ICEArguments = 0;
1924  ASTContext::GetBuiltinTypeError Error;
1925  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1926  assert(Error == ASTContext::GE_None && "Should not codegen an error");
1927
1928  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
1929    // If this is a normal argument, just emit it as a scalar.
1930    if ((ICEArguments & (1 << i)) == 0) {
1931      Ops.push_back(EmitScalarExpr(E->getArg(i)));
1932      continue;
1933    }
1934
1935    // If this is required to be a constant, constant fold it so that we know
1936    // that the generated intrinsic gets a ConstantInt.
1937    llvm::APSInt Result;
1938    bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
1939    assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
1940    Ops.push_back(llvm::ConstantInt::get(VMContext, Result));
1941  }
1942
1943  switch (BuiltinID) {
1944  default: return 0;
1945  case X86::BI__builtin_ia32_pslldi128:
1946  case X86::BI__builtin_ia32_psllqi128:
1947  case X86::BI__builtin_ia32_psllwi128:
1948  case X86::BI__builtin_ia32_psradi128:
1949  case X86::BI__builtin_ia32_psrawi128:
1950  case X86::BI__builtin_ia32_psrldi128:
1951  case X86::BI__builtin_ia32_psrlqi128:
1952  case X86::BI__builtin_ia32_psrlwi128: {
1953    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
1954    const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
1955    llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
1956    Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty),
1957                                         Ops[1], Zero, "insert");
1958    Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast");
1959    const char *name = 0;
1960    Intrinsic::ID ID = Intrinsic::not_intrinsic;
1961
1962    switch (BuiltinID) {
1963    default: assert(0 && "Unsupported shift intrinsic!");
1964    case X86::BI__builtin_ia32_pslldi128:
1965      name = "pslldi";
1966      ID = Intrinsic::x86_sse2_psll_d;
1967      break;
1968    case X86::BI__builtin_ia32_psllqi128:
1969      name = "psllqi";
1970      ID = Intrinsic::x86_sse2_psll_q;
1971      break;
1972    case X86::BI__builtin_ia32_psllwi128:
1973      name = "psllwi";
1974      ID = Intrinsic::x86_sse2_psll_w;
1975      break;
1976    case X86::BI__builtin_ia32_psradi128:
1977      name = "psradi";
1978      ID = Intrinsic::x86_sse2_psra_d;
1979      break;
1980    case X86::BI__builtin_ia32_psrawi128:
1981      name = "psrawi";
1982      ID = Intrinsic::x86_sse2_psra_w;
1983      break;
1984    case X86::BI__builtin_ia32_psrldi128:
1985      name = "psrldi";
1986      ID = Intrinsic::x86_sse2_psrl_d;
1987      break;
1988    case X86::BI__builtin_ia32_psrlqi128:
1989      name = "psrlqi";
1990      ID = Intrinsic::x86_sse2_psrl_q;
1991      break;
1992    case X86::BI__builtin_ia32_psrlwi128:
1993      name = "psrlwi";
1994      ID = Intrinsic::x86_sse2_psrl_w;
1995      break;
1996    }
1997    llvm::Function *F = CGM.getIntrinsic(ID);
1998    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
1999  }
2000  case X86::BI__builtin_ia32_vec_init_v8qi:
2001  case X86::BI__builtin_ia32_vec_init_v4hi:
2002  case X86::BI__builtin_ia32_vec_init_v2si:
2003    return Builder.CreateBitCast(BuildVector(Ops),
2004                                 llvm::Type::getX86_MMXTy(VMContext));
2005  case X86::BI__builtin_ia32_vec_ext_v2si:
2006    return Builder.CreateExtractElement(Ops[0],
2007                                  llvm::ConstantInt::get(Ops[1]->getType(), 0));
2008  case X86::BI__builtin_ia32_pslldi:
2009  case X86::BI__builtin_ia32_psllqi:
2010  case X86::BI__builtin_ia32_psllwi:
2011  case X86::BI__builtin_ia32_psradi:
2012  case X86::BI__builtin_ia32_psrawi:
2013  case X86::BI__builtin_ia32_psrldi:
2014  case X86::BI__builtin_ia32_psrlqi:
2015  case X86::BI__builtin_ia32_psrlwi: {
2016    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
2017    const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1);
2018    Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast");
2019    const char *name = 0;
2020    Intrinsic::ID ID = Intrinsic::not_intrinsic;
2021
2022    switch (BuiltinID) {
2023    default: assert(0 && "Unsupported shift intrinsic!");
2024    case X86::BI__builtin_ia32_pslldi:
2025      name = "pslldi";
2026      ID = Intrinsic::x86_mmx_psll_d;
2027      break;
2028    case X86::BI__builtin_ia32_psllqi:
2029      name = "psllqi";
2030      ID = Intrinsic::x86_mmx_psll_q;
2031      break;
2032    case X86::BI__builtin_ia32_psllwi:
2033      name = "psllwi";
2034      ID = Intrinsic::x86_mmx_psll_w;
2035      break;
2036    case X86::BI__builtin_ia32_psradi:
2037      name = "psradi";
2038      ID = Intrinsic::x86_mmx_psra_d;
2039      break;
2040    case X86::BI__builtin_ia32_psrawi:
2041      name = "psrawi";
2042      ID = Intrinsic::x86_mmx_psra_w;
2043      break;
2044    case X86::BI__builtin_ia32_psrldi:
2045      name = "psrldi";
2046      ID = Intrinsic::x86_mmx_psrl_d;
2047      break;
2048    case X86::BI__builtin_ia32_psrlqi:
2049      name = "psrlqi";
2050      ID = Intrinsic::x86_mmx_psrl_q;
2051      break;
2052    case X86::BI__builtin_ia32_psrlwi:
2053      name = "psrlwi";
2054      ID = Intrinsic::x86_mmx_psrl_w;
2055      break;
2056    }
2057    llvm::Function *F = CGM.getIntrinsic(ID);
2058    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
2059  }
2060  case X86::BI__builtin_ia32_cmpps: {
2061    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
2062    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpps");
2063  }
2064  case X86::BI__builtin_ia32_cmpss: {
2065    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
2066    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpss");
2067  }
2068  case X86::BI__builtin_ia32_ldmxcsr: {
2069    const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext);
2070    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2071    Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
2072    Builder.CreateStore(Ops[0], Tmp);
2073    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
2074                              Builder.CreateBitCast(Tmp, PtrTy));
2075  }
2076  case X86::BI__builtin_ia32_stmxcsr: {
2077    const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext);
2078    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2079    Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
2080    One = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
2081                             Builder.CreateBitCast(Tmp, PtrTy));
2082    return Builder.CreateLoad(Tmp, "stmxcsr");
2083  }
2084  case X86::BI__builtin_ia32_cmppd: {
2085    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
2086    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmppd");
2087  }
2088  case X86::BI__builtin_ia32_cmpsd: {
2089    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
2090    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpsd");
2091  }
2092  case X86::BI__builtin_ia32_storehps:
2093  case X86::BI__builtin_ia32_storelps: {
2094    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
2095    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2096
2097    // cast val v2i64
2098    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
2099
2100    // extract (0, 1)
2101    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
2102    llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
2103    Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
2104
2105    // cast pointer to i64 & store
2106    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
2107    return Builder.CreateStore(Ops[1], Ops[0]);
2108  }
2109  case X86::BI__builtin_ia32_palignr: {
2110    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2111
2112    // If palignr is shifting the pair of input vectors less than 9 bytes,
2113    // emit a shuffle instruction.
2114    if (shiftVal <= 8) {
2115      llvm::SmallVector<llvm::Constant*, 8> Indices;
2116      for (unsigned i = 0; i != 8; ++i)
2117        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2118
2119      Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
2120      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2121    }
2122
2123    // If palignr is shifting the pair of input vectors more than 8 but less
2124    // than 16 bytes, emit a logical right shift of the destination.
2125    if (shiftVal < 16) {
2126      // MMX has these as 1 x i64 vectors for some odd optimization reasons.
2127      const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
2128
2129      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2130      Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
2131
2132      // create i32 constant
2133      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
2134      return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
2135    }
2136
2137    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2138    return llvm::Constant::getNullValue(ConvertType(E->getType()));
2139  }
2140  case X86::BI__builtin_ia32_palignr128: {
2141    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2142
2143    // If palignr is shifting the pair of input vectors less than 17 bytes,
2144    // emit a shuffle instruction.
2145    if (shiftVal <= 16) {
2146      llvm::SmallVector<llvm::Constant*, 16> Indices;
2147      for (unsigned i = 0; i != 16; ++i)
2148        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2149
2150      Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
2151      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2152    }
2153
2154    // If palignr is shifting the pair of input vectors more than 16 but less
2155    // than 32 bytes, emit a logical right shift of the destination.
2156    if (shiftVal < 32) {
2157      const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2158
2159      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2160      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2161
2162      // create i32 constant
2163      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2164      return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
2165    }
2166
2167    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2168    return llvm::Constant::getNullValue(ConvertType(E->getType()));
2169  }
2170  }
2171}
2172
2173Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
2174                                           const CallExpr *E) {
2175  llvm::SmallVector<Value*, 4> Ops;
2176
2177  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
2178    Ops.push_back(EmitScalarExpr(E->getArg(i)));
2179
2180  Intrinsic::ID ID = Intrinsic::not_intrinsic;
2181
2182  switch (BuiltinID) {
2183  default: return 0;
2184
2185  // vec_ld, vec_lvsl, vec_lvsr
2186  case PPC::BI__builtin_altivec_lvx:
2187  case PPC::BI__builtin_altivec_lvxl:
2188  case PPC::BI__builtin_altivec_lvebx:
2189  case PPC::BI__builtin_altivec_lvehx:
2190  case PPC::BI__builtin_altivec_lvewx:
2191  case PPC::BI__builtin_altivec_lvsl:
2192  case PPC::BI__builtin_altivec_lvsr:
2193  {
2194    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::Type::getInt8PtrTy(VMContext));
2195
2196    Ops[0] = Builder.CreateGEP(Ops[1], Ops[0], "tmp");
2197    Ops.pop_back();
2198
2199    switch (BuiltinID) {
2200    default: assert(0 && "Unsupported ld/lvsl/lvsr intrinsic!");
2201    case PPC::BI__builtin_altivec_lvx:
2202      ID = Intrinsic::ppc_altivec_lvx;
2203      break;
2204    case PPC::BI__builtin_altivec_lvxl:
2205      ID = Intrinsic::ppc_altivec_lvxl;
2206      break;
2207    case PPC::BI__builtin_altivec_lvebx:
2208      ID = Intrinsic::ppc_altivec_lvebx;
2209      break;
2210    case PPC::BI__builtin_altivec_lvehx:
2211      ID = Intrinsic::ppc_altivec_lvehx;
2212      break;
2213    case PPC::BI__builtin_altivec_lvewx:
2214      ID = Intrinsic::ppc_altivec_lvewx;
2215      break;
2216    case PPC::BI__builtin_altivec_lvsl:
2217      ID = Intrinsic::ppc_altivec_lvsl;
2218      break;
2219    case PPC::BI__builtin_altivec_lvsr:
2220      ID = Intrinsic::ppc_altivec_lvsr;
2221      break;
2222    }
2223    llvm::Function *F = CGM.getIntrinsic(ID);
2224    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "");
2225  }
2226
2227  // vec_st
2228  case PPC::BI__builtin_altivec_stvx:
2229  case PPC::BI__builtin_altivec_stvxl:
2230  case PPC::BI__builtin_altivec_stvebx:
2231  case PPC::BI__builtin_altivec_stvehx:
2232  case PPC::BI__builtin_altivec_stvewx:
2233  {
2234    Ops[2] = Builder.CreateBitCast(Ops[2], llvm::Type::getInt8PtrTy(VMContext));
2235    Ops[1] = Builder.CreateGEP(Ops[2], Ops[1], "tmp");
2236    Ops.pop_back();
2237
2238    switch (BuiltinID) {
2239    default: assert(0 && "Unsupported st intrinsic!");
2240    case PPC::BI__builtin_altivec_stvx:
2241      ID = Intrinsic::ppc_altivec_stvx;
2242      break;
2243    case PPC::BI__builtin_altivec_stvxl:
2244      ID = Intrinsic::ppc_altivec_stvxl;
2245      break;
2246    case PPC::BI__builtin_altivec_stvebx:
2247      ID = Intrinsic::ppc_altivec_stvebx;
2248      break;
2249    case PPC::BI__builtin_altivec_stvehx:
2250      ID = Intrinsic::ppc_altivec_stvehx;
2251      break;
2252    case PPC::BI__builtin_altivec_stvewx:
2253      ID = Intrinsic::ppc_altivec_stvewx;
2254      break;
2255    }
2256    llvm::Function *F = CGM.getIntrinsic(ID);
2257    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "");
2258  }
2259  }
2260  return 0;
2261}
2262