CGBuiltin.cpp revision 2a674e8e443b7a3e77957078248fb52b3b1ec321
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This contains code to emit Builtin calls as LLVM code.
11//
12//===----------------------------------------------------------------------===//
13
14#include "TargetInfo.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "CGObjCRuntime.h"
18#include "clang/Basic/TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/ASTContext.h"
21#include "clang/AST/Decl.h"
22#include "clang/Basic/TargetBuiltins.h"
23#include "llvm/Intrinsics.h"
24#include "llvm/Target/TargetData.h"
25using namespace clang;
26using namespace CodeGen;
27using namespace llvm;
28
29static void EmitMemoryBarrier(CodeGenFunction &CGF,
30                              bool LoadLoad, bool LoadStore,
31                              bool StoreLoad, bool StoreStore,
32                              bool Device) {
33  Value *True = llvm::ConstantInt::getTrue(CGF.getLLVMContext());
34  Value *False = llvm::ConstantInt::getFalse(CGF.getLLVMContext());
35  Value *C[5] = { LoadLoad ? True : False,
36                  LoadStore ? True : False,
37                  StoreLoad ? True : False,
38                  StoreStore ? True : False,
39                  Device ? True : False };
40  CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::memory_barrier),
41                         C, C + 5);
42}
43
44static Value *EmitCastToInt(CodeGenFunction &CGF,
45                            const llvm::Type *ToType, Value *Val) {
46  if (Val->getType()->isPointerTy()) {
47    return CGF.Builder.CreatePtrToInt(Val, ToType);
48  }
49  assert(Val->getType()->isIntegerTy() &&
50         "Used a non-integer and non-pointer type with atomic builtin");
51  assert(Val->getType()->getScalarSizeInBits() <=
52         ToType->getScalarSizeInBits() && "Integer type too small");
53  return CGF.Builder.CreateSExtOrBitCast(Val, ToType);
54}
55
56static Value *EmitCastFromInt(CodeGenFunction &CGF, QualType ToQualType,
57                              Value *Val) {
58  const llvm::Type *ToType = CGF.ConvertType(ToQualType);
59  if (ToType->isPointerTy()) {
60    return CGF.Builder.CreateIntToPtr(Val, ToType);
61  }
62  assert(Val->getType()->isIntegerTy() &&
63         "Used a non-integer and non-pointer type with atomic builtin");
64  assert(Val->getType()->getScalarSizeInBits() >=
65         ToType->getScalarSizeInBits() && "Integer type too small");
66  return CGF.Builder.CreateTruncOrBitCast(Val, ToType);
67}
68
69// The atomic builtins are also full memory barriers. This is a utility for
70// wrapping a call to the builtins with memory barriers.
71static Value *EmitCallWithBarrier(CodeGenFunction &CGF, Value *Fn,
72                                  Value **ArgBegin, Value **ArgEnd) {
73  // FIXME: We need a target hook for whether this applies to device memory or
74  // not.
75  bool Device = true;
76
77  // Create barriers both before and after the call.
78  EmitMemoryBarrier(CGF, true, true, true, true, Device);
79  Value *Result = CGF.Builder.CreateCall(Fn, ArgBegin, ArgEnd);
80  EmitMemoryBarrier(CGF, true, true, true, true, Device);
81  return Result;
82}
83
84/// Utility to insert an atomic instruction based on Instrinsic::ID
85/// and the expression node.
86static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
87                               Intrinsic::ID Id, const CallExpr *E) {
88  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
89  unsigned AddrSpace =
90    cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
91  const llvm::Type *ValueType =
92    llvm::IntegerType::get(CGF.getLLVMContext(),
93                           CGF.getContext().getTypeSize(E->getType()));
94  const llvm::Type *PtrType = ValueType->getPointerTo(AddrSpace);
95  const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
96  Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2);
97
98  Value *Args[2] = { CGF.Builder.CreateBitCast(DestPtr, PtrType),
99                     EmitCastToInt(CGF, ValueType,
100                                   CGF.EmitScalarExpr(E->getArg(1))) };
101  return RValue::get(EmitCastFromInt(CGF, E->getType(),
102                                     EmitCallWithBarrier(CGF, AtomF, Args,
103                                                         Args + 2)));
104}
105
106/// Utility to insert an atomic instruction based Instrinsic::ID and
107// the expression node, where the return value is the result of the
108// operation.
109static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
110                                   Intrinsic::ID Id, const CallExpr *E,
111                                   Instruction::BinaryOps Op) {
112  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
113  unsigned AddrSpace =
114    cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
115
116  const llvm::Type *ValueType =
117    llvm::IntegerType::get(CGF.getLLVMContext(),
118                           CGF.getContext().getTypeSize(E->getType()));
119  const llvm::Type *PtrType = ValueType->getPointerTo(AddrSpace);
120  const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
121  Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2);
122
123  Value *Args[2] = { CGF.Builder.CreateBitCast(DestPtr, PtrType),
124                     EmitCastToInt(CGF, ValueType,
125                                   CGF.EmitScalarExpr(E->getArg(1))) };
126  Value *Result = EmitCallWithBarrier(CGF, AtomF, Args, Args + 2);
127  return RValue::get(EmitCastFromInt(CGF, E->getType(),
128                                     CGF.Builder.CreateBinOp(Op, Result,
129                                                             Args[1])));
130}
131
132/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
133/// which must be a scalar floating point type.
134static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
135  const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
136  assert(ValTyP && "isn't scalar fp type!");
137
138  StringRef FnName;
139  switch (ValTyP->getKind()) {
140  default: assert(0 && "Isn't a scalar fp type!");
141  case BuiltinType::Float:      FnName = "fabsf"; break;
142  case BuiltinType::Double:     FnName = "fabs"; break;
143  case BuiltinType::LongDouble: FnName = "fabsl"; break;
144  }
145
146  // The prototype is something that takes and returns whatever V's type is.
147  std::vector<const llvm::Type*> Args;
148  Args.push_back(V->getType());
149  llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), Args, false);
150  llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
151
152  return CGF.Builder.CreateCall(Fn, V, "abs");
153}
154
155RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
156                                        unsigned BuiltinID, const CallExpr *E) {
157  // See if we can constant fold this builtin.  If so, don't emit it at all.
158  Expr::EvalResult Result;
159  if (E->Evaluate(Result, CGM.getContext())) {
160    if (Result.Val.isInt())
161      return RValue::get(llvm::ConstantInt::get(VMContext,
162                                                Result.Val.getInt()));
163    else if (Result.Val.isFloat())
164      return RValue::get(ConstantFP::get(VMContext, Result.Val.getFloat()));
165  }
166
167  switch (BuiltinID) {
168  default: break;  // Handle intrinsics and libm functions below.
169  case Builtin::BI__builtin___CFStringMakeConstantString:
170  case Builtin::BI__builtin___NSStringMakeConstantString:
171    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
172  case Builtin::BI__builtin_stdarg_start:
173  case Builtin::BI__builtin_va_start:
174  case Builtin::BI__builtin_va_end: {
175    Value *ArgValue = EmitVAListRef(E->getArg(0));
176    const llvm::Type *DestType = llvm::Type::getInt8PtrTy(VMContext);
177    if (ArgValue->getType() != DestType)
178      ArgValue = Builder.CreateBitCast(ArgValue, DestType,
179                                       ArgValue->getName().data());
180
181    Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
182      Intrinsic::vaend : Intrinsic::vastart;
183    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
184  }
185  case Builtin::BI__builtin_va_copy: {
186    Value *DstPtr = EmitVAListRef(E->getArg(0));
187    Value *SrcPtr = EmitVAListRef(E->getArg(1));
188
189    const llvm::Type *Type = llvm::Type::getInt8PtrTy(VMContext);
190
191    DstPtr = Builder.CreateBitCast(DstPtr, Type);
192    SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
193    return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
194                                           DstPtr, SrcPtr));
195  }
196  case Builtin::BI__builtin_abs: {
197    Value *ArgValue = EmitScalarExpr(E->getArg(0));
198
199    Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
200    Value *CmpResult =
201    Builder.CreateICmpSGE(ArgValue,
202                          llvm::Constant::getNullValue(ArgValue->getType()),
203                                                            "abscond");
204    Value *Result =
205      Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
206
207    return RValue::get(Result);
208  }
209  case Builtin::BI__builtin_ctz:
210  case Builtin::BI__builtin_ctzl:
211  case Builtin::BI__builtin_ctzll: {
212    Value *ArgValue = EmitScalarExpr(E->getArg(0));
213
214    const llvm::Type *ArgType = ArgValue->getType();
215    Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1);
216
217    const llvm::Type *ResultType = ConvertType(E->getType());
218    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
219    if (Result->getType() != ResultType)
220      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
221                                     "cast");
222    return RValue::get(Result);
223  }
224  case Builtin::BI__builtin_clz:
225  case Builtin::BI__builtin_clzl:
226  case Builtin::BI__builtin_clzll: {
227    Value *ArgValue = EmitScalarExpr(E->getArg(0));
228
229    const llvm::Type *ArgType = ArgValue->getType();
230    Value *F = CGM.getIntrinsic(Intrinsic::ctlz, &ArgType, 1);
231
232    const llvm::Type *ResultType = ConvertType(E->getType());
233    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
234    if (Result->getType() != ResultType)
235      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
236                                     "cast");
237    return RValue::get(Result);
238  }
239  case Builtin::BI__builtin_ffs:
240  case Builtin::BI__builtin_ffsl:
241  case Builtin::BI__builtin_ffsll: {
242    // ffs(x) -> x ? cttz(x) + 1 : 0
243    Value *ArgValue = EmitScalarExpr(E->getArg(0));
244
245    const llvm::Type *ArgType = ArgValue->getType();
246    Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1);
247
248    const llvm::Type *ResultType = ConvertType(E->getType());
249    Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue, "tmp"),
250                                   llvm::ConstantInt::get(ArgType, 1), "tmp");
251    Value *Zero = llvm::Constant::getNullValue(ArgType);
252    Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
253    Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
254    if (Result->getType() != ResultType)
255      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
256                                     "cast");
257    return RValue::get(Result);
258  }
259  case Builtin::BI__builtin_parity:
260  case Builtin::BI__builtin_parityl:
261  case Builtin::BI__builtin_parityll: {
262    // parity(x) -> ctpop(x) & 1
263    Value *ArgValue = EmitScalarExpr(E->getArg(0));
264
265    const llvm::Type *ArgType = ArgValue->getType();
266    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1);
267
268    const llvm::Type *ResultType = ConvertType(E->getType());
269    Value *Tmp = Builder.CreateCall(F, ArgValue, "tmp");
270    Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1),
271                                      "tmp");
272    if (Result->getType() != ResultType)
273      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
274                                     "cast");
275    return RValue::get(Result);
276  }
277  case Builtin::BI__builtin_popcount:
278  case Builtin::BI__builtin_popcountl:
279  case Builtin::BI__builtin_popcountll: {
280    Value *ArgValue = EmitScalarExpr(E->getArg(0));
281
282    const llvm::Type *ArgType = ArgValue->getType();
283    Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1);
284
285    const llvm::Type *ResultType = ConvertType(E->getType());
286    Value *Result = Builder.CreateCall(F, ArgValue, "tmp");
287    if (Result->getType() != ResultType)
288      Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
289                                     "cast");
290    return RValue::get(Result);
291  }
292  case Builtin::BI__builtin_expect: {
293    // FIXME: pass expect through to LLVM
294    if (E->getArg(1)->HasSideEffects(getContext()))
295      (void)EmitScalarExpr(E->getArg(1));
296    return RValue::get(EmitScalarExpr(E->getArg(0)));
297  }
298  case Builtin::BI__builtin_bswap32:
299  case Builtin::BI__builtin_bswap64: {
300    Value *ArgValue = EmitScalarExpr(E->getArg(0));
301    const llvm::Type *ArgType = ArgValue->getType();
302    Value *F = CGM.getIntrinsic(Intrinsic::bswap, &ArgType, 1);
303    return RValue::get(Builder.CreateCall(F, ArgValue, "tmp"));
304  }
305  case Builtin::BI__builtin_object_size: {
306    // We pass this builtin onto the optimizer so that it can
307    // figure out the object size in more complex cases.
308    const llvm::Type *ResType[] = {
309      ConvertType(E->getType())
310    };
311
312    // LLVM only supports 0 and 2, make sure that we pass along that
313    // as a boolean.
314    Value *Ty = EmitScalarExpr(E->getArg(1));
315    ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
316    assert(CI);
317    uint64_t val = CI->getZExtValue();
318    CI = ConstantInt::get(llvm::Type::getInt1Ty(VMContext), (val & 0x2) >> 1);
319
320    Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType, 1);
321    return RValue::get(Builder.CreateCall2(F,
322                                           EmitScalarExpr(E->getArg(0)),
323                                           CI));
324  }
325  case Builtin::BI__builtin_prefetch: {
326    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
327    // FIXME: Technically these constants should of type 'int', yes?
328    RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
329      llvm::ConstantInt::get(Int32Ty, 0);
330    Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
331      llvm::ConstantInt::get(Int32Ty, 3);
332    Value *F = CGM.getIntrinsic(Intrinsic::prefetch, 0, 0);
333    return RValue::get(Builder.CreateCall3(F, Address, RW, Locality));
334  }
335  case Builtin::BI__builtin_trap: {
336    Value *F = CGM.getIntrinsic(Intrinsic::trap, 0, 0);
337    return RValue::get(Builder.CreateCall(F));
338  }
339  case Builtin::BI__builtin_unreachable: {
340    if (CatchUndefined && HaveInsertPoint())
341      EmitBranch(getTrapBB());
342    Value *V = Builder.CreateUnreachable();
343    Builder.ClearInsertionPoint();
344    return RValue::get(V);
345  }
346
347  case Builtin::BI__builtin_powi:
348  case Builtin::BI__builtin_powif:
349  case Builtin::BI__builtin_powil: {
350    Value *Base = EmitScalarExpr(E->getArg(0));
351    Value *Exponent = EmitScalarExpr(E->getArg(1));
352    const llvm::Type *ArgType = Base->getType();
353    Value *F = CGM.getIntrinsic(Intrinsic::powi, &ArgType, 1);
354    return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
355  }
356
357  case Builtin::BI__builtin_isgreater:
358  case Builtin::BI__builtin_isgreaterequal:
359  case Builtin::BI__builtin_isless:
360  case Builtin::BI__builtin_islessequal:
361  case Builtin::BI__builtin_islessgreater:
362  case Builtin::BI__builtin_isunordered: {
363    // Ordered comparisons: we know the arguments to these are matching scalar
364    // floating point values.
365    Value *LHS = EmitScalarExpr(E->getArg(0));
366    Value *RHS = EmitScalarExpr(E->getArg(1));
367
368    switch (BuiltinID) {
369    default: assert(0 && "Unknown ordered comparison");
370    case Builtin::BI__builtin_isgreater:
371      LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
372      break;
373    case Builtin::BI__builtin_isgreaterequal:
374      LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
375      break;
376    case Builtin::BI__builtin_isless:
377      LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
378      break;
379    case Builtin::BI__builtin_islessequal:
380      LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
381      break;
382    case Builtin::BI__builtin_islessgreater:
383      LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
384      break;
385    case Builtin::BI__builtin_isunordered:
386      LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
387      break;
388    }
389    // ZExt bool to int type.
390    return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()),
391                                          "tmp"));
392  }
393  case Builtin::BI__builtin_isnan: {
394    Value *V = EmitScalarExpr(E->getArg(0));
395    V = Builder.CreateFCmpUNO(V, V, "cmp");
396    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
397  }
398
399  case Builtin::BI__builtin_isinf: {
400    // isinf(x) --> fabs(x) == infinity
401    Value *V = EmitScalarExpr(E->getArg(0));
402    V = EmitFAbs(*this, V, E->getArg(0)->getType());
403
404    V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
405    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp"));
406  }
407
408  // TODO: BI__builtin_isinf_sign
409  //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
410
411  case Builtin::BI__builtin_isnormal: {
412    // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
413    Value *V = EmitScalarExpr(E->getArg(0));
414    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
415
416    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
417    Value *IsLessThanInf =
418      Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
419    APFloat Smallest = APFloat::getSmallestNormalized(
420                   getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
421    Value *IsNormal =
422      Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
423                            "isnormal");
424    V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
425    V = Builder.CreateAnd(V, IsNormal, "and");
426    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
427  }
428
429  case Builtin::BI__builtin_isfinite: {
430    // isfinite(x) --> x == x && fabs(x) != infinity; }
431    Value *V = EmitScalarExpr(E->getArg(0));
432    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
433
434    Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
435    Value *IsNotInf =
436      Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
437
438    V = Builder.CreateAnd(Eq, IsNotInf, "and");
439    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
440  }
441
442  case Builtin::BI__builtin_fpclassify: {
443    Value *V = EmitScalarExpr(E->getArg(5));
444    const llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
445
446    // Create Result
447    BasicBlock *Begin = Builder.GetInsertBlock();
448    BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
449    Builder.SetInsertPoint(End);
450    PHINode *Result =
451      Builder.CreatePHI(ConvertType(E->getArg(0)->getType()),
452                        "fpclassify_result");
453
454    // if (V==0) return FP_ZERO
455    Builder.SetInsertPoint(Begin);
456    Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
457                                          "iszero");
458    Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
459    BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
460    Builder.CreateCondBr(IsZero, End, NotZero);
461    Result->addIncoming(ZeroLiteral, Begin);
462
463    // if (V != V) return FP_NAN
464    Builder.SetInsertPoint(NotZero);
465    Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
466    Value *NanLiteral = EmitScalarExpr(E->getArg(0));
467    BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
468    Builder.CreateCondBr(IsNan, End, NotNan);
469    Result->addIncoming(NanLiteral, NotZero);
470
471    // if (fabs(V) == infinity) return FP_INFINITY
472    Builder.SetInsertPoint(NotNan);
473    Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
474    Value *IsInf =
475      Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
476                            "isinf");
477    Value *InfLiteral = EmitScalarExpr(E->getArg(1));
478    BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
479    Builder.CreateCondBr(IsInf, End, NotInf);
480    Result->addIncoming(InfLiteral, NotNan);
481
482    // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
483    Builder.SetInsertPoint(NotInf);
484    APFloat Smallest = APFloat::getSmallestNormalized(
485        getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
486    Value *IsNormal =
487      Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
488                            "isnormal");
489    Value *NormalResult =
490      Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
491                           EmitScalarExpr(E->getArg(3)));
492    Builder.CreateBr(End);
493    Result->addIncoming(NormalResult, NotInf);
494
495    // return Result
496    Builder.SetInsertPoint(End);
497    return RValue::get(Result);
498  }
499
500  case Builtin::BIalloca:
501  case Builtin::BI__builtin_alloca: {
502    Value *Size = EmitScalarExpr(E->getArg(0));
503    return RValue::get(Builder.CreateAlloca(llvm::Type::getInt8Ty(VMContext), Size, "tmp"));
504  }
505  case Builtin::BIbzero:
506  case Builtin::BI__builtin_bzero: {
507    Value *Address = EmitScalarExpr(E->getArg(0));
508    Value *SizeVal = EmitScalarExpr(E->getArg(1));
509    Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()),
510                   Address,
511                   llvm::ConstantInt::get(llvm::Type::getInt8Ty(VMContext), 0),
512                   SizeVal,
513                   llvm::ConstantInt::get(Int32Ty, 1),
514                   llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
515    return RValue::get(Address);
516  }
517  case Builtin::BImemcpy:
518  case Builtin::BI__builtin_memcpy: {
519    Value *Address = EmitScalarExpr(E->getArg(0));
520    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
521    Value *SizeVal = EmitScalarExpr(E->getArg(2));
522    Builder.CreateCall5(CGM.getMemCpyFn(Address->getType(), SrcAddr->getType(),
523                                        SizeVal->getType()),
524                  Address, SrcAddr, SizeVal,
525                  llvm::ConstantInt::get(Int32Ty, 1),
526                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
527    return RValue::get(Address);
528  }
529
530  case Builtin::BI__builtin_objc_memmove_collectable: {
531    Value *Address = EmitScalarExpr(E->getArg(0));
532    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
533    Value *SizeVal = EmitScalarExpr(E->getArg(2));
534    CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
535                                                  Address, SrcAddr, SizeVal);
536    return RValue::get(Address);
537  }
538
539  case Builtin::BImemmove:
540  case Builtin::BI__builtin_memmove: {
541    Value *Address = EmitScalarExpr(E->getArg(0));
542    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
543    Value *SizeVal = EmitScalarExpr(E->getArg(2));
544    Builder.CreateCall5(CGM.getMemMoveFn(Address->getType(), SrcAddr->getType(),
545                                         SizeVal->getType()),
546                  Address, SrcAddr, SizeVal,
547                  llvm::ConstantInt::get(Int32Ty, 1),
548                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
549    return RValue::get(Address);
550  }
551  case Builtin::BImemset:
552  case Builtin::BI__builtin_memset: {
553    Value *Address = EmitScalarExpr(E->getArg(0));
554    Value *SizeVal = EmitScalarExpr(E->getArg(2));
555    Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()),
556                  Address,
557                  Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
558                                      llvm::Type::getInt8Ty(VMContext)),
559                  SizeVal,
560                  llvm::ConstantInt::get(Int32Ty, 1),
561                  llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0));
562    return RValue::get(Address);
563  }
564  case Builtin::BI__builtin_dwarf_cfa: {
565    // The offset in bytes from the first argument to the CFA.
566    //
567    // Why on earth is this in the frontend?  Is there any reason at
568    // all that the backend can't reasonably determine this while
569    // lowering llvm.eh.dwarf.cfa()?
570    //
571    // TODO: If there's a satisfactory reason, add a target hook for
572    // this instead of hard-coding 0, which is correct for most targets.
573    int32_t Offset = 0;
574
575    Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa, 0, 0);
576    return RValue::get(Builder.CreateCall(F,
577                                      llvm::ConstantInt::get(Int32Ty, Offset)));
578  }
579  case Builtin::BI__builtin_return_address: {
580    Value *Depth = EmitScalarExpr(E->getArg(0));
581    Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
582    Value *F = CGM.getIntrinsic(Intrinsic::returnaddress, 0, 0);
583    return RValue::get(Builder.CreateCall(F, Depth));
584  }
585  case Builtin::BI__builtin_frame_address: {
586    Value *Depth = EmitScalarExpr(E->getArg(0));
587    Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp");
588    Value *F = CGM.getIntrinsic(Intrinsic::frameaddress, 0, 0);
589    return RValue::get(Builder.CreateCall(F, Depth));
590  }
591  case Builtin::BI__builtin_extract_return_addr: {
592    Value *Address = EmitScalarExpr(E->getArg(0));
593    Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
594    return RValue::get(Result);
595  }
596  case Builtin::BI__builtin_frob_return_addr: {
597    Value *Address = EmitScalarExpr(E->getArg(0));
598    Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
599    return RValue::get(Result);
600  }
601  case Builtin::BI__builtin_dwarf_sp_column: {
602    const llvm::IntegerType *Ty
603      = cast<llvm::IntegerType>(ConvertType(E->getType()));
604    int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
605    if (Column == -1) {
606      CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
607      return RValue::get(llvm::UndefValue::get(Ty));
608    }
609    return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
610  }
611  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
612    Value *Address = EmitScalarExpr(E->getArg(0));
613    if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
614      CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
615    return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
616  }
617  case Builtin::BI__builtin_eh_return: {
618    Value *Int = EmitScalarExpr(E->getArg(0));
619    Value *Ptr = EmitScalarExpr(E->getArg(1));
620
621    const llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
622    assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
623           "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
624    Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
625                                  ? Intrinsic::eh_return_i32
626                                  : Intrinsic::eh_return_i64,
627                                0, 0);
628    Builder.CreateCall2(F, Int, Ptr);
629    Value *V = Builder.CreateUnreachable();
630    Builder.ClearInsertionPoint();
631    return RValue::get(V);
632  }
633  case Builtin::BI__builtin_unwind_init: {
634    Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init, 0, 0);
635    return RValue::get(Builder.CreateCall(F));
636  }
637  case Builtin::BI__builtin_extend_pointer: {
638    // Extends a pointer to the size of an _Unwind_Word, which is
639    // uint64_t on all platforms.  Generally this gets poked into a
640    // register and eventually used as an address, so if the
641    // addressing registers are wider than pointers and the platform
642    // doesn't implicitly ignore high-order bits when doing
643    // addressing, we need to make sure we zext / sext based on
644    // the platform's expectations.
645    //
646    // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
647
648    LLVMContext &C = CGM.getLLVMContext();
649
650    // Cast the pointer to intptr_t.
651    Value *Ptr = EmitScalarExpr(E->getArg(0));
652    const llvm::IntegerType *IntPtrTy = CGM.getTargetData().getIntPtrType(C);
653    Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
654
655    // If that's 64 bits, we're done.
656    if (IntPtrTy->getBitWidth() == 64)
657      return RValue::get(Result);
658
659    // Otherwise, ask the codegen data what to do.
660    if (getTargetHooks().extendPointerWithSExt())
661      return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
662    else
663      return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
664  }
665  case Builtin::BI__builtin_setjmp: {
666    // Buffer is a void**.
667    Value *Buf = EmitScalarExpr(E->getArg(0));
668
669    // Store the frame pointer to the setjmp buffer.
670    Value *FrameAddr =
671      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
672                         ConstantInt::get(Int32Ty, 0));
673    Builder.CreateStore(FrameAddr, Buf);
674
675    // Store the stack pointer to the setjmp buffer.
676    Value *StackAddr =
677      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
678    Value *StackSaveSlot =
679      Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
680    Builder.CreateStore(StackAddr, StackSaveSlot);
681
682    // Call LLVM's EH setjmp, which is lightweight.
683    Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
684    Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext));
685    return RValue::get(Builder.CreateCall(F, Buf));
686  }
687  case Builtin::BI__builtin_longjmp: {
688    Value *Buf = EmitScalarExpr(E->getArg(0));
689    Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext));
690
691    // Call LLVM's EH longjmp, which is lightweight.
692    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
693
694    // longjmp doesn't return; mark this as unreachable
695    Value *V = Builder.CreateUnreachable();
696    Builder.ClearInsertionPoint();
697    return RValue::get(V);
698  }
699  case Builtin::BI__sync_fetch_and_add:
700  case Builtin::BI__sync_fetch_and_sub:
701  case Builtin::BI__sync_fetch_and_or:
702  case Builtin::BI__sync_fetch_and_and:
703  case Builtin::BI__sync_fetch_and_xor:
704  case Builtin::BI__sync_add_and_fetch:
705  case Builtin::BI__sync_sub_and_fetch:
706  case Builtin::BI__sync_and_and_fetch:
707  case Builtin::BI__sync_or_and_fetch:
708  case Builtin::BI__sync_xor_and_fetch:
709  case Builtin::BI__sync_val_compare_and_swap:
710  case Builtin::BI__sync_bool_compare_and_swap:
711  case Builtin::BI__sync_lock_test_and_set:
712  case Builtin::BI__sync_lock_release:
713    assert(0 && "Shouldn't make it through sema");
714  case Builtin::BI__sync_fetch_and_add_1:
715  case Builtin::BI__sync_fetch_and_add_2:
716  case Builtin::BI__sync_fetch_and_add_4:
717  case Builtin::BI__sync_fetch_and_add_8:
718  case Builtin::BI__sync_fetch_and_add_16:
719    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_add, E);
720  case Builtin::BI__sync_fetch_and_sub_1:
721  case Builtin::BI__sync_fetch_and_sub_2:
722  case Builtin::BI__sync_fetch_and_sub_4:
723  case Builtin::BI__sync_fetch_and_sub_8:
724  case Builtin::BI__sync_fetch_and_sub_16:
725    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_sub, E);
726  case Builtin::BI__sync_fetch_and_or_1:
727  case Builtin::BI__sync_fetch_and_or_2:
728  case Builtin::BI__sync_fetch_and_or_4:
729  case Builtin::BI__sync_fetch_and_or_8:
730  case Builtin::BI__sync_fetch_and_or_16:
731    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_or, E);
732  case Builtin::BI__sync_fetch_and_and_1:
733  case Builtin::BI__sync_fetch_and_and_2:
734  case Builtin::BI__sync_fetch_and_and_4:
735  case Builtin::BI__sync_fetch_and_and_8:
736  case Builtin::BI__sync_fetch_and_and_16:
737    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_and, E);
738  case Builtin::BI__sync_fetch_and_xor_1:
739  case Builtin::BI__sync_fetch_and_xor_2:
740  case Builtin::BI__sync_fetch_and_xor_4:
741  case Builtin::BI__sync_fetch_and_xor_8:
742  case Builtin::BI__sync_fetch_and_xor_16:
743    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_xor, E);
744
745  // Clang extensions: not overloaded yet.
746  case Builtin::BI__sync_fetch_and_min:
747    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_min, E);
748  case Builtin::BI__sync_fetch_and_max:
749    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_max, E);
750  case Builtin::BI__sync_fetch_and_umin:
751    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umin, E);
752  case Builtin::BI__sync_fetch_and_umax:
753    return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umax, E);
754
755  case Builtin::BI__sync_add_and_fetch_1:
756  case Builtin::BI__sync_add_and_fetch_2:
757  case Builtin::BI__sync_add_and_fetch_4:
758  case Builtin::BI__sync_add_and_fetch_8:
759  case Builtin::BI__sync_add_and_fetch_16:
760    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_add, E,
761                                llvm::Instruction::Add);
762  case Builtin::BI__sync_sub_and_fetch_1:
763  case Builtin::BI__sync_sub_and_fetch_2:
764  case Builtin::BI__sync_sub_and_fetch_4:
765  case Builtin::BI__sync_sub_and_fetch_8:
766  case Builtin::BI__sync_sub_and_fetch_16:
767    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_sub, E,
768                                llvm::Instruction::Sub);
769  case Builtin::BI__sync_and_and_fetch_1:
770  case Builtin::BI__sync_and_and_fetch_2:
771  case Builtin::BI__sync_and_and_fetch_4:
772  case Builtin::BI__sync_and_and_fetch_8:
773  case Builtin::BI__sync_and_and_fetch_16:
774    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_and, E,
775                                llvm::Instruction::And);
776  case Builtin::BI__sync_or_and_fetch_1:
777  case Builtin::BI__sync_or_and_fetch_2:
778  case Builtin::BI__sync_or_and_fetch_4:
779  case Builtin::BI__sync_or_and_fetch_8:
780  case Builtin::BI__sync_or_and_fetch_16:
781    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_or, E,
782                                llvm::Instruction::Or);
783  case Builtin::BI__sync_xor_and_fetch_1:
784  case Builtin::BI__sync_xor_and_fetch_2:
785  case Builtin::BI__sync_xor_and_fetch_4:
786  case Builtin::BI__sync_xor_and_fetch_8:
787  case Builtin::BI__sync_xor_and_fetch_16:
788    return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_xor, E,
789                                llvm::Instruction::Xor);
790
791  case Builtin::BI__sync_val_compare_and_swap_1:
792  case Builtin::BI__sync_val_compare_and_swap_2:
793  case Builtin::BI__sync_val_compare_and_swap_4:
794  case Builtin::BI__sync_val_compare_and_swap_8:
795  case Builtin::BI__sync_val_compare_and_swap_16: {
796    llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
797    unsigned AddrSpace =
798      cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
799    const llvm::Type *ValueType =
800      llvm::IntegerType::get(CGF.getLLVMContext(),
801                             CGF.getContext().getTypeSize(E->getType()));
802    const llvm::Type *PtrType = ValueType->getPointerTo(AddrSpace);
803    const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
804    Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
805                                    IntrinsicTypes, 2);
806
807    Value *Args[3] = { Builder.CreateBitCast(DestPtr, PtrType),
808                       EmitCastToInt(CGF, ValueType,
809                                     CGF.EmitScalarExpr(E->getArg(1))),
810                       EmitCastToInt(CGF, ValueType,
811                                     CGF.EmitScalarExpr(E->getArg(2))) };
812    return RValue::get(EmitCastFromInt(CGF, E->getType(),
813                                       EmitCallWithBarrier(CGF, AtomF, Args,
814                                                           Args + 3)));
815  }
816
817  case Builtin::BI__sync_bool_compare_and_swap_1:
818  case Builtin::BI__sync_bool_compare_and_swap_2:
819  case Builtin::BI__sync_bool_compare_and_swap_4:
820  case Builtin::BI__sync_bool_compare_and_swap_8:
821  case Builtin::BI__sync_bool_compare_and_swap_16: {
822    llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
823    unsigned AddrSpace =
824      cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
825    const llvm::Type *ValueType =
826      llvm::IntegerType::get(CGF.getLLVMContext(),
827        CGF.getContext().getTypeSize(E->getArg(1)->getType()));
828    const llvm::Type *PtrType = ValueType->getPointerTo(AddrSpace);
829    const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType };
830    Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap,
831                                    IntrinsicTypes, 2);
832
833    Value *Args[3] = { Builder.CreateBitCast(DestPtr, PtrType),
834                       EmitCastToInt(CGF, ValueType,
835                                     CGF.EmitScalarExpr(E->getArg(1))),
836                       EmitCastToInt(CGF, ValueType,
837                                     CGF.EmitScalarExpr(E->getArg(2))) };
838    Value *OldVal = Args[1];
839    Value *PrevVal = EmitCallWithBarrier(*this, AtomF, Args, Args + 3);
840    Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
841    // zext bool to int.
842    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
843  }
844
845  case Builtin::BI__sync_lock_test_and_set_1:
846  case Builtin::BI__sync_lock_test_and_set_2:
847  case Builtin::BI__sync_lock_test_and_set_4:
848  case Builtin::BI__sync_lock_test_and_set_8:
849  case Builtin::BI__sync_lock_test_and_set_16:
850    return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E);
851
852  case Builtin::BI__sync_lock_release_1:
853  case Builtin::BI__sync_lock_release_2:
854  case Builtin::BI__sync_lock_release_4:
855  case Builtin::BI__sync_lock_release_8:
856  case Builtin::BI__sync_lock_release_16: {
857    Value *Ptr = EmitScalarExpr(E->getArg(0));
858    const llvm::Type *ElTy =
859      cast<llvm::PointerType>(Ptr->getType())->getElementType();
860    llvm::StoreInst *Store =
861      Builder.CreateStore(llvm::Constant::getNullValue(ElTy), Ptr);
862    Store->setVolatile(true);
863    return RValue::get(0);
864  }
865
866  case Builtin::BI__sync_synchronize: {
867    // We assume like gcc appears to, that this only applies to cached memory.
868    EmitMemoryBarrier(*this, true, true, true, true, false);
869    return RValue::get(0);
870  }
871
872  case Builtin::BI__builtin_llvm_memory_barrier: {
873    Value *C[5] = {
874      EmitScalarExpr(E->getArg(0)),
875      EmitScalarExpr(E->getArg(1)),
876      EmitScalarExpr(E->getArg(2)),
877      EmitScalarExpr(E->getArg(3)),
878      EmitScalarExpr(E->getArg(4))
879    };
880    Builder.CreateCall(CGM.getIntrinsic(Intrinsic::memory_barrier), C, C + 5);
881    return RValue::get(0);
882  }
883
884    // Library functions with special handling.
885  case Builtin::BIsqrt:
886  case Builtin::BIsqrtf:
887  case Builtin::BIsqrtl: {
888    // TODO: there is currently no set of optimizer flags
889    // sufficient for us to rewrite sqrt to @llvm.sqrt.
890    // -fmath-errno=0 is not good enough; we need finiteness.
891    // We could probably precondition the call with an ult
892    // against 0, but is that worth the complexity?
893    break;
894  }
895
896  case Builtin::BIpow:
897  case Builtin::BIpowf:
898  case Builtin::BIpowl: {
899    // Rewrite sqrt to intrinsic if allowed.
900    if (!FD->hasAttr<ConstAttr>())
901      break;
902    Value *Base = EmitScalarExpr(E->getArg(0));
903    Value *Exponent = EmitScalarExpr(E->getArg(1));
904    const llvm::Type *ArgType = Base->getType();
905    Value *F = CGM.getIntrinsic(Intrinsic::pow, &ArgType, 1);
906    return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp"));
907  }
908
909  case Builtin::BI__builtin_signbit:
910  case Builtin::BI__builtin_signbitf:
911  case Builtin::BI__builtin_signbitl: {
912    LLVMContext &C = CGM.getLLVMContext();
913
914    Value *Arg = EmitScalarExpr(E->getArg(0));
915    const llvm::Type *ArgTy = Arg->getType();
916    if (ArgTy->isPPC_FP128Ty())
917      break; // FIXME: I'm not sure what the right implementation is here.
918    int ArgWidth = ArgTy->getPrimitiveSizeInBits();
919    const llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
920    Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
921    Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
922    Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
923    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
924  }
925  }
926
927  // If this is an alias for a libm function (e.g. __builtin_sin) turn it into
928  // that function.
929  if (getContext().BuiltinInfo.isLibFunction(BuiltinID) ||
930      getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
931    return EmitCall(E->getCallee()->getType(),
932                    CGM.getBuiltinLibFunction(FD, BuiltinID),
933                    ReturnValueSlot(),
934                    E->arg_begin(), E->arg_end());
935
936  // See if we have a target specific intrinsic.
937  const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
938  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
939  if (const char *Prefix =
940      llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
941    IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
942
943  if (IntrinsicID != Intrinsic::not_intrinsic) {
944    SmallVector<Value*, 16> Args;
945
946    Function *F = CGM.getIntrinsic(IntrinsicID);
947    const llvm::FunctionType *FTy = F->getFunctionType();
948
949    for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
950      Value *ArgValue = EmitScalarExpr(E->getArg(i));
951
952      // If the intrinsic arg type is different from the builtin arg type
953      // we need to do a bit cast.
954      const llvm::Type *PTy = FTy->getParamType(i);
955      if (PTy != ArgValue->getType()) {
956        assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
957               "Must be able to losslessly bit cast to param");
958        ArgValue = Builder.CreateBitCast(ArgValue, PTy);
959      }
960
961      Args.push_back(ArgValue);
962    }
963
964    Value *V = Builder.CreateCall(F, Args.data(), Args.data() + Args.size());
965    QualType BuiltinRetType = E->getType();
966
967    const llvm::Type *RetTy = llvm::Type::getVoidTy(VMContext);
968    if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType);
969
970    if (RetTy != V->getType()) {
971      assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
972             "Must be able to losslessly bit cast result type");
973      V = Builder.CreateBitCast(V, RetTy);
974    }
975
976    return RValue::get(V);
977  }
978
979  // See if we have a target specific builtin that needs to be lowered.
980  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
981    return RValue::get(V);
982
983  ErrorUnsupported(E, "builtin function");
984
985  // Unknown builtin, for now just dump it out and return undef.
986  if (hasAggregateLLVMType(E->getType()))
987    return RValue::getAggregate(CreateMemTemp(E->getType()));
988  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
989}
990
991Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
992                                              const CallExpr *E) {
993  switch (Target.getTriple().getArch()) {
994  case llvm::Triple::arm:
995  case llvm::Triple::thumb:
996    return EmitARMBuiltinExpr(BuiltinID, E);
997  case llvm::Triple::x86:
998  case llvm::Triple::x86_64:
999    return EmitX86BuiltinExpr(BuiltinID, E);
1000  case llvm::Triple::ppc:
1001  case llvm::Triple::ppc64:
1002    return EmitPPCBuiltinExpr(BuiltinID, E);
1003  default:
1004    return 0;
1005  }
1006}
1007
1008const llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) {
1009  switch (type) {
1010    default: break;
1011    case 0:
1012    case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q);
1013    case 6:
1014    case 7:
1015    case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q);
1016    case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q);
1017    case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q);
1018    case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q);
1019  };
1020  return 0;
1021}
1022
1023Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C, bool widen) {
1024  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1025  if (widen)
1026    nElts <<= 1;
1027  SmallVector<Constant*, 16> Indices(nElts, C);
1028  Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1029  return Builder.CreateShuffleVector(V, V, SV, "lane");
1030}
1031
1032Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1033                                     const char *name, bool splat,
1034                                     unsigned shift, bool rightshift) {
1035  unsigned j = 0;
1036  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1037       ai != ae; ++ai, ++j)
1038    if (shift > 0 && shift == j)
1039      Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1040    else
1041      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1042
1043  if (splat) {
1044    Ops[j-1] = EmitNeonSplat(Ops[j-1], cast<Constant>(Ops[j]));
1045    Ops.resize(j);
1046  }
1047  return Builder.CreateCall(F, Ops.begin(), Ops.end(), name);
1048}
1049
1050Value *CodeGenFunction::EmitNeonShiftVector(Value *V, const llvm::Type *Ty,
1051                                            bool neg) {
1052  ConstantInt *CI = cast<ConstantInt>(V);
1053  int SV = CI->getSExtValue();
1054
1055  const llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1056  llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
1057  SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C);
1058  return llvm::ConstantVector::get(CV.begin(), CV.size());
1059}
1060
1061/// GetPointeeAlignment - Given an expression with a pointer type, find the
1062/// alignment of the type referenced by the pointer.  Skip over implicit
1063/// casts.
1064static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) {
1065  unsigned Align = 1;
1066  // Check if the type is a pointer.  The implicit cast operand might not be.
1067  while (Addr->getType()->isPointerType()) {
1068    QualType PtTy = Addr->getType()->getPointeeType();
1069    unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity();
1070    if (NewA > Align)
1071      Align = NewA;
1072
1073    // If the address is an implicit cast, repeat with the cast operand.
1074    if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) {
1075      Addr = CastAddr->getSubExpr();
1076      continue;
1077    }
1078    break;
1079  }
1080  return llvm::ConstantInt::get(CGF.Int32Ty, Align);
1081}
1082
1083Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
1084                                           const CallExpr *E) {
1085  if (BuiltinID == ARM::BI__clear_cache) {
1086    const FunctionDecl *FD = E->getDirectCallee();
1087    Value *a = EmitScalarExpr(E->getArg(0));
1088    Value *b = EmitScalarExpr(E->getArg(1));
1089    const llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
1090    const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
1091    llvm::StringRef Name = FD->getName();
1092    return Builder.CreateCall2(CGM.CreateRuntimeFunction(FTy, Name),
1093                               a, b);
1094  }
1095
1096  llvm::SmallVector<Value*, 4> Ops;
1097  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
1098    Ops.push_back(EmitScalarExpr(E->getArg(i)));
1099
1100  llvm::APSInt Result;
1101  const Expr *Arg = E->getArg(E->getNumArgs()-1);
1102  if (!Arg->isIntegerConstantExpr(Result, getContext()))
1103    return 0;
1104
1105  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
1106      BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
1107    // Determine the overloaded type of this builtin.
1108    const llvm::Type *Ty;
1109    if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
1110      Ty = llvm::Type::getFloatTy(VMContext);
1111    else
1112      Ty = llvm::Type::getDoubleTy(VMContext);
1113
1114    // Determine whether this is an unsigned conversion or not.
1115    bool usgn = Result.getZExtValue() == 1;
1116    unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
1117
1118    // Call the appropriate intrinsic.
1119    Function *F = CGM.getIntrinsic(Int, &Ty, 1);
1120    return Builder.CreateCall(F, Ops.begin(), Ops.end(), "vcvtr");
1121  }
1122
1123  // Determine the type of this overloaded NEON intrinsic.
1124  unsigned type = Result.getZExtValue();
1125  bool usgn = type & 0x08;
1126  bool quad = type & 0x10;
1127  bool poly = (type & 0x7) == 5 || (type & 0x7) == 6;
1128  bool splat = false;
1129
1130  const llvm::VectorType *VTy = GetNeonType(VMContext, type & 0x7, quad);
1131  const llvm::Type *Ty = VTy;
1132  if (!Ty)
1133    return 0;
1134
1135  unsigned Int;
1136  switch (BuiltinID) {
1137  default: return 0;
1138  case ARM::BI__builtin_neon_vaba_v:
1139  case ARM::BI__builtin_neon_vabaq_v: {
1140    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1141    SmallVector<Value*, 2> Args;
1142    Args.push_back(Ops[1]);
1143    Args.push_back(Ops[2]);
1144    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1145    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Args, "vaba");
1146    return Builder.CreateAdd(Ops[0], Ops[1], "vaba");
1147  }
1148  case ARM::BI__builtin_neon_vabal_v: {
1149    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1150    SmallVector<Value*, 2> Args;
1151    Args.push_back(Ops[1]);
1152    Args.push_back(Ops[2]);
1153    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1154    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1155    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Int, &DTy, 1), Args, "vabal");
1156    Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1157    return Builder.CreateAdd(Ops[0], Ops[1], "vabal");
1158  }
1159  case ARM::BI__builtin_neon_vabd_v:
1160  case ARM::BI__builtin_neon_vabdq_v:
1161    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1162    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabd");
1163  case ARM::BI__builtin_neon_vabdl_v: {
1164    Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
1165    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1166    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, &DTy, 1), Ops, "vabdl");
1167    return Builder.CreateZExt(Ops[0], Ty, "vabdl");
1168  }
1169  case ARM::BI__builtin_neon_vabs_v:
1170  case ARM::BI__builtin_neon_vabsq_v:
1171    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1),
1172                        Ops, "vabs");
1173  case ARM::BI__builtin_neon_vaddhn_v:
1174    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1),
1175                        Ops, "vaddhn");
1176  case ARM::BI__builtin_neon_vaddl_v: {
1177    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1178    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1179    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1180    if (usgn) {
1181      Ops[0] = Builder.CreateZExt(Ops[0], Ty);
1182      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1183    } else {
1184      Ops[0] = Builder.CreateSExt(Ops[0], Ty);
1185      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1186    }
1187    return Builder.CreateAdd(Ops[0], Ops[1], "vaddl");
1188  }
1189  case ARM::BI__builtin_neon_vaddw_v: {
1190    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1191    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1192    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1193    if (usgn)
1194      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1195    else
1196      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1197    return Builder.CreateAdd(Ops[0], Ops[1], "vaddw");
1198  }
1199  case ARM::BI__builtin_neon_vcale_v:
1200    std::swap(Ops[0], Ops[1]);
1201  case ARM::BI__builtin_neon_vcage_v: {
1202    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged, &Ty, 1);
1203    return EmitNeonCall(F, Ops, "vcage");
1204  }
1205  case ARM::BI__builtin_neon_vcaleq_v:
1206    std::swap(Ops[0], Ops[1]);
1207  case ARM::BI__builtin_neon_vcageq_v: {
1208    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq, &Ty, 1);
1209    return EmitNeonCall(F, Ops, "vcage");
1210  }
1211  case ARM::BI__builtin_neon_vcalt_v:
1212    std::swap(Ops[0], Ops[1]);
1213  case ARM::BI__builtin_neon_vcagt_v: {
1214    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd, &Ty, 1);
1215    return EmitNeonCall(F, Ops, "vcagt");
1216  }
1217  case ARM::BI__builtin_neon_vcaltq_v:
1218    std::swap(Ops[0], Ops[1]);
1219  case ARM::BI__builtin_neon_vcagtq_v: {
1220    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq, &Ty, 1);
1221    return EmitNeonCall(F, Ops, "vcagt");
1222  }
1223  case ARM::BI__builtin_neon_vcls_v:
1224  case ARM::BI__builtin_neon_vclsq_v: {
1225    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1);
1226    return EmitNeonCall(F, Ops, "vcls");
1227  }
1228  case ARM::BI__builtin_neon_vclz_v:
1229  case ARM::BI__builtin_neon_vclzq_v: {
1230    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1);
1231    return EmitNeonCall(F, Ops, "vclz");
1232  }
1233  case ARM::BI__builtin_neon_vcnt_v:
1234  case ARM::BI__builtin_neon_vcntq_v: {
1235    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1);
1236    return EmitNeonCall(F, Ops, "vcnt");
1237  }
1238  // FIXME: intrinsics for f16<->f32 convert missing from ARM target.
1239  case ARM::BI__builtin_neon_vcvt_f32_v:
1240  case ARM::BI__builtin_neon_vcvtq_f32_v: {
1241    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1242    Ty = GetNeonType(VMContext, 4, quad);
1243    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
1244                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
1245  }
1246  case ARM::BI__builtin_neon_vcvt_s32_v:
1247  case ARM::BI__builtin_neon_vcvt_u32_v:
1248  case ARM::BI__builtin_neon_vcvtq_s32_v:
1249  case ARM::BI__builtin_neon_vcvtq_u32_v: {
1250    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(VMContext, 4, quad));
1251    return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
1252                : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
1253  }
1254  case ARM::BI__builtin_neon_vcvt_n_f32_v:
1255  case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
1256    const llvm::Type *Tys[2] = { GetNeonType(VMContext, 4, quad), Ty };
1257    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp;
1258    Function *F = CGM.getIntrinsic(Int, Tys, 2);
1259    return EmitNeonCall(F, Ops, "vcvt_n");
1260  }
1261  case ARM::BI__builtin_neon_vcvt_n_s32_v:
1262  case ARM::BI__builtin_neon_vcvt_n_u32_v:
1263  case ARM::BI__builtin_neon_vcvtq_n_s32_v:
1264  case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
1265    const llvm::Type *Tys[2] = { Ty, GetNeonType(VMContext, 4, quad) };
1266    Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs;
1267    Function *F = CGM.getIntrinsic(Int, Tys, 2);
1268    return EmitNeonCall(F, Ops, "vcvt_n");
1269  }
1270  case ARM::BI__builtin_neon_vdup_lane_v:
1271    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1272    return EmitNeonSplat(Ops[0], cast<Constant>(Ops[1]));
1273  case ARM::BI__builtin_neon_vdupq_lane_v:
1274    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1275    return EmitNeonSplat(Ops[0], cast<Constant>(Ops[1]), true);
1276  case ARM::BI__builtin_neon_vext_v:
1277  case ARM::BI__builtin_neon_vextq_v: {
1278    ConstantInt *C = dyn_cast<ConstantInt>(Ops[2]);
1279    int CV = C->getSExtValue();
1280    SmallVector<Constant*, 16> Indices;
1281    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1282      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
1283
1284    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1285    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1286    Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1287    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
1288  }
1289  case ARM::BI__builtin_neon_vget_lane_i8:
1290  case ARM::BI__builtin_neon_vget_lane_i16:
1291  case ARM::BI__builtin_neon_vget_lane_i32:
1292  case ARM::BI__builtin_neon_vget_lane_i64:
1293  case ARM::BI__builtin_neon_vget_lane_f32:
1294  case ARM::BI__builtin_neon_vgetq_lane_i8:
1295  case ARM::BI__builtin_neon_vgetq_lane_i16:
1296  case ARM::BI__builtin_neon_vgetq_lane_i32:
1297  case ARM::BI__builtin_neon_vgetq_lane_i64:
1298  case ARM::BI__builtin_neon_vgetq_lane_f32:
1299    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
1300                                        "vget_lane");
1301  case ARM::BI__builtin_neon_vhadd_v:
1302  case ARM::BI__builtin_neon_vhaddq_v:
1303    Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
1304    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhadd");
1305  case ARM::BI__builtin_neon_vhsub_v:
1306  case ARM::BI__builtin_neon_vhsubq_v:
1307    Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
1308    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhsub");
1309  case ARM::BI__builtin_neon_vld1_v:
1310  case ARM::BI__builtin_neon_vld1q_v:
1311    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1312    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, &Ty, 1),
1313                        Ops, "vld1");
1314  case ARM::BI__builtin_neon_vld1_lane_v:
1315  case ARM::BI__builtin_neon_vld1q_lane_v:
1316    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1317    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1318    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1319    Ops[0] = Builder.CreateLoad(Ops[0]);
1320    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
1321  case ARM::BI__builtin_neon_vld1_dup_v:
1322  case ARM::BI__builtin_neon_vld1q_dup_v: {
1323    Value *V = UndefValue::get(Ty);
1324    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
1325    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1326    Ops[0] = Builder.CreateLoad(Ops[0]);
1327    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1328    Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
1329    return EmitNeonSplat(Ops[0], CI);
1330  }
1331  case ARM::BI__builtin_neon_vld2_v:
1332  case ARM::BI__builtin_neon_vld2q_v: {
1333    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, &Ty, 1);
1334    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1335    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
1336    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1337    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1338    return Builder.CreateStore(Ops[1], Ops[0]);
1339  }
1340  case ARM::BI__builtin_neon_vld3_v:
1341  case ARM::BI__builtin_neon_vld3q_v: {
1342    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, &Ty, 1);
1343    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1344    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
1345    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1346    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1347    return Builder.CreateStore(Ops[1], Ops[0]);
1348  }
1349  case ARM::BI__builtin_neon_vld4_v:
1350  case ARM::BI__builtin_neon_vld4q_v: {
1351    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, &Ty, 1);
1352    Value *Align = GetPointeeAlignment(*this, E->getArg(1));
1353    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
1354    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1355    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1356    return Builder.CreateStore(Ops[1], Ops[0]);
1357  }
1358  case ARM::BI__builtin_neon_vld2_lane_v:
1359  case ARM::BI__builtin_neon_vld2q_lane_v: {
1360    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, &Ty, 1);
1361    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1362    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1363    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1364    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld2_lane");
1365    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1366    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1367    return Builder.CreateStore(Ops[1], Ops[0]);
1368  }
1369  case ARM::BI__builtin_neon_vld3_lane_v:
1370  case ARM::BI__builtin_neon_vld3q_lane_v: {
1371    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, &Ty, 1);
1372    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1373    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1374    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1375    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1376    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
1377    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1378    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1379    return Builder.CreateStore(Ops[1], Ops[0]);
1380  }
1381  case ARM::BI__builtin_neon_vld4_lane_v:
1382  case ARM::BI__builtin_neon_vld4q_lane_v: {
1383    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, &Ty, 1);
1384    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1385    Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
1386    Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
1387    Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
1388    Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1389    Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
1390    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1391    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1392    return Builder.CreateStore(Ops[1], Ops[0]);
1393  }
1394  case ARM::BI__builtin_neon_vld2_dup_v:
1395  case ARM::BI__builtin_neon_vld3_dup_v:
1396  case ARM::BI__builtin_neon_vld4_dup_v: {
1397    switch (BuiltinID) {
1398    case ARM::BI__builtin_neon_vld2_dup_v:
1399      Int = Intrinsic::arm_neon_vld2lane;
1400      break;
1401    case ARM::BI__builtin_neon_vld3_dup_v:
1402      Int = Intrinsic::arm_neon_vld2lane;
1403      break;
1404    case ARM::BI__builtin_neon_vld4_dup_v:
1405      Int = Intrinsic::arm_neon_vld2lane;
1406      break;
1407    default: assert(0 && "unknown vld_dup intrinsic?");
1408    }
1409    Function *F = CGM.getIntrinsic(Int, &Ty, 1);
1410    const llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
1411
1412    SmallVector<Value*, 6> Args;
1413    Args.push_back(Ops[1]);
1414    Args.append(STy->getNumElements(), UndefValue::get(Ty));
1415
1416    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
1417    Args.push_back(CI);
1418    Args.push_back(GetPointeeAlignment(*this, E->getArg(1)));
1419
1420    Ops[1] = Builder.CreateCall(F, Args.begin(), Args.end(), "vld_dup");
1421    // splat lane 0 to all elts in each vector of the result.
1422    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1423      Value *Val = Builder.CreateExtractValue(Ops[1], i);
1424      Value *Elt = Builder.CreateBitCast(Val, Ty);
1425      Elt = EmitNeonSplat(Elt, CI);
1426      Elt = Builder.CreateBitCast(Elt, Val->getType());
1427      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
1428    }
1429    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1430    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1431    return Builder.CreateStore(Ops[1], Ops[0]);
1432  }
1433  case ARM::BI__builtin_neon_vmax_v:
1434  case ARM::BI__builtin_neon_vmaxq_v:
1435    Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
1436    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmax");
1437  case ARM::BI__builtin_neon_vmin_v:
1438  case ARM::BI__builtin_neon_vminq_v:
1439    Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
1440    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin");
1441  case ARM::BI__builtin_neon_vmlal_lane_v: {
1442    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1443    Ops[2] = Builder.CreateBitCast(Ops[2], DTy);
1444    Ops[2] = EmitNeonSplat(Ops[2], cast<Constant>(Ops[3]));
1445  }
1446  case ARM::BI__builtin_neon_vmlal_v: {
1447    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1448    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1449    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1450    Ops[2] = Builder.CreateBitCast(Ops[2], DTy);
1451    if (usgn) {
1452      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1453      Ops[2] = Builder.CreateZExt(Ops[2], Ty);
1454    } else {
1455      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1456      Ops[2] = Builder.CreateSExt(Ops[2], Ty);
1457    }
1458    Ops[1] = Builder.CreateMul(Ops[1], Ops[2]);
1459    return Builder.CreateAdd(Ops[0], Ops[1], "vmlal");
1460  }
1461  case ARM::BI__builtin_neon_vmlsl_lane_v: {
1462    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1463    Ops[2] = Builder.CreateBitCast(Ops[2], DTy);
1464    Ops[2] = EmitNeonSplat(Ops[2], cast<Constant>(Ops[3]));
1465  }
1466  case ARM::BI__builtin_neon_vmlsl_v: {
1467    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1468    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1469    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1470    Ops[2] = Builder.CreateBitCast(Ops[2], DTy);
1471    if (usgn) {
1472      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1473      Ops[2] = Builder.CreateZExt(Ops[2], Ty);
1474    } else {
1475      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1476      Ops[2] = Builder.CreateSExt(Ops[2], Ty);
1477    }
1478    Ops[1] = Builder.CreateMul(Ops[1], Ops[2]);
1479    return Builder.CreateSub(Ops[0], Ops[1], "vmlsl");
1480  }
1481  case ARM::BI__builtin_neon_vmovl_v: {
1482    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1483    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1484    if (usgn)
1485      return Builder.CreateZExt(Ops[0], Ty, "vmovl");
1486    return Builder.CreateSExt(Ops[0], Ty, "vmovl");
1487  }
1488  case ARM::BI__builtin_neon_vmovn_v: {
1489    const llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
1490    Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
1491    return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
1492  }
1493  case ARM::BI__builtin_neon_vmull_lane_v: {
1494    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1495    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1496    Ops[1] = EmitNeonSplat(Ops[1], cast<Constant>(Ops[2]));
1497  }
1498  case ARM::BI__builtin_neon_vmull_v: {
1499    if (poly)
1500      return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmullp, &Ty, 1),
1501                          Ops, "vmull");
1502    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1503    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1504    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1505    if (usgn) {
1506      Ops[0] = Builder.CreateZExt(Ops[0], Ty);
1507      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1508    } else {
1509      Ops[0] = Builder.CreateSExt(Ops[0], Ty);
1510      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1511    }
1512    return Builder.CreateMul(Ops[0], Ops[1], "vmull");
1513  }
1514  case ARM::BI__builtin_neon_vpadal_v:
1515  case ARM::BI__builtin_neon_vpadalq_v:
1516    Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
1517    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpadal");
1518  case ARM::BI__builtin_neon_vpadd_v:
1519    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, &Ty, 1),
1520                        Ops, "vpadd");
1521  case ARM::BI__builtin_neon_vpaddl_v:
1522  case ARM::BI__builtin_neon_vpaddlq_v:
1523    Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
1524    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpaddl");
1525  case ARM::BI__builtin_neon_vpmax_v:
1526    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
1527    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmax");
1528  case ARM::BI__builtin_neon_vpmin_v:
1529    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
1530    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmin");
1531  case ARM::BI__builtin_neon_vqabs_v:
1532  case ARM::BI__builtin_neon_vqabsq_v:
1533    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, &Ty, 1),
1534                        Ops, "vqabs");
1535  case ARM::BI__builtin_neon_vqadd_v:
1536  case ARM::BI__builtin_neon_vqaddq_v:
1537    Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
1538    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqadd");
1539  case ARM::BI__builtin_neon_vqdmlal_lane_v:
1540    splat = true;
1541  case ARM::BI__builtin_neon_vqdmlal_v:
1542    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, &Ty, 1),
1543                        Ops, "vqdmlal", splat);
1544  case ARM::BI__builtin_neon_vqdmlsl_lane_v:
1545    splat = true;
1546  case ARM::BI__builtin_neon_vqdmlsl_v:
1547    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, &Ty, 1),
1548                        Ops, "vqdmlsl", splat);
1549  case ARM::BI__builtin_neon_vqdmulh_lane_v:
1550  case ARM::BI__builtin_neon_vqdmulhq_lane_v:
1551    splat = true;
1552  case ARM::BI__builtin_neon_vqdmulh_v:
1553  case ARM::BI__builtin_neon_vqdmulhq_v:
1554    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, &Ty, 1),
1555                        Ops, "vqdmulh", splat);
1556  case ARM::BI__builtin_neon_vqdmull_lane_v:
1557    splat = true;
1558  case ARM::BI__builtin_neon_vqdmull_v:
1559    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, &Ty, 1),
1560                        Ops, "vqdmull", splat);
1561  case ARM::BI__builtin_neon_vqmovn_v:
1562    Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
1563    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqmovn");
1564  case ARM::BI__builtin_neon_vqmovun_v:
1565    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, &Ty, 1),
1566                        Ops, "vqdmull");
1567  case ARM::BI__builtin_neon_vqneg_v:
1568    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, &Ty, 1),
1569                        Ops, "vqneg");
1570  case ARM::BI__builtin_neon_vqrdmulh_lane_v:
1571  case ARM::BI__builtin_neon_vqrdmulhq_lane_v:
1572    splat = true;
1573  case ARM::BI__builtin_neon_vqrdmulh_v:
1574  case ARM::BI__builtin_neon_vqrdmulhq_v:
1575    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, &Ty, 1),
1576                        Ops, "vqrdmulh", splat);
1577  case ARM::BI__builtin_neon_vqrshl_v:
1578  case ARM::BI__builtin_neon_vqrshlq_v:
1579    Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
1580    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshl");
1581  case ARM::BI__builtin_neon_vqrshrn_n_v:
1582    Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
1583    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshrn_n", false,
1584                        1, true);
1585  case ARM::BI__builtin_neon_vqrshrun_n_v:
1586    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, &Ty, 1),
1587                        Ops, "vqrshrun_n", false, 1, true);
1588  case ARM::BI__builtin_neon_vqshl_v:
1589  case ARM::BI__builtin_neon_vqshlq_v:
1590    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1591    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl");
1592  case ARM::BI__builtin_neon_vqshl_n_v:
1593  case ARM::BI__builtin_neon_vqshlq_n_v:
1594    Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
1595    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl_n", false,
1596                        1, false);
1597  case ARM::BI__builtin_neon_vqshlu_n_v:
1598  case ARM::BI__builtin_neon_vqshluq_n_v:
1599    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, &Ty, 1),
1600                        Ops, "vqshlu", 1, false);
1601  case ARM::BI__builtin_neon_vqshrn_n_v:
1602    Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
1603    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshrn_n", false,
1604                        1, true);
1605  case ARM::BI__builtin_neon_vqshrun_n_v:
1606    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, &Ty, 1),
1607                        Ops, "vqshrun_n", false, 1, true);
1608  case ARM::BI__builtin_neon_vqsub_v:
1609  case ARM::BI__builtin_neon_vqsubq_v:
1610    Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
1611    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqsub");
1612  case ARM::BI__builtin_neon_vraddhn_v:
1613    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, &Ty, 1),
1614                        Ops, "vraddhn");
1615  case ARM::BI__builtin_neon_vrecpe_v:
1616  case ARM::BI__builtin_neon_vrecpeq_v:
1617    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, &Ty, 1),
1618                        Ops, "vrecpe");
1619  case ARM::BI__builtin_neon_vrecps_v:
1620  case ARM::BI__builtin_neon_vrecpsq_v:
1621    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, &Ty, 1),
1622                        Ops, "vrecps");
1623  case ARM::BI__builtin_neon_vrhadd_v:
1624  case ARM::BI__builtin_neon_vrhaddq_v:
1625    Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
1626    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrhadd");
1627  case ARM::BI__builtin_neon_vrshl_v:
1628  case ARM::BI__builtin_neon_vrshlq_v:
1629    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1630    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshl");
1631  case ARM::BI__builtin_neon_vrshrn_n_v:
1632    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, &Ty, 1),
1633                        Ops, "vrshrn_n", false, 1, true);
1634  case ARM::BI__builtin_neon_vrshr_n_v:
1635  case ARM::BI__builtin_neon_vrshrq_n_v:
1636    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1637    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshr_n", false,
1638                        1, true);
1639  case ARM::BI__builtin_neon_vrsqrte_v:
1640  case ARM::BI__builtin_neon_vrsqrteq_v:
1641    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, &Ty, 1),
1642                        Ops, "vrsqrte");
1643  case ARM::BI__builtin_neon_vrsqrts_v:
1644  case ARM::BI__builtin_neon_vrsqrtsq_v:
1645    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, &Ty, 1),
1646                        Ops, "vrsqrts");
1647  case ARM::BI__builtin_neon_vrsra_n_v:
1648  case ARM::BI__builtin_neon_vrsraq_n_v:
1649    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1650    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1651    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
1652    Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
1653    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, &Ty, 1), Ops[1], Ops[2]);
1654    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
1655  case ARM::BI__builtin_neon_vrsubhn_v:
1656    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, &Ty, 1),
1657                        Ops, "vrsubhn");
1658  case ARM::BI__builtin_neon_vset_lane_i8:
1659  case ARM::BI__builtin_neon_vset_lane_i16:
1660  case ARM::BI__builtin_neon_vset_lane_i32:
1661  case ARM::BI__builtin_neon_vset_lane_i64:
1662  case ARM::BI__builtin_neon_vset_lane_f32:
1663  case ARM::BI__builtin_neon_vsetq_lane_i8:
1664  case ARM::BI__builtin_neon_vsetq_lane_i16:
1665  case ARM::BI__builtin_neon_vsetq_lane_i32:
1666  case ARM::BI__builtin_neon_vsetq_lane_i64:
1667  case ARM::BI__builtin_neon_vsetq_lane_f32:
1668    Ops.push_back(EmitScalarExpr(E->getArg(2)));
1669    return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
1670  case ARM::BI__builtin_neon_vshl_v:
1671  case ARM::BI__builtin_neon_vshlq_v:
1672    Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
1673    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshl");
1674  case ARM::BI__builtin_neon_vshll_n_v:
1675    Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
1676    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshll", false, 1);
1677  case ARM::BI__builtin_neon_vshl_n_v:
1678  case ARM::BI__builtin_neon_vshlq_n_v:
1679    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1680    return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
1681  case ARM::BI__builtin_neon_vshrn_n_v:
1682    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, &Ty, 1),
1683                        Ops, "vshrn_n", false, 1, true);
1684  case ARM::BI__builtin_neon_vshr_n_v:
1685  case ARM::BI__builtin_neon_vshrq_n_v:
1686    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1687    Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
1688    if (usgn)
1689      return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
1690    else
1691      return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
1692  case ARM::BI__builtin_neon_vsri_n_v:
1693  case ARM::BI__builtin_neon_vsriq_n_v:
1694    poly = true;
1695  case ARM::BI__builtin_neon_vsli_n_v:
1696  case ARM::BI__builtin_neon_vsliq_n_v:
1697    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, poly);
1698    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, &Ty, 1),
1699                        Ops, "vsli_n");
1700  case ARM::BI__builtin_neon_vsra_n_v:
1701  case ARM::BI__builtin_neon_vsraq_n_v:
1702    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1703    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1704    Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
1705    if (usgn)
1706      Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
1707    else
1708      Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
1709    return Builder.CreateAdd(Ops[0], Ops[1]);
1710  case ARM::BI__builtin_neon_vst1_v:
1711  case ARM::BI__builtin_neon_vst1q_v:
1712    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1713    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, &Ty, 1),
1714                        Ops, "");
1715  case ARM::BI__builtin_neon_vst1_lane_v:
1716  case ARM::BI__builtin_neon_vst1q_lane_v:
1717    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1718    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
1719    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
1720    return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
1721  case ARM::BI__builtin_neon_vst2_v:
1722  case ARM::BI__builtin_neon_vst2q_v:
1723    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1724    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, &Ty, 1),
1725                        Ops, "");
1726  case ARM::BI__builtin_neon_vst2_lane_v:
1727  case ARM::BI__builtin_neon_vst2q_lane_v:
1728    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1729    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, &Ty, 1),
1730                        Ops, "");
1731  case ARM::BI__builtin_neon_vst3_v:
1732  case ARM::BI__builtin_neon_vst3q_v:
1733    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1734    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, &Ty, 1),
1735                        Ops, "");
1736  case ARM::BI__builtin_neon_vst3_lane_v:
1737  case ARM::BI__builtin_neon_vst3q_lane_v:
1738    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1739    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, &Ty, 1),
1740                        Ops, "");
1741  case ARM::BI__builtin_neon_vst4_v:
1742  case ARM::BI__builtin_neon_vst4q_v:
1743    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1744    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, &Ty, 1),
1745                        Ops, "");
1746  case ARM::BI__builtin_neon_vst4_lane_v:
1747  case ARM::BI__builtin_neon_vst4q_lane_v:
1748    Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
1749    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, &Ty, 1),
1750                        Ops, "");
1751  case ARM::BI__builtin_neon_vsubhn_v:
1752    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, &Ty, 1),
1753                        Ops, "vsubhn");
1754  case ARM::BI__builtin_neon_vsubl_v: {
1755    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1756    Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
1757    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1758    if (usgn) {
1759      Ops[0] = Builder.CreateZExt(Ops[0], Ty);
1760      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1761    } else {
1762      Ops[0] = Builder.CreateSExt(Ops[0], Ty);
1763      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1764    }
1765    return Builder.CreateSub(Ops[0], Ops[1], "vsubl");
1766  }
1767  case ARM::BI__builtin_neon_vsubw_v: {
1768    const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
1769    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1770    Ops[1] = Builder.CreateBitCast(Ops[1], DTy);
1771    if (usgn)
1772      Ops[1] = Builder.CreateZExt(Ops[1], Ty);
1773    else
1774      Ops[1] = Builder.CreateSExt(Ops[1], Ty);
1775    return Builder.CreateSub(Ops[0], Ops[1], "vsubw");
1776  }
1777  case ARM::BI__builtin_neon_vtbl1_v:
1778    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
1779                        Ops, "vtbl1");
1780  case ARM::BI__builtin_neon_vtbl2_v:
1781    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
1782                        Ops, "vtbl2");
1783  case ARM::BI__builtin_neon_vtbl3_v:
1784    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
1785                        Ops, "vtbl3");
1786  case ARM::BI__builtin_neon_vtbl4_v:
1787    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
1788                        Ops, "vtbl4");
1789  case ARM::BI__builtin_neon_vtbx1_v:
1790    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
1791                        Ops, "vtbx1");
1792  case ARM::BI__builtin_neon_vtbx2_v:
1793    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
1794                        Ops, "vtbx2");
1795  case ARM::BI__builtin_neon_vtbx3_v:
1796    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
1797                        Ops, "vtbx3");
1798  case ARM::BI__builtin_neon_vtbx4_v:
1799    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
1800                        Ops, "vtbx4");
1801  case ARM::BI__builtin_neon_vtst_v:
1802  case ARM::BI__builtin_neon_vtstq_v: {
1803    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
1804    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1805    Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
1806    Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
1807                                ConstantAggregateZero::get(Ty));
1808    return Builder.CreateSExt(Ops[0], Ty, "vtst");
1809  }
1810  case ARM::BI__builtin_neon_vtrn_v:
1811  case ARM::BI__builtin_neon_vtrnq_v: {
1812    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1813    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1814    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1815    Value *SV;
1816
1817    for (unsigned vi = 0; vi != 2; ++vi) {
1818      SmallVector<Constant*, 16> Indices;
1819      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1820        Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
1821        Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
1822      }
1823      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1824      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1825      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
1826      SV = Builder.CreateStore(SV, Addr);
1827    }
1828    return SV;
1829  }
1830  case ARM::BI__builtin_neon_vuzp_v:
1831  case ARM::BI__builtin_neon_vuzpq_v: {
1832    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1833    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1834    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1835    Value *SV;
1836
1837    for (unsigned vi = 0; vi != 2; ++vi) {
1838      SmallVector<Constant*, 16> Indices;
1839      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
1840        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
1841
1842      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1843      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1844      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
1845      SV = Builder.CreateStore(SV, Addr);
1846    }
1847    return SV;
1848  }
1849  case ARM::BI__builtin_neon_vzip_v:
1850  case ARM::BI__builtin_neon_vzipq_v: {
1851    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
1852    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
1853    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
1854    Value *SV;
1855
1856    for (unsigned vi = 0; vi != 2; ++vi) {
1857      SmallVector<Constant*, 16> Indices;
1858      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
1859        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
1860        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
1861      }
1862      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
1863      SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
1864      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
1865      SV = Builder.CreateStore(SV, Addr);
1866    }
1867    return SV;
1868  }
1869  }
1870}
1871
1872llvm::Value *
1873CodeGenFunction::BuildVector(const llvm::SmallVectorImpl<llvm::Value*> &Ops) {
1874  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
1875         "Not a power-of-two sized vector!");
1876  bool AllConstants = true;
1877  for (unsigned I = 0, E = Ops.size(); I != E && AllConstants; ++I)
1878    AllConstants &= isa<Constant>(Ops[I]);
1879
1880  // If this is a constant vector, create a ConstantVector.
1881  if (AllConstants) {
1882    std::vector<Constant*> CstOps;
1883    for (unsigned I = 0, E = Ops.size(); I != E; ++I)
1884      CstOps.push_back(cast<Constant>(Ops[I]));
1885    return ConstantVector::get(CstOps);
1886  }
1887
1888  // Otherwise, insertelement the values to build the vector.
1889  Value *Result =
1890    llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
1891
1892  for (unsigned I = 0, E = Ops.size(); I != E; ++I)
1893    Result = Builder.CreateInsertElement(Result, Ops[I],
1894     llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext), I));
1895
1896  return Result;
1897}
1898
1899Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1900                                           const CallExpr *E) {
1901
1902  llvm::SmallVector<Value*, 4> Ops;
1903
1904  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
1905    Ops.push_back(EmitScalarExpr(E->getArg(i)));
1906
1907  switch (BuiltinID) {
1908  default: return 0;
1909  case X86::BI__builtin_ia32_pslldi128:
1910  case X86::BI__builtin_ia32_psllqi128:
1911  case X86::BI__builtin_ia32_psllwi128:
1912  case X86::BI__builtin_ia32_psradi128:
1913  case X86::BI__builtin_ia32_psrawi128:
1914  case X86::BI__builtin_ia32_psrldi128:
1915  case X86::BI__builtin_ia32_psrlqi128:
1916  case X86::BI__builtin_ia32_psrlwi128: {
1917    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
1918    const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
1919    llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
1920    Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty),
1921                                         Ops[1], Zero, "insert");
1922    Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast");
1923    const char *name = 0;
1924    Intrinsic::ID ID = Intrinsic::not_intrinsic;
1925
1926    switch (BuiltinID) {
1927    default: assert(0 && "Unsupported shift intrinsic!");
1928    case X86::BI__builtin_ia32_pslldi128:
1929      name = "pslldi";
1930      ID = Intrinsic::x86_sse2_psll_d;
1931      break;
1932    case X86::BI__builtin_ia32_psllqi128:
1933      name = "psllqi";
1934      ID = Intrinsic::x86_sse2_psll_q;
1935      break;
1936    case X86::BI__builtin_ia32_psllwi128:
1937      name = "psllwi";
1938      ID = Intrinsic::x86_sse2_psll_w;
1939      break;
1940    case X86::BI__builtin_ia32_psradi128:
1941      name = "psradi";
1942      ID = Intrinsic::x86_sse2_psra_d;
1943      break;
1944    case X86::BI__builtin_ia32_psrawi128:
1945      name = "psrawi";
1946      ID = Intrinsic::x86_sse2_psra_w;
1947      break;
1948    case X86::BI__builtin_ia32_psrldi128:
1949      name = "psrldi";
1950      ID = Intrinsic::x86_sse2_psrl_d;
1951      break;
1952    case X86::BI__builtin_ia32_psrlqi128:
1953      name = "psrlqi";
1954      ID = Intrinsic::x86_sse2_psrl_q;
1955      break;
1956    case X86::BI__builtin_ia32_psrlwi128:
1957      name = "psrlwi";
1958      ID = Intrinsic::x86_sse2_psrl_w;
1959      break;
1960    }
1961    llvm::Function *F = CGM.getIntrinsic(ID);
1962    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
1963  }
1964  case X86::BI__builtin_ia32_pslldi:
1965  case X86::BI__builtin_ia32_psllqi:
1966  case X86::BI__builtin_ia32_psllwi:
1967  case X86::BI__builtin_ia32_psradi:
1968  case X86::BI__builtin_ia32_psrawi:
1969  case X86::BI__builtin_ia32_psrldi:
1970  case X86::BI__builtin_ia32_psrlqi:
1971  case X86::BI__builtin_ia32_psrlwi: {
1972    Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
1973    const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1);
1974    Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast");
1975    const char *name = 0;
1976    Intrinsic::ID ID = Intrinsic::not_intrinsic;
1977
1978    switch (BuiltinID) {
1979    default: assert(0 && "Unsupported shift intrinsic!");
1980    case X86::BI__builtin_ia32_pslldi:
1981      name = "pslldi";
1982      ID = Intrinsic::x86_mmx_psll_d;
1983      break;
1984    case X86::BI__builtin_ia32_psllqi:
1985      name = "psllqi";
1986      ID = Intrinsic::x86_mmx_psll_q;
1987      break;
1988    case X86::BI__builtin_ia32_psllwi:
1989      name = "psllwi";
1990      ID = Intrinsic::x86_mmx_psll_w;
1991      break;
1992    case X86::BI__builtin_ia32_psradi:
1993      name = "psradi";
1994      ID = Intrinsic::x86_mmx_psra_d;
1995      break;
1996    case X86::BI__builtin_ia32_psrawi:
1997      name = "psrawi";
1998      ID = Intrinsic::x86_mmx_psra_w;
1999      break;
2000    case X86::BI__builtin_ia32_psrldi:
2001      name = "psrldi";
2002      ID = Intrinsic::x86_mmx_psrl_d;
2003      break;
2004    case X86::BI__builtin_ia32_psrlqi:
2005      name = "psrlqi";
2006      ID = Intrinsic::x86_mmx_psrl_q;
2007      break;
2008    case X86::BI__builtin_ia32_psrlwi:
2009      name = "psrlwi";
2010      ID = Intrinsic::x86_mmx_psrl_w;
2011      break;
2012    }
2013    llvm::Function *F = CGM.getIntrinsic(ID);
2014    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name);
2015  }
2016  case X86::BI__builtin_ia32_vec_init_v8qi:
2017  case X86::BI__builtin_ia32_vec_init_v4hi:
2018  case X86::BI__builtin_ia32_vec_init_v2si:
2019    return Builder.CreateBitCast(BuildVector(Ops),
2020                                 llvm::Type::getX86_MMXTy(VMContext));
2021  case X86::BI__builtin_ia32_cmpps: {
2022    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
2023    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpps");
2024  }
2025  case X86::BI__builtin_ia32_cmpss: {
2026    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
2027    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpss");
2028  }
2029  case X86::BI__builtin_ia32_ldmxcsr: {
2030    const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext);
2031    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2032    Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
2033    Builder.CreateStore(Ops[0], Tmp);
2034    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
2035                              Builder.CreateBitCast(Tmp, PtrTy));
2036  }
2037  case X86::BI__builtin_ia32_stmxcsr: {
2038    const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext);
2039    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
2040    Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp");
2041    One = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
2042                             Builder.CreateBitCast(Tmp, PtrTy));
2043    return Builder.CreateLoad(Tmp, "stmxcsr");
2044  }
2045  case X86::BI__builtin_ia32_cmppd: {
2046    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
2047    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmppd");
2048  }
2049  case X86::BI__builtin_ia32_cmpsd: {
2050    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
2051    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpsd");
2052  }
2053  case X86::BI__builtin_ia32_storehps:
2054  case X86::BI__builtin_ia32_storelps: {
2055    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
2056    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2057
2058    // cast val v2i64
2059    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
2060
2061    // extract (0, 1)
2062    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
2063    llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
2064    Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
2065
2066    // cast pointer to i64 & store
2067    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
2068    return Builder.CreateStore(Ops[1], Ops[0]);
2069  }
2070  case X86::BI__builtin_ia32_palignr128: {
2071    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
2072
2073    // If palignr is shifting the pair of input vectors less than 17 bytes,
2074    // emit a shuffle instruction.
2075    if (shiftVal <= 16) {
2076      llvm::SmallVector<llvm::Constant*, 16> Indices;
2077      for (unsigned i = 0; i != 16; ++i)
2078        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
2079
2080      Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
2081      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
2082    }
2083
2084    // If palignr is shifting the pair of input vectors more than 16 but less
2085    // than 32 bytes, emit a logical right shift of the destination.
2086    if (shiftVal < 32) {
2087      const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
2088
2089      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
2090      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
2091
2092      // create i32 constant
2093      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
2094      return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr");
2095    }
2096
2097    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
2098    return llvm::Constant::getNullValue(ConvertType(E->getType()));
2099  }
2100  }
2101}
2102
2103Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
2104                                           const CallExpr *E) {
2105  llvm::SmallVector<Value*, 4> Ops;
2106
2107  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
2108    Ops.push_back(EmitScalarExpr(E->getArg(i)));
2109
2110  Intrinsic::ID ID = Intrinsic::not_intrinsic;
2111
2112  switch (BuiltinID) {
2113  default: return 0;
2114
2115  // vec_ld, vec_lvsl, vec_lvsr
2116  case PPC::BI__builtin_altivec_lvx:
2117  case PPC::BI__builtin_altivec_lvxl:
2118  case PPC::BI__builtin_altivec_lvebx:
2119  case PPC::BI__builtin_altivec_lvehx:
2120  case PPC::BI__builtin_altivec_lvewx:
2121  case PPC::BI__builtin_altivec_lvsl:
2122  case PPC::BI__builtin_altivec_lvsr:
2123  {
2124    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::Type::getInt8PtrTy(VMContext));
2125
2126    Ops[0] = Builder.CreateGEP(Ops[1], Ops[0], "tmp");
2127    Ops.pop_back();
2128
2129    switch (BuiltinID) {
2130    default: assert(0 && "Unsupported ld/lvsl/lvsr intrinsic!");
2131    case PPC::BI__builtin_altivec_lvx:
2132      ID = Intrinsic::ppc_altivec_lvx;
2133      break;
2134    case PPC::BI__builtin_altivec_lvxl:
2135      ID = Intrinsic::ppc_altivec_lvxl;
2136      break;
2137    case PPC::BI__builtin_altivec_lvebx:
2138      ID = Intrinsic::ppc_altivec_lvebx;
2139      break;
2140    case PPC::BI__builtin_altivec_lvehx:
2141      ID = Intrinsic::ppc_altivec_lvehx;
2142      break;
2143    case PPC::BI__builtin_altivec_lvewx:
2144      ID = Intrinsic::ppc_altivec_lvewx;
2145      break;
2146    case PPC::BI__builtin_altivec_lvsl:
2147      ID = Intrinsic::ppc_altivec_lvsl;
2148      break;
2149    case PPC::BI__builtin_altivec_lvsr:
2150      ID = Intrinsic::ppc_altivec_lvsr;
2151      break;
2152    }
2153    llvm::Function *F = CGM.getIntrinsic(ID);
2154    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "");
2155  }
2156
2157  // vec_st
2158  case PPC::BI__builtin_altivec_stvx:
2159  case PPC::BI__builtin_altivec_stvxl:
2160  case PPC::BI__builtin_altivec_stvebx:
2161  case PPC::BI__builtin_altivec_stvehx:
2162  case PPC::BI__builtin_altivec_stvewx:
2163  {
2164    Ops[2] = Builder.CreateBitCast(Ops[2], llvm::Type::getInt8PtrTy(VMContext));
2165    Ops[1] = Builder.CreateGEP(Ops[2], Ops[1], "tmp");
2166    Ops.pop_back();
2167
2168    switch (BuiltinID) {
2169    default: assert(0 && "Unsupported st intrinsic!");
2170    case PPC::BI__builtin_altivec_stvx:
2171      ID = Intrinsic::ppc_altivec_stvx;
2172      break;
2173    case PPC::BI__builtin_altivec_stvxl:
2174      ID = Intrinsic::ppc_altivec_stvxl;
2175      break;
2176    case PPC::BI__builtin_altivec_stvebx:
2177      ID = Intrinsic::ppc_altivec_stvebx;
2178      break;
2179    case PPC::BI__builtin_altivec_stvehx:
2180      ID = Intrinsic::ppc_altivec_stvehx;
2181      break;
2182    case PPC::BI__builtin_altivec_stvewx:
2183      ID = Intrinsic::ppc_altivec_stvewx;
2184      break;
2185    }
2186    llvm::Function *F = CGM.getIntrinsic(ID);
2187    return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "");
2188  }
2189  }
2190  return 0;
2191}
2192