CGBuiltin.cpp revision db4325b098eff5e9e660db19f0148423fb21f27f
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This contains code to emit Builtin calls as LLVM code. 11// 12//===----------------------------------------------------------------------===// 13 14#include "TargetInfo.h" 15#include "CodeGenFunction.h" 16#include "CodeGenModule.h" 17#include "CGObjCRuntime.h" 18#include "clang/Basic/TargetInfo.h" 19#include "clang/AST/APValue.h" 20#include "clang/AST/ASTContext.h" 21#include "clang/AST/Decl.h" 22#include "clang/Basic/TargetBuiltins.h" 23#include "llvm/Intrinsics.h" 24#include "llvm/Target/TargetData.h" 25using namespace clang; 26using namespace CodeGen; 27using namespace llvm; 28 29static void EmitMemoryBarrier(CodeGenFunction &CGF, 30 bool LoadLoad, bool LoadStore, 31 bool StoreLoad, bool StoreStore, 32 bool Device) { 33 Value *True = llvm::ConstantInt::getTrue(CGF.getLLVMContext()); 34 Value *False = llvm::ConstantInt::getFalse(CGF.getLLVMContext()); 35 Value *C[5] = { LoadLoad ? True : False, 36 LoadStore ? True : False, 37 StoreLoad ? True : False, 38 StoreStore ? True : False, 39 Device ? True : False }; 40 CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::memory_barrier), 41 C, C + 5); 42} 43 44static Value *EmitCastToInt(CodeGenFunction &CGF, 45 const llvm::Type *ToType, Value *Val) { 46 if (Val->getType()->isPointerTy()) { 47 return CGF.Builder.CreatePtrToInt(Val, ToType); 48 } 49 assert(Val->getType()->isIntegerTy() && 50 "Used a non-integer and non-pointer type with atomic builtin"); 51 assert(Val->getType()->getScalarSizeInBits() <= 52 ToType->getScalarSizeInBits() && "Integer type too small"); 53 return CGF.Builder.CreateSExtOrBitCast(Val, ToType); 54} 55 56static Value *EmitCastFromInt(CodeGenFunction &CGF, QualType ToQualType, 57 Value *Val) { 58 const llvm::Type *ToType = CGF.ConvertType(ToQualType); 59 if (ToType->isPointerTy()) { 60 return CGF.Builder.CreateIntToPtr(Val, ToType); 61 } 62 assert(Val->getType()->isIntegerTy() && 63 "Used a non-integer and non-pointer type with atomic builtin"); 64 assert(Val->getType()->getScalarSizeInBits() >= 65 ToType->getScalarSizeInBits() && "Integer type too small"); 66 return CGF.Builder.CreateTruncOrBitCast(Val, ToType); 67} 68 69// The atomic builtins are also full memory barriers. This is a utility for 70// wrapping a call to the builtins with memory barriers. 71static Value *EmitCallWithBarrier(CodeGenFunction &CGF, Value *Fn, 72 Value **ArgBegin, Value **ArgEnd) { 73 // FIXME: We need a target hook for whether this applies to device memory or 74 // not. 75 bool Device = true; 76 77 // Create barriers both before and after the call. 78 EmitMemoryBarrier(CGF, true, true, true, true, Device); 79 Value *Result = CGF.Builder.CreateCall(Fn, ArgBegin, ArgEnd); 80 EmitMemoryBarrier(CGF, true, true, true, true, Device); 81 return Result; 82} 83 84/// Utility to insert an atomic instruction based on Instrinsic::ID 85/// and the expression node. 86static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 87 Intrinsic::ID Id, const CallExpr *E) { 88 const llvm::Type *ValueType = 89 llvm::IntegerType::get(CGF.getLLVMContext(), 90 CGF.getContext().getTypeSize(E->getType())); 91 const llvm::Type *PtrType = ValueType->getPointerTo(); 92 const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType }; 93 Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2); 94 95 Value *Args[2] = { CGF.Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)), 96 PtrType), 97 EmitCastToInt(CGF, ValueType, 98 CGF.EmitScalarExpr(E->getArg(1))) }; 99 return RValue::get(EmitCastFromInt(CGF, E->getType(), 100 EmitCallWithBarrier(CGF, AtomF, Args, 101 Args + 2))); 102} 103 104/// Utility to insert an atomic instruction based Instrinsic::ID and 105// the expression node, where the return value is the result of the 106// operation. 107static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 108 Intrinsic::ID Id, const CallExpr *E, 109 Instruction::BinaryOps Op) { 110 const llvm::Type *ValueType = 111 llvm::IntegerType::get(CGF.getLLVMContext(), 112 CGF.getContext().getTypeSize(E->getType())); 113 const llvm::Type *PtrType = ValueType->getPointerTo(); 114 const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType }; 115 Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2); 116 117 Value *Args[2] = { CGF.Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)), 118 PtrType), 119 EmitCastToInt(CGF, ValueType, 120 CGF.EmitScalarExpr(E->getArg(1))) }; 121 Value *Result = EmitCallWithBarrier(CGF, AtomF, Args, Args + 2); 122 return RValue::get(EmitCastFromInt(CGF, E->getType(), 123 CGF.Builder.CreateBinOp(Op, Result, 124 Args[1]))); 125} 126 127/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy, 128/// which must be a scalar floating point type. 129static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) { 130 const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>(); 131 assert(ValTyP && "isn't scalar fp type!"); 132 133 StringRef FnName; 134 switch (ValTyP->getKind()) { 135 default: assert(0 && "Isn't a scalar fp type!"); 136 case BuiltinType::Float: FnName = "fabsf"; break; 137 case BuiltinType::Double: FnName = "fabs"; break; 138 case BuiltinType::LongDouble: FnName = "fabsl"; break; 139 } 140 141 // The prototype is something that takes and returns whatever V's type is. 142 std::vector<const llvm::Type*> Args; 143 Args.push_back(V->getType()); 144 llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), Args, false); 145 llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName); 146 147 return CGF.Builder.CreateCall(Fn, V, "abs"); 148} 149 150RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 151 unsigned BuiltinID, const CallExpr *E) { 152 // See if we can constant fold this builtin. If so, don't emit it at all. 153 Expr::EvalResult Result; 154 if (E->Evaluate(Result, CGM.getContext())) { 155 if (Result.Val.isInt()) 156 return RValue::get(llvm::ConstantInt::get(VMContext, 157 Result.Val.getInt())); 158 else if (Result.Val.isFloat()) 159 return RValue::get(ConstantFP::get(VMContext, Result.Val.getFloat())); 160 } 161 162 switch (BuiltinID) { 163 default: break; // Handle intrinsics and libm functions below. 164 case Builtin::BI__builtin___CFStringMakeConstantString: 165 case Builtin::BI__builtin___NSStringMakeConstantString: 166 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0)); 167 case Builtin::BI__builtin_stdarg_start: 168 case Builtin::BI__builtin_va_start: 169 case Builtin::BI__builtin_va_end: { 170 Value *ArgValue = EmitVAListRef(E->getArg(0)); 171 const llvm::Type *DestType = llvm::Type::getInt8PtrTy(VMContext); 172 if (ArgValue->getType() != DestType) 173 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 174 ArgValue->getName().data()); 175 176 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 177 Intrinsic::vaend : Intrinsic::vastart; 178 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 179 } 180 case Builtin::BI__builtin_va_copy: { 181 Value *DstPtr = EmitVAListRef(E->getArg(0)); 182 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 183 184 const llvm::Type *Type = llvm::Type::getInt8PtrTy(VMContext); 185 186 DstPtr = Builder.CreateBitCast(DstPtr, Type); 187 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 188 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 189 DstPtr, SrcPtr)); 190 } 191 case Builtin::BI__builtin_abs: { 192 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 193 194 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 195 Value *CmpResult = 196 Builder.CreateICmpSGE(ArgValue, 197 llvm::Constant::getNullValue(ArgValue->getType()), 198 "abscond"); 199 Value *Result = 200 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 201 202 return RValue::get(Result); 203 } 204 case Builtin::BI__builtin_ctz: 205 case Builtin::BI__builtin_ctzl: 206 case Builtin::BI__builtin_ctzll: { 207 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 208 209 const llvm::Type *ArgType = ArgValue->getType(); 210 Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1); 211 212 const llvm::Type *ResultType = ConvertType(E->getType()); 213 Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); 214 if (Result->getType() != ResultType) 215 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 216 "cast"); 217 return RValue::get(Result); 218 } 219 case Builtin::BI__builtin_clz: 220 case Builtin::BI__builtin_clzl: 221 case Builtin::BI__builtin_clzll: { 222 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 223 224 const llvm::Type *ArgType = ArgValue->getType(); 225 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, &ArgType, 1); 226 227 const llvm::Type *ResultType = ConvertType(E->getType()); 228 Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); 229 if (Result->getType() != ResultType) 230 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 231 "cast"); 232 return RValue::get(Result); 233 } 234 case Builtin::BI__builtin_ffs: 235 case Builtin::BI__builtin_ffsl: 236 case Builtin::BI__builtin_ffsll: { 237 // ffs(x) -> x ? cttz(x) + 1 : 0 238 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 239 240 const llvm::Type *ArgType = ArgValue->getType(); 241 Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1); 242 243 const llvm::Type *ResultType = ConvertType(E->getType()); 244 Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue, "tmp"), 245 llvm::ConstantInt::get(ArgType, 1), "tmp"); 246 Value *Zero = llvm::Constant::getNullValue(ArgType); 247 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 248 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 249 if (Result->getType() != ResultType) 250 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 251 "cast"); 252 return RValue::get(Result); 253 } 254 case Builtin::BI__builtin_parity: 255 case Builtin::BI__builtin_parityl: 256 case Builtin::BI__builtin_parityll: { 257 // parity(x) -> ctpop(x) & 1 258 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 259 260 const llvm::Type *ArgType = ArgValue->getType(); 261 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1); 262 263 const llvm::Type *ResultType = ConvertType(E->getType()); 264 Value *Tmp = Builder.CreateCall(F, ArgValue, "tmp"); 265 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1), 266 "tmp"); 267 if (Result->getType() != ResultType) 268 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 269 "cast"); 270 return RValue::get(Result); 271 } 272 case Builtin::BI__builtin_popcount: 273 case Builtin::BI__builtin_popcountl: 274 case Builtin::BI__builtin_popcountll: { 275 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 276 277 const llvm::Type *ArgType = ArgValue->getType(); 278 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1); 279 280 const llvm::Type *ResultType = ConvertType(E->getType()); 281 Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); 282 if (Result->getType() != ResultType) 283 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 284 "cast"); 285 return RValue::get(Result); 286 } 287 case Builtin::BI__builtin_expect: 288 // FIXME: pass expect through to LLVM 289 return RValue::get(EmitScalarExpr(E->getArg(0))); 290 case Builtin::BI__builtin_bswap32: 291 case Builtin::BI__builtin_bswap64: { 292 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 293 const llvm::Type *ArgType = ArgValue->getType(); 294 Value *F = CGM.getIntrinsic(Intrinsic::bswap, &ArgType, 1); 295 return RValue::get(Builder.CreateCall(F, ArgValue, "tmp")); 296 } 297 case Builtin::BI__builtin_object_size: { 298 // We pass this builtin onto the optimizer so that it can 299 // figure out the object size in more complex cases. 300 const llvm::Type *ResType[] = { 301 ConvertType(E->getType()) 302 }; 303 304 // LLVM only supports 0 and 2, make sure that we pass along that 305 // as a boolean. 306 Value *Ty = EmitScalarExpr(E->getArg(1)); 307 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 308 assert(CI); 309 uint64_t val = CI->getZExtValue(); 310 CI = ConstantInt::get(llvm::Type::getInt1Ty(VMContext), (val & 0x2) >> 1); 311 312 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType, 1); 313 return RValue::get(Builder.CreateCall2(F, 314 EmitScalarExpr(E->getArg(0)), 315 CI)); 316 } 317 case Builtin::BI__builtin_prefetch: { 318 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 319 // FIXME: Technically these constants should of type 'int', yes? 320 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 321 llvm::ConstantInt::get(Int32Ty, 0); 322 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 323 llvm::ConstantInt::get(Int32Ty, 3); 324 Value *F = CGM.getIntrinsic(Intrinsic::prefetch, 0, 0); 325 return RValue::get(Builder.CreateCall3(F, Address, RW, Locality)); 326 } 327 case Builtin::BI__builtin_trap: { 328 Value *F = CGM.getIntrinsic(Intrinsic::trap, 0, 0); 329 return RValue::get(Builder.CreateCall(F)); 330 } 331 case Builtin::BI__builtin_unreachable: { 332 if (CatchUndefined && HaveInsertPoint()) 333 EmitBranch(getTrapBB()); 334 Value *V = Builder.CreateUnreachable(); 335 Builder.ClearInsertionPoint(); 336 return RValue::get(V); 337 } 338 339 case Builtin::BI__builtin_powi: 340 case Builtin::BI__builtin_powif: 341 case Builtin::BI__builtin_powil: { 342 Value *Base = EmitScalarExpr(E->getArg(0)); 343 Value *Exponent = EmitScalarExpr(E->getArg(1)); 344 const llvm::Type *ArgType = Base->getType(); 345 Value *F = CGM.getIntrinsic(Intrinsic::powi, &ArgType, 1); 346 return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp")); 347 } 348 349 case Builtin::BI__builtin_isgreater: 350 case Builtin::BI__builtin_isgreaterequal: 351 case Builtin::BI__builtin_isless: 352 case Builtin::BI__builtin_islessequal: 353 case Builtin::BI__builtin_islessgreater: 354 case Builtin::BI__builtin_isunordered: { 355 // Ordered comparisons: we know the arguments to these are matching scalar 356 // floating point values. 357 Value *LHS = EmitScalarExpr(E->getArg(0)); 358 Value *RHS = EmitScalarExpr(E->getArg(1)); 359 360 switch (BuiltinID) { 361 default: assert(0 && "Unknown ordered comparison"); 362 case Builtin::BI__builtin_isgreater: 363 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 364 break; 365 case Builtin::BI__builtin_isgreaterequal: 366 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 367 break; 368 case Builtin::BI__builtin_isless: 369 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 370 break; 371 case Builtin::BI__builtin_islessequal: 372 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 373 break; 374 case Builtin::BI__builtin_islessgreater: 375 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 376 break; 377 case Builtin::BI__builtin_isunordered: 378 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 379 break; 380 } 381 // ZExt bool to int type. 382 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()), 383 "tmp")); 384 } 385 case Builtin::BI__builtin_isnan: { 386 Value *V = EmitScalarExpr(E->getArg(0)); 387 V = Builder.CreateFCmpUNO(V, V, "cmp"); 388 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp")); 389 } 390 391 case Builtin::BI__builtin_isinf: { 392 // isinf(x) --> fabs(x) == infinity 393 Value *V = EmitScalarExpr(E->getArg(0)); 394 V = EmitFAbs(*this, V, E->getArg(0)->getType()); 395 396 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 397 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp")); 398 } 399 400 // TODO: BI__builtin_isinf_sign 401 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 402 403 case Builtin::BI__builtin_isnormal: { 404 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 405 Value *V = EmitScalarExpr(E->getArg(0)); 406 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 407 408 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 409 Value *IsLessThanInf = 410 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 411 APFloat Smallest = APFloat::getSmallestNormalized( 412 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 413 Value *IsNormal = 414 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 415 "isnormal"); 416 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 417 V = Builder.CreateAnd(V, IsNormal, "and"); 418 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 419 } 420 421 case Builtin::BI__builtin_isfinite: { 422 // isfinite(x) --> x == x && fabs(x) != infinity; } 423 Value *V = EmitScalarExpr(E->getArg(0)); 424 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 425 426 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 427 Value *IsNotInf = 428 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 429 430 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 431 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 432 } 433 434 case Builtin::BI__builtin_fpclassify: { 435 Value *V = EmitScalarExpr(E->getArg(5)); 436 const llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 437 438 // Create Result 439 BasicBlock *Begin = Builder.GetInsertBlock(); 440 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 441 Builder.SetInsertPoint(End); 442 PHINode *Result = 443 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 444 "fpclassify_result"); 445 446 // if (V==0) return FP_ZERO 447 Builder.SetInsertPoint(Begin); 448 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 449 "iszero"); 450 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 451 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 452 Builder.CreateCondBr(IsZero, End, NotZero); 453 Result->addIncoming(ZeroLiteral, Begin); 454 455 // if (V != V) return FP_NAN 456 Builder.SetInsertPoint(NotZero); 457 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 458 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 459 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 460 Builder.CreateCondBr(IsNan, End, NotNan); 461 Result->addIncoming(NanLiteral, NotZero); 462 463 // if (fabs(V) == infinity) return FP_INFINITY 464 Builder.SetInsertPoint(NotNan); 465 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType()); 466 Value *IsInf = 467 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 468 "isinf"); 469 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 470 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 471 Builder.CreateCondBr(IsInf, End, NotInf); 472 Result->addIncoming(InfLiteral, NotNan); 473 474 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 475 Builder.SetInsertPoint(NotInf); 476 APFloat Smallest = APFloat::getSmallestNormalized( 477 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 478 Value *IsNormal = 479 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 480 "isnormal"); 481 Value *NormalResult = 482 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 483 EmitScalarExpr(E->getArg(3))); 484 Builder.CreateBr(End); 485 Result->addIncoming(NormalResult, NotInf); 486 487 // return Result 488 Builder.SetInsertPoint(End); 489 return RValue::get(Result); 490 } 491 492 case Builtin::BIalloca: 493 case Builtin::BI__builtin_alloca: { 494 Value *Size = EmitScalarExpr(E->getArg(0)); 495 return RValue::get(Builder.CreateAlloca(llvm::Type::getInt8Ty(VMContext), Size, "tmp")); 496 } 497 case Builtin::BIbzero: 498 case Builtin::BI__builtin_bzero: { 499 Value *Address = EmitScalarExpr(E->getArg(0)); 500 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 501 Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()), 502 Address, 503 llvm::ConstantInt::get(llvm::Type::getInt8Ty(VMContext), 0), 504 SizeVal, 505 llvm::ConstantInt::get(Int32Ty, 1), 506 llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0)); 507 return RValue::get(Address); 508 } 509 case Builtin::BImemcpy: 510 case Builtin::BI__builtin_memcpy: { 511 Value *Address = EmitScalarExpr(E->getArg(0)); 512 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 513 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 514 Builder.CreateCall5(CGM.getMemCpyFn(Address->getType(), SrcAddr->getType(), 515 SizeVal->getType()), 516 Address, SrcAddr, SizeVal, 517 llvm::ConstantInt::get(Int32Ty, 1), 518 llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0)); 519 return RValue::get(Address); 520 } 521 522 case Builtin::BI__builtin_objc_memmove_collectable: { 523 Value *Address = EmitScalarExpr(E->getArg(0)); 524 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 525 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 526 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 527 Address, SrcAddr, SizeVal); 528 return RValue::get(Address); 529 } 530 531 case Builtin::BImemmove: 532 case Builtin::BI__builtin_memmove: { 533 Value *Address = EmitScalarExpr(E->getArg(0)); 534 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 535 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 536 Builder.CreateCall5(CGM.getMemMoveFn(Address->getType(), SrcAddr->getType(), 537 SizeVal->getType()), 538 Address, SrcAddr, SizeVal, 539 llvm::ConstantInt::get(Int32Ty, 1), 540 llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0)); 541 return RValue::get(Address); 542 } 543 case Builtin::BImemset: 544 case Builtin::BI__builtin_memset: { 545 Value *Address = EmitScalarExpr(E->getArg(0)); 546 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 547 Builder.CreateCall5(CGM.getMemSetFn(Address->getType(), SizeVal->getType()), 548 Address, 549 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 550 llvm::Type::getInt8Ty(VMContext)), 551 SizeVal, 552 llvm::ConstantInt::get(Int32Ty, 1), 553 llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), 0)); 554 return RValue::get(Address); 555 } 556 case Builtin::BI__builtin_dwarf_cfa: { 557 // The offset in bytes from the first argument to the CFA. 558 // 559 // Why on earth is this in the frontend? Is there any reason at 560 // all that the backend can't reasonably determine this while 561 // lowering llvm.eh.dwarf.cfa()? 562 // 563 // TODO: If there's a satisfactory reason, add a target hook for 564 // this instead of hard-coding 0, which is correct for most targets. 565 int32_t Offset = 0; 566 567 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa, 0, 0); 568 return RValue::get(Builder.CreateCall(F, 569 llvm::ConstantInt::get(Int32Ty, Offset))); 570 } 571 case Builtin::BI__builtin_return_address: { 572 Value *Depth = EmitScalarExpr(E->getArg(0)); 573 Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp"); 574 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress, 0, 0); 575 return RValue::get(Builder.CreateCall(F, Depth)); 576 } 577 case Builtin::BI__builtin_frame_address: { 578 Value *Depth = EmitScalarExpr(E->getArg(0)); 579 Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp"); 580 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress, 0, 0); 581 return RValue::get(Builder.CreateCall(F, Depth)); 582 } 583 case Builtin::BI__builtin_extract_return_addr: { 584 Value *Address = EmitScalarExpr(E->getArg(0)); 585 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 586 return RValue::get(Result); 587 } 588 case Builtin::BI__builtin_frob_return_addr: { 589 Value *Address = EmitScalarExpr(E->getArg(0)); 590 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 591 return RValue::get(Result); 592 } 593 case Builtin::BI__builtin_dwarf_sp_column: { 594 const llvm::IntegerType *Ty 595 = cast<llvm::IntegerType>(ConvertType(E->getType())); 596 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 597 if (Column == -1) { 598 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 599 return RValue::get(llvm::UndefValue::get(Ty)); 600 } 601 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 602 } 603 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 604 Value *Address = EmitScalarExpr(E->getArg(0)); 605 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 606 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 607 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 608 } 609 case Builtin::BI__builtin_eh_return: { 610 Value *Int = EmitScalarExpr(E->getArg(0)); 611 Value *Ptr = EmitScalarExpr(E->getArg(1)); 612 613 const llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 614 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 615 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 616 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 617 ? Intrinsic::eh_return_i32 618 : Intrinsic::eh_return_i64, 619 0, 0); 620 Builder.CreateCall2(F, Int, Ptr); 621 Value *V = Builder.CreateUnreachable(); 622 Builder.ClearInsertionPoint(); 623 return RValue::get(V); 624 } 625 case Builtin::BI__builtin_unwind_init: { 626 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init, 0, 0); 627 return RValue::get(Builder.CreateCall(F)); 628 } 629 case Builtin::BI__builtin_extend_pointer: { 630 // Extends a pointer to the size of an _Unwind_Word, which is 631 // uint64_t on all platforms. Generally this gets poked into a 632 // register and eventually used as an address, so if the 633 // addressing registers are wider than pointers and the platform 634 // doesn't implicitly ignore high-order bits when doing 635 // addressing, we need to make sure we zext / sext based on 636 // the platform's expectations. 637 // 638 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 639 640 LLVMContext &C = CGM.getLLVMContext(); 641 642 // Cast the pointer to intptr_t. 643 Value *Ptr = EmitScalarExpr(E->getArg(0)); 644 const llvm::IntegerType *IntPtrTy = CGM.getTargetData().getIntPtrType(C); 645 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 646 647 // If that's 64 bits, we're done. 648 if (IntPtrTy->getBitWidth() == 64) 649 return RValue::get(Result); 650 651 // Otherwise, ask the codegen data what to do. 652 if (getTargetHooks().extendPointerWithSExt()) 653 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 654 else 655 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 656 } 657 case Builtin::BI__builtin_setjmp: { 658 // Buffer is a void**. 659 Value *Buf = EmitScalarExpr(E->getArg(0)); 660 661 // Store the frame pointer to the setjmp buffer. 662 Value *FrameAddr = 663 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 664 ConstantInt::get(Int32Ty, 0)); 665 Builder.CreateStore(FrameAddr, Buf); 666 667 // Store the stack pointer to the setjmp buffer. 668 Value *StackAddr = 669 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 670 Value *StackSaveSlot = 671 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 672 Builder.CreateStore(StackAddr, StackSaveSlot); 673 674 // Call LLVM's EH setjmp, which is lightweight. 675 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 676 Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext)); 677 return RValue::get(Builder.CreateCall(F, Buf)); 678 } 679 case Builtin::BI__builtin_longjmp: { 680 Value *Buf = EmitScalarExpr(E->getArg(0)); 681 Buf = Builder.CreateBitCast(Buf, llvm::Type::getInt8PtrTy(VMContext)); 682 683 // Call LLVM's EH longjmp, which is lightweight. 684 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 685 686 // longjmp doesn't return; mark this as unreachable 687 Value *V = Builder.CreateUnreachable(); 688 Builder.ClearInsertionPoint(); 689 return RValue::get(V); 690 } 691 case Builtin::BI__sync_fetch_and_add: 692 case Builtin::BI__sync_fetch_and_sub: 693 case Builtin::BI__sync_fetch_and_or: 694 case Builtin::BI__sync_fetch_and_and: 695 case Builtin::BI__sync_fetch_and_xor: 696 case Builtin::BI__sync_add_and_fetch: 697 case Builtin::BI__sync_sub_and_fetch: 698 case Builtin::BI__sync_and_and_fetch: 699 case Builtin::BI__sync_or_and_fetch: 700 case Builtin::BI__sync_xor_and_fetch: 701 case Builtin::BI__sync_val_compare_and_swap: 702 case Builtin::BI__sync_bool_compare_and_swap: 703 case Builtin::BI__sync_lock_test_and_set: 704 case Builtin::BI__sync_lock_release: 705 assert(0 && "Shouldn't make it through sema"); 706 case Builtin::BI__sync_fetch_and_add_1: 707 case Builtin::BI__sync_fetch_and_add_2: 708 case Builtin::BI__sync_fetch_and_add_4: 709 case Builtin::BI__sync_fetch_and_add_8: 710 case Builtin::BI__sync_fetch_and_add_16: 711 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_add, E); 712 case Builtin::BI__sync_fetch_and_sub_1: 713 case Builtin::BI__sync_fetch_and_sub_2: 714 case Builtin::BI__sync_fetch_and_sub_4: 715 case Builtin::BI__sync_fetch_and_sub_8: 716 case Builtin::BI__sync_fetch_and_sub_16: 717 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_sub, E); 718 case Builtin::BI__sync_fetch_and_or_1: 719 case Builtin::BI__sync_fetch_and_or_2: 720 case Builtin::BI__sync_fetch_and_or_4: 721 case Builtin::BI__sync_fetch_and_or_8: 722 case Builtin::BI__sync_fetch_and_or_16: 723 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_or, E); 724 case Builtin::BI__sync_fetch_and_and_1: 725 case Builtin::BI__sync_fetch_and_and_2: 726 case Builtin::BI__sync_fetch_and_and_4: 727 case Builtin::BI__sync_fetch_and_and_8: 728 case Builtin::BI__sync_fetch_and_and_16: 729 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_and, E); 730 case Builtin::BI__sync_fetch_and_xor_1: 731 case Builtin::BI__sync_fetch_and_xor_2: 732 case Builtin::BI__sync_fetch_and_xor_4: 733 case Builtin::BI__sync_fetch_and_xor_8: 734 case Builtin::BI__sync_fetch_and_xor_16: 735 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_xor, E); 736 737 // Clang extensions: not overloaded yet. 738 case Builtin::BI__sync_fetch_and_min: 739 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_min, E); 740 case Builtin::BI__sync_fetch_and_max: 741 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_max, E); 742 case Builtin::BI__sync_fetch_and_umin: 743 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umin, E); 744 case Builtin::BI__sync_fetch_and_umax: 745 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umax, E); 746 747 case Builtin::BI__sync_add_and_fetch_1: 748 case Builtin::BI__sync_add_and_fetch_2: 749 case Builtin::BI__sync_add_and_fetch_4: 750 case Builtin::BI__sync_add_and_fetch_8: 751 case Builtin::BI__sync_add_and_fetch_16: 752 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_add, E, 753 llvm::Instruction::Add); 754 case Builtin::BI__sync_sub_and_fetch_1: 755 case Builtin::BI__sync_sub_and_fetch_2: 756 case Builtin::BI__sync_sub_and_fetch_4: 757 case Builtin::BI__sync_sub_and_fetch_8: 758 case Builtin::BI__sync_sub_and_fetch_16: 759 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_sub, E, 760 llvm::Instruction::Sub); 761 case Builtin::BI__sync_and_and_fetch_1: 762 case Builtin::BI__sync_and_and_fetch_2: 763 case Builtin::BI__sync_and_and_fetch_4: 764 case Builtin::BI__sync_and_and_fetch_8: 765 case Builtin::BI__sync_and_and_fetch_16: 766 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_and, E, 767 llvm::Instruction::And); 768 case Builtin::BI__sync_or_and_fetch_1: 769 case Builtin::BI__sync_or_and_fetch_2: 770 case Builtin::BI__sync_or_and_fetch_4: 771 case Builtin::BI__sync_or_and_fetch_8: 772 case Builtin::BI__sync_or_and_fetch_16: 773 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_or, E, 774 llvm::Instruction::Or); 775 case Builtin::BI__sync_xor_and_fetch_1: 776 case Builtin::BI__sync_xor_and_fetch_2: 777 case Builtin::BI__sync_xor_and_fetch_4: 778 case Builtin::BI__sync_xor_and_fetch_8: 779 case Builtin::BI__sync_xor_and_fetch_16: 780 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_xor, E, 781 llvm::Instruction::Xor); 782 783 case Builtin::BI__sync_val_compare_and_swap_1: 784 case Builtin::BI__sync_val_compare_and_swap_2: 785 case Builtin::BI__sync_val_compare_and_swap_4: 786 case Builtin::BI__sync_val_compare_and_swap_8: 787 case Builtin::BI__sync_val_compare_and_swap_16: { 788 const llvm::Type *ValueType = 789 llvm::IntegerType::get(CGF.getLLVMContext(), 790 CGF.getContext().getTypeSize(E->getType())); 791 const llvm::Type *PtrType = ValueType->getPointerTo(); 792 const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType }; 793 Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap, 794 IntrinsicTypes, 2); 795 796 Value *Args[3] = { Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)), 797 PtrType), 798 EmitCastToInt(CGF, ValueType, 799 CGF.EmitScalarExpr(E->getArg(1))), 800 EmitCastToInt(CGF, ValueType, 801 CGF.EmitScalarExpr(E->getArg(2))) }; 802 return RValue::get(EmitCastFromInt(CGF, E->getType(), 803 EmitCallWithBarrier(CGF, AtomF, Args, 804 Args + 3))); 805 } 806 807 case Builtin::BI__sync_bool_compare_and_swap_1: 808 case Builtin::BI__sync_bool_compare_and_swap_2: 809 case Builtin::BI__sync_bool_compare_and_swap_4: 810 case Builtin::BI__sync_bool_compare_and_swap_8: 811 case Builtin::BI__sync_bool_compare_and_swap_16: { 812 const llvm::Type *ValueType = 813 llvm::IntegerType::get( 814 CGF.getLLVMContext(), 815 CGF.getContext().getTypeSize(E->getArg(1)->getType())); 816 const llvm::Type *PtrType = ValueType->getPointerTo(); 817 const llvm::Type *IntrinsicTypes[2] = { ValueType, PtrType }; 818 Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap, 819 IntrinsicTypes, 2); 820 821 Value *Args[3] = { Builder.CreateBitCast(CGF.EmitScalarExpr(E->getArg(0)), 822 PtrType), 823 EmitCastToInt(CGF, ValueType, 824 CGF.EmitScalarExpr(E->getArg(1))), 825 EmitCastToInt(CGF, ValueType, 826 CGF.EmitScalarExpr(E->getArg(2))) }; 827 Value *OldVal = Args[1]; 828 Value *PrevVal = EmitCallWithBarrier(*this, AtomF, Args, Args + 3); 829 Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal); 830 // zext bool to int. 831 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 832 } 833 834 case Builtin::BI__sync_lock_test_and_set_1: 835 case Builtin::BI__sync_lock_test_and_set_2: 836 case Builtin::BI__sync_lock_test_and_set_4: 837 case Builtin::BI__sync_lock_test_and_set_8: 838 case Builtin::BI__sync_lock_test_and_set_16: 839 return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E); 840 841 case Builtin::BI__sync_lock_release_1: 842 case Builtin::BI__sync_lock_release_2: 843 case Builtin::BI__sync_lock_release_4: 844 case Builtin::BI__sync_lock_release_8: 845 case Builtin::BI__sync_lock_release_16: { 846 Value *Ptr = EmitScalarExpr(E->getArg(0)); 847 const llvm::Type *ElTy = 848 cast<llvm::PointerType>(Ptr->getType())->getElementType(); 849 llvm::StoreInst *Store = 850 Builder.CreateStore(llvm::Constant::getNullValue(ElTy), Ptr); 851 Store->setVolatile(true); 852 return RValue::get(0); 853 } 854 855 case Builtin::BI__sync_synchronize: { 856 // We assume like gcc appears to, that this only applies to cached memory. 857 EmitMemoryBarrier(*this, true, true, true, true, false); 858 return RValue::get(0); 859 } 860 861 case Builtin::BI__builtin_llvm_memory_barrier: { 862 Value *C[5] = { 863 EmitScalarExpr(E->getArg(0)), 864 EmitScalarExpr(E->getArg(1)), 865 EmitScalarExpr(E->getArg(2)), 866 EmitScalarExpr(E->getArg(3)), 867 EmitScalarExpr(E->getArg(4)) 868 }; 869 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::memory_barrier), C, C + 5); 870 return RValue::get(0); 871 } 872 873 // Library functions with special handling. 874 case Builtin::BIsqrt: 875 case Builtin::BIsqrtf: 876 case Builtin::BIsqrtl: { 877 // TODO: there is currently no set of optimizer flags 878 // sufficient for us to rewrite sqrt to @llvm.sqrt. 879 // -fmath-errno=0 is not good enough; we need finiteness. 880 // We could probably precondition the call with an ult 881 // against 0, but is that worth the complexity? 882 break; 883 } 884 885 case Builtin::BIpow: 886 case Builtin::BIpowf: 887 case Builtin::BIpowl: { 888 // Rewrite sqrt to intrinsic if allowed. 889 if (!FD->hasAttr<ConstAttr>()) 890 break; 891 Value *Base = EmitScalarExpr(E->getArg(0)); 892 Value *Exponent = EmitScalarExpr(E->getArg(1)); 893 const llvm::Type *ArgType = Base->getType(); 894 Value *F = CGM.getIntrinsic(Intrinsic::pow, &ArgType, 1); 895 return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp")); 896 } 897 898 case Builtin::BI__builtin_signbit: 899 case Builtin::BI__builtin_signbitf: 900 case Builtin::BI__builtin_signbitl: { 901 LLVMContext &C = CGM.getLLVMContext(); 902 903 Value *Arg = EmitScalarExpr(E->getArg(0)); 904 const llvm::Type *ArgTy = Arg->getType(); 905 if (ArgTy->isPPC_FP128Ty()) 906 break; // FIXME: I'm not sure what the right implementation is here. 907 int ArgWidth = ArgTy->getPrimitiveSizeInBits(); 908 const llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 909 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); 910 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); 911 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); 912 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 913 } 914 } 915 916 // If this is an alias for a libm function (e.g. __builtin_sin) turn it into 917 // that function. 918 if (getContext().BuiltinInfo.isLibFunction(BuiltinID) || 919 getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 920 return EmitCall(E->getCallee()->getType(), 921 CGM.getBuiltinLibFunction(FD, BuiltinID), 922 ReturnValueSlot(), 923 E->arg_begin(), E->arg_end()); 924 925 // See if we have a target specific intrinsic. 926 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 927 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 928 if (const char *Prefix = 929 llvm::Triple::getArchTypePrefix(Target.getTriple().getArch())) 930 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 931 932 if (IntrinsicID != Intrinsic::not_intrinsic) { 933 SmallVector<Value*, 16> Args; 934 935 Function *F = CGM.getIntrinsic(IntrinsicID); 936 const llvm::FunctionType *FTy = F->getFunctionType(); 937 938 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 939 Value *ArgValue = EmitScalarExpr(E->getArg(i)); 940 941 // If the intrinsic arg type is different from the builtin arg type 942 // we need to do a bit cast. 943 const llvm::Type *PTy = FTy->getParamType(i); 944 if (PTy != ArgValue->getType()) { 945 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 946 "Must be able to losslessly bit cast to param"); 947 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 948 } 949 950 Args.push_back(ArgValue); 951 } 952 953 Value *V = Builder.CreateCall(F, Args.data(), Args.data() + Args.size()); 954 QualType BuiltinRetType = E->getType(); 955 956 const llvm::Type *RetTy = llvm::Type::getVoidTy(VMContext); 957 if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType); 958 959 if (RetTy != V->getType()) { 960 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 961 "Must be able to losslessly bit cast result type"); 962 V = Builder.CreateBitCast(V, RetTy); 963 } 964 965 return RValue::get(V); 966 } 967 968 // See if we have a target specific builtin that needs to be lowered. 969 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 970 return RValue::get(V); 971 972 ErrorUnsupported(E, "builtin function"); 973 974 // Unknown builtin, for now just dump it out and return undef. 975 if (hasAggregateLLVMType(E->getType())) 976 return RValue::getAggregate(CreateMemTemp(E->getType())); 977 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 978} 979 980Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 981 const CallExpr *E) { 982 switch (Target.getTriple().getArch()) { 983 case llvm::Triple::arm: 984 case llvm::Triple::thumb: 985 return EmitARMBuiltinExpr(BuiltinID, E); 986 case llvm::Triple::x86: 987 case llvm::Triple::x86_64: 988 return EmitX86BuiltinExpr(BuiltinID, E); 989 case llvm::Triple::ppc: 990 case llvm::Triple::ppc64: 991 return EmitPPCBuiltinExpr(BuiltinID, E); 992 default: 993 return 0; 994 } 995} 996 997const llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) { 998 switch (type) { 999 default: break; 1000 case 0: 1001 case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q); 1002 case 6: 1003 case 7: 1004 case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q); 1005 case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q); 1006 case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q); 1007 case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q); 1008 }; 1009 return 0; 1010} 1011 1012Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 1013 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 1014 SmallVector<Constant*, 16> Indices(nElts, C); 1015 Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); 1016 return Builder.CreateShuffleVector(V, V, SV, "lane"); 1017} 1018 1019Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 1020 const char *name, bool splat, 1021 unsigned shift, bool rightshift) { 1022 unsigned j = 0; 1023 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 1024 ai != ae; ++ai, ++j) 1025 if (shift > 0 && shift == j) 1026 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 1027 else 1028 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 1029 1030 if (splat) { 1031 Ops[j-1] = EmitNeonSplat(Ops[j-1], cast<Constant>(Ops[j])); 1032 Ops.resize(j); 1033 } 1034 return Builder.CreateCall(F, Ops.begin(), Ops.end(), name); 1035} 1036 1037Value *CodeGenFunction::EmitNeonShiftVector(Value *V, const llvm::Type *Ty, 1038 bool neg) { 1039 ConstantInt *CI = cast<ConstantInt>(V); 1040 int SV = CI->getSExtValue(); 1041 1042 const llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1043 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 1044 SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C); 1045 return llvm::ConstantVector::get(CV.begin(), CV.size()); 1046} 1047 1048Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 1049 const CallExpr *E) { 1050 if (BuiltinID == ARM::BI__clear_cache) { 1051 const FunctionDecl *FD = E->getDirectCallee(); 1052 Value *a = EmitScalarExpr(E->getArg(0)); 1053 Value *b = EmitScalarExpr(E->getArg(1)); 1054 const llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 1055 const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 1056 llvm::StringRef Name = FD->getName(); 1057 return Builder.CreateCall2(CGM.CreateRuntimeFunction(FTy, Name), 1058 a, b); 1059 } 1060 1061 // Determine the type of this overloaded NEON intrinsic. 1062 assert(BuiltinID > ARM::BI__builtin_thread_pointer); 1063 1064 llvm::SmallVector<Value*, 4> Ops; 1065 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) 1066 Ops.push_back(EmitScalarExpr(E->getArg(i))); 1067 1068 llvm::APSInt Result; 1069 const Expr *Arg = E->getArg(E->getNumArgs()-1); 1070 if (!Arg->isIntegerConstantExpr(Result, getContext())) 1071 return 0; 1072 1073 unsigned type = Result.getZExtValue(); 1074 bool usgn = type & 0x08; 1075 bool quad = type & 0x10; 1076 bool poly = (type & 0x7) == 5 || (type & 0x7) == 6; 1077 bool splat = false; 1078 1079 const llvm::VectorType *VTy = GetNeonType(VMContext, type & 0x7, quad); 1080 const llvm::Type *Ty = VTy; 1081 if (!Ty) 1082 return 0; 1083 1084 unsigned Int; 1085 switch (BuiltinID) { 1086 default: return 0; 1087 case ARM::BI__builtin_neon_vaba_v: 1088 case ARM::BI__builtin_neon_vabaq_v: 1089 Int = usgn ? Intrinsic::arm_neon_vabau : Intrinsic::arm_neon_vabas; 1090 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaba"); 1091 case ARM::BI__builtin_neon_vabal_v: 1092 Int = usgn ? Intrinsic::arm_neon_vabalu : Intrinsic::arm_neon_vabals; 1093 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabal"); 1094 case ARM::BI__builtin_neon_vabd_v: 1095 case ARM::BI__builtin_neon_vabdq_v: 1096 Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds; 1097 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabd"); 1098 case ARM::BI__builtin_neon_vabdl_v: 1099 Int = usgn ? Intrinsic::arm_neon_vabdlu : Intrinsic::arm_neon_vabdls; 1100 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabdl"); 1101 case ARM::BI__builtin_neon_vabs_v: 1102 case ARM::BI__builtin_neon_vabsq_v: 1103 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1), 1104 Ops, "vabs"); 1105 case ARM::BI__builtin_neon_vaddhn_v: 1106 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1), 1107 Ops, "vaddhn"); 1108 case ARM::BI__builtin_neon_vaddl_v: 1109 Int = usgn ? Intrinsic::arm_neon_vaddlu : Intrinsic::arm_neon_vaddls; 1110 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaddl"); 1111 case ARM::BI__builtin_neon_vaddw_v: 1112 Int = usgn ? Intrinsic::arm_neon_vaddws : Intrinsic::arm_neon_vaddwu; 1113 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaddw"); 1114 case ARM::BI__builtin_neon_vcale_v: 1115 std::swap(Ops[0], Ops[1]); 1116 case ARM::BI__builtin_neon_vcage_v: { 1117 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged, &Ty, 1); 1118 return EmitNeonCall(F, Ops, "vcage"); 1119 } 1120 case ARM::BI__builtin_neon_vcaleq_v: 1121 std::swap(Ops[0], Ops[1]); 1122 case ARM::BI__builtin_neon_vcageq_v: { 1123 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq, &Ty, 1); 1124 return EmitNeonCall(F, Ops, "vcage"); 1125 } 1126 case ARM::BI__builtin_neon_vcalt_v: 1127 std::swap(Ops[0], Ops[1]); 1128 case ARM::BI__builtin_neon_vcagt_v: { 1129 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd, &Ty, 1); 1130 return EmitNeonCall(F, Ops, "vcagt"); 1131 } 1132 case ARM::BI__builtin_neon_vcaltq_v: 1133 std::swap(Ops[0], Ops[1]); 1134 case ARM::BI__builtin_neon_vcagtq_v: { 1135 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq, &Ty, 1); 1136 return EmitNeonCall(F, Ops, "vcagt"); 1137 } 1138 case ARM::BI__builtin_neon_vcls_v: 1139 case ARM::BI__builtin_neon_vclsq_v: { 1140 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1); 1141 return EmitNeonCall(F, Ops, "vcls"); 1142 } 1143 case ARM::BI__builtin_neon_vclz_v: 1144 case ARM::BI__builtin_neon_vclzq_v: { 1145 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1); 1146 return EmitNeonCall(F, Ops, "vclz"); 1147 } 1148 case ARM::BI__builtin_neon_vcnt_v: 1149 case ARM::BI__builtin_neon_vcntq_v: { 1150 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1); 1151 return EmitNeonCall(F, Ops, "vcnt"); 1152 } 1153 // FIXME: intrinsics for f16<->f32 convert missing from ARM target. 1154 case ARM::BI__builtin_neon_vcvt_f32_v: 1155 case ARM::BI__builtin_neon_vcvtq_f32_v: { 1156 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1157 Ty = GetNeonType(VMContext, 4, quad); 1158 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 1159 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 1160 } 1161 case ARM::BI__builtin_neon_vcvt_s32_v: 1162 case ARM::BI__builtin_neon_vcvt_u32_v: 1163 case ARM::BI__builtin_neon_vcvtq_s32_v: 1164 case ARM::BI__builtin_neon_vcvtq_u32_v: { 1165 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(VMContext, 4, quad)); 1166 return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 1167 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 1168 } 1169 case ARM::BI__builtin_neon_vcvt_n_f32_v: 1170 case ARM::BI__builtin_neon_vcvtq_n_f32_v: { 1171 const llvm::Type *Tys[2] = { GetNeonType(VMContext, 4, quad), Ty }; 1172 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp; 1173 Function *F = CGM.getIntrinsic(Int, Tys, 2); 1174 return EmitNeonCall(F, Ops, "vcvt_n"); 1175 } 1176 case ARM::BI__builtin_neon_vcvt_n_s32_v: 1177 case ARM::BI__builtin_neon_vcvt_n_u32_v: 1178 case ARM::BI__builtin_neon_vcvtq_n_s32_v: 1179 case ARM::BI__builtin_neon_vcvtq_n_u32_v: { 1180 const llvm::Type *Tys[2] = { Ty, GetNeonType(VMContext, 4, quad) }; 1181 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs; 1182 Function *F = CGM.getIntrinsic(Int, Tys, 2); 1183 return EmitNeonCall(F, Ops, "vcvt_n"); 1184 } 1185 case ARM::BI__builtin_neon_vext_v: 1186 case ARM::BI__builtin_neon_vextq_v: { 1187 ConstantInt *C = dyn_cast<ConstantInt>(Ops[2]); 1188 int CV = C->getSExtValue(); 1189 SmallVector<Constant*, 16> Indices; 1190 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 1191 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 1192 1193 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1194 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1195 Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); 1196 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 1197 } 1198 case ARM::BI__builtin_neon_vget_lane_i8: 1199 case ARM::BI__builtin_neon_vget_lane_i16: 1200 case ARM::BI__builtin_neon_vget_lane_i32: 1201 case ARM::BI__builtin_neon_vget_lane_i64: 1202 case ARM::BI__builtin_neon_vget_lane_f32: 1203 case ARM::BI__builtin_neon_vgetq_lane_i8: 1204 case ARM::BI__builtin_neon_vgetq_lane_i16: 1205 case ARM::BI__builtin_neon_vgetq_lane_i32: 1206 case ARM::BI__builtin_neon_vgetq_lane_i64: 1207 case ARM::BI__builtin_neon_vgetq_lane_f32: 1208 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 1209 "vget_lane"); 1210 case ARM::BI__builtin_neon_vhadd_v: 1211 case ARM::BI__builtin_neon_vhaddq_v: 1212 Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds; 1213 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhadd"); 1214 case ARM::BI__builtin_neon_vhsub_v: 1215 case ARM::BI__builtin_neon_vhsubq_v: 1216 Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs; 1217 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhsub"); 1218 case ARM::BI__builtin_neon_vld1_v: 1219 case ARM::BI__builtin_neon_vld1q_v: 1220 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, &Ty, 1), 1221 Ops, "vld1"); 1222 case ARM::BI__builtin_neon_vld1_lane_v: 1223 case ARM::BI__builtin_neon_vld1q_lane_v: 1224 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1225 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 1226 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1227 Ops[0] = Builder.CreateLoad(Ops[0]); 1228 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 1229 case ARM::BI__builtin_neon_vld1_dup_v: 1230 case ARM::BI__builtin_neon_vld1q_dup_v: { 1231 Value *V = UndefValue::get(Ty); 1232 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 1233 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1234 Ops[0] = Builder.CreateLoad(Ops[0]); 1235 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 1236 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 1237 return EmitNeonSplat(Ops[0], CI); 1238 } 1239 case ARM::BI__builtin_neon_vld2_v: 1240 case ARM::BI__builtin_neon_vld2q_v: { 1241 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, &Ty, 1); 1242 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 1243 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1244 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1245 return Builder.CreateStore(Ops[1], Ops[0]); 1246 } 1247 case ARM::BI__builtin_neon_vld3_v: 1248 case ARM::BI__builtin_neon_vld3q_v: { 1249 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, &Ty, 1); 1250 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 1251 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1252 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1253 return Builder.CreateStore(Ops[1], Ops[0]); 1254 } 1255 case ARM::BI__builtin_neon_vld4_v: 1256 case ARM::BI__builtin_neon_vld4q_v: { 1257 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, &Ty, 1); 1258 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 1259 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1260 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1261 return Builder.CreateStore(Ops[1], Ops[0]); 1262 } 1263 case ARM::BI__builtin_neon_vld2_lane_v: 1264 case ARM::BI__builtin_neon_vld2q_lane_v: { 1265 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, &Ty, 1); 1266 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1267 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 1268 Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld2_lane"); 1269 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1270 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1271 return Builder.CreateStore(Ops[1], Ops[0]); 1272 } 1273 case ARM::BI__builtin_neon_vld3_lane_v: 1274 case ARM::BI__builtin_neon_vld3q_lane_v: { 1275 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, &Ty, 1); 1276 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1277 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 1278 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 1279 Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane"); 1280 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1281 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1282 return Builder.CreateStore(Ops[1], Ops[0]); 1283 } 1284 case ARM::BI__builtin_neon_vld4_lane_v: 1285 case ARM::BI__builtin_neon_vld4q_lane_v: { 1286 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, &Ty, 1); 1287 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1288 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 1289 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 1290 Ops[5] = Builder.CreateBitCast(Ops[5], Ty); 1291 Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane"); 1292 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1293 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1294 return Builder.CreateStore(Ops[1], Ops[0]); 1295 } 1296 case ARM::BI__builtin_neon_vld2_dup_v: 1297 case ARM::BI__builtin_neon_vld3_dup_v: 1298 case ARM::BI__builtin_neon_vld4_dup_v: { 1299 switch (BuiltinID) { 1300 case ARM::BI__builtin_neon_vld2_dup_v: 1301 Int = Intrinsic::arm_neon_vld2lane; 1302 break; 1303 case ARM::BI__builtin_neon_vld3_dup_v: 1304 Int = Intrinsic::arm_neon_vld2lane; 1305 break; 1306 case ARM::BI__builtin_neon_vld4_dup_v: 1307 Int = Intrinsic::arm_neon_vld2lane; 1308 break; 1309 default: assert(0 && "unknown vld_dup intrinsic?"); 1310 } 1311 Function *F = CGM.getIntrinsic(Int, &Ty, 1); 1312 const llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 1313 1314 SmallVector<Value*, 6> Args; 1315 Args.push_back(Ops[1]); 1316 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 1317 1318 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 1319 Args.push_back(CI); 1320 1321 Ops[1] = Builder.CreateCall(F, Args.begin(), Args.end(), "vld_dup"); 1322 // splat lane 0 to all elts in each vector of the result. 1323 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 1324 Value *Val = Builder.CreateExtractValue(Ops[1], i); 1325 Value *Elt = Builder.CreateBitCast(Val, Ty); 1326 Elt = EmitNeonSplat(Elt, CI); 1327 Elt = Builder.CreateBitCast(Elt, Val->getType()); 1328 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 1329 } 1330 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1331 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1332 return Builder.CreateStore(Ops[1], Ops[0]); 1333 } 1334 case ARM::BI__builtin_neon_vmax_v: 1335 case ARM::BI__builtin_neon_vmaxq_v: 1336 Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs; 1337 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmax"); 1338 case ARM::BI__builtin_neon_vmin_v: 1339 case ARM::BI__builtin_neon_vminq_v: 1340 Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins; 1341 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin"); 1342 case ARM::BI__builtin_neon_vmlal_lane_v: 1343 splat = true; 1344 case ARM::BI__builtin_neon_vmlal_v: 1345 Int = usgn ? Intrinsic::arm_neon_vmlalu : Intrinsic::arm_neon_vmlals; 1346 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal", splat); 1347 case ARM::BI__builtin_neon_vmlsl_lane_v: 1348 splat = true; 1349 case ARM::BI__builtin_neon_vmlsl_v: 1350 Int = usgn ? Intrinsic::arm_neon_vmlslu : Intrinsic::arm_neon_vmlsls; 1351 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlsl", splat); 1352 case ARM::BI__builtin_neon_vmovl_v: 1353 Int = usgn ? Intrinsic::arm_neon_vmovlu : Intrinsic::arm_neon_vmovls; 1354 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmovl"); 1355 case ARM::BI__builtin_neon_vmovn_v: 1356 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmovn, &Ty, 1), 1357 Ops, "vmovn"); 1358 case ARM::BI__builtin_neon_vmull_lane_v: 1359 splat = true; 1360 case ARM::BI__builtin_neon_vmull_v: 1361 Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 1362 Int = poly ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 1363 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal", splat); 1364 case ARM::BI__builtin_neon_vpadal_v: 1365 case ARM::BI__builtin_neon_vpadalq_v: 1366 Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals; 1367 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpadal"); 1368 case ARM::BI__builtin_neon_vpadd_v: 1369 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, &Ty, 1), 1370 Ops, "vpadd"); 1371 case ARM::BI__builtin_neon_vpaddl_v: 1372 case ARM::BI__builtin_neon_vpaddlq_v: 1373 Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls; 1374 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpaddl"); 1375 case ARM::BI__builtin_neon_vpmax_v: 1376 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; 1377 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmax"); 1378 case ARM::BI__builtin_neon_vpmin_v: 1379 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; 1380 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmin"); 1381 case ARM::BI__builtin_neon_vqabs_v: 1382 case ARM::BI__builtin_neon_vqabsq_v: 1383 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, &Ty, 1), 1384 Ops, "vqabs"); 1385 case ARM::BI__builtin_neon_vqadd_v: 1386 case ARM::BI__builtin_neon_vqaddq_v: 1387 Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds; 1388 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqadd"); 1389 case ARM::BI__builtin_neon_vqdmlal_lane_v: 1390 splat = true; 1391 case ARM::BI__builtin_neon_vqdmlal_v: 1392 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, &Ty, 1), 1393 Ops, "vqdmlal", splat); 1394 case ARM::BI__builtin_neon_vqdmlsl_lane_v: 1395 splat = true; 1396 case ARM::BI__builtin_neon_vqdmlsl_v: 1397 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, &Ty, 1), 1398 Ops, "vqdmlsl", splat); 1399 case ARM::BI__builtin_neon_vqdmulh_lane_v: 1400 case ARM::BI__builtin_neon_vqdmulhq_lane_v: 1401 splat = true; 1402 case ARM::BI__builtin_neon_vqdmulh_v: 1403 case ARM::BI__builtin_neon_vqdmulhq_v: 1404 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, &Ty, 1), 1405 Ops, "vqdmulh", splat); 1406 case ARM::BI__builtin_neon_vqdmull_lane_v: 1407 splat = true; 1408 case ARM::BI__builtin_neon_vqdmull_v: 1409 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, &Ty, 1), 1410 Ops, "vqdmull", splat); 1411 case ARM::BI__builtin_neon_vqmovn_v: 1412 Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns; 1413 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqmovn"); 1414 case ARM::BI__builtin_neon_vqmovun_v: 1415 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, &Ty, 1), 1416 Ops, "vqdmull"); 1417 case ARM::BI__builtin_neon_vqneg_v: 1418 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, &Ty, 1), 1419 Ops, "vqneg"); 1420 case ARM::BI__builtin_neon_vqrdmulh_lane_v: 1421 case ARM::BI__builtin_neon_vqrdmulhq_lane_v: 1422 splat = true; 1423 case ARM::BI__builtin_neon_vqrdmulh_v: 1424 case ARM::BI__builtin_neon_vqrdmulhq_v: 1425 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, &Ty, 1), 1426 Ops, "vqrdmulh", splat); 1427 case ARM::BI__builtin_neon_vqrshl_v: 1428 case ARM::BI__builtin_neon_vqrshlq_v: 1429 Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts; 1430 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshl"); 1431 case ARM::BI__builtin_neon_vqrshrn_n_v: 1432 Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 1433 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshrn_n", false, 1434 1, true); 1435 case ARM::BI__builtin_neon_vqrshrun_n_v: 1436 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, &Ty, 1), 1437 Ops, "vqrshrun_n", false, 1, true); 1438 case ARM::BI__builtin_neon_vqshl_v: 1439 case ARM::BI__builtin_neon_vqshlq_v: 1440 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 1441 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl"); 1442 case ARM::BI__builtin_neon_vqshl_n_v: 1443 case ARM::BI__builtin_neon_vqshlq_n_v: 1444 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 1445 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl_n", false, 1446 1, false); 1447 case ARM::BI__builtin_neon_vqshlu_n_v: 1448 case ARM::BI__builtin_neon_vqshluq_n_v: 1449 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, &Ty, 1), 1450 Ops, "vqshlu", 1, false); 1451 case ARM::BI__builtin_neon_vqshrn_n_v: 1452 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 1453 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshrn_n", false, 1454 1, true); 1455 case ARM::BI__builtin_neon_vqshrun_n_v: 1456 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, &Ty, 1), 1457 Ops, "vqshrun_n", false, 1, true); 1458 case ARM::BI__builtin_neon_vqsub_v: 1459 case ARM::BI__builtin_neon_vqsubq_v: 1460 Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs; 1461 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqsub"); 1462 case ARM::BI__builtin_neon_vraddhn_v: 1463 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, &Ty, 1), 1464 Ops, "vraddhn"); 1465 case ARM::BI__builtin_neon_vrecpe_v: 1466 case ARM::BI__builtin_neon_vrecpeq_v: 1467 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, &Ty, 1), 1468 Ops, "vrecpe"); 1469 case ARM::BI__builtin_neon_vrecps_v: 1470 case ARM::BI__builtin_neon_vrecpsq_v: 1471 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, &Ty, 1), 1472 Ops, "vrecps"); 1473 case ARM::BI__builtin_neon_vrhadd_v: 1474 case ARM::BI__builtin_neon_vrhaddq_v: 1475 Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds; 1476 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrhadd"); 1477 case ARM::BI__builtin_neon_vrshl_v: 1478 case ARM::BI__builtin_neon_vrshlq_v: 1479 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 1480 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshl"); 1481 case ARM::BI__builtin_neon_vrshrn_n_v: 1482 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, &Ty, 1), 1483 Ops, "vrshrn_n", false, 1, true); 1484 case ARM::BI__builtin_neon_vrshr_n_v: 1485 case ARM::BI__builtin_neon_vrshrq_n_v: 1486 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 1487 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshr_n", false, 1488 1, true); 1489 case ARM::BI__builtin_neon_vrsqrte_v: 1490 case ARM::BI__builtin_neon_vrsqrteq_v: 1491 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, &Ty, 1), 1492 Ops, "vrsqrte"); 1493 case ARM::BI__builtin_neon_vrsqrts_v: 1494 case ARM::BI__builtin_neon_vrsqrtsq_v: 1495 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, &Ty, 1), 1496 Ops, "vrsqrts"); 1497 case ARM::BI__builtin_neon_vrsra_n_v: 1498 case ARM::BI__builtin_neon_vrsraq_n_v: 1499 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1500 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1501 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 1502 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 1503 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, &Ty, 1), Ops[1], Ops[2]); 1504 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 1505 case ARM::BI__builtin_neon_vrsubhn_v: 1506 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, &Ty, 1), 1507 Ops, "vrsubhn"); 1508 case ARM::BI__builtin_neon_vset_lane_i8: 1509 case ARM::BI__builtin_neon_vset_lane_i16: 1510 case ARM::BI__builtin_neon_vset_lane_i32: 1511 case ARM::BI__builtin_neon_vset_lane_i64: 1512 case ARM::BI__builtin_neon_vset_lane_f32: 1513 case ARM::BI__builtin_neon_vsetq_lane_i8: 1514 case ARM::BI__builtin_neon_vsetq_lane_i16: 1515 case ARM::BI__builtin_neon_vsetq_lane_i32: 1516 case ARM::BI__builtin_neon_vsetq_lane_i64: 1517 case ARM::BI__builtin_neon_vsetq_lane_f32: 1518 Ops.push_back(EmitScalarExpr(E->getArg(2))); 1519 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 1520 case ARM::BI__builtin_neon_vshl_v: 1521 case ARM::BI__builtin_neon_vshlq_v: 1522 Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts; 1523 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshl"); 1524 case ARM::BI__builtin_neon_vshll_n_v: 1525 Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls; 1526 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshll", false, 1); 1527 case ARM::BI__builtin_neon_vshl_n_v: 1528 case ARM::BI__builtin_neon_vshlq_n_v: 1529 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 1530 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n"); 1531 case ARM::BI__builtin_neon_vshrn_n_v: 1532 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, &Ty, 1), 1533 Ops, "vshrn_n", false, 1, true); 1534 case ARM::BI__builtin_neon_vshr_n_v: 1535 case ARM::BI__builtin_neon_vshrq_n_v: 1536 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1537 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 1538 if (usgn) 1539 return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n"); 1540 else 1541 return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n"); 1542 case ARM::BI__builtin_neon_vsri_n_v: 1543 case ARM::BI__builtin_neon_vsriq_n_v: 1544 poly = true; 1545 case ARM::BI__builtin_neon_vsli_n_v: 1546 case ARM::BI__builtin_neon_vsliq_n_v: 1547 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, poly); 1548 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, &Ty, 1), 1549 Ops, "vsli_n"); 1550 case ARM::BI__builtin_neon_vsra_n_v: 1551 case ARM::BI__builtin_neon_vsraq_n_v: 1552 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1553 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1554 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false); 1555 if (usgn) 1556 Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n"); 1557 else 1558 Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n"); 1559 return Builder.CreateAdd(Ops[0], Ops[1]); 1560 case ARM::BI__builtin_neon_vst1_v: 1561 case ARM::BI__builtin_neon_vst1q_v: 1562 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, &Ty, 1), 1563 Ops, ""); 1564 case ARM::BI__builtin_neon_vst1_lane_v: 1565 case ARM::BI__builtin_neon_vst1q_lane_v: 1566 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1567 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 1568 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1569 return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); 1570 case ARM::BI__builtin_neon_vst2_v: 1571 case ARM::BI__builtin_neon_vst2q_v: 1572 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, &Ty, 1), 1573 Ops, ""); 1574 case ARM::BI__builtin_neon_vst2_lane_v: 1575 case ARM::BI__builtin_neon_vst2q_lane_v: 1576 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, &Ty, 1), 1577 Ops, ""); 1578 case ARM::BI__builtin_neon_vst3_v: 1579 case ARM::BI__builtin_neon_vst3q_v: 1580 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, &Ty, 1), 1581 Ops, ""); 1582 case ARM::BI__builtin_neon_vst3_lane_v: 1583 case ARM::BI__builtin_neon_vst3q_lane_v: 1584 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, &Ty, 1), 1585 Ops, ""); 1586 case ARM::BI__builtin_neon_vst4_v: 1587 case ARM::BI__builtin_neon_vst4q_v: 1588 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, &Ty, 1), 1589 Ops, ""); 1590 case ARM::BI__builtin_neon_vst4_lane_v: 1591 case ARM::BI__builtin_neon_vst4q_lane_v: 1592 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, &Ty, 1), 1593 Ops, ""); 1594 case ARM::BI__builtin_neon_vsubhn_v: 1595 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, &Ty, 1), 1596 Ops, "vsubhn"); 1597 case ARM::BI__builtin_neon_vsubl_v: 1598 Int = usgn ? Intrinsic::arm_neon_vsublu : Intrinsic::arm_neon_vsubls; 1599 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vsubl"); 1600 case ARM::BI__builtin_neon_vsubw_v: 1601 Int = usgn ? Intrinsic::arm_neon_vsubws : Intrinsic::arm_neon_vsubwu; 1602 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vsubw"); 1603 case ARM::BI__builtin_neon_vtbl1_v: 1604 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 1605 Ops, "vtbl1"); 1606 case ARM::BI__builtin_neon_vtbl2_v: 1607 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 1608 Ops, "vtbl2"); 1609 case ARM::BI__builtin_neon_vtbl3_v: 1610 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 1611 Ops, "vtbl3"); 1612 case ARM::BI__builtin_neon_vtbl4_v: 1613 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 1614 Ops, "vtbl4"); 1615 case ARM::BI__builtin_neon_vtbx1_v: 1616 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 1617 Ops, "vtbx1"); 1618 case ARM::BI__builtin_neon_vtbx2_v: 1619 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 1620 Ops, "vtbx2"); 1621 case ARM::BI__builtin_neon_vtbx3_v: 1622 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 1623 Ops, "vtbx3"); 1624 case ARM::BI__builtin_neon_vtbx4_v: 1625 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 1626 Ops, "vtbx4"); 1627 case ARM::BI__builtin_neon_vtst_v: 1628 case ARM::BI__builtin_neon_vtstq_v: { 1629 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1630 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1631 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 1632 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 1633 ConstantAggregateZero::get(Ty)); 1634 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 1635 } 1636 case ARM::BI__builtin_neon_vtrn_v: 1637 case ARM::BI__builtin_neon_vtrnq_v: { 1638 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 1639 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1640 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1641 Value *SV; 1642 1643 for (unsigned vi = 0; vi != 2; ++vi) { 1644 SmallVector<Constant*, 16> Indices; 1645 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 1646 Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); 1647 Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); 1648 } 1649 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 1650 SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); 1651 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 1652 SV = Builder.CreateStore(SV, Addr); 1653 } 1654 return SV; 1655 } 1656 case ARM::BI__builtin_neon_vuzp_v: 1657 case ARM::BI__builtin_neon_vuzpq_v: { 1658 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 1659 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1660 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1661 Value *SV; 1662 1663 for (unsigned vi = 0; vi != 2; ++vi) { 1664 SmallVector<Constant*, 16> Indices; 1665 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 1666 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 1667 1668 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 1669 SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); 1670 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 1671 SV = Builder.CreateStore(SV, Addr); 1672 } 1673 return SV; 1674 } 1675 case ARM::BI__builtin_neon_vzip_v: 1676 case ARM::BI__builtin_neon_vzipq_v: { 1677 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 1678 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1679 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1680 Value *SV; 1681 1682 for (unsigned vi = 0; vi != 2; ++vi) { 1683 SmallVector<Constant*, 16> Indices; 1684 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 1685 Indices.push_back(ConstantInt::get(Int32Ty, (i >> 1))); 1686 Indices.push_back(ConstantInt::get(Int32Ty, (i >> 1)+e)); 1687 } 1688 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 1689 SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); 1690 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 1691 SV = Builder.CreateStore(SV, Addr); 1692 } 1693 return SV; 1694 } 1695 } 1696} 1697 1698Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 1699 const CallExpr *E) { 1700 1701 llvm::SmallVector<Value*, 4> Ops; 1702 1703 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 1704 Ops.push_back(EmitScalarExpr(E->getArg(i))); 1705 1706 switch (BuiltinID) { 1707 default: return 0; 1708 case X86::BI__builtin_ia32_pslldi128: 1709 case X86::BI__builtin_ia32_psllqi128: 1710 case X86::BI__builtin_ia32_psllwi128: 1711 case X86::BI__builtin_ia32_psradi128: 1712 case X86::BI__builtin_ia32_psrawi128: 1713 case X86::BI__builtin_ia32_psrldi128: 1714 case X86::BI__builtin_ia32_psrlqi128: 1715 case X86::BI__builtin_ia32_psrlwi128: { 1716 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext"); 1717 const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 1718 llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0); 1719 Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty), 1720 Ops[1], Zero, "insert"); 1721 Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast"); 1722 const char *name = 0; 1723 Intrinsic::ID ID = Intrinsic::not_intrinsic; 1724 1725 switch (BuiltinID) { 1726 default: assert(0 && "Unsupported shift intrinsic!"); 1727 case X86::BI__builtin_ia32_pslldi128: 1728 name = "pslldi"; 1729 ID = Intrinsic::x86_sse2_psll_d; 1730 break; 1731 case X86::BI__builtin_ia32_psllqi128: 1732 name = "psllqi"; 1733 ID = Intrinsic::x86_sse2_psll_q; 1734 break; 1735 case X86::BI__builtin_ia32_psllwi128: 1736 name = "psllwi"; 1737 ID = Intrinsic::x86_sse2_psll_w; 1738 break; 1739 case X86::BI__builtin_ia32_psradi128: 1740 name = "psradi"; 1741 ID = Intrinsic::x86_sse2_psra_d; 1742 break; 1743 case X86::BI__builtin_ia32_psrawi128: 1744 name = "psrawi"; 1745 ID = Intrinsic::x86_sse2_psra_w; 1746 break; 1747 case X86::BI__builtin_ia32_psrldi128: 1748 name = "psrldi"; 1749 ID = Intrinsic::x86_sse2_psrl_d; 1750 break; 1751 case X86::BI__builtin_ia32_psrlqi128: 1752 name = "psrlqi"; 1753 ID = Intrinsic::x86_sse2_psrl_q; 1754 break; 1755 case X86::BI__builtin_ia32_psrlwi128: 1756 name = "psrlwi"; 1757 ID = Intrinsic::x86_sse2_psrl_w; 1758 break; 1759 } 1760 llvm::Function *F = CGM.getIntrinsic(ID); 1761 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); 1762 } 1763 case X86::BI__builtin_ia32_pslldi: 1764 case X86::BI__builtin_ia32_psllqi: 1765 case X86::BI__builtin_ia32_psllwi: 1766 case X86::BI__builtin_ia32_psradi: 1767 case X86::BI__builtin_ia32_psrawi: 1768 case X86::BI__builtin_ia32_psrldi: 1769 case X86::BI__builtin_ia32_psrlqi: 1770 case X86::BI__builtin_ia32_psrlwi: { 1771 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext"); 1772 const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1); 1773 Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast"); 1774 const char *name = 0; 1775 Intrinsic::ID ID = Intrinsic::not_intrinsic; 1776 1777 switch (BuiltinID) { 1778 default: assert(0 && "Unsupported shift intrinsic!"); 1779 case X86::BI__builtin_ia32_pslldi: 1780 name = "pslldi"; 1781 ID = Intrinsic::x86_mmx_psll_d; 1782 break; 1783 case X86::BI__builtin_ia32_psllqi: 1784 name = "psllqi"; 1785 ID = Intrinsic::x86_mmx_psll_q; 1786 break; 1787 case X86::BI__builtin_ia32_psllwi: 1788 name = "psllwi"; 1789 ID = Intrinsic::x86_mmx_psll_w; 1790 break; 1791 case X86::BI__builtin_ia32_psradi: 1792 name = "psradi"; 1793 ID = Intrinsic::x86_mmx_psra_d; 1794 break; 1795 case X86::BI__builtin_ia32_psrawi: 1796 name = "psrawi"; 1797 ID = Intrinsic::x86_mmx_psra_w; 1798 break; 1799 case X86::BI__builtin_ia32_psrldi: 1800 name = "psrldi"; 1801 ID = Intrinsic::x86_mmx_psrl_d; 1802 break; 1803 case X86::BI__builtin_ia32_psrlqi: 1804 name = "psrlqi"; 1805 ID = Intrinsic::x86_mmx_psrl_q; 1806 break; 1807 case X86::BI__builtin_ia32_psrlwi: 1808 name = "psrlwi"; 1809 ID = Intrinsic::x86_mmx_psrl_w; 1810 break; 1811 } 1812 llvm::Function *F = CGM.getIntrinsic(ID); 1813 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); 1814 } 1815 case X86::BI__builtin_ia32_cmpps: { 1816 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps); 1817 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpps"); 1818 } 1819 case X86::BI__builtin_ia32_cmpss: { 1820 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss); 1821 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpss"); 1822 } 1823 case X86::BI__builtin_ia32_ldmxcsr: { 1824 const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext); 1825 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 1826 Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp"); 1827 Builder.CreateStore(Ops[0], Tmp); 1828 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 1829 Builder.CreateBitCast(Tmp, PtrTy)); 1830 } 1831 case X86::BI__builtin_ia32_stmxcsr: { 1832 const llvm::Type *PtrTy = llvm::Type::getInt8PtrTy(VMContext); 1833 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 1834 Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp"); 1835 One = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 1836 Builder.CreateBitCast(Tmp, PtrTy)); 1837 return Builder.CreateLoad(Tmp, "stmxcsr"); 1838 } 1839 case X86::BI__builtin_ia32_cmppd: { 1840 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd); 1841 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmppd"); 1842 } 1843 case X86::BI__builtin_ia32_cmpsd: { 1844 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd); 1845 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpsd"); 1846 } 1847 case X86::BI__builtin_ia32_storehps: 1848 case X86::BI__builtin_ia32_storelps: { 1849 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 1850 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 1851 1852 // cast val v2i64 1853 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 1854 1855 // extract (0, 1) 1856 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 1857 llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index); 1858 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 1859 1860 // cast pointer to i64 & store 1861 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 1862 return Builder.CreateStore(Ops[1], Ops[0]); 1863 } 1864 case X86::BI__builtin_ia32_palignr: { 1865 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 1866 1867 // If palignr is shifting the pair of input vectors less than 9 bytes, 1868 // emit a shuffle instruction. 1869 if (shiftVal <= 8) { 1870 llvm::SmallVector<llvm::Constant*, 8> Indices; 1871 for (unsigned i = 0; i != 8; ++i) 1872 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 1873 1874 Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); 1875 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 1876 } 1877 1878 // If palignr is shifting the pair of input vectors more than 8 but less 1879 // than 16 bytes, emit a logical right shift of the destination. 1880 if (shiftVal < 16) { 1881 // MMX has these as 1 x i64 vectors for some odd optimization reasons. 1882 const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); 1883 1884 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 1885 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); 1886 1887 // create i32 constant 1888 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); 1889 return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr"); 1890 } 1891 1892 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 1893 return llvm::Constant::getNullValue(ConvertType(E->getType())); 1894 } 1895 case X86::BI__builtin_ia32_palignr128: { 1896 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 1897 1898 // If palignr is shifting the pair of input vectors less than 17 bytes, 1899 // emit a shuffle instruction. 1900 if (shiftVal <= 16) { 1901 llvm::SmallVector<llvm::Constant*, 16> Indices; 1902 for (unsigned i = 0; i != 16; ++i) 1903 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 1904 1905 Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); 1906 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 1907 } 1908 1909 // If palignr is shifting the pair of input vectors more than 16 but less 1910 // than 32 bytes, emit a logical right shift of the destination. 1911 if (shiftVal < 32) { 1912 const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 1913 1914 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 1915 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 1916 1917 // create i32 constant 1918 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); 1919 return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr"); 1920 } 1921 1922 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 1923 return llvm::Constant::getNullValue(ConvertType(E->getType())); 1924 } 1925 } 1926} 1927 1928Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 1929 const CallExpr *E) { 1930 llvm::SmallVector<Value*, 4> Ops; 1931 1932 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 1933 Ops.push_back(EmitScalarExpr(E->getArg(i))); 1934 1935 Intrinsic::ID ID = Intrinsic::not_intrinsic; 1936 1937 switch (BuiltinID) { 1938 default: return 0; 1939 1940 // vec_ld, vec_lvsl, vec_lvsr 1941 case PPC::BI__builtin_altivec_lvx: 1942 case PPC::BI__builtin_altivec_lvxl: 1943 case PPC::BI__builtin_altivec_lvebx: 1944 case PPC::BI__builtin_altivec_lvehx: 1945 case PPC::BI__builtin_altivec_lvewx: 1946 case PPC::BI__builtin_altivec_lvsl: 1947 case PPC::BI__builtin_altivec_lvsr: 1948 { 1949 Ops[1] = Builder.CreateBitCast(Ops[1], llvm::Type::getInt8PtrTy(VMContext)); 1950 1951 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0], "tmp"); 1952 Ops.pop_back(); 1953 1954 switch (BuiltinID) { 1955 default: assert(0 && "Unsupported ld/lvsl/lvsr intrinsic!"); 1956 case PPC::BI__builtin_altivec_lvx: 1957 ID = Intrinsic::ppc_altivec_lvx; 1958 break; 1959 case PPC::BI__builtin_altivec_lvxl: 1960 ID = Intrinsic::ppc_altivec_lvxl; 1961 break; 1962 case PPC::BI__builtin_altivec_lvebx: 1963 ID = Intrinsic::ppc_altivec_lvebx; 1964 break; 1965 case PPC::BI__builtin_altivec_lvehx: 1966 ID = Intrinsic::ppc_altivec_lvehx; 1967 break; 1968 case PPC::BI__builtin_altivec_lvewx: 1969 ID = Intrinsic::ppc_altivec_lvewx; 1970 break; 1971 case PPC::BI__builtin_altivec_lvsl: 1972 ID = Intrinsic::ppc_altivec_lvsl; 1973 break; 1974 case PPC::BI__builtin_altivec_lvsr: 1975 ID = Intrinsic::ppc_altivec_lvsr; 1976 break; 1977 } 1978 llvm::Function *F = CGM.getIntrinsic(ID); 1979 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), ""); 1980 } 1981 1982 // vec_st 1983 case PPC::BI__builtin_altivec_stvx: 1984 case PPC::BI__builtin_altivec_stvxl: 1985 case PPC::BI__builtin_altivec_stvebx: 1986 case PPC::BI__builtin_altivec_stvehx: 1987 case PPC::BI__builtin_altivec_stvewx: 1988 { 1989 Ops[2] = Builder.CreateBitCast(Ops[2], llvm::Type::getInt8PtrTy(VMContext)); 1990 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1], "tmp"); 1991 Ops.pop_back(); 1992 1993 switch (BuiltinID) { 1994 default: assert(0 && "Unsupported st intrinsic!"); 1995 case PPC::BI__builtin_altivec_stvx: 1996 ID = Intrinsic::ppc_altivec_stvx; 1997 break; 1998 case PPC::BI__builtin_altivec_stvxl: 1999 ID = Intrinsic::ppc_altivec_stvxl; 2000 break; 2001 case PPC::BI__builtin_altivec_stvebx: 2002 ID = Intrinsic::ppc_altivec_stvebx; 2003 break; 2004 case PPC::BI__builtin_altivec_stvehx: 2005 ID = Intrinsic::ppc_altivec_stvehx; 2006 break; 2007 case PPC::BI__builtin_altivec_stvewx: 2008 ID = Intrinsic::ppc_altivec_stvewx; 2009 break; 2010 } 2011 llvm::Function *F = CGM.getIntrinsic(ID); 2012 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), ""); 2013 } 2014 } 2015 return 0; 2016} 2017