CGBuiltin.cpp revision 9cbe4f0ba01ec304e1e3d071c071f7bca33631c0
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This contains code to emit Builtin calls as LLVM code. 11// 12//===----------------------------------------------------------------------===// 13 14#include "TargetInfo.h" 15#include "CodeGenFunction.h" 16#include "CodeGenModule.h" 17#include "CGObjCRuntime.h" 18#include "clang/Basic/TargetInfo.h" 19#include "clang/AST/APValue.h" 20#include "clang/AST/ASTContext.h" 21#include "clang/AST/Decl.h" 22#include "clang/Basic/TargetBuiltins.h" 23#include "llvm/Intrinsics.h" 24#include "llvm/Target/TargetData.h" 25 26using namespace clang; 27using namespace CodeGen; 28using namespace llvm; 29 30static void EmitMemoryBarrier(CodeGenFunction &CGF, 31 bool LoadLoad, bool LoadStore, 32 bool StoreLoad, bool StoreStore, 33 bool Device) { 34 Value *True = CGF.Builder.getTrue(); 35 Value *False = CGF.Builder.getFalse(); 36 Value *C[5] = { LoadLoad ? True : False, 37 LoadStore ? True : False, 38 StoreLoad ? True : False, 39 StoreStore ? True : False, 40 Device ? True : False }; 41 CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::memory_barrier), 42 C, C + 5); 43} 44 45/// Emit the conversions required to turn the given value into an 46/// integer of the given size. 47static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 48 QualType T, const llvm::IntegerType *IntType) { 49 V = CGF.EmitToMemory(V, T); 50 51 if (V->getType()->isPointerTy()) 52 return CGF.Builder.CreatePtrToInt(V, IntType); 53 54 assert(V->getType() == IntType); 55 return V; 56} 57 58static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 59 QualType T, const llvm::Type *ResultType) { 60 V = CGF.EmitFromMemory(V, T); 61 62 if (ResultType->isPointerTy()) 63 return CGF.Builder.CreateIntToPtr(V, ResultType); 64 65 assert(V->getType() == ResultType); 66 return V; 67} 68 69// The atomic builtins are also full memory barriers. This is a utility for 70// wrapping a call to the builtins with memory barriers. 71static Value *EmitCallWithBarrier(CodeGenFunction &CGF, Value *Fn, 72 Value **ArgBegin, Value **ArgEnd) { 73 // FIXME: We need a target hook for whether this applies to device memory or 74 // not. 75 bool Device = true; 76 77 // Create barriers both before and after the call. 78 EmitMemoryBarrier(CGF, true, true, true, true, Device); 79 Value *Result = CGF.Builder.CreateCall(Fn, ArgBegin, ArgEnd); 80 EmitMemoryBarrier(CGF, true, true, true, true, Device); 81 return Result; 82} 83 84/// Utility to insert an atomic instruction based on Instrinsic::ID 85/// and the expression node. 86static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 87 Intrinsic::ID Id, const CallExpr *E) { 88 QualType T = E->getType(); 89 assert(E->getArg(0)->getType()->isPointerType()); 90 assert(CGF.getContext().hasSameUnqualifiedType(T, 91 E->getArg(0)->getType()->getPointeeType())); 92 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 93 94 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 95 unsigned AddrSpace = 96 cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace(); 97 98 llvm::IntegerType *IntType = 99 llvm::IntegerType::get(CGF.getLLVMContext(), 100 CGF.getContext().getTypeSize(T)); 101 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 102 103 llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType }; 104 llvm::Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2); 105 106 llvm::Value *Args[2]; 107 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 108 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 109 const llvm::Type *ValueType = Args[1]->getType(); 110 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 111 112 llvm::Value *Result = EmitCallWithBarrier(CGF, AtomF, Args, Args + 2); 113 Result = EmitFromInt(CGF, Result, T, ValueType); 114 return RValue::get(Result); 115} 116 117/// Utility to insert an atomic instruction based Instrinsic::ID and 118/// the expression node, where the return value is the result of the 119/// operation. 120static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 121 Intrinsic::ID Id, const CallExpr *E, 122 Instruction::BinaryOps Op) { 123 QualType T = E->getType(); 124 assert(E->getArg(0)->getType()->isPointerType()); 125 assert(CGF.getContext().hasSameUnqualifiedType(T, 126 E->getArg(0)->getType()->getPointeeType())); 127 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 128 129 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 130 unsigned AddrSpace = 131 cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace(); 132 133 llvm::IntegerType *IntType = 134 llvm::IntegerType::get(CGF.getLLVMContext(), 135 CGF.getContext().getTypeSize(T)); 136 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 137 138 llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType }; 139 llvm::Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2); 140 141 llvm::Value *Args[2]; 142 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 143 const llvm::Type *ValueType = Args[1]->getType(); 144 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 145 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 146 147 llvm::Value *Result = EmitCallWithBarrier(CGF, AtomF, Args, Args + 2); 148 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 149 Result = EmitFromInt(CGF, Result, T, ValueType); 150 return RValue::get(Result); 151} 152 153/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy, 154/// which must be a scalar floating point type. 155static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) { 156 const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>(); 157 assert(ValTyP && "isn't scalar fp type!"); 158 159 StringRef FnName; 160 switch (ValTyP->getKind()) { 161 default: assert(0 && "Isn't a scalar fp type!"); 162 case BuiltinType::Float: FnName = "fabsf"; break; 163 case BuiltinType::Double: FnName = "fabs"; break; 164 case BuiltinType::LongDouble: FnName = "fabsl"; break; 165 } 166 167 // The prototype is something that takes and returns whatever V's type is. 168 llvm::Type *ArgTys[] = { V->getType() }; 169 llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), ArgTys, 170 false); 171 llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName); 172 173 return CGF.Builder.CreateCall(Fn, V, "abs"); 174} 175 176RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 177 unsigned BuiltinID, const CallExpr *E) { 178 // See if we can constant fold this builtin. If so, don't emit it at all. 179 Expr::EvalResult Result; 180 if (E->Evaluate(Result, CGM.getContext()) && 181 !Result.hasSideEffects()) { 182 if (Result.Val.isInt()) 183 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 184 Result.Val.getInt())); 185 if (Result.Val.isFloat()) 186 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 187 Result.Val.getFloat())); 188 } 189 190 switch (BuiltinID) { 191 default: break; // Handle intrinsics and libm functions below. 192 case Builtin::BI__builtin___CFStringMakeConstantString: 193 case Builtin::BI__builtin___NSStringMakeConstantString: 194 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0)); 195 case Builtin::BI__builtin_stdarg_start: 196 case Builtin::BI__builtin_va_start: 197 case Builtin::BI__builtin_va_end: { 198 Value *ArgValue = EmitVAListRef(E->getArg(0)); 199 const llvm::Type *DestType = Int8PtrTy; 200 if (ArgValue->getType() != DestType) 201 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 202 ArgValue->getName().data()); 203 204 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 205 Intrinsic::vaend : Intrinsic::vastart; 206 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 207 } 208 case Builtin::BI__builtin_va_copy: { 209 Value *DstPtr = EmitVAListRef(E->getArg(0)); 210 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 211 212 const llvm::Type *Type = Int8PtrTy; 213 214 DstPtr = Builder.CreateBitCast(DstPtr, Type); 215 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 216 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 217 DstPtr, SrcPtr)); 218 } 219 case Builtin::BI__builtin_abs: { 220 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 221 222 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 223 Value *CmpResult = 224 Builder.CreateICmpSGE(ArgValue, 225 llvm::Constant::getNullValue(ArgValue->getType()), 226 "abscond"); 227 Value *Result = 228 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 229 230 return RValue::get(Result); 231 } 232 case Builtin::BI__builtin_ctz: 233 case Builtin::BI__builtin_ctzl: 234 case Builtin::BI__builtin_ctzll: { 235 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 236 237 llvm::Type *ArgType = ArgValue->getType(); 238 Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1); 239 240 const llvm::Type *ResultType = ConvertType(E->getType()); 241 Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); 242 if (Result->getType() != ResultType) 243 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 244 "cast"); 245 return RValue::get(Result); 246 } 247 case Builtin::BI__builtin_clz: 248 case Builtin::BI__builtin_clzl: 249 case Builtin::BI__builtin_clzll: { 250 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 251 252 llvm::Type *ArgType = ArgValue->getType(); 253 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, &ArgType, 1); 254 255 const llvm::Type *ResultType = ConvertType(E->getType()); 256 Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); 257 if (Result->getType() != ResultType) 258 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 259 "cast"); 260 return RValue::get(Result); 261 } 262 case Builtin::BI__builtin_ffs: 263 case Builtin::BI__builtin_ffsl: 264 case Builtin::BI__builtin_ffsll: { 265 // ffs(x) -> x ? cttz(x) + 1 : 0 266 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 267 268 llvm::Type *ArgType = ArgValue->getType(); 269 Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1); 270 271 const llvm::Type *ResultType = ConvertType(E->getType()); 272 Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue, "tmp"), 273 llvm::ConstantInt::get(ArgType, 1), "tmp"); 274 Value *Zero = llvm::Constant::getNullValue(ArgType); 275 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 276 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 277 if (Result->getType() != ResultType) 278 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 279 "cast"); 280 return RValue::get(Result); 281 } 282 case Builtin::BI__builtin_parity: 283 case Builtin::BI__builtin_parityl: 284 case Builtin::BI__builtin_parityll: { 285 // parity(x) -> ctpop(x) & 1 286 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 287 288 llvm::Type *ArgType = ArgValue->getType(); 289 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1); 290 291 const llvm::Type *ResultType = ConvertType(E->getType()); 292 Value *Tmp = Builder.CreateCall(F, ArgValue, "tmp"); 293 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1), 294 "tmp"); 295 if (Result->getType() != ResultType) 296 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 297 "cast"); 298 return RValue::get(Result); 299 } 300 case Builtin::BI__builtin_popcount: 301 case Builtin::BI__builtin_popcountl: 302 case Builtin::BI__builtin_popcountll: { 303 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 304 305 llvm::Type *ArgType = ArgValue->getType(); 306 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1); 307 308 const llvm::Type *ResultType = ConvertType(E->getType()); 309 Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); 310 if (Result->getType() != ResultType) 311 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 312 "cast"); 313 return RValue::get(Result); 314 } 315 case Builtin::BI__builtin_expect: { 316 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 317 llvm::Type *ArgType = ArgValue->getType(); 318 319 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, &ArgType, 1); 320 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 321 322 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue, 323 "expval"); 324 return RValue::get(Result); 325 } 326 case Builtin::BI__builtin_bswap32: 327 case Builtin::BI__builtin_bswap64: { 328 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 329 llvm::Type *ArgType = ArgValue->getType(); 330 Value *F = CGM.getIntrinsic(Intrinsic::bswap, &ArgType, 1); 331 return RValue::get(Builder.CreateCall(F, ArgValue, "tmp")); 332 } 333 case Builtin::BI__builtin_object_size: { 334 // We pass this builtin onto the optimizer so that it can 335 // figure out the object size in more complex cases. 336 llvm::Type *ResType[] = { 337 ConvertType(E->getType()) 338 }; 339 340 // LLVM only supports 0 and 2, make sure that we pass along that 341 // as a boolean. 342 Value *Ty = EmitScalarExpr(E->getArg(1)); 343 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 344 assert(CI); 345 uint64_t val = CI->getZExtValue(); 346 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); 347 348 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType, 1); 349 return RValue::get(Builder.CreateCall2(F, 350 EmitScalarExpr(E->getArg(0)), 351 CI)); 352 } 353 case Builtin::BI__builtin_prefetch: { 354 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 355 // FIXME: Technically these constants should of type 'int', yes? 356 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 357 llvm::ConstantInt::get(Int32Ty, 0); 358 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 359 llvm::ConstantInt::get(Int32Ty, 3); 360 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 361 Value *F = CGM.getIntrinsic(Intrinsic::prefetch, 0, 0); 362 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); 363 } 364 case Builtin::BI__builtin_trap: { 365 Value *F = CGM.getIntrinsic(Intrinsic::trap, 0, 0); 366 return RValue::get(Builder.CreateCall(F)); 367 } 368 case Builtin::BI__builtin_unreachable: { 369 if (CatchUndefined) 370 EmitBranch(getTrapBB()); 371 else 372 Builder.CreateUnreachable(); 373 374 // We do need to preserve an insertion point. 375 EmitBlock(createBasicBlock("unreachable.cont")); 376 377 return RValue::get(0); 378 } 379 380 case Builtin::BI__builtin_powi: 381 case Builtin::BI__builtin_powif: 382 case Builtin::BI__builtin_powil: { 383 Value *Base = EmitScalarExpr(E->getArg(0)); 384 Value *Exponent = EmitScalarExpr(E->getArg(1)); 385 llvm::Type *ArgType = Base->getType(); 386 Value *F = CGM.getIntrinsic(Intrinsic::powi, &ArgType, 1); 387 return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp")); 388 } 389 390 case Builtin::BI__builtin_isgreater: 391 case Builtin::BI__builtin_isgreaterequal: 392 case Builtin::BI__builtin_isless: 393 case Builtin::BI__builtin_islessequal: 394 case Builtin::BI__builtin_islessgreater: 395 case Builtin::BI__builtin_isunordered: { 396 // Ordered comparisons: we know the arguments to these are matching scalar 397 // floating point values. 398 Value *LHS = EmitScalarExpr(E->getArg(0)); 399 Value *RHS = EmitScalarExpr(E->getArg(1)); 400 401 switch (BuiltinID) { 402 default: assert(0 && "Unknown ordered comparison"); 403 case Builtin::BI__builtin_isgreater: 404 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 405 break; 406 case Builtin::BI__builtin_isgreaterequal: 407 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 408 break; 409 case Builtin::BI__builtin_isless: 410 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 411 break; 412 case Builtin::BI__builtin_islessequal: 413 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 414 break; 415 case Builtin::BI__builtin_islessgreater: 416 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 417 break; 418 case Builtin::BI__builtin_isunordered: 419 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 420 break; 421 } 422 // ZExt bool to int type. 423 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()), 424 "tmp")); 425 } 426 case Builtin::BI__builtin_isnan: { 427 Value *V = EmitScalarExpr(E->getArg(0)); 428 V = Builder.CreateFCmpUNO(V, V, "cmp"); 429 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp")); 430 } 431 432 case Builtin::BI__builtin_isinf: { 433 // isinf(x) --> fabs(x) == infinity 434 Value *V = EmitScalarExpr(E->getArg(0)); 435 V = EmitFAbs(*this, V, E->getArg(0)->getType()); 436 437 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 438 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp")); 439 } 440 441 // TODO: BI__builtin_isinf_sign 442 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 443 444 case Builtin::BI__builtin_isnormal: { 445 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 446 Value *V = EmitScalarExpr(E->getArg(0)); 447 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 448 449 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 450 Value *IsLessThanInf = 451 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 452 APFloat Smallest = APFloat::getSmallestNormalized( 453 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 454 Value *IsNormal = 455 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 456 "isnormal"); 457 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 458 V = Builder.CreateAnd(V, IsNormal, "and"); 459 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 460 } 461 462 case Builtin::BI__builtin_isfinite: { 463 // isfinite(x) --> x == x && fabs(x) != infinity; } 464 Value *V = EmitScalarExpr(E->getArg(0)); 465 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 466 467 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 468 Value *IsNotInf = 469 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 470 471 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 472 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 473 } 474 475 case Builtin::BI__builtin_fpclassify: { 476 Value *V = EmitScalarExpr(E->getArg(5)); 477 const llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 478 479 // Create Result 480 BasicBlock *Begin = Builder.GetInsertBlock(); 481 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 482 Builder.SetInsertPoint(End); 483 PHINode *Result = 484 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 485 "fpclassify_result"); 486 487 // if (V==0) return FP_ZERO 488 Builder.SetInsertPoint(Begin); 489 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 490 "iszero"); 491 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 492 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 493 Builder.CreateCondBr(IsZero, End, NotZero); 494 Result->addIncoming(ZeroLiteral, Begin); 495 496 // if (V != V) return FP_NAN 497 Builder.SetInsertPoint(NotZero); 498 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 499 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 500 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 501 Builder.CreateCondBr(IsNan, End, NotNan); 502 Result->addIncoming(NanLiteral, NotZero); 503 504 // if (fabs(V) == infinity) return FP_INFINITY 505 Builder.SetInsertPoint(NotNan); 506 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType()); 507 Value *IsInf = 508 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 509 "isinf"); 510 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 511 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 512 Builder.CreateCondBr(IsInf, End, NotInf); 513 Result->addIncoming(InfLiteral, NotNan); 514 515 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 516 Builder.SetInsertPoint(NotInf); 517 APFloat Smallest = APFloat::getSmallestNormalized( 518 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 519 Value *IsNormal = 520 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 521 "isnormal"); 522 Value *NormalResult = 523 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 524 EmitScalarExpr(E->getArg(3))); 525 Builder.CreateBr(End); 526 Result->addIncoming(NormalResult, NotInf); 527 528 // return Result 529 Builder.SetInsertPoint(End); 530 return RValue::get(Result); 531 } 532 533 case Builtin::BIalloca: 534 case Builtin::BI__builtin_alloca: { 535 Value *Size = EmitScalarExpr(E->getArg(0)); 536 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size, "tmp")); 537 } 538 case Builtin::BIbzero: 539 case Builtin::BI__builtin_bzero: { 540 Value *Address = EmitScalarExpr(E->getArg(0)); 541 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 542 Builder.CreateMemSet(Address, Builder.getInt8(0), SizeVal, 1, false); 543 return RValue::get(Address); 544 } 545 case Builtin::BImemcpy: 546 case Builtin::BI__builtin_memcpy: { 547 Value *Address = EmitScalarExpr(E->getArg(0)); 548 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 549 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 550 Builder.CreateMemCpy(Address, SrcAddr, SizeVal, 1, false); 551 return RValue::get(Address); 552 } 553 554 case Builtin::BI__builtin___memcpy_chk: { 555 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 556 if (!E->getArg(2)->isEvaluatable(CGM.getContext()) || 557 !E->getArg(3)->isEvaluatable(CGM.getContext())) 558 break; 559 llvm::APSInt Size = E->getArg(2)->EvaluateAsInt(CGM.getContext()); 560 llvm::APSInt DstSize = E->getArg(3)->EvaluateAsInt(CGM.getContext()); 561 if (Size.ugt(DstSize)) 562 break; 563 Value *Dest = EmitScalarExpr(E->getArg(0)); 564 Value *Src = EmitScalarExpr(E->getArg(1)); 565 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 566 Builder.CreateMemCpy(Dest, Src, SizeVal, 1, false); 567 return RValue::get(Dest); 568 } 569 570 case Builtin::BI__builtin_objc_memmove_collectable: { 571 Value *Address = EmitScalarExpr(E->getArg(0)); 572 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 573 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 574 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 575 Address, SrcAddr, SizeVal); 576 return RValue::get(Address); 577 } 578 579 case Builtin::BI__builtin___memmove_chk: { 580 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 581 if (!E->getArg(2)->isEvaluatable(CGM.getContext()) || 582 !E->getArg(3)->isEvaluatable(CGM.getContext())) 583 break; 584 llvm::APSInt Size = E->getArg(2)->EvaluateAsInt(CGM.getContext()); 585 llvm::APSInt DstSize = E->getArg(3)->EvaluateAsInt(CGM.getContext()); 586 if (Size.ugt(DstSize)) 587 break; 588 Value *Dest = EmitScalarExpr(E->getArg(0)); 589 Value *Src = EmitScalarExpr(E->getArg(1)); 590 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 591 Builder.CreateMemMove(Dest, Src, SizeVal, 1, false); 592 return RValue::get(Dest); 593 } 594 595 case Builtin::BImemmove: 596 case Builtin::BI__builtin_memmove: { 597 Value *Address = EmitScalarExpr(E->getArg(0)); 598 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 599 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 600 Builder.CreateMemMove(Address, SrcAddr, SizeVal, 1, false); 601 return RValue::get(Address); 602 } 603 case Builtin::BImemset: 604 case Builtin::BI__builtin_memset: { 605 Value *Address = EmitScalarExpr(E->getArg(0)); 606 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 607 Builder.getInt8Ty()); 608 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 609 Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false); 610 return RValue::get(Address); 611 } 612 case Builtin::BI__builtin___memset_chk: { 613 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 614 if (!E->getArg(2)->isEvaluatable(CGM.getContext()) || 615 !E->getArg(3)->isEvaluatable(CGM.getContext())) 616 break; 617 llvm::APSInt Size = E->getArg(2)->EvaluateAsInt(CGM.getContext()); 618 llvm::APSInt DstSize = E->getArg(3)->EvaluateAsInt(CGM.getContext()); 619 if (Size.ugt(DstSize)) 620 break; 621 Value *Address = EmitScalarExpr(E->getArg(0)); 622 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 623 Builder.getInt8Ty()); 624 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 625 Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false); 626 627 return RValue::get(Address); 628 } 629 case Builtin::BI__builtin_dwarf_cfa: { 630 // The offset in bytes from the first argument to the CFA. 631 // 632 // Why on earth is this in the frontend? Is there any reason at 633 // all that the backend can't reasonably determine this while 634 // lowering llvm.eh.dwarf.cfa()? 635 // 636 // TODO: If there's a satisfactory reason, add a target hook for 637 // this instead of hard-coding 0, which is correct for most targets. 638 int32_t Offset = 0; 639 640 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa, 0, 0); 641 return RValue::get(Builder.CreateCall(F, 642 llvm::ConstantInt::get(Int32Ty, Offset))); 643 } 644 case Builtin::BI__builtin_return_address: { 645 Value *Depth = EmitScalarExpr(E->getArg(0)); 646 Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp"); 647 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress, 0, 0); 648 return RValue::get(Builder.CreateCall(F, Depth)); 649 } 650 case Builtin::BI__builtin_frame_address: { 651 Value *Depth = EmitScalarExpr(E->getArg(0)); 652 Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp"); 653 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress, 0, 0); 654 return RValue::get(Builder.CreateCall(F, Depth)); 655 } 656 case Builtin::BI__builtin_extract_return_addr: { 657 Value *Address = EmitScalarExpr(E->getArg(0)); 658 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 659 return RValue::get(Result); 660 } 661 case Builtin::BI__builtin_frob_return_addr: { 662 Value *Address = EmitScalarExpr(E->getArg(0)); 663 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 664 return RValue::get(Result); 665 } 666 case Builtin::BI__builtin_dwarf_sp_column: { 667 const llvm::IntegerType *Ty 668 = cast<llvm::IntegerType>(ConvertType(E->getType())); 669 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 670 if (Column == -1) { 671 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 672 return RValue::get(llvm::UndefValue::get(Ty)); 673 } 674 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 675 } 676 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 677 Value *Address = EmitScalarExpr(E->getArg(0)); 678 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 679 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 680 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 681 } 682 case Builtin::BI__builtin_eh_return: { 683 Value *Int = EmitScalarExpr(E->getArg(0)); 684 Value *Ptr = EmitScalarExpr(E->getArg(1)); 685 686 const llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 687 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 688 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 689 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 690 ? Intrinsic::eh_return_i32 691 : Intrinsic::eh_return_i64, 692 0, 0); 693 Builder.CreateCall2(F, Int, Ptr); 694 Builder.CreateUnreachable(); 695 696 // We do need to preserve an insertion point. 697 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 698 699 return RValue::get(0); 700 } 701 case Builtin::BI__builtin_unwind_init: { 702 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init, 0, 0); 703 return RValue::get(Builder.CreateCall(F)); 704 } 705 case Builtin::BI__builtin_extend_pointer: { 706 // Extends a pointer to the size of an _Unwind_Word, which is 707 // uint64_t on all platforms. Generally this gets poked into a 708 // register and eventually used as an address, so if the 709 // addressing registers are wider than pointers and the platform 710 // doesn't implicitly ignore high-order bits when doing 711 // addressing, we need to make sure we zext / sext based on 712 // the platform's expectations. 713 // 714 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 715 716 // Cast the pointer to intptr_t. 717 Value *Ptr = EmitScalarExpr(E->getArg(0)); 718 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 719 720 // If that's 64 bits, we're done. 721 if (IntPtrTy->getBitWidth() == 64) 722 return RValue::get(Result); 723 724 // Otherwise, ask the codegen data what to do. 725 if (getTargetHooks().extendPointerWithSExt()) 726 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 727 else 728 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 729 } 730 case Builtin::BI__builtin_setjmp: { 731 // Buffer is a void**. 732 Value *Buf = EmitScalarExpr(E->getArg(0)); 733 734 // Store the frame pointer to the setjmp buffer. 735 Value *FrameAddr = 736 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 737 ConstantInt::get(Int32Ty, 0)); 738 Builder.CreateStore(FrameAddr, Buf); 739 740 // Store the stack pointer to the setjmp buffer. 741 Value *StackAddr = 742 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 743 Value *StackSaveSlot = 744 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 745 Builder.CreateStore(StackAddr, StackSaveSlot); 746 747 // Call LLVM's EH setjmp, which is lightweight. 748 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 749 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 750 return RValue::get(Builder.CreateCall(F, Buf)); 751 } 752 case Builtin::BI__builtin_longjmp: { 753 Value *Buf = EmitScalarExpr(E->getArg(0)); 754 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 755 756 // Call LLVM's EH longjmp, which is lightweight. 757 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 758 759 // longjmp doesn't return; mark this as unreachable. 760 Builder.CreateUnreachable(); 761 762 // We do need to preserve an insertion point. 763 EmitBlock(createBasicBlock("longjmp.cont")); 764 765 return RValue::get(0); 766 } 767 case Builtin::BI__sync_fetch_and_add: 768 case Builtin::BI__sync_fetch_and_sub: 769 case Builtin::BI__sync_fetch_and_or: 770 case Builtin::BI__sync_fetch_and_and: 771 case Builtin::BI__sync_fetch_and_xor: 772 case Builtin::BI__sync_add_and_fetch: 773 case Builtin::BI__sync_sub_and_fetch: 774 case Builtin::BI__sync_and_and_fetch: 775 case Builtin::BI__sync_or_and_fetch: 776 case Builtin::BI__sync_xor_and_fetch: 777 case Builtin::BI__sync_val_compare_and_swap: 778 case Builtin::BI__sync_bool_compare_and_swap: 779 case Builtin::BI__sync_lock_test_and_set: 780 case Builtin::BI__sync_lock_release: 781 case Builtin::BI__sync_swap: 782 assert(0 && "Shouldn't make it through sema"); 783 case Builtin::BI__sync_fetch_and_add_1: 784 case Builtin::BI__sync_fetch_and_add_2: 785 case Builtin::BI__sync_fetch_and_add_4: 786 case Builtin::BI__sync_fetch_and_add_8: 787 case Builtin::BI__sync_fetch_and_add_16: 788 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_add, E); 789 case Builtin::BI__sync_fetch_and_sub_1: 790 case Builtin::BI__sync_fetch_and_sub_2: 791 case Builtin::BI__sync_fetch_and_sub_4: 792 case Builtin::BI__sync_fetch_and_sub_8: 793 case Builtin::BI__sync_fetch_and_sub_16: 794 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_sub, E); 795 case Builtin::BI__sync_fetch_and_or_1: 796 case Builtin::BI__sync_fetch_and_or_2: 797 case Builtin::BI__sync_fetch_and_or_4: 798 case Builtin::BI__sync_fetch_and_or_8: 799 case Builtin::BI__sync_fetch_and_or_16: 800 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_or, E); 801 case Builtin::BI__sync_fetch_and_and_1: 802 case Builtin::BI__sync_fetch_and_and_2: 803 case Builtin::BI__sync_fetch_and_and_4: 804 case Builtin::BI__sync_fetch_and_and_8: 805 case Builtin::BI__sync_fetch_and_and_16: 806 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_and, E); 807 case Builtin::BI__sync_fetch_and_xor_1: 808 case Builtin::BI__sync_fetch_and_xor_2: 809 case Builtin::BI__sync_fetch_and_xor_4: 810 case Builtin::BI__sync_fetch_and_xor_8: 811 case Builtin::BI__sync_fetch_and_xor_16: 812 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_xor, E); 813 814 // Clang extensions: not overloaded yet. 815 case Builtin::BI__sync_fetch_and_min: 816 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_min, E); 817 case Builtin::BI__sync_fetch_and_max: 818 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_max, E); 819 case Builtin::BI__sync_fetch_and_umin: 820 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umin, E); 821 case Builtin::BI__sync_fetch_and_umax: 822 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umax, E); 823 824 case Builtin::BI__sync_add_and_fetch_1: 825 case Builtin::BI__sync_add_and_fetch_2: 826 case Builtin::BI__sync_add_and_fetch_4: 827 case Builtin::BI__sync_add_and_fetch_8: 828 case Builtin::BI__sync_add_and_fetch_16: 829 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_add, E, 830 llvm::Instruction::Add); 831 case Builtin::BI__sync_sub_and_fetch_1: 832 case Builtin::BI__sync_sub_and_fetch_2: 833 case Builtin::BI__sync_sub_and_fetch_4: 834 case Builtin::BI__sync_sub_and_fetch_8: 835 case Builtin::BI__sync_sub_and_fetch_16: 836 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_sub, E, 837 llvm::Instruction::Sub); 838 case Builtin::BI__sync_and_and_fetch_1: 839 case Builtin::BI__sync_and_and_fetch_2: 840 case Builtin::BI__sync_and_and_fetch_4: 841 case Builtin::BI__sync_and_and_fetch_8: 842 case Builtin::BI__sync_and_and_fetch_16: 843 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_and, E, 844 llvm::Instruction::And); 845 case Builtin::BI__sync_or_and_fetch_1: 846 case Builtin::BI__sync_or_and_fetch_2: 847 case Builtin::BI__sync_or_and_fetch_4: 848 case Builtin::BI__sync_or_and_fetch_8: 849 case Builtin::BI__sync_or_and_fetch_16: 850 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_or, E, 851 llvm::Instruction::Or); 852 case Builtin::BI__sync_xor_and_fetch_1: 853 case Builtin::BI__sync_xor_and_fetch_2: 854 case Builtin::BI__sync_xor_and_fetch_4: 855 case Builtin::BI__sync_xor_and_fetch_8: 856 case Builtin::BI__sync_xor_and_fetch_16: 857 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_xor, E, 858 llvm::Instruction::Xor); 859 860 case Builtin::BI__sync_val_compare_and_swap_1: 861 case Builtin::BI__sync_val_compare_and_swap_2: 862 case Builtin::BI__sync_val_compare_and_swap_4: 863 case Builtin::BI__sync_val_compare_and_swap_8: 864 case Builtin::BI__sync_val_compare_and_swap_16: { 865 QualType T = E->getType(); 866 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 867 unsigned AddrSpace = 868 cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace(); 869 870 llvm::IntegerType *IntType = 871 llvm::IntegerType::get(getLLVMContext(), 872 getContext().getTypeSize(T)); 873 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 874 llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType }; 875 Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap, 876 IntrinsicTypes, 2); 877 878 Value *Args[3]; 879 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 880 Args[1] = EmitScalarExpr(E->getArg(1)); 881 const llvm::Type *ValueType = Args[1]->getType(); 882 Args[1] = EmitToInt(*this, Args[1], T, IntType); 883 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 884 885 Value *Result = EmitCallWithBarrier(*this, AtomF, Args, Args + 3); 886 Result = EmitFromInt(*this, Result, T, ValueType); 887 return RValue::get(Result); 888 } 889 890 case Builtin::BI__sync_bool_compare_and_swap_1: 891 case Builtin::BI__sync_bool_compare_and_swap_2: 892 case Builtin::BI__sync_bool_compare_and_swap_4: 893 case Builtin::BI__sync_bool_compare_and_swap_8: 894 case Builtin::BI__sync_bool_compare_and_swap_16: { 895 QualType T = E->getArg(1)->getType(); 896 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 897 unsigned AddrSpace = 898 cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace(); 899 900 llvm::IntegerType *IntType = 901 llvm::IntegerType::get(getLLVMContext(), 902 getContext().getTypeSize(T)); 903 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 904 llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType }; 905 Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap, 906 IntrinsicTypes, 2); 907 908 Value *Args[3]; 909 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 910 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType); 911 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 912 913 Value *OldVal = Args[1]; 914 Value *PrevVal = EmitCallWithBarrier(*this, AtomF, Args, Args + 3); 915 Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal); 916 // zext bool to int. 917 Result = Builder.CreateZExt(Result, ConvertType(E->getType())); 918 return RValue::get(Result); 919 } 920 921 case Builtin::BI__sync_swap_1: 922 case Builtin::BI__sync_swap_2: 923 case Builtin::BI__sync_swap_4: 924 case Builtin::BI__sync_swap_8: 925 case Builtin::BI__sync_swap_16: 926 return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E); 927 928 case Builtin::BI__sync_lock_test_and_set_1: 929 case Builtin::BI__sync_lock_test_and_set_2: 930 case Builtin::BI__sync_lock_test_and_set_4: 931 case Builtin::BI__sync_lock_test_and_set_8: 932 case Builtin::BI__sync_lock_test_and_set_16: 933 return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E); 934 935 case Builtin::BI__sync_lock_release_1: 936 case Builtin::BI__sync_lock_release_2: 937 case Builtin::BI__sync_lock_release_4: 938 case Builtin::BI__sync_lock_release_8: 939 case Builtin::BI__sync_lock_release_16: { 940 Value *Ptr = EmitScalarExpr(E->getArg(0)); 941 const llvm::Type *ElTy = 942 cast<llvm::PointerType>(Ptr->getType())->getElementType(); 943 llvm::StoreInst *Store = 944 Builder.CreateStore(llvm::Constant::getNullValue(ElTy), Ptr); 945 Store->setVolatile(true); 946 return RValue::get(0); 947 } 948 949 case Builtin::BI__sync_synchronize: { 950 // We assume like gcc appears to, that this only applies to cached memory. 951 EmitMemoryBarrier(*this, true, true, true, true, false); 952 return RValue::get(0); 953 } 954 955 case Builtin::BI__builtin_llvm_memory_barrier: { 956 Value *C[5] = { 957 EmitScalarExpr(E->getArg(0)), 958 EmitScalarExpr(E->getArg(1)), 959 EmitScalarExpr(E->getArg(2)), 960 EmitScalarExpr(E->getArg(3)), 961 EmitScalarExpr(E->getArg(4)) 962 }; 963 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::memory_barrier), C, C + 5); 964 return RValue::get(0); 965 } 966 967 // Library functions with special handling. 968 case Builtin::BIsqrt: 969 case Builtin::BIsqrtf: 970 case Builtin::BIsqrtl: { 971 // TODO: there is currently no set of optimizer flags 972 // sufficient for us to rewrite sqrt to @llvm.sqrt. 973 // -fmath-errno=0 is not good enough; we need finiteness. 974 // We could probably precondition the call with an ult 975 // against 0, but is that worth the complexity? 976 break; 977 } 978 979 case Builtin::BIpow: 980 case Builtin::BIpowf: 981 case Builtin::BIpowl: { 982 // Rewrite sqrt to intrinsic if allowed. 983 if (!FD->hasAttr<ConstAttr>()) 984 break; 985 Value *Base = EmitScalarExpr(E->getArg(0)); 986 Value *Exponent = EmitScalarExpr(E->getArg(1)); 987 llvm::Type *ArgType = Base->getType(); 988 Value *F = CGM.getIntrinsic(Intrinsic::pow, &ArgType, 1); 989 return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp")); 990 } 991 992 case Builtin::BIfma: 993 case Builtin::BIfmaf: 994 case Builtin::BIfmal: 995 case Builtin::BI__builtin_fma: 996 case Builtin::BI__builtin_fmaf: 997 case Builtin::BI__builtin_fmal: { 998 // Rewrite fma to intrinsic. 999 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1000 llvm::Type *ArgType = FirstArg->getType(); 1001 Value *F = CGM.getIntrinsic(Intrinsic::fma, &ArgType, 1); 1002 return RValue::get(Builder.CreateCall3(F, FirstArg, 1003 EmitScalarExpr(E->getArg(1)), 1004 EmitScalarExpr(E->getArg(2)), 1005 "tmp")); 1006 } 1007 1008 case Builtin::BI__builtin_signbit: 1009 case Builtin::BI__builtin_signbitf: 1010 case Builtin::BI__builtin_signbitl: { 1011 LLVMContext &C = CGM.getLLVMContext(); 1012 1013 Value *Arg = EmitScalarExpr(E->getArg(0)); 1014 const llvm::Type *ArgTy = Arg->getType(); 1015 if (ArgTy->isPPC_FP128Ty()) 1016 break; // FIXME: I'm not sure what the right implementation is here. 1017 int ArgWidth = ArgTy->getPrimitiveSizeInBits(); 1018 const llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 1019 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); 1020 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); 1021 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); 1022 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 1023 } 1024 } 1025 1026 // If this is an alias for a libm function (e.g. __builtin_sin) turn it into 1027 // that function. 1028 if (getContext().BuiltinInfo.isLibFunction(BuiltinID) || 1029 getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 1030 return EmitCall(E->getCallee()->getType(), 1031 CGM.getBuiltinLibFunction(FD, BuiltinID), 1032 ReturnValueSlot(), E->arg_begin(), E->arg_end(), FD); 1033 1034 // See if we have a target specific intrinsic. 1035 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 1036 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 1037 if (const char *Prefix = 1038 llvm::Triple::getArchTypePrefix(Target.getTriple().getArch())) 1039 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 1040 1041 if (IntrinsicID != Intrinsic::not_intrinsic) { 1042 SmallVector<Value*, 16> Args; 1043 1044 // Find out if any arguments are required to be integer constant 1045 // expressions. 1046 unsigned ICEArguments = 0; 1047 ASTContext::GetBuiltinTypeError Error; 1048 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1049 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1050 1051 Function *F = CGM.getIntrinsic(IntrinsicID); 1052 const llvm::FunctionType *FTy = F->getFunctionType(); 1053 1054 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 1055 Value *ArgValue; 1056 // If this is a normal argument, just emit it as a scalar. 1057 if ((ICEArguments & (1 << i)) == 0) { 1058 ArgValue = EmitScalarExpr(E->getArg(i)); 1059 } else { 1060 // If this is required to be a constant, constant fold it so that we 1061 // know that the generated intrinsic gets a ConstantInt. 1062 llvm::APSInt Result; 1063 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 1064 assert(IsConst && "Constant arg isn't actually constant?"); 1065 (void)IsConst; 1066 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 1067 } 1068 1069 // If the intrinsic arg type is different from the builtin arg type 1070 // we need to do a bit cast. 1071 const llvm::Type *PTy = FTy->getParamType(i); 1072 if (PTy != ArgValue->getType()) { 1073 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 1074 "Must be able to losslessly bit cast to param"); 1075 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 1076 } 1077 1078 Args.push_back(ArgValue); 1079 } 1080 1081 Value *V = Builder.CreateCall(F, Args.data(), Args.data() + Args.size()); 1082 QualType BuiltinRetType = E->getType(); 1083 1084 const llvm::Type *RetTy = llvm::Type::getVoidTy(getLLVMContext()); 1085 if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType); 1086 1087 if (RetTy != V->getType()) { 1088 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 1089 "Must be able to losslessly bit cast result type"); 1090 V = Builder.CreateBitCast(V, RetTy); 1091 } 1092 1093 return RValue::get(V); 1094 } 1095 1096 // See if we have a target specific builtin that needs to be lowered. 1097 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 1098 return RValue::get(V); 1099 1100 ErrorUnsupported(E, "builtin function"); 1101 1102 // Unknown builtin, for now just dump it out and return undef. 1103 if (hasAggregateLLVMType(E->getType())) 1104 return RValue::getAggregate(CreateMemTemp(E->getType())); 1105 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1106} 1107 1108Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 1109 const CallExpr *E) { 1110 switch (Target.getTriple().getArch()) { 1111 case llvm::Triple::arm: 1112 case llvm::Triple::thumb: 1113 return EmitARMBuiltinExpr(BuiltinID, E); 1114 case llvm::Triple::x86: 1115 case llvm::Triple::x86_64: 1116 return EmitX86BuiltinExpr(BuiltinID, E); 1117 case llvm::Triple::ppc: 1118 case llvm::Triple::ppc64: 1119 return EmitPPCBuiltinExpr(BuiltinID, E); 1120 default: 1121 return 0; 1122 } 1123} 1124 1125static llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) { 1126 switch (type) { 1127 default: break; 1128 case 0: 1129 case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q); 1130 case 6: 1131 case 7: 1132 case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q); 1133 case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q); 1134 case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q); 1135 case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q); 1136 }; 1137 return 0; 1138} 1139 1140Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 1141 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 1142 SmallVector<Constant*, 16> Indices(nElts, C); 1143 Value* SV = llvm::ConstantVector::get(Indices); 1144 return Builder.CreateShuffleVector(V, V, SV, "lane"); 1145} 1146 1147Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 1148 const char *name, 1149 unsigned shift, bool rightshift) { 1150 unsigned j = 0; 1151 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 1152 ai != ae; ++ai, ++j) 1153 if (shift > 0 && shift == j) 1154 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 1155 else 1156 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 1157 1158 return Builder.CreateCall(F, Ops.begin(), Ops.end(), name); 1159} 1160 1161Value *CodeGenFunction::EmitNeonShiftVector(Value *V, const llvm::Type *Ty, 1162 bool neg) { 1163 ConstantInt *CI = cast<ConstantInt>(V); 1164 int SV = CI->getSExtValue(); 1165 1166 const llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1167 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 1168 SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C); 1169 return llvm::ConstantVector::get(CV); 1170} 1171 1172/// GetPointeeAlignment - Given an expression with a pointer type, find the 1173/// alignment of the type referenced by the pointer. Skip over implicit 1174/// casts. 1175static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) { 1176 unsigned Align = 1; 1177 // Check if the type is a pointer. The implicit cast operand might not be. 1178 while (Addr->getType()->isPointerType()) { 1179 QualType PtTy = Addr->getType()->getPointeeType(); 1180 unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity(); 1181 if (NewA > Align) 1182 Align = NewA; 1183 1184 // If the address is an implicit cast, repeat with the cast operand. 1185 if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) { 1186 Addr = CastAddr->getSubExpr(); 1187 continue; 1188 } 1189 break; 1190 } 1191 return llvm::ConstantInt::get(CGF.Int32Ty, Align); 1192} 1193 1194Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 1195 const CallExpr *E) { 1196 if (BuiltinID == ARM::BI__clear_cache) { 1197 const FunctionDecl *FD = E->getDirectCallee(); 1198 // Oddly people write this call without args on occasion and gcc accepts 1199 // it - it's also marked as varargs in the description file. 1200 llvm::SmallVector<Value*, 2> Ops; 1201 for (unsigned i = 0; i < E->getNumArgs(); i++) 1202 Ops.push_back(EmitScalarExpr(E->getArg(i))); 1203 const llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 1204 const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 1205 llvm::StringRef Name = FD->getName(); 1206 return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 1207 Ops.begin(), Ops.end()); 1208 } 1209 1210 if (BuiltinID == ARM::BI__builtin_arm_ldrexd) { 1211 Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 1212 1213 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 1214 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd"); 1215 1216 Value *Val0 = Builder.CreateExtractValue(Val, 1); 1217 Value *Val1 = Builder.CreateExtractValue(Val, 0); 1218 Val0 = Builder.CreateZExt(Val0, Int64Ty); 1219 Val1 = Builder.CreateZExt(Val1, Int64Ty); 1220 1221 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 1222 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 1223 return Builder.CreateOr(Val, Val1); 1224 } 1225 1226 if (BuiltinID == ARM::BI__builtin_arm_strexd) { 1227 Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd); 1228 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL); 1229 1230 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 1231 Value *Tmp = Builder.CreateAlloca(Int64Ty, One, "tmp"); 1232 Value *Val = EmitScalarExpr(E->getArg(0)); 1233 Builder.CreateStore(Val, Tmp); 1234 1235 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 1236 Val = Builder.CreateLoad(LdPtr); 1237 1238 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 1239 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 1240 Value *StPtr = EmitScalarExpr(E->getArg(1)); 1241 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd"); 1242 } 1243 1244 llvm::SmallVector<Value*, 4> Ops; 1245 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) 1246 Ops.push_back(EmitScalarExpr(E->getArg(i))); 1247 1248 llvm::APSInt Result; 1249 const Expr *Arg = E->getArg(E->getNumArgs()-1); 1250 if (!Arg->isIntegerConstantExpr(Result, getContext())) 1251 return 0; 1252 1253 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 1254 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 1255 // Determine the overloaded type of this builtin. 1256 llvm::Type *Ty; 1257 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 1258 Ty = llvm::Type::getFloatTy(getLLVMContext()); 1259 else 1260 Ty = llvm::Type::getDoubleTy(getLLVMContext()); 1261 1262 // Determine whether this is an unsigned conversion or not. 1263 bool usgn = Result.getZExtValue() == 1; 1264 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 1265 1266 // Call the appropriate intrinsic. 1267 Function *F = CGM.getIntrinsic(Int, &Ty, 1); 1268 return Builder.CreateCall(F, Ops.begin(), Ops.end(), "vcvtr"); 1269 } 1270 1271 // Determine the type of this overloaded NEON intrinsic. 1272 unsigned type = Result.getZExtValue(); 1273 bool usgn = type & 0x08; 1274 bool quad = type & 0x10; 1275 bool poly = (type & 0x7) == 5 || (type & 0x7) == 6; 1276 (void)poly; // Only used in assert()s. 1277 bool rightShift = false; 1278 1279 llvm::VectorType *VTy = GetNeonType(getLLVMContext(), type & 0x7, quad); 1280 llvm::Type *Ty = VTy; 1281 if (!Ty) 1282 return 0; 1283 1284 unsigned Int; 1285 switch (BuiltinID) { 1286 default: return 0; 1287 case ARM::BI__builtin_neon_vabd_v: 1288 case ARM::BI__builtin_neon_vabdq_v: 1289 Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds; 1290 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabd"); 1291 case ARM::BI__builtin_neon_vabs_v: 1292 case ARM::BI__builtin_neon_vabsq_v: 1293 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1), 1294 Ops, "vabs"); 1295 case ARM::BI__builtin_neon_vaddhn_v: 1296 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1), 1297 Ops, "vaddhn"); 1298 case ARM::BI__builtin_neon_vcale_v: 1299 std::swap(Ops[0], Ops[1]); 1300 case ARM::BI__builtin_neon_vcage_v: { 1301 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged); 1302 return EmitNeonCall(F, Ops, "vcage"); 1303 } 1304 case ARM::BI__builtin_neon_vcaleq_v: 1305 std::swap(Ops[0], Ops[1]); 1306 case ARM::BI__builtin_neon_vcageq_v: { 1307 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq); 1308 return EmitNeonCall(F, Ops, "vcage"); 1309 } 1310 case ARM::BI__builtin_neon_vcalt_v: 1311 std::swap(Ops[0], Ops[1]); 1312 case ARM::BI__builtin_neon_vcagt_v: { 1313 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd); 1314 return EmitNeonCall(F, Ops, "vcagt"); 1315 } 1316 case ARM::BI__builtin_neon_vcaltq_v: 1317 std::swap(Ops[0], Ops[1]); 1318 case ARM::BI__builtin_neon_vcagtq_v: { 1319 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq); 1320 return EmitNeonCall(F, Ops, "vcagt"); 1321 } 1322 case ARM::BI__builtin_neon_vcls_v: 1323 case ARM::BI__builtin_neon_vclsq_v: { 1324 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1); 1325 return EmitNeonCall(F, Ops, "vcls"); 1326 } 1327 case ARM::BI__builtin_neon_vclz_v: 1328 case ARM::BI__builtin_neon_vclzq_v: { 1329 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1); 1330 return EmitNeonCall(F, Ops, "vclz"); 1331 } 1332 case ARM::BI__builtin_neon_vcnt_v: 1333 case ARM::BI__builtin_neon_vcntq_v: { 1334 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1); 1335 return EmitNeonCall(F, Ops, "vcnt"); 1336 } 1337 case ARM::BI__builtin_neon_vcvt_f16_v: { 1338 assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f16_v builtin"); 1339 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf); 1340 return EmitNeonCall(F, Ops, "vcvt"); 1341 } 1342 case ARM::BI__builtin_neon_vcvt_f32_f16: { 1343 assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f32_f16 builtin"); 1344 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp); 1345 return EmitNeonCall(F, Ops, "vcvt"); 1346 } 1347 case ARM::BI__builtin_neon_vcvt_f32_v: 1348 case ARM::BI__builtin_neon_vcvtq_f32_v: { 1349 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1350 Ty = GetNeonType(getLLVMContext(), 4, quad); 1351 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 1352 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 1353 } 1354 case ARM::BI__builtin_neon_vcvt_s32_v: 1355 case ARM::BI__builtin_neon_vcvt_u32_v: 1356 case ARM::BI__builtin_neon_vcvtq_s32_v: 1357 case ARM::BI__builtin_neon_vcvtq_u32_v: { 1358 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(getLLVMContext(), 4, quad)); 1359 return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 1360 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 1361 } 1362 case ARM::BI__builtin_neon_vcvt_n_f32_v: 1363 case ARM::BI__builtin_neon_vcvtq_n_f32_v: { 1364 llvm::Type *Tys[2] = { GetNeonType(getLLVMContext(), 4, quad), Ty }; 1365 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp; 1366 Function *F = CGM.getIntrinsic(Int, Tys, 2); 1367 return EmitNeonCall(F, Ops, "vcvt_n"); 1368 } 1369 case ARM::BI__builtin_neon_vcvt_n_s32_v: 1370 case ARM::BI__builtin_neon_vcvt_n_u32_v: 1371 case ARM::BI__builtin_neon_vcvtq_n_s32_v: 1372 case ARM::BI__builtin_neon_vcvtq_n_u32_v: { 1373 llvm::Type *Tys[2] = { Ty, GetNeonType(getLLVMContext(), 4, quad) }; 1374 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs; 1375 Function *F = CGM.getIntrinsic(Int, Tys, 2); 1376 return EmitNeonCall(F, Ops, "vcvt_n"); 1377 } 1378 case ARM::BI__builtin_neon_vext_v: 1379 case ARM::BI__builtin_neon_vextq_v: { 1380 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 1381 SmallVector<Constant*, 16> Indices; 1382 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 1383 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 1384 1385 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1386 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1387 Value *SV = llvm::ConstantVector::get(Indices); 1388 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 1389 } 1390 case ARM::BI__builtin_neon_vget_lane_i8: 1391 case ARM::BI__builtin_neon_vget_lane_i16: 1392 case ARM::BI__builtin_neon_vget_lane_i32: 1393 case ARM::BI__builtin_neon_vget_lane_i64: 1394 case ARM::BI__builtin_neon_vget_lane_f32: 1395 case ARM::BI__builtin_neon_vgetq_lane_i8: 1396 case ARM::BI__builtin_neon_vgetq_lane_i16: 1397 case ARM::BI__builtin_neon_vgetq_lane_i32: 1398 case ARM::BI__builtin_neon_vgetq_lane_i64: 1399 case ARM::BI__builtin_neon_vgetq_lane_f32: 1400 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 1401 "vget_lane"); 1402 case ARM::BI__builtin_neon_vhadd_v: 1403 case ARM::BI__builtin_neon_vhaddq_v: 1404 Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds; 1405 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhadd"); 1406 case ARM::BI__builtin_neon_vhsub_v: 1407 case ARM::BI__builtin_neon_vhsubq_v: 1408 Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs; 1409 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhsub"); 1410 case ARM::BI__builtin_neon_vld1_v: 1411 case ARM::BI__builtin_neon_vld1q_v: 1412 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1413 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, &Ty, 1), 1414 Ops, "vld1"); 1415 case ARM::BI__builtin_neon_vld1_lane_v: 1416 case ARM::BI__builtin_neon_vld1q_lane_v: 1417 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1418 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 1419 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1420 Ops[0] = Builder.CreateLoad(Ops[0]); 1421 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 1422 case ARM::BI__builtin_neon_vld1_dup_v: 1423 case ARM::BI__builtin_neon_vld1q_dup_v: { 1424 Value *V = UndefValue::get(Ty); 1425 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 1426 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1427 Ops[0] = Builder.CreateLoad(Ops[0]); 1428 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 1429 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 1430 return EmitNeonSplat(Ops[0], CI); 1431 } 1432 case ARM::BI__builtin_neon_vld2_v: 1433 case ARM::BI__builtin_neon_vld2q_v: { 1434 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, &Ty, 1); 1435 Value *Align = GetPointeeAlignment(*this, E->getArg(1)); 1436 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2"); 1437 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1438 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1439 return Builder.CreateStore(Ops[1], Ops[0]); 1440 } 1441 case ARM::BI__builtin_neon_vld3_v: 1442 case ARM::BI__builtin_neon_vld3q_v: { 1443 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, &Ty, 1); 1444 Value *Align = GetPointeeAlignment(*this, E->getArg(1)); 1445 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3"); 1446 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1447 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1448 return Builder.CreateStore(Ops[1], Ops[0]); 1449 } 1450 case ARM::BI__builtin_neon_vld4_v: 1451 case ARM::BI__builtin_neon_vld4q_v: { 1452 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, &Ty, 1); 1453 Value *Align = GetPointeeAlignment(*this, E->getArg(1)); 1454 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4"); 1455 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1456 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1457 return Builder.CreateStore(Ops[1], Ops[0]); 1458 } 1459 case ARM::BI__builtin_neon_vld2_lane_v: 1460 case ARM::BI__builtin_neon_vld2q_lane_v: { 1461 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, &Ty, 1); 1462 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1463 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 1464 Ops.push_back(GetPointeeAlignment(*this, E->getArg(1))); 1465 Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld2_lane"); 1466 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1467 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1468 return Builder.CreateStore(Ops[1], Ops[0]); 1469 } 1470 case ARM::BI__builtin_neon_vld3_lane_v: 1471 case ARM::BI__builtin_neon_vld3q_lane_v: { 1472 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, &Ty, 1); 1473 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1474 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 1475 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 1476 Ops.push_back(GetPointeeAlignment(*this, E->getArg(1))); 1477 Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane"); 1478 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1479 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1480 return Builder.CreateStore(Ops[1], Ops[0]); 1481 } 1482 case ARM::BI__builtin_neon_vld4_lane_v: 1483 case ARM::BI__builtin_neon_vld4q_lane_v: { 1484 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, &Ty, 1); 1485 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1486 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 1487 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 1488 Ops[5] = Builder.CreateBitCast(Ops[5], Ty); 1489 Ops.push_back(GetPointeeAlignment(*this, E->getArg(1))); 1490 Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane"); 1491 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1492 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1493 return Builder.CreateStore(Ops[1], Ops[0]); 1494 } 1495 case ARM::BI__builtin_neon_vld2_dup_v: 1496 case ARM::BI__builtin_neon_vld3_dup_v: 1497 case ARM::BI__builtin_neon_vld4_dup_v: { 1498 // Handle 64-bit elements as a special-case. There is no "dup" needed. 1499 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 1500 switch (BuiltinID) { 1501 case ARM::BI__builtin_neon_vld2_dup_v: 1502 Int = Intrinsic::arm_neon_vld2; 1503 break; 1504 case ARM::BI__builtin_neon_vld3_dup_v: 1505 Int = Intrinsic::arm_neon_vld2; 1506 break; 1507 case ARM::BI__builtin_neon_vld4_dup_v: 1508 Int = Intrinsic::arm_neon_vld2; 1509 break; 1510 default: assert(0 && "unknown vld_dup intrinsic?"); 1511 } 1512 Function *F = CGM.getIntrinsic(Int, &Ty, 1); 1513 Value *Align = GetPointeeAlignment(*this, E->getArg(1)); 1514 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 1515 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1516 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1517 return Builder.CreateStore(Ops[1], Ops[0]); 1518 } 1519 switch (BuiltinID) { 1520 case ARM::BI__builtin_neon_vld2_dup_v: 1521 Int = Intrinsic::arm_neon_vld2lane; 1522 break; 1523 case ARM::BI__builtin_neon_vld3_dup_v: 1524 Int = Intrinsic::arm_neon_vld2lane; 1525 break; 1526 case ARM::BI__builtin_neon_vld4_dup_v: 1527 Int = Intrinsic::arm_neon_vld2lane; 1528 break; 1529 default: assert(0 && "unknown vld_dup intrinsic?"); 1530 } 1531 Function *F = CGM.getIntrinsic(Int, &Ty, 1); 1532 const llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 1533 1534 SmallVector<Value*, 6> Args; 1535 Args.push_back(Ops[1]); 1536 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 1537 1538 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 1539 Args.push_back(CI); 1540 Args.push_back(GetPointeeAlignment(*this, E->getArg(1))); 1541 1542 Ops[1] = Builder.CreateCall(F, Args.begin(), Args.end(), "vld_dup"); 1543 // splat lane 0 to all elts in each vector of the result. 1544 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 1545 Value *Val = Builder.CreateExtractValue(Ops[1], i); 1546 Value *Elt = Builder.CreateBitCast(Val, Ty); 1547 Elt = EmitNeonSplat(Elt, CI); 1548 Elt = Builder.CreateBitCast(Elt, Val->getType()); 1549 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 1550 } 1551 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1552 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1553 return Builder.CreateStore(Ops[1], Ops[0]); 1554 } 1555 case ARM::BI__builtin_neon_vmax_v: 1556 case ARM::BI__builtin_neon_vmaxq_v: 1557 Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs; 1558 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmax"); 1559 case ARM::BI__builtin_neon_vmin_v: 1560 case ARM::BI__builtin_neon_vminq_v: 1561 Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins; 1562 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin"); 1563 case ARM::BI__builtin_neon_vmovl_v: { 1564 const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 1565 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 1566 if (usgn) 1567 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 1568 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 1569 } 1570 case ARM::BI__builtin_neon_vmovn_v: { 1571 const llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 1572 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 1573 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 1574 } 1575 case ARM::BI__builtin_neon_vmul_v: 1576 case ARM::BI__builtin_neon_vmulq_v: 1577 assert(poly && "vmul builtin only supported for polynomial types"); 1578 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, &Ty, 1), 1579 Ops, "vmul"); 1580 case ARM::BI__builtin_neon_vmull_v: 1581 Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 1582 Int = poly ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 1583 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmull"); 1584 case ARM::BI__builtin_neon_vpadal_v: 1585 case ARM::BI__builtin_neon_vpadalq_v: { 1586 Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals; 1587 // The source operand type has twice as many elements of half the size. 1588 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 1589 const llvm::Type *EltTy = 1590 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 1591 llvm::Type *NarrowTy = 1592 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 1593 llvm::Type *Tys[2] = { Ty, NarrowTy }; 1594 return EmitNeonCall(CGM.getIntrinsic(Int, Tys, 2), Ops, "vpadal"); 1595 } 1596 case ARM::BI__builtin_neon_vpadd_v: 1597 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, &Ty, 1), 1598 Ops, "vpadd"); 1599 case ARM::BI__builtin_neon_vpaddl_v: 1600 case ARM::BI__builtin_neon_vpaddlq_v: { 1601 Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls; 1602 // The source operand type has twice as many elements of half the size. 1603 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 1604 const llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 1605 llvm::Type *NarrowTy = 1606 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 1607 llvm::Type *Tys[2] = { Ty, NarrowTy }; 1608 return EmitNeonCall(CGM.getIntrinsic(Int, Tys, 2), Ops, "vpaddl"); 1609 } 1610 case ARM::BI__builtin_neon_vpmax_v: 1611 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; 1612 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmax"); 1613 case ARM::BI__builtin_neon_vpmin_v: 1614 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; 1615 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmin"); 1616 case ARM::BI__builtin_neon_vqabs_v: 1617 case ARM::BI__builtin_neon_vqabsq_v: 1618 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, &Ty, 1), 1619 Ops, "vqabs"); 1620 case ARM::BI__builtin_neon_vqadd_v: 1621 case ARM::BI__builtin_neon_vqaddq_v: 1622 Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds; 1623 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqadd"); 1624 case ARM::BI__builtin_neon_vqdmlal_v: 1625 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, &Ty, 1), 1626 Ops, "vqdmlal"); 1627 case ARM::BI__builtin_neon_vqdmlsl_v: 1628 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, &Ty, 1), 1629 Ops, "vqdmlsl"); 1630 case ARM::BI__builtin_neon_vqdmulh_v: 1631 case ARM::BI__builtin_neon_vqdmulhq_v: 1632 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, &Ty, 1), 1633 Ops, "vqdmulh"); 1634 case ARM::BI__builtin_neon_vqdmull_v: 1635 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, &Ty, 1), 1636 Ops, "vqdmull"); 1637 case ARM::BI__builtin_neon_vqmovn_v: 1638 Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns; 1639 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqmovn"); 1640 case ARM::BI__builtin_neon_vqmovun_v: 1641 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, &Ty, 1), 1642 Ops, "vqdmull"); 1643 case ARM::BI__builtin_neon_vqneg_v: 1644 case ARM::BI__builtin_neon_vqnegq_v: 1645 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, &Ty, 1), 1646 Ops, "vqneg"); 1647 case ARM::BI__builtin_neon_vqrdmulh_v: 1648 case ARM::BI__builtin_neon_vqrdmulhq_v: 1649 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, &Ty, 1), 1650 Ops, "vqrdmulh"); 1651 case ARM::BI__builtin_neon_vqrshl_v: 1652 case ARM::BI__builtin_neon_vqrshlq_v: 1653 Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts; 1654 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshl"); 1655 case ARM::BI__builtin_neon_vqrshrn_n_v: 1656 Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 1657 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshrn_n", 1658 1, true); 1659 case ARM::BI__builtin_neon_vqrshrun_n_v: 1660 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, &Ty, 1), 1661 Ops, "vqrshrun_n", 1, true); 1662 case ARM::BI__builtin_neon_vqshl_v: 1663 case ARM::BI__builtin_neon_vqshlq_v: 1664 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 1665 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl"); 1666 case ARM::BI__builtin_neon_vqshl_n_v: 1667 case ARM::BI__builtin_neon_vqshlq_n_v: 1668 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 1669 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl_n", 1670 1, false); 1671 case ARM::BI__builtin_neon_vqshlu_n_v: 1672 case ARM::BI__builtin_neon_vqshluq_n_v: 1673 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, &Ty, 1), 1674 Ops, "vqshlu", 1, false); 1675 case ARM::BI__builtin_neon_vqshrn_n_v: 1676 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 1677 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshrn_n", 1678 1, true); 1679 case ARM::BI__builtin_neon_vqshrun_n_v: 1680 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, &Ty, 1), 1681 Ops, "vqshrun_n", 1, true); 1682 case ARM::BI__builtin_neon_vqsub_v: 1683 case ARM::BI__builtin_neon_vqsubq_v: 1684 Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs; 1685 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqsub"); 1686 case ARM::BI__builtin_neon_vraddhn_v: 1687 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, &Ty, 1), 1688 Ops, "vraddhn"); 1689 case ARM::BI__builtin_neon_vrecpe_v: 1690 case ARM::BI__builtin_neon_vrecpeq_v: 1691 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, &Ty, 1), 1692 Ops, "vrecpe"); 1693 case ARM::BI__builtin_neon_vrecps_v: 1694 case ARM::BI__builtin_neon_vrecpsq_v: 1695 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, &Ty, 1), 1696 Ops, "vrecps"); 1697 case ARM::BI__builtin_neon_vrhadd_v: 1698 case ARM::BI__builtin_neon_vrhaddq_v: 1699 Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds; 1700 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrhadd"); 1701 case ARM::BI__builtin_neon_vrshl_v: 1702 case ARM::BI__builtin_neon_vrshlq_v: 1703 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 1704 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshl"); 1705 case ARM::BI__builtin_neon_vrshrn_n_v: 1706 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, &Ty, 1), 1707 Ops, "vrshrn_n", 1, true); 1708 case ARM::BI__builtin_neon_vrshr_n_v: 1709 case ARM::BI__builtin_neon_vrshrq_n_v: 1710 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 1711 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshr_n", 1, true); 1712 case ARM::BI__builtin_neon_vrsqrte_v: 1713 case ARM::BI__builtin_neon_vrsqrteq_v: 1714 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, &Ty, 1), 1715 Ops, "vrsqrte"); 1716 case ARM::BI__builtin_neon_vrsqrts_v: 1717 case ARM::BI__builtin_neon_vrsqrtsq_v: 1718 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, &Ty, 1), 1719 Ops, "vrsqrts"); 1720 case ARM::BI__builtin_neon_vrsra_n_v: 1721 case ARM::BI__builtin_neon_vrsraq_n_v: 1722 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1723 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1724 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 1725 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 1726 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, &Ty, 1), Ops[1], Ops[2]); 1727 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 1728 case ARM::BI__builtin_neon_vrsubhn_v: 1729 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, &Ty, 1), 1730 Ops, "vrsubhn"); 1731 case ARM::BI__builtin_neon_vset_lane_i8: 1732 case ARM::BI__builtin_neon_vset_lane_i16: 1733 case ARM::BI__builtin_neon_vset_lane_i32: 1734 case ARM::BI__builtin_neon_vset_lane_i64: 1735 case ARM::BI__builtin_neon_vset_lane_f32: 1736 case ARM::BI__builtin_neon_vsetq_lane_i8: 1737 case ARM::BI__builtin_neon_vsetq_lane_i16: 1738 case ARM::BI__builtin_neon_vsetq_lane_i32: 1739 case ARM::BI__builtin_neon_vsetq_lane_i64: 1740 case ARM::BI__builtin_neon_vsetq_lane_f32: 1741 Ops.push_back(EmitScalarExpr(E->getArg(2))); 1742 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 1743 case ARM::BI__builtin_neon_vshl_v: 1744 case ARM::BI__builtin_neon_vshlq_v: 1745 Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts; 1746 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshl"); 1747 case ARM::BI__builtin_neon_vshll_n_v: 1748 Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls; 1749 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshll", 1); 1750 case ARM::BI__builtin_neon_vshl_n_v: 1751 case ARM::BI__builtin_neon_vshlq_n_v: 1752 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 1753 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n"); 1754 case ARM::BI__builtin_neon_vshrn_n_v: 1755 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, &Ty, 1), 1756 Ops, "vshrn_n", 1, true); 1757 case ARM::BI__builtin_neon_vshr_n_v: 1758 case ARM::BI__builtin_neon_vshrq_n_v: 1759 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1760 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 1761 if (usgn) 1762 return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n"); 1763 else 1764 return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n"); 1765 case ARM::BI__builtin_neon_vsri_n_v: 1766 case ARM::BI__builtin_neon_vsriq_n_v: 1767 rightShift = true; 1768 case ARM::BI__builtin_neon_vsli_n_v: 1769 case ARM::BI__builtin_neon_vsliq_n_v: 1770 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 1771 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, &Ty, 1), 1772 Ops, "vsli_n"); 1773 case ARM::BI__builtin_neon_vsra_n_v: 1774 case ARM::BI__builtin_neon_vsraq_n_v: 1775 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1776 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1777 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false); 1778 if (usgn) 1779 Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n"); 1780 else 1781 Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n"); 1782 return Builder.CreateAdd(Ops[0], Ops[1]); 1783 case ARM::BI__builtin_neon_vst1_v: 1784 case ARM::BI__builtin_neon_vst1q_v: 1785 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1786 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, &Ty, 1), 1787 Ops, ""); 1788 case ARM::BI__builtin_neon_vst1_lane_v: 1789 case ARM::BI__builtin_neon_vst1q_lane_v: 1790 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1791 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 1792 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1793 return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); 1794 case ARM::BI__builtin_neon_vst2_v: 1795 case ARM::BI__builtin_neon_vst2q_v: 1796 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1797 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, &Ty, 1), 1798 Ops, ""); 1799 case ARM::BI__builtin_neon_vst2_lane_v: 1800 case ARM::BI__builtin_neon_vst2q_lane_v: 1801 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1802 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, &Ty, 1), 1803 Ops, ""); 1804 case ARM::BI__builtin_neon_vst3_v: 1805 case ARM::BI__builtin_neon_vst3q_v: 1806 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1807 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, &Ty, 1), 1808 Ops, ""); 1809 case ARM::BI__builtin_neon_vst3_lane_v: 1810 case ARM::BI__builtin_neon_vst3q_lane_v: 1811 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1812 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, &Ty, 1), 1813 Ops, ""); 1814 case ARM::BI__builtin_neon_vst4_v: 1815 case ARM::BI__builtin_neon_vst4q_v: 1816 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1817 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, &Ty, 1), 1818 Ops, ""); 1819 case ARM::BI__builtin_neon_vst4_lane_v: 1820 case ARM::BI__builtin_neon_vst4q_lane_v: 1821 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1822 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, &Ty, 1), 1823 Ops, ""); 1824 case ARM::BI__builtin_neon_vsubhn_v: 1825 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, &Ty, 1), 1826 Ops, "vsubhn"); 1827 case ARM::BI__builtin_neon_vtbl1_v: 1828 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 1829 Ops, "vtbl1"); 1830 case ARM::BI__builtin_neon_vtbl2_v: 1831 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 1832 Ops, "vtbl2"); 1833 case ARM::BI__builtin_neon_vtbl3_v: 1834 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 1835 Ops, "vtbl3"); 1836 case ARM::BI__builtin_neon_vtbl4_v: 1837 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 1838 Ops, "vtbl4"); 1839 case ARM::BI__builtin_neon_vtbx1_v: 1840 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 1841 Ops, "vtbx1"); 1842 case ARM::BI__builtin_neon_vtbx2_v: 1843 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 1844 Ops, "vtbx2"); 1845 case ARM::BI__builtin_neon_vtbx3_v: 1846 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 1847 Ops, "vtbx3"); 1848 case ARM::BI__builtin_neon_vtbx4_v: 1849 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 1850 Ops, "vtbx4"); 1851 case ARM::BI__builtin_neon_vtst_v: 1852 case ARM::BI__builtin_neon_vtstq_v: { 1853 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1854 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1855 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 1856 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 1857 ConstantAggregateZero::get(Ty)); 1858 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 1859 } 1860 case ARM::BI__builtin_neon_vtrn_v: 1861 case ARM::BI__builtin_neon_vtrnq_v: { 1862 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 1863 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1864 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1865 Value *SV = 0; 1866 1867 for (unsigned vi = 0; vi != 2; ++vi) { 1868 SmallVector<Constant*, 16> Indices; 1869 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 1870 Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); 1871 Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); 1872 } 1873 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 1874 SV = llvm::ConstantVector::get(Indices); 1875 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 1876 SV = Builder.CreateStore(SV, Addr); 1877 } 1878 return SV; 1879 } 1880 case ARM::BI__builtin_neon_vuzp_v: 1881 case ARM::BI__builtin_neon_vuzpq_v: { 1882 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 1883 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1884 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1885 Value *SV = 0; 1886 1887 for (unsigned vi = 0; vi != 2; ++vi) { 1888 SmallVector<Constant*, 16> Indices; 1889 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 1890 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 1891 1892 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 1893 SV = llvm::ConstantVector::get(Indices); 1894 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 1895 SV = Builder.CreateStore(SV, Addr); 1896 } 1897 return SV; 1898 } 1899 case ARM::BI__builtin_neon_vzip_v: 1900 case ARM::BI__builtin_neon_vzipq_v: { 1901 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 1902 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1903 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1904 Value *SV = 0; 1905 1906 for (unsigned vi = 0; vi != 2; ++vi) { 1907 SmallVector<Constant*, 16> Indices; 1908 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 1909 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 1910 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 1911 } 1912 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 1913 SV = llvm::ConstantVector::get(Indices); 1914 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 1915 SV = Builder.CreateStore(SV, Addr); 1916 } 1917 return SV; 1918 } 1919 } 1920} 1921 1922llvm::Value *CodeGenFunction:: 1923BuildVector(const llvm::SmallVectorImpl<llvm::Value*> &Ops) { 1924 assert((Ops.size() & (Ops.size() - 1)) == 0 && 1925 "Not a power-of-two sized vector!"); 1926 bool AllConstants = true; 1927 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 1928 AllConstants &= isa<Constant>(Ops[i]); 1929 1930 // If this is a constant vector, create a ConstantVector. 1931 if (AllConstants) { 1932 std::vector<llvm::Constant*> CstOps; 1933 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1934 CstOps.push_back(cast<Constant>(Ops[i])); 1935 return llvm::ConstantVector::get(CstOps); 1936 } 1937 1938 // Otherwise, insertelement the values to build the vector. 1939 Value *Result = 1940 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 1941 1942 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1943 Result = Builder.CreateInsertElement(Result, Ops[i], 1944 llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), i)); 1945 1946 return Result; 1947} 1948 1949Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 1950 const CallExpr *E) { 1951 llvm::SmallVector<Value*, 4> Ops; 1952 1953 // Find out if any arguments are required to be integer constant expressions. 1954 unsigned ICEArguments = 0; 1955 ASTContext::GetBuiltinTypeError Error; 1956 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1957 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1958 1959 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 1960 // If this is a normal argument, just emit it as a scalar. 1961 if ((ICEArguments & (1 << i)) == 0) { 1962 Ops.push_back(EmitScalarExpr(E->getArg(i))); 1963 continue; 1964 } 1965 1966 // If this is required to be a constant, constant fold it so that we know 1967 // that the generated intrinsic gets a ConstantInt. 1968 llvm::APSInt Result; 1969 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 1970 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 1971 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 1972 } 1973 1974 switch (BuiltinID) { 1975 default: return 0; 1976 case X86::BI__builtin_ia32_pslldi128: 1977 case X86::BI__builtin_ia32_psllqi128: 1978 case X86::BI__builtin_ia32_psllwi128: 1979 case X86::BI__builtin_ia32_psradi128: 1980 case X86::BI__builtin_ia32_psrawi128: 1981 case X86::BI__builtin_ia32_psrldi128: 1982 case X86::BI__builtin_ia32_psrlqi128: 1983 case X86::BI__builtin_ia32_psrlwi128: { 1984 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext"); 1985 const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 1986 llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0); 1987 Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty), 1988 Ops[1], Zero, "insert"); 1989 Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast"); 1990 const char *name = 0; 1991 Intrinsic::ID ID = Intrinsic::not_intrinsic; 1992 1993 switch (BuiltinID) { 1994 default: assert(0 && "Unsupported shift intrinsic!"); 1995 case X86::BI__builtin_ia32_pslldi128: 1996 name = "pslldi"; 1997 ID = Intrinsic::x86_sse2_psll_d; 1998 break; 1999 case X86::BI__builtin_ia32_psllqi128: 2000 name = "psllqi"; 2001 ID = Intrinsic::x86_sse2_psll_q; 2002 break; 2003 case X86::BI__builtin_ia32_psllwi128: 2004 name = "psllwi"; 2005 ID = Intrinsic::x86_sse2_psll_w; 2006 break; 2007 case X86::BI__builtin_ia32_psradi128: 2008 name = "psradi"; 2009 ID = Intrinsic::x86_sse2_psra_d; 2010 break; 2011 case X86::BI__builtin_ia32_psrawi128: 2012 name = "psrawi"; 2013 ID = Intrinsic::x86_sse2_psra_w; 2014 break; 2015 case X86::BI__builtin_ia32_psrldi128: 2016 name = "psrldi"; 2017 ID = Intrinsic::x86_sse2_psrl_d; 2018 break; 2019 case X86::BI__builtin_ia32_psrlqi128: 2020 name = "psrlqi"; 2021 ID = Intrinsic::x86_sse2_psrl_q; 2022 break; 2023 case X86::BI__builtin_ia32_psrlwi128: 2024 name = "psrlwi"; 2025 ID = Intrinsic::x86_sse2_psrl_w; 2026 break; 2027 } 2028 llvm::Function *F = CGM.getIntrinsic(ID); 2029 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); 2030 } 2031 case X86::BI__builtin_ia32_vec_init_v8qi: 2032 case X86::BI__builtin_ia32_vec_init_v4hi: 2033 case X86::BI__builtin_ia32_vec_init_v2si: 2034 return Builder.CreateBitCast(BuildVector(Ops), 2035 llvm::Type::getX86_MMXTy(getLLVMContext())); 2036 case X86::BI__builtin_ia32_vec_ext_v2si: 2037 return Builder.CreateExtractElement(Ops[0], 2038 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 2039 case X86::BI__builtin_ia32_pslldi: 2040 case X86::BI__builtin_ia32_psllqi: 2041 case X86::BI__builtin_ia32_psllwi: 2042 case X86::BI__builtin_ia32_psradi: 2043 case X86::BI__builtin_ia32_psrawi: 2044 case X86::BI__builtin_ia32_psrldi: 2045 case X86::BI__builtin_ia32_psrlqi: 2046 case X86::BI__builtin_ia32_psrlwi: { 2047 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext"); 2048 const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1); 2049 Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast"); 2050 const char *name = 0; 2051 Intrinsic::ID ID = Intrinsic::not_intrinsic; 2052 2053 switch (BuiltinID) { 2054 default: assert(0 && "Unsupported shift intrinsic!"); 2055 case X86::BI__builtin_ia32_pslldi: 2056 name = "pslldi"; 2057 ID = Intrinsic::x86_mmx_psll_d; 2058 break; 2059 case X86::BI__builtin_ia32_psllqi: 2060 name = "psllqi"; 2061 ID = Intrinsic::x86_mmx_psll_q; 2062 break; 2063 case X86::BI__builtin_ia32_psllwi: 2064 name = "psllwi"; 2065 ID = Intrinsic::x86_mmx_psll_w; 2066 break; 2067 case X86::BI__builtin_ia32_psradi: 2068 name = "psradi"; 2069 ID = Intrinsic::x86_mmx_psra_d; 2070 break; 2071 case X86::BI__builtin_ia32_psrawi: 2072 name = "psrawi"; 2073 ID = Intrinsic::x86_mmx_psra_w; 2074 break; 2075 case X86::BI__builtin_ia32_psrldi: 2076 name = "psrldi"; 2077 ID = Intrinsic::x86_mmx_psrl_d; 2078 break; 2079 case X86::BI__builtin_ia32_psrlqi: 2080 name = "psrlqi"; 2081 ID = Intrinsic::x86_mmx_psrl_q; 2082 break; 2083 case X86::BI__builtin_ia32_psrlwi: 2084 name = "psrlwi"; 2085 ID = Intrinsic::x86_mmx_psrl_w; 2086 break; 2087 } 2088 llvm::Function *F = CGM.getIntrinsic(ID); 2089 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); 2090 } 2091 case X86::BI__builtin_ia32_cmpps: { 2092 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps); 2093 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpps"); 2094 } 2095 case X86::BI__builtin_ia32_cmpss: { 2096 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss); 2097 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpss"); 2098 } 2099 case X86::BI__builtin_ia32_ldmxcsr: { 2100 const llvm::Type *PtrTy = Int8PtrTy; 2101 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 2102 Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp"); 2103 Builder.CreateStore(Ops[0], Tmp); 2104 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 2105 Builder.CreateBitCast(Tmp, PtrTy)); 2106 } 2107 case X86::BI__builtin_ia32_stmxcsr: { 2108 const llvm::Type *PtrTy = Int8PtrTy; 2109 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 2110 Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp"); 2111 One = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 2112 Builder.CreateBitCast(Tmp, PtrTy)); 2113 return Builder.CreateLoad(Tmp, "stmxcsr"); 2114 } 2115 case X86::BI__builtin_ia32_cmppd: { 2116 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd); 2117 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmppd"); 2118 } 2119 case X86::BI__builtin_ia32_cmpsd: { 2120 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd); 2121 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpsd"); 2122 } 2123 case X86::BI__builtin_ia32_storehps: 2124 case X86::BI__builtin_ia32_storelps: { 2125 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 2126 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 2127 2128 // cast val v2i64 2129 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 2130 2131 // extract (0, 1) 2132 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 2133 llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index); 2134 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 2135 2136 // cast pointer to i64 & store 2137 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 2138 return Builder.CreateStore(Ops[1], Ops[0]); 2139 } 2140 case X86::BI__builtin_ia32_palignr: { 2141 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 2142 2143 // If palignr is shifting the pair of input vectors less than 9 bytes, 2144 // emit a shuffle instruction. 2145 if (shiftVal <= 8) { 2146 llvm::SmallVector<llvm::Constant*, 8> Indices; 2147 for (unsigned i = 0; i != 8; ++i) 2148 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 2149 2150 Value* SV = llvm::ConstantVector::get(Indices); 2151 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 2152 } 2153 2154 // If palignr is shifting the pair of input vectors more than 8 but less 2155 // than 16 bytes, emit a logical right shift of the destination. 2156 if (shiftVal < 16) { 2157 // MMX has these as 1 x i64 vectors for some odd optimization reasons. 2158 const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); 2159 2160 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 2161 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); 2162 2163 // create i32 constant 2164 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); 2165 return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr"); 2166 } 2167 2168 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 2169 return llvm::Constant::getNullValue(ConvertType(E->getType())); 2170 } 2171 case X86::BI__builtin_ia32_palignr128: { 2172 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 2173 2174 // If palignr is shifting the pair of input vectors less than 17 bytes, 2175 // emit a shuffle instruction. 2176 if (shiftVal <= 16) { 2177 llvm::SmallVector<llvm::Constant*, 16> Indices; 2178 for (unsigned i = 0; i != 16; ++i) 2179 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 2180 2181 Value* SV = llvm::ConstantVector::get(Indices); 2182 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 2183 } 2184 2185 // If palignr is shifting the pair of input vectors more than 16 but less 2186 // than 32 bytes, emit a logical right shift of the destination. 2187 if (shiftVal < 32) { 2188 const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 2189 2190 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 2191 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 2192 2193 // create i32 constant 2194 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); 2195 return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr"); 2196 } 2197 2198 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 2199 return llvm::Constant::getNullValue(ConvertType(E->getType())); 2200 } 2201 case X86::BI__builtin_ia32_movntps: 2202 case X86::BI__builtin_ia32_movntpd: 2203 case X86::BI__builtin_ia32_movntdq: 2204 case X86::BI__builtin_ia32_movnti: { 2205 llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), 2206 Builder.getInt32(1)); 2207 2208 // Convert the type of the pointer to a pointer to the stored type. 2209 Value *BC = Builder.CreateBitCast(Ops[0], 2210 llvm::PointerType::getUnqual(Ops[1]->getType()), 2211 "cast"); 2212 StoreInst *SI = Builder.CreateStore(Ops[1], BC); 2213 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 2214 SI->setAlignment(16); 2215 return SI; 2216 } 2217 // 3DNow! 2218 case X86::BI__builtin_ia32_pavgusb: 2219 case X86::BI__builtin_ia32_pf2id: 2220 case X86::BI__builtin_ia32_pfacc: 2221 case X86::BI__builtin_ia32_pfadd: 2222 case X86::BI__builtin_ia32_pfcmpeq: 2223 case X86::BI__builtin_ia32_pfcmpge: 2224 case X86::BI__builtin_ia32_pfcmpgt: 2225 case X86::BI__builtin_ia32_pfmax: 2226 case X86::BI__builtin_ia32_pfmin: 2227 case X86::BI__builtin_ia32_pfmul: 2228 case X86::BI__builtin_ia32_pfrcp: 2229 case X86::BI__builtin_ia32_pfrcpit1: 2230 case X86::BI__builtin_ia32_pfrcpit2: 2231 case X86::BI__builtin_ia32_pfrsqrt: 2232 case X86::BI__builtin_ia32_pfrsqit1: 2233 case X86::BI__builtin_ia32_pfrsqrtit1: 2234 case X86::BI__builtin_ia32_pfsub: 2235 case X86::BI__builtin_ia32_pfsubr: 2236 case X86::BI__builtin_ia32_pi2fd: 2237 case X86::BI__builtin_ia32_pmulhrw: 2238 case X86::BI__builtin_ia32_pf2iw: 2239 case X86::BI__builtin_ia32_pfnacc: 2240 case X86::BI__builtin_ia32_pfpnacc: 2241 case X86::BI__builtin_ia32_pi2fw: 2242 case X86::BI__builtin_ia32_pswapdsf: 2243 case X86::BI__builtin_ia32_pswapdsi: { 2244 const char *name = 0; 2245 Intrinsic::ID ID = Intrinsic::not_intrinsic; 2246 switch(BuiltinID) { 2247 case X86::BI__builtin_ia32_pavgusb: 2248 name = "pavgusb"; 2249 ID = Intrinsic::x86_3dnow_pavgusb; 2250 break; 2251 case X86::BI__builtin_ia32_pf2id: 2252 name = "pf2id"; 2253 ID = Intrinsic::x86_3dnow_pf2id; 2254 break; 2255 case X86::BI__builtin_ia32_pfacc: 2256 name = "pfacc"; 2257 ID = Intrinsic::x86_3dnow_pfacc; 2258 break; 2259 case X86::BI__builtin_ia32_pfadd: 2260 name = "pfadd"; 2261 ID = Intrinsic::x86_3dnow_pfadd; 2262 break; 2263 case X86::BI__builtin_ia32_pfcmpeq: 2264 name = "pfcmpeq"; 2265 ID = Intrinsic::x86_3dnow_pfcmpeq; 2266 break; 2267 case X86::BI__builtin_ia32_pfcmpge: 2268 name = "pfcmpge"; 2269 ID = Intrinsic::x86_3dnow_pfcmpge; 2270 break; 2271 case X86::BI__builtin_ia32_pfcmpgt: 2272 name = "pfcmpgt"; 2273 ID = Intrinsic::x86_3dnow_pfcmpgt; 2274 break; 2275 case X86::BI__builtin_ia32_pfmax: 2276 name = "pfmax"; 2277 ID = Intrinsic::x86_3dnow_pfmax; 2278 break; 2279 case X86::BI__builtin_ia32_pfmin: 2280 name = "pfmin"; 2281 ID = Intrinsic::x86_3dnow_pfmin; 2282 break; 2283 case X86::BI__builtin_ia32_pfmul: 2284 name = "pfmul"; 2285 ID = Intrinsic::x86_3dnow_pfmul; 2286 break; 2287 case X86::BI__builtin_ia32_pfrcp: 2288 name = "pfrcp"; 2289 ID = Intrinsic::x86_3dnow_pfrcp; 2290 break; 2291 case X86::BI__builtin_ia32_pfrcpit1: 2292 name = "pfrcpit1"; 2293 ID = Intrinsic::x86_3dnow_pfrcpit1; 2294 break; 2295 case X86::BI__builtin_ia32_pfrcpit2: 2296 name = "pfrcpit2"; 2297 ID = Intrinsic::x86_3dnow_pfrcpit2; 2298 break; 2299 case X86::BI__builtin_ia32_pfrsqrt: 2300 name = "pfrsqrt"; 2301 ID = Intrinsic::x86_3dnow_pfrsqrt; 2302 break; 2303 case X86::BI__builtin_ia32_pfrsqit1: 2304 case X86::BI__builtin_ia32_pfrsqrtit1: 2305 name = "pfrsqit1"; 2306 ID = Intrinsic::x86_3dnow_pfrsqit1; 2307 break; 2308 case X86::BI__builtin_ia32_pfsub: 2309 name = "pfsub"; 2310 ID = Intrinsic::x86_3dnow_pfsub; 2311 break; 2312 case X86::BI__builtin_ia32_pfsubr: 2313 name = "pfsubr"; 2314 ID = Intrinsic::x86_3dnow_pfsubr; 2315 break; 2316 case X86::BI__builtin_ia32_pi2fd: 2317 name = "pi2fd"; 2318 ID = Intrinsic::x86_3dnow_pi2fd; 2319 break; 2320 case X86::BI__builtin_ia32_pmulhrw: 2321 name = "pmulhrw"; 2322 ID = Intrinsic::x86_3dnow_pmulhrw; 2323 break; 2324 case X86::BI__builtin_ia32_pf2iw: 2325 name = "pf2iw"; 2326 ID = Intrinsic::x86_3dnowa_pf2iw; 2327 break; 2328 case X86::BI__builtin_ia32_pfnacc: 2329 name = "pfnacc"; 2330 ID = Intrinsic::x86_3dnowa_pfnacc; 2331 break; 2332 case X86::BI__builtin_ia32_pfpnacc: 2333 name = "pfpnacc"; 2334 ID = Intrinsic::x86_3dnowa_pfpnacc; 2335 break; 2336 case X86::BI__builtin_ia32_pi2fw: 2337 name = "pi2fw"; 2338 ID = Intrinsic::x86_3dnowa_pi2fw; 2339 break; 2340 case X86::BI__builtin_ia32_pswapdsf: 2341 case X86::BI__builtin_ia32_pswapdsi: 2342 name = "pswapd"; 2343 ID = Intrinsic::x86_3dnowa_pswapd; 2344 break; 2345 } 2346 llvm::Function *F = CGM.getIntrinsic(ID); 2347 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); 2348 } 2349 } 2350} 2351 2352Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 2353 const CallExpr *E) { 2354 llvm::SmallVector<Value*, 4> Ops; 2355 2356 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 2357 Ops.push_back(EmitScalarExpr(E->getArg(i))); 2358 2359 Intrinsic::ID ID = Intrinsic::not_intrinsic; 2360 2361 switch (BuiltinID) { 2362 default: return 0; 2363 2364 // vec_ld, vec_lvsl, vec_lvsr 2365 case PPC::BI__builtin_altivec_lvx: 2366 case PPC::BI__builtin_altivec_lvxl: 2367 case PPC::BI__builtin_altivec_lvebx: 2368 case PPC::BI__builtin_altivec_lvehx: 2369 case PPC::BI__builtin_altivec_lvewx: 2370 case PPC::BI__builtin_altivec_lvsl: 2371 case PPC::BI__builtin_altivec_lvsr: 2372 { 2373 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 2374 2375 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0], "tmp"); 2376 Ops.pop_back(); 2377 2378 switch (BuiltinID) { 2379 default: assert(0 && "Unsupported ld/lvsl/lvsr intrinsic!"); 2380 case PPC::BI__builtin_altivec_lvx: 2381 ID = Intrinsic::ppc_altivec_lvx; 2382 break; 2383 case PPC::BI__builtin_altivec_lvxl: 2384 ID = Intrinsic::ppc_altivec_lvxl; 2385 break; 2386 case PPC::BI__builtin_altivec_lvebx: 2387 ID = Intrinsic::ppc_altivec_lvebx; 2388 break; 2389 case PPC::BI__builtin_altivec_lvehx: 2390 ID = Intrinsic::ppc_altivec_lvehx; 2391 break; 2392 case PPC::BI__builtin_altivec_lvewx: 2393 ID = Intrinsic::ppc_altivec_lvewx; 2394 break; 2395 case PPC::BI__builtin_altivec_lvsl: 2396 ID = Intrinsic::ppc_altivec_lvsl; 2397 break; 2398 case PPC::BI__builtin_altivec_lvsr: 2399 ID = Intrinsic::ppc_altivec_lvsr; 2400 break; 2401 } 2402 llvm::Function *F = CGM.getIntrinsic(ID); 2403 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), ""); 2404 } 2405 2406 // vec_st 2407 case PPC::BI__builtin_altivec_stvx: 2408 case PPC::BI__builtin_altivec_stvxl: 2409 case PPC::BI__builtin_altivec_stvebx: 2410 case PPC::BI__builtin_altivec_stvehx: 2411 case PPC::BI__builtin_altivec_stvewx: 2412 { 2413 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 2414 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1], "tmp"); 2415 Ops.pop_back(); 2416 2417 switch (BuiltinID) { 2418 default: assert(0 && "Unsupported st intrinsic!"); 2419 case PPC::BI__builtin_altivec_stvx: 2420 ID = Intrinsic::ppc_altivec_stvx; 2421 break; 2422 case PPC::BI__builtin_altivec_stvxl: 2423 ID = Intrinsic::ppc_altivec_stvxl; 2424 break; 2425 case PPC::BI__builtin_altivec_stvebx: 2426 ID = Intrinsic::ppc_altivec_stvebx; 2427 break; 2428 case PPC::BI__builtin_altivec_stvehx: 2429 ID = Intrinsic::ppc_altivec_stvehx; 2430 break; 2431 case PPC::BI__builtin_altivec_stvewx: 2432 ID = Intrinsic::ppc_altivec_stvewx; 2433 break; 2434 } 2435 llvm::Function *F = CGM.getIntrinsic(ID); 2436 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), ""); 2437 } 2438 } 2439 return 0; 2440} 2441