CGBuiltin.cpp revision ec9919549095390b5737c561544f8bd49fc894de
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This contains code to emit Builtin calls as LLVM code. 11// 12//===----------------------------------------------------------------------===// 13 14#include "TargetInfo.h" 15#include "CodeGenFunction.h" 16#include "CodeGenModule.h" 17#include "CGObjCRuntime.h" 18#include "clang/Basic/TargetInfo.h" 19#include "clang/AST/APValue.h" 20#include "clang/AST/ASTContext.h" 21#include "clang/AST/Decl.h" 22#include "clang/Basic/TargetBuiltins.h" 23#include "llvm/Intrinsics.h" 24#include "llvm/Target/TargetData.h" 25using namespace clang; 26using namespace CodeGen; 27using namespace llvm; 28 29static void EmitMemoryBarrier(CodeGenFunction &CGF, 30 bool LoadLoad, bool LoadStore, 31 bool StoreLoad, bool StoreStore, 32 bool Device) { 33 Value *True = CGF.Builder.getTrue(); 34 Value *False = CGF.Builder.getFalse(); 35 Value *C[5] = { LoadLoad ? True : False, 36 LoadStore ? True : False, 37 StoreLoad ? True : False, 38 StoreStore ? True : False, 39 Device ? True : False }; 40 CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::memory_barrier), 41 C, C + 5); 42} 43 44/// Emit the conversions required to turn the given value into an 45/// integer of the given size. 46static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 47 QualType T, const llvm::IntegerType *IntType) { 48 V = CGF.EmitToMemory(V, T); 49 50 if (V->getType()->isPointerTy()) 51 return CGF.Builder.CreatePtrToInt(V, IntType); 52 53 assert(V->getType() == IntType); 54 return V; 55} 56 57static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 58 QualType T, const llvm::Type *ResultType) { 59 V = CGF.EmitFromMemory(V, T); 60 61 if (ResultType->isPointerTy()) 62 return CGF.Builder.CreateIntToPtr(V, ResultType); 63 64 assert(V->getType() == ResultType); 65 return V; 66} 67 68// The atomic builtins are also full memory barriers. This is a utility for 69// wrapping a call to the builtins with memory barriers. 70static Value *EmitCallWithBarrier(CodeGenFunction &CGF, Value *Fn, 71 Value **ArgBegin, Value **ArgEnd) { 72 // FIXME: We need a target hook for whether this applies to device memory or 73 // not. 74 bool Device = true; 75 76 // Create barriers both before and after the call. 77 EmitMemoryBarrier(CGF, true, true, true, true, Device); 78 Value *Result = CGF.Builder.CreateCall(Fn, ArgBegin, ArgEnd); 79 EmitMemoryBarrier(CGF, true, true, true, true, Device); 80 return Result; 81} 82 83/// Utility to insert an atomic instruction based on Instrinsic::ID 84/// and the expression node. 85static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 86 Intrinsic::ID Id, const CallExpr *E) { 87 QualType T = E->getType(); 88 assert(E->getArg(0)->getType()->isPointerType()); 89 assert(CGF.getContext().hasSameUnqualifiedType(T, 90 E->getArg(0)->getType()->getPointeeType())); 91 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 92 93 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 94 unsigned AddrSpace = 95 cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace(); 96 97 const llvm::IntegerType *IntType = 98 llvm::IntegerType::get(CGF.getLLVMContext(), 99 CGF.getContext().getTypeSize(T)); 100 const llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 101 102 const llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType }; 103 llvm::Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2); 104 105 llvm::Value *Args[2]; 106 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 107 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 108 const llvm::Type *ValueType = Args[1]->getType(); 109 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 110 111 llvm::Value *Result = EmitCallWithBarrier(CGF, AtomF, Args, Args + 2); 112 Result = EmitFromInt(CGF, Result, T, ValueType); 113 return RValue::get(Result); 114} 115 116/// Utility to insert an atomic instruction based Instrinsic::ID and 117/// the expression node, where the return value is the result of the 118/// operation. 119static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 120 Intrinsic::ID Id, const CallExpr *E, 121 Instruction::BinaryOps Op) { 122 QualType T = E->getType(); 123 assert(E->getArg(0)->getType()->isPointerType()); 124 assert(CGF.getContext().hasSameUnqualifiedType(T, 125 E->getArg(0)->getType()->getPointeeType())); 126 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 127 128 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 129 unsigned AddrSpace = 130 cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace(); 131 132 const llvm::IntegerType *IntType = 133 llvm::IntegerType::get(CGF.getLLVMContext(), 134 CGF.getContext().getTypeSize(T)); 135 const llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 136 137 const llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType }; 138 llvm::Value *AtomF = CGF.CGM.getIntrinsic(Id, IntrinsicTypes, 2); 139 140 llvm::Value *Args[2]; 141 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 142 const llvm::Type *ValueType = Args[1]->getType(); 143 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 144 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 145 146 llvm::Value *Result = EmitCallWithBarrier(CGF, AtomF, Args, Args + 2); 147 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 148 Result = EmitFromInt(CGF, Result, T, ValueType); 149 return RValue::get(Result); 150} 151 152/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy, 153/// which must be a scalar floating point type. 154static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) { 155 const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>(); 156 assert(ValTyP && "isn't scalar fp type!"); 157 158 StringRef FnName; 159 switch (ValTyP->getKind()) { 160 default: assert(0 && "Isn't a scalar fp type!"); 161 case BuiltinType::Float: FnName = "fabsf"; break; 162 case BuiltinType::Double: FnName = "fabs"; break; 163 case BuiltinType::LongDouble: FnName = "fabsl"; break; 164 } 165 166 // The prototype is something that takes and returns whatever V's type is. 167 std::vector<const llvm::Type*> Args; 168 Args.push_back(V->getType()); 169 llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), Args, false); 170 llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName); 171 172 return CGF.Builder.CreateCall(Fn, V, "abs"); 173} 174 175RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 176 unsigned BuiltinID, const CallExpr *E) { 177 // See if we can constant fold this builtin. If so, don't emit it at all. 178 Expr::EvalResult Result; 179 if (E->Evaluate(Result, CGM.getContext())) { 180 // Short circuiting the __builtin_expect on its 1st argument 181 // must still IR-gen the 1st and 2nd argument's side-effect. 182 if (BuiltinID == Builtin::BI__builtin_expect) { 183 if (E->getArg(0)->HasSideEffects(getContext())) 184 (void)EmitScalarExpr(E->getArg(0)); 185 if (E->getArg(1)->HasSideEffects(getContext())) 186 (void)EmitScalarExpr(E->getArg(1)); 187 } 188 189 if (Result.Val.isInt()) 190 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 191 Result.Val.getInt())); 192 if (Result.Val.isFloat()) 193 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 194 Result.Val.getFloat())); 195 } 196 197 switch (BuiltinID) { 198 default: break; // Handle intrinsics and libm functions below. 199 case Builtin::BI__builtin___CFStringMakeConstantString: 200 case Builtin::BI__builtin___NSStringMakeConstantString: 201 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0)); 202 case Builtin::BI__builtin_stdarg_start: 203 case Builtin::BI__builtin_va_start: 204 case Builtin::BI__builtin_va_end: { 205 Value *ArgValue = EmitVAListRef(E->getArg(0)); 206 const llvm::Type *DestType = Int8PtrTy; 207 if (ArgValue->getType() != DestType) 208 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 209 ArgValue->getName().data()); 210 211 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 212 Intrinsic::vaend : Intrinsic::vastart; 213 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 214 } 215 case Builtin::BI__builtin_va_copy: { 216 Value *DstPtr = EmitVAListRef(E->getArg(0)); 217 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 218 219 const llvm::Type *Type = Int8PtrTy; 220 221 DstPtr = Builder.CreateBitCast(DstPtr, Type); 222 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 223 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 224 DstPtr, SrcPtr)); 225 } 226 case Builtin::BI__builtin_abs: { 227 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 228 229 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 230 Value *CmpResult = 231 Builder.CreateICmpSGE(ArgValue, 232 llvm::Constant::getNullValue(ArgValue->getType()), 233 "abscond"); 234 Value *Result = 235 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 236 237 return RValue::get(Result); 238 } 239 case Builtin::BI__builtin_ctz: 240 case Builtin::BI__builtin_ctzl: 241 case Builtin::BI__builtin_ctzll: { 242 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 243 244 const llvm::Type *ArgType = ArgValue->getType(); 245 Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1); 246 247 const llvm::Type *ResultType = ConvertType(E->getType()); 248 Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); 249 if (Result->getType() != ResultType) 250 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 251 "cast"); 252 return RValue::get(Result); 253 } 254 case Builtin::BI__builtin_clz: 255 case Builtin::BI__builtin_clzl: 256 case Builtin::BI__builtin_clzll: { 257 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 258 259 const llvm::Type *ArgType = ArgValue->getType(); 260 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, &ArgType, 1); 261 262 const llvm::Type *ResultType = ConvertType(E->getType()); 263 Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); 264 if (Result->getType() != ResultType) 265 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 266 "cast"); 267 return RValue::get(Result); 268 } 269 case Builtin::BI__builtin_ffs: 270 case Builtin::BI__builtin_ffsl: 271 case Builtin::BI__builtin_ffsll: { 272 // ffs(x) -> x ? cttz(x) + 1 : 0 273 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 274 275 const llvm::Type *ArgType = ArgValue->getType(); 276 Value *F = CGM.getIntrinsic(Intrinsic::cttz, &ArgType, 1); 277 278 const llvm::Type *ResultType = ConvertType(E->getType()); 279 Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue, "tmp"), 280 llvm::ConstantInt::get(ArgType, 1), "tmp"); 281 Value *Zero = llvm::Constant::getNullValue(ArgType); 282 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 283 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 284 if (Result->getType() != ResultType) 285 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 286 "cast"); 287 return RValue::get(Result); 288 } 289 case Builtin::BI__builtin_parity: 290 case Builtin::BI__builtin_parityl: 291 case Builtin::BI__builtin_parityll: { 292 // parity(x) -> ctpop(x) & 1 293 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 294 295 const llvm::Type *ArgType = ArgValue->getType(); 296 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1); 297 298 const llvm::Type *ResultType = ConvertType(E->getType()); 299 Value *Tmp = Builder.CreateCall(F, ArgValue, "tmp"); 300 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1), 301 "tmp"); 302 if (Result->getType() != ResultType) 303 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 304 "cast"); 305 return RValue::get(Result); 306 } 307 case Builtin::BI__builtin_popcount: 308 case Builtin::BI__builtin_popcountl: 309 case Builtin::BI__builtin_popcountll: { 310 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 311 312 const llvm::Type *ArgType = ArgValue->getType(); 313 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, &ArgType, 1); 314 315 const llvm::Type *ResultType = ConvertType(E->getType()); 316 Value *Result = Builder.CreateCall(F, ArgValue, "tmp"); 317 if (Result->getType() != ResultType) 318 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 319 "cast"); 320 return RValue::get(Result); 321 } 322 case Builtin::BI__builtin_expect: { 323 // FIXME: pass expect through to LLVM 324 if (E->getArg(1)->HasSideEffects(getContext())) 325 (void)EmitScalarExpr(E->getArg(1)); 326 return RValue::get(EmitScalarExpr(E->getArg(0))); 327 } 328 case Builtin::BI__builtin_bswap32: 329 case Builtin::BI__builtin_bswap64: { 330 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 331 const llvm::Type *ArgType = ArgValue->getType(); 332 Value *F = CGM.getIntrinsic(Intrinsic::bswap, &ArgType, 1); 333 return RValue::get(Builder.CreateCall(F, ArgValue, "tmp")); 334 } 335 case Builtin::BI__builtin_object_size: { 336 // We pass this builtin onto the optimizer so that it can 337 // figure out the object size in more complex cases. 338 const llvm::Type *ResType[] = { 339 ConvertType(E->getType()) 340 }; 341 342 // LLVM only supports 0 and 2, make sure that we pass along that 343 // as a boolean. 344 Value *Ty = EmitScalarExpr(E->getArg(1)); 345 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 346 assert(CI); 347 uint64_t val = CI->getZExtValue(); 348 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); 349 350 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType, 1); 351 return RValue::get(Builder.CreateCall2(F, 352 EmitScalarExpr(E->getArg(0)), 353 CI)); 354 } 355 case Builtin::BI__builtin_prefetch: { 356 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 357 // FIXME: Technically these constants should of type 'int', yes? 358 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 359 llvm::ConstantInt::get(Int32Ty, 0); 360 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 361 llvm::ConstantInt::get(Int32Ty, 3); 362 Value *F = CGM.getIntrinsic(Intrinsic::prefetch, 0, 0); 363 return RValue::get(Builder.CreateCall3(F, Address, RW, Locality)); 364 } 365 case Builtin::BI__builtin_trap: { 366 Value *F = CGM.getIntrinsic(Intrinsic::trap, 0, 0); 367 return RValue::get(Builder.CreateCall(F)); 368 } 369 case Builtin::BI__builtin_unreachable: { 370 if (CatchUndefined) 371 EmitBranch(getTrapBB()); 372 else 373 Builder.CreateUnreachable(); 374 375 // We do need to preserve an insertion point. 376 EmitBlock(createBasicBlock("unreachable.cont")); 377 378 return RValue::get(0); 379 } 380 381 case Builtin::BI__builtin_powi: 382 case Builtin::BI__builtin_powif: 383 case Builtin::BI__builtin_powil: { 384 Value *Base = EmitScalarExpr(E->getArg(0)); 385 Value *Exponent = EmitScalarExpr(E->getArg(1)); 386 const llvm::Type *ArgType = Base->getType(); 387 Value *F = CGM.getIntrinsic(Intrinsic::powi, &ArgType, 1); 388 return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp")); 389 } 390 391 case Builtin::BI__builtin_isgreater: 392 case Builtin::BI__builtin_isgreaterequal: 393 case Builtin::BI__builtin_isless: 394 case Builtin::BI__builtin_islessequal: 395 case Builtin::BI__builtin_islessgreater: 396 case Builtin::BI__builtin_isunordered: { 397 // Ordered comparisons: we know the arguments to these are matching scalar 398 // floating point values. 399 Value *LHS = EmitScalarExpr(E->getArg(0)); 400 Value *RHS = EmitScalarExpr(E->getArg(1)); 401 402 switch (BuiltinID) { 403 default: assert(0 && "Unknown ordered comparison"); 404 case Builtin::BI__builtin_isgreater: 405 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 406 break; 407 case Builtin::BI__builtin_isgreaterequal: 408 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 409 break; 410 case Builtin::BI__builtin_isless: 411 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 412 break; 413 case Builtin::BI__builtin_islessequal: 414 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 415 break; 416 case Builtin::BI__builtin_islessgreater: 417 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 418 break; 419 case Builtin::BI__builtin_isunordered: 420 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 421 break; 422 } 423 // ZExt bool to int type. 424 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()), 425 "tmp")); 426 } 427 case Builtin::BI__builtin_isnan: { 428 Value *V = EmitScalarExpr(E->getArg(0)); 429 V = Builder.CreateFCmpUNO(V, V, "cmp"); 430 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp")); 431 } 432 433 case Builtin::BI__builtin_isinf: { 434 // isinf(x) --> fabs(x) == infinity 435 Value *V = EmitScalarExpr(E->getArg(0)); 436 V = EmitFAbs(*this, V, E->getArg(0)->getType()); 437 438 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 439 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()), "tmp")); 440 } 441 442 // TODO: BI__builtin_isinf_sign 443 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 444 445 case Builtin::BI__builtin_isnormal: { 446 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 447 Value *V = EmitScalarExpr(E->getArg(0)); 448 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 449 450 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 451 Value *IsLessThanInf = 452 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 453 APFloat Smallest = APFloat::getSmallestNormalized( 454 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 455 Value *IsNormal = 456 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 457 "isnormal"); 458 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 459 V = Builder.CreateAnd(V, IsNormal, "and"); 460 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 461 } 462 463 case Builtin::BI__builtin_isfinite: { 464 // isfinite(x) --> x == x && fabs(x) != infinity; } 465 Value *V = EmitScalarExpr(E->getArg(0)); 466 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 467 468 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 469 Value *IsNotInf = 470 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 471 472 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 473 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 474 } 475 476 case Builtin::BI__builtin_fpclassify: { 477 Value *V = EmitScalarExpr(E->getArg(5)); 478 const llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 479 480 // Create Result 481 BasicBlock *Begin = Builder.GetInsertBlock(); 482 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 483 Builder.SetInsertPoint(End); 484 PHINode *Result = 485 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 486 "fpclassify_result"); 487 488 // if (V==0) return FP_ZERO 489 Builder.SetInsertPoint(Begin); 490 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 491 "iszero"); 492 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 493 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 494 Builder.CreateCondBr(IsZero, End, NotZero); 495 Result->addIncoming(ZeroLiteral, Begin); 496 497 // if (V != V) return FP_NAN 498 Builder.SetInsertPoint(NotZero); 499 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 500 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 501 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 502 Builder.CreateCondBr(IsNan, End, NotNan); 503 Result->addIncoming(NanLiteral, NotZero); 504 505 // if (fabs(V) == infinity) return FP_INFINITY 506 Builder.SetInsertPoint(NotNan); 507 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType()); 508 Value *IsInf = 509 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 510 "isinf"); 511 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 512 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 513 Builder.CreateCondBr(IsInf, End, NotInf); 514 Result->addIncoming(InfLiteral, NotNan); 515 516 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 517 Builder.SetInsertPoint(NotInf); 518 APFloat Smallest = APFloat::getSmallestNormalized( 519 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 520 Value *IsNormal = 521 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 522 "isnormal"); 523 Value *NormalResult = 524 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 525 EmitScalarExpr(E->getArg(3))); 526 Builder.CreateBr(End); 527 Result->addIncoming(NormalResult, NotInf); 528 529 // return Result 530 Builder.SetInsertPoint(End); 531 return RValue::get(Result); 532 } 533 534 case Builtin::BIalloca: 535 case Builtin::BI__builtin_alloca: { 536 Value *Size = EmitScalarExpr(E->getArg(0)); 537 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size, "tmp")); 538 } 539 case Builtin::BIbzero: 540 case Builtin::BI__builtin_bzero: { 541 Value *Address = EmitScalarExpr(E->getArg(0)); 542 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 543 Builder.CreateMemSet(Address, Builder.getInt8(0), SizeVal, 1, false); 544 return RValue::get(Address); 545 } 546 case Builtin::BImemcpy: 547 case Builtin::BI__builtin_memcpy: { 548 Value *Address = EmitScalarExpr(E->getArg(0)); 549 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 550 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 551 Builder.CreateMemCpy(Address, SrcAddr, SizeVal, 1, false); 552 return RValue::get(Address); 553 } 554 555 case Builtin::BI__builtin___memcpy_chk: { 556 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 557 if (!E->getArg(2)->isEvaluatable(CGM.getContext()) || 558 !E->getArg(3)->isEvaluatable(CGM.getContext())) 559 break; 560 llvm::APSInt Size = E->getArg(2)->EvaluateAsInt(CGM.getContext()); 561 llvm::APSInt DstSize = E->getArg(3)->EvaluateAsInt(CGM.getContext()); 562 if (Size.ugt(DstSize)) 563 break; 564 Value *Dest = EmitScalarExpr(E->getArg(0)); 565 Value *Src = EmitScalarExpr(E->getArg(1)); 566 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 567 Builder.CreateMemCpy(Dest, Src, SizeVal, 1, false); 568 return RValue::get(Dest); 569 } 570 571 case Builtin::BI__builtin_objc_memmove_collectable: { 572 Value *Address = EmitScalarExpr(E->getArg(0)); 573 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 574 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 575 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 576 Address, SrcAddr, SizeVal); 577 return RValue::get(Address); 578 } 579 580 case Builtin::BI__builtin___memmove_chk: { 581 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 582 if (!E->getArg(2)->isEvaluatable(CGM.getContext()) || 583 !E->getArg(3)->isEvaluatable(CGM.getContext())) 584 break; 585 llvm::APSInt Size = E->getArg(2)->EvaluateAsInt(CGM.getContext()); 586 llvm::APSInt DstSize = E->getArg(3)->EvaluateAsInt(CGM.getContext()); 587 if (Size.ugt(DstSize)) 588 break; 589 Value *Dest = EmitScalarExpr(E->getArg(0)); 590 Value *Src = EmitScalarExpr(E->getArg(1)); 591 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 592 Builder.CreateMemMove(Dest, Src, SizeVal, 1, false); 593 return RValue::get(Dest); 594 } 595 596 case Builtin::BImemmove: 597 case Builtin::BI__builtin_memmove: { 598 Value *Address = EmitScalarExpr(E->getArg(0)); 599 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 600 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 601 Builder.CreateMemMove(Address, SrcAddr, SizeVal, 1, false); 602 return RValue::get(Address); 603 } 604 case Builtin::BImemset: 605 case Builtin::BI__builtin_memset: { 606 Value *Address = EmitScalarExpr(E->getArg(0)); 607 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 608 Builder.getInt8Ty()); 609 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 610 Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false); 611 return RValue::get(Address); 612 } 613 case Builtin::BI__builtin___memset_chk: { 614 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 615 if (!E->getArg(2)->isEvaluatable(CGM.getContext()) || 616 !E->getArg(3)->isEvaluatable(CGM.getContext())) 617 break; 618 llvm::APSInt Size = E->getArg(2)->EvaluateAsInt(CGM.getContext()); 619 llvm::APSInt DstSize = E->getArg(3)->EvaluateAsInt(CGM.getContext()); 620 if (Size.ugt(DstSize)) 621 break; 622 Value *Address = EmitScalarExpr(E->getArg(0)); 623 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 624 Builder.getInt8Ty()); 625 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 626 Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false); 627 628 return RValue::get(Address); 629 } 630 case Builtin::BI__builtin_dwarf_cfa: { 631 // The offset in bytes from the first argument to the CFA. 632 // 633 // Why on earth is this in the frontend? Is there any reason at 634 // all that the backend can't reasonably determine this while 635 // lowering llvm.eh.dwarf.cfa()? 636 // 637 // TODO: If there's a satisfactory reason, add a target hook for 638 // this instead of hard-coding 0, which is correct for most targets. 639 int32_t Offset = 0; 640 641 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa, 0, 0); 642 return RValue::get(Builder.CreateCall(F, 643 llvm::ConstantInt::get(Int32Ty, Offset))); 644 } 645 case Builtin::BI__builtin_return_address: { 646 Value *Depth = EmitScalarExpr(E->getArg(0)); 647 Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp"); 648 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress, 0, 0); 649 return RValue::get(Builder.CreateCall(F, Depth)); 650 } 651 case Builtin::BI__builtin_frame_address: { 652 Value *Depth = EmitScalarExpr(E->getArg(0)); 653 Depth = Builder.CreateIntCast(Depth, Int32Ty, false, "tmp"); 654 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress, 0, 0); 655 return RValue::get(Builder.CreateCall(F, Depth)); 656 } 657 case Builtin::BI__builtin_extract_return_addr: { 658 Value *Address = EmitScalarExpr(E->getArg(0)); 659 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 660 return RValue::get(Result); 661 } 662 case Builtin::BI__builtin_frob_return_addr: { 663 Value *Address = EmitScalarExpr(E->getArg(0)); 664 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 665 return RValue::get(Result); 666 } 667 case Builtin::BI__builtin_dwarf_sp_column: { 668 const llvm::IntegerType *Ty 669 = cast<llvm::IntegerType>(ConvertType(E->getType())); 670 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 671 if (Column == -1) { 672 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 673 return RValue::get(llvm::UndefValue::get(Ty)); 674 } 675 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 676 } 677 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 678 Value *Address = EmitScalarExpr(E->getArg(0)); 679 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 680 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 681 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 682 } 683 case Builtin::BI__builtin_eh_return: { 684 Value *Int = EmitScalarExpr(E->getArg(0)); 685 Value *Ptr = EmitScalarExpr(E->getArg(1)); 686 687 const llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 688 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 689 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 690 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 691 ? Intrinsic::eh_return_i32 692 : Intrinsic::eh_return_i64, 693 0, 0); 694 Builder.CreateCall2(F, Int, Ptr); 695 Builder.CreateUnreachable(); 696 697 // We do need to preserve an insertion point. 698 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 699 700 return RValue::get(0); 701 } 702 case Builtin::BI__builtin_unwind_init: { 703 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init, 0, 0); 704 return RValue::get(Builder.CreateCall(F)); 705 } 706 case Builtin::BI__builtin_extend_pointer: { 707 // Extends a pointer to the size of an _Unwind_Word, which is 708 // uint64_t on all platforms. Generally this gets poked into a 709 // register and eventually used as an address, so if the 710 // addressing registers are wider than pointers and the platform 711 // doesn't implicitly ignore high-order bits when doing 712 // addressing, we need to make sure we zext / sext based on 713 // the platform's expectations. 714 // 715 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 716 717 // Cast the pointer to intptr_t. 718 Value *Ptr = EmitScalarExpr(E->getArg(0)); 719 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 720 721 // If that's 64 bits, we're done. 722 if (IntPtrTy->getBitWidth() == 64) 723 return RValue::get(Result); 724 725 // Otherwise, ask the codegen data what to do. 726 if (getTargetHooks().extendPointerWithSExt()) 727 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 728 else 729 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 730 } 731 case Builtin::BI__builtin_setjmp: { 732 // Buffer is a void**. 733 Value *Buf = EmitScalarExpr(E->getArg(0)); 734 735 // Store the frame pointer to the setjmp buffer. 736 Value *FrameAddr = 737 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 738 ConstantInt::get(Int32Ty, 0)); 739 Builder.CreateStore(FrameAddr, Buf); 740 741 // Store the stack pointer to the setjmp buffer. 742 Value *StackAddr = 743 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 744 Value *StackSaveSlot = 745 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 746 Builder.CreateStore(StackAddr, StackSaveSlot); 747 748 // Call LLVM's EH setjmp, which is lightweight. 749 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 750 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 751 return RValue::get(Builder.CreateCall(F, Buf)); 752 } 753 case Builtin::BI__builtin_longjmp: { 754 Value *Buf = EmitScalarExpr(E->getArg(0)); 755 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 756 757 // Call LLVM's EH longjmp, which is lightweight. 758 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 759 760 // longjmp doesn't return; mark this as unreachable. 761 Builder.CreateUnreachable(); 762 763 // We do need to preserve an insertion point. 764 EmitBlock(createBasicBlock("longjmp.cont")); 765 766 return RValue::get(0); 767 } 768 case Builtin::BI__sync_fetch_and_add: 769 case Builtin::BI__sync_fetch_and_sub: 770 case Builtin::BI__sync_fetch_and_or: 771 case Builtin::BI__sync_fetch_and_and: 772 case Builtin::BI__sync_fetch_and_xor: 773 case Builtin::BI__sync_add_and_fetch: 774 case Builtin::BI__sync_sub_and_fetch: 775 case Builtin::BI__sync_and_and_fetch: 776 case Builtin::BI__sync_or_and_fetch: 777 case Builtin::BI__sync_xor_and_fetch: 778 case Builtin::BI__sync_val_compare_and_swap: 779 case Builtin::BI__sync_bool_compare_and_swap: 780 case Builtin::BI__sync_lock_test_and_set: 781 case Builtin::BI__sync_lock_release: 782 case Builtin::BI__sync_swap: 783 assert(0 && "Shouldn't make it through sema"); 784 case Builtin::BI__sync_fetch_and_add_1: 785 case Builtin::BI__sync_fetch_and_add_2: 786 case Builtin::BI__sync_fetch_and_add_4: 787 case Builtin::BI__sync_fetch_and_add_8: 788 case Builtin::BI__sync_fetch_and_add_16: 789 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_add, E); 790 case Builtin::BI__sync_fetch_and_sub_1: 791 case Builtin::BI__sync_fetch_and_sub_2: 792 case Builtin::BI__sync_fetch_and_sub_4: 793 case Builtin::BI__sync_fetch_and_sub_8: 794 case Builtin::BI__sync_fetch_and_sub_16: 795 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_sub, E); 796 case Builtin::BI__sync_fetch_and_or_1: 797 case Builtin::BI__sync_fetch_and_or_2: 798 case Builtin::BI__sync_fetch_and_or_4: 799 case Builtin::BI__sync_fetch_and_or_8: 800 case Builtin::BI__sync_fetch_and_or_16: 801 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_or, E); 802 case Builtin::BI__sync_fetch_and_and_1: 803 case Builtin::BI__sync_fetch_and_and_2: 804 case Builtin::BI__sync_fetch_and_and_4: 805 case Builtin::BI__sync_fetch_and_and_8: 806 case Builtin::BI__sync_fetch_and_and_16: 807 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_and, E); 808 case Builtin::BI__sync_fetch_and_xor_1: 809 case Builtin::BI__sync_fetch_and_xor_2: 810 case Builtin::BI__sync_fetch_and_xor_4: 811 case Builtin::BI__sync_fetch_and_xor_8: 812 case Builtin::BI__sync_fetch_and_xor_16: 813 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_xor, E); 814 815 // Clang extensions: not overloaded yet. 816 case Builtin::BI__sync_fetch_and_min: 817 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_min, E); 818 case Builtin::BI__sync_fetch_and_max: 819 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_max, E); 820 case Builtin::BI__sync_fetch_and_umin: 821 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umin, E); 822 case Builtin::BI__sync_fetch_and_umax: 823 return EmitBinaryAtomic(*this, Intrinsic::atomic_load_umax, E); 824 825 case Builtin::BI__sync_add_and_fetch_1: 826 case Builtin::BI__sync_add_and_fetch_2: 827 case Builtin::BI__sync_add_and_fetch_4: 828 case Builtin::BI__sync_add_and_fetch_8: 829 case Builtin::BI__sync_add_and_fetch_16: 830 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_add, E, 831 llvm::Instruction::Add); 832 case Builtin::BI__sync_sub_and_fetch_1: 833 case Builtin::BI__sync_sub_and_fetch_2: 834 case Builtin::BI__sync_sub_and_fetch_4: 835 case Builtin::BI__sync_sub_and_fetch_8: 836 case Builtin::BI__sync_sub_and_fetch_16: 837 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_sub, E, 838 llvm::Instruction::Sub); 839 case Builtin::BI__sync_and_and_fetch_1: 840 case Builtin::BI__sync_and_and_fetch_2: 841 case Builtin::BI__sync_and_and_fetch_4: 842 case Builtin::BI__sync_and_and_fetch_8: 843 case Builtin::BI__sync_and_and_fetch_16: 844 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_and, E, 845 llvm::Instruction::And); 846 case Builtin::BI__sync_or_and_fetch_1: 847 case Builtin::BI__sync_or_and_fetch_2: 848 case Builtin::BI__sync_or_and_fetch_4: 849 case Builtin::BI__sync_or_and_fetch_8: 850 case Builtin::BI__sync_or_and_fetch_16: 851 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_or, E, 852 llvm::Instruction::Or); 853 case Builtin::BI__sync_xor_and_fetch_1: 854 case Builtin::BI__sync_xor_and_fetch_2: 855 case Builtin::BI__sync_xor_and_fetch_4: 856 case Builtin::BI__sync_xor_and_fetch_8: 857 case Builtin::BI__sync_xor_and_fetch_16: 858 return EmitBinaryAtomicPost(*this, Intrinsic::atomic_load_xor, E, 859 llvm::Instruction::Xor); 860 861 case Builtin::BI__sync_val_compare_and_swap_1: 862 case Builtin::BI__sync_val_compare_and_swap_2: 863 case Builtin::BI__sync_val_compare_and_swap_4: 864 case Builtin::BI__sync_val_compare_and_swap_8: 865 case Builtin::BI__sync_val_compare_and_swap_16: { 866 QualType T = E->getType(); 867 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 868 unsigned AddrSpace = 869 cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace(); 870 871 const llvm::IntegerType *IntType = 872 llvm::IntegerType::get(getLLVMContext(), 873 getContext().getTypeSize(T)); 874 const llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 875 const llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType }; 876 Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap, 877 IntrinsicTypes, 2); 878 879 Value *Args[3]; 880 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 881 Args[1] = EmitScalarExpr(E->getArg(1)); 882 const llvm::Type *ValueType = Args[1]->getType(); 883 Args[1] = EmitToInt(*this, Args[1], T, IntType); 884 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 885 886 Value *Result = EmitCallWithBarrier(*this, AtomF, Args, Args + 3); 887 Result = EmitFromInt(*this, Result, T, ValueType); 888 return RValue::get(Result); 889 } 890 891 case Builtin::BI__sync_bool_compare_and_swap_1: 892 case Builtin::BI__sync_bool_compare_and_swap_2: 893 case Builtin::BI__sync_bool_compare_and_swap_4: 894 case Builtin::BI__sync_bool_compare_and_swap_8: 895 case Builtin::BI__sync_bool_compare_and_swap_16: { 896 QualType T = E->getArg(1)->getType(); 897 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 898 unsigned AddrSpace = 899 cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace(); 900 901 const llvm::IntegerType *IntType = 902 llvm::IntegerType::get(getLLVMContext(), 903 getContext().getTypeSize(T)); 904 const llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 905 const llvm::Type *IntrinsicTypes[2] = { IntType, IntPtrType }; 906 Value *AtomF = CGM.getIntrinsic(Intrinsic::atomic_cmp_swap, 907 IntrinsicTypes, 2); 908 909 Value *Args[3]; 910 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 911 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType); 912 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 913 914 Value *OldVal = Args[1]; 915 Value *PrevVal = EmitCallWithBarrier(*this, AtomF, Args, Args + 3); 916 Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal); 917 // zext bool to int. 918 Result = Builder.CreateZExt(Result, ConvertType(E->getType())); 919 return RValue::get(Result); 920 } 921 922 case Builtin::BI__sync_swap_1: 923 case Builtin::BI__sync_swap_2: 924 case Builtin::BI__sync_swap_4: 925 case Builtin::BI__sync_swap_8: 926 case Builtin::BI__sync_swap_16: 927 return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E); 928 929 case Builtin::BI__sync_lock_test_and_set_1: 930 case Builtin::BI__sync_lock_test_and_set_2: 931 case Builtin::BI__sync_lock_test_and_set_4: 932 case Builtin::BI__sync_lock_test_and_set_8: 933 case Builtin::BI__sync_lock_test_and_set_16: 934 return EmitBinaryAtomic(*this, Intrinsic::atomic_swap, E); 935 936 case Builtin::BI__sync_lock_release_1: 937 case Builtin::BI__sync_lock_release_2: 938 case Builtin::BI__sync_lock_release_4: 939 case Builtin::BI__sync_lock_release_8: 940 case Builtin::BI__sync_lock_release_16: { 941 Value *Ptr = EmitScalarExpr(E->getArg(0)); 942 const llvm::Type *ElTy = 943 cast<llvm::PointerType>(Ptr->getType())->getElementType(); 944 llvm::StoreInst *Store = 945 Builder.CreateStore(llvm::Constant::getNullValue(ElTy), Ptr); 946 Store->setVolatile(true); 947 return RValue::get(0); 948 } 949 950 case Builtin::BI__sync_synchronize: { 951 // We assume like gcc appears to, that this only applies to cached memory. 952 EmitMemoryBarrier(*this, true, true, true, true, false); 953 return RValue::get(0); 954 } 955 956 case Builtin::BI__builtin_llvm_memory_barrier: { 957 Value *C[5] = { 958 EmitScalarExpr(E->getArg(0)), 959 EmitScalarExpr(E->getArg(1)), 960 EmitScalarExpr(E->getArg(2)), 961 EmitScalarExpr(E->getArg(3)), 962 EmitScalarExpr(E->getArg(4)) 963 }; 964 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::memory_barrier), C, C + 5); 965 return RValue::get(0); 966 } 967 968 // Library functions with special handling. 969 case Builtin::BIsqrt: 970 case Builtin::BIsqrtf: 971 case Builtin::BIsqrtl: { 972 // TODO: there is currently no set of optimizer flags 973 // sufficient for us to rewrite sqrt to @llvm.sqrt. 974 // -fmath-errno=0 is not good enough; we need finiteness. 975 // We could probably precondition the call with an ult 976 // against 0, but is that worth the complexity? 977 break; 978 } 979 980 case Builtin::BIpow: 981 case Builtin::BIpowf: 982 case Builtin::BIpowl: { 983 // Rewrite sqrt to intrinsic if allowed. 984 if (!FD->hasAttr<ConstAttr>()) 985 break; 986 Value *Base = EmitScalarExpr(E->getArg(0)); 987 Value *Exponent = EmitScalarExpr(E->getArg(1)); 988 const llvm::Type *ArgType = Base->getType(); 989 Value *F = CGM.getIntrinsic(Intrinsic::pow, &ArgType, 1); 990 return RValue::get(Builder.CreateCall2(F, Base, Exponent, "tmp")); 991 } 992 993 case Builtin::BI__builtin_signbit: 994 case Builtin::BI__builtin_signbitf: 995 case Builtin::BI__builtin_signbitl: { 996 LLVMContext &C = CGM.getLLVMContext(); 997 998 Value *Arg = EmitScalarExpr(E->getArg(0)); 999 const llvm::Type *ArgTy = Arg->getType(); 1000 if (ArgTy->isPPC_FP128Ty()) 1001 break; // FIXME: I'm not sure what the right implementation is here. 1002 int ArgWidth = ArgTy->getPrimitiveSizeInBits(); 1003 const llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 1004 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); 1005 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); 1006 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); 1007 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 1008 } 1009 } 1010 1011 // If this is an alias for a libm function (e.g. __builtin_sin) turn it into 1012 // that function. 1013 if (getContext().BuiltinInfo.isLibFunction(BuiltinID) || 1014 getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 1015 return EmitCall(E->getCallee()->getType(), 1016 CGM.getBuiltinLibFunction(FD, BuiltinID), 1017 ReturnValueSlot(), E->arg_begin(), E->arg_end(), FD); 1018 1019 // See if we have a target specific intrinsic. 1020 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 1021 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 1022 if (const char *Prefix = 1023 llvm::Triple::getArchTypePrefix(Target.getTriple().getArch())) 1024 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 1025 1026 if (IntrinsicID != Intrinsic::not_intrinsic) { 1027 SmallVector<Value*, 16> Args; 1028 1029 // Find out if any arguments are required to be integer constant 1030 // expressions. 1031 unsigned ICEArguments = 0; 1032 ASTContext::GetBuiltinTypeError Error; 1033 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1034 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1035 1036 Function *F = CGM.getIntrinsic(IntrinsicID); 1037 const llvm::FunctionType *FTy = F->getFunctionType(); 1038 1039 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 1040 Value *ArgValue; 1041 // If this is a normal argument, just emit it as a scalar. 1042 if ((ICEArguments & (1 << i)) == 0) { 1043 ArgValue = EmitScalarExpr(E->getArg(i)); 1044 } else { 1045 // If this is required to be a constant, constant fold it so that we 1046 // know that the generated intrinsic gets a ConstantInt. 1047 llvm::APSInt Result; 1048 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 1049 assert(IsConst && "Constant arg isn't actually constant?"); 1050 (void)IsConst; 1051 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 1052 } 1053 1054 // If the intrinsic arg type is different from the builtin arg type 1055 // we need to do a bit cast. 1056 const llvm::Type *PTy = FTy->getParamType(i); 1057 if (PTy != ArgValue->getType()) { 1058 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 1059 "Must be able to losslessly bit cast to param"); 1060 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 1061 } 1062 1063 Args.push_back(ArgValue); 1064 } 1065 1066 Value *V = Builder.CreateCall(F, Args.data(), Args.data() + Args.size()); 1067 QualType BuiltinRetType = E->getType(); 1068 1069 const llvm::Type *RetTy = llvm::Type::getVoidTy(getLLVMContext()); 1070 if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType); 1071 1072 if (RetTy != V->getType()) { 1073 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 1074 "Must be able to losslessly bit cast result type"); 1075 V = Builder.CreateBitCast(V, RetTy); 1076 } 1077 1078 return RValue::get(V); 1079 } 1080 1081 // See if we have a target specific builtin that needs to be lowered. 1082 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 1083 return RValue::get(V); 1084 1085 ErrorUnsupported(E, "builtin function"); 1086 1087 // Unknown builtin, for now just dump it out and return undef. 1088 if (hasAggregateLLVMType(E->getType())) 1089 return RValue::getAggregate(CreateMemTemp(E->getType())); 1090 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1091} 1092 1093Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 1094 const CallExpr *E) { 1095 switch (Target.getTriple().getArch()) { 1096 case llvm::Triple::arm: 1097 case llvm::Triple::thumb: 1098 return EmitARMBuiltinExpr(BuiltinID, E); 1099 case llvm::Triple::x86: 1100 case llvm::Triple::x86_64: 1101 return EmitX86BuiltinExpr(BuiltinID, E); 1102 case llvm::Triple::ppc: 1103 case llvm::Triple::ppc64: 1104 return EmitPPCBuiltinExpr(BuiltinID, E); 1105 default: 1106 return 0; 1107 } 1108} 1109 1110static const llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, 1111 bool q) { 1112 switch (type) { 1113 default: break; 1114 case 0: 1115 case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q); 1116 case 6: 1117 case 7: 1118 case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q); 1119 case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q); 1120 case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q); 1121 case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q); 1122 }; 1123 return 0; 1124} 1125 1126Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 1127 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 1128 SmallVector<Constant*, 16> Indices(nElts, C); 1129 Value* SV = llvm::ConstantVector::get(Indices); 1130 return Builder.CreateShuffleVector(V, V, SV, "lane"); 1131} 1132 1133Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 1134 const char *name, 1135 unsigned shift, bool rightshift) { 1136 unsigned j = 0; 1137 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 1138 ai != ae; ++ai, ++j) 1139 if (shift > 0 && shift == j) 1140 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 1141 else 1142 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 1143 1144 return Builder.CreateCall(F, Ops.begin(), Ops.end(), name); 1145} 1146 1147Value *CodeGenFunction::EmitNeonShiftVector(Value *V, const llvm::Type *Ty, 1148 bool neg) { 1149 ConstantInt *CI = cast<ConstantInt>(V); 1150 int SV = CI->getSExtValue(); 1151 1152 const llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1153 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 1154 SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C); 1155 return llvm::ConstantVector::get(CV); 1156} 1157 1158/// GetPointeeAlignment - Given an expression with a pointer type, find the 1159/// alignment of the type referenced by the pointer. Skip over implicit 1160/// casts. 1161static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) { 1162 unsigned Align = 1; 1163 // Check if the type is a pointer. The implicit cast operand might not be. 1164 while (Addr->getType()->isPointerType()) { 1165 QualType PtTy = Addr->getType()->getPointeeType(); 1166 unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity(); 1167 if (NewA > Align) 1168 Align = NewA; 1169 1170 // If the address is an implicit cast, repeat with the cast operand. 1171 if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) { 1172 Addr = CastAddr->getSubExpr(); 1173 continue; 1174 } 1175 break; 1176 } 1177 return llvm::ConstantInt::get(CGF.Int32Ty, Align); 1178} 1179 1180Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 1181 const CallExpr *E) { 1182 if (BuiltinID == ARM::BI__clear_cache) { 1183 const FunctionDecl *FD = E->getDirectCallee(); 1184 // Oddly people write this call without args on occasion and gcc accepts 1185 // it - it's also marked as varargs in the description file. 1186 llvm::SmallVector<Value*, 2> Ops; 1187 for (unsigned i = 0; i < E->getNumArgs(); i++) 1188 Ops.push_back(EmitScalarExpr(E->getArg(i))); 1189 const llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 1190 const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 1191 llvm::StringRef Name = FD->getName(); 1192 return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 1193 Ops.begin(), Ops.end()); 1194 } 1195 1196 llvm::SmallVector<Value*, 4> Ops; 1197 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) 1198 Ops.push_back(EmitScalarExpr(E->getArg(i))); 1199 1200 llvm::APSInt Result; 1201 const Expr *Arg = E->getArg(E->getNumArgs()-1); 1202 if (!Arg->isIntegerConstantExpr(Result, getContext())) 1203 return 0; 1204 1205 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 1206 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 1207 // Determine the overloaded type of this builtin. 1208 const llvm::Type *Ty; 1209 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 1210 Ty = llvm::Type::getFloatTy(getLLVMContext()); 1211 else 1212 Ty = llvm::Type::getDoubleTy(getLLVMContext()); 1213 1214 // Determine whether this is an unsigned conversion or not. 1215 bool usgn = Result.getZExtValue() == 1; 1216 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 1217 1218 // Call the appropriate intrinsic. 1219 Function *F = CGM.getIntrinsic(Int, &Ty, 1); 1220 return Builder.CreateCall(F, Ops.begin(), Ops.end(), "vcvtr"); 1221 } 1222 1223 // Determine the type of this overloaded NEON intrinsic. 1224 unsigned type = Result.getZExtValue(); 1225 bool usgn = type & 0x08; 1226 bool quad = type & 0x10; 1227 bool poly = (type & 0x7) == 5 || (type & 0x7) == 6; 1228 (void)poly; // Only used in assert()s. 1229 bool rightShift = false; 1230 1231 const llvm::VectorType *VTy = GetNeonType(getLLVMContext(), type & 0x7, quad); 1232 const llvm::Type *Ty = VTy; 1233 if (!Ty) 1234 return 0; 1235 1236 unsigned Int; 1237 switch (BuiltinID) { 1238 default: return 0; 1239 case ARM::BI__builtin_neon_vabd_v: 1240 case ARM::BI__builtin_neon_vabdq_v: 1241 Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds; 1242 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabd"); 1243 case ARM::BI__builtin_neon_vabs_v: 1244 case ARM::BI__builtin_neon_vabsq_v: 1245 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1), 1246 Ops, "vabs"); 1247 case ARM::BI__builtin_neon_vaddhn_v: 1248 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1), 1249 Ops, "vaddhn"); 1250 case ARM::BI__builtin_neon_vcale_v: 1251 std::swap(Ops[0], Ops[1]); 1252 case ARM::BI__builtin_neon_vcage_v: { 1253 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged); 1254 return EmitNeonCall(F, Ops, "vcage"); 1255 } 1256 case ARM::BI__builtin_neon_vcaleq_v: 1257 std::swap(Ops[0], Ops[1]); 1258 case ARM::BI__builtin_neon_vcageq_v: { 1259 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq); 1260 return EmitNeonCall(F, Ops, "vcage"); 1261 } 1262 case ARM::BI__builtin_neon_vcalt_v: 1263 std::swap(Ops[0], Ops[1]); 1264 case ARM::BI__builtin_neon_vcagt_v: { 1265 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd); 1266 return EmitNeonCall(F, Ops, "vcagt"); 1267 } 1268 case ARM::BI__builtin_neon_vcaltq_v: 1269 std::swap(Ops[0], Ops[1]); 1270 case ARM::BI__builtin_neon_vcagtq_v: { 1271 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq); 1272 return EmitNeonCall(F, Ops, "vcagt"); 1273 } 1274 case ARM::BI__builtin_neon_vcls_v: 1275 case ARM::BI__builtin_neon_vclsq_v: { 1276 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1); 1277 return EmitNeonCall(F, Ops, "vcls"); 1278 } 1279 case ARM::BI__builtin_neon_vclz_v: 1280 case ARM::BI__builtin_neon_vclzq_v: { 1281 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1); 1282 return EmitNeonCall(F, Ops, "vclz"); 1283 } 1284 case ARM::BI__builtin_neon_vcnt_v: 1285 case ARM::BI__builtin_neon_vcntq_v: { 1286 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1); 1287 return EmitNeonCall(F, Ops, "vcnt"); 1288 } 1289 case ARM::BI__builtin_neon_vcvt_f16_v: { 1290 assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f16_v builtin"); 1291 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf); 1292 return EmitNeonCall(F, Ops, "vcvt"); 1293 } 1294 case ARM::BI__builtin_neon_vcvt_f32_f16: { 1295 assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f32_f16 builtin"); 1296 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp); 1297 return EmitNeonCall(F, Ops, "vcvt"); 1298 } 1299 case ARM::BI__builtin_neon_vcvt_f32_v: 1300 case ARM::BI__builtin_neon_vcvtq_f32_v: { 1301 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1302 Ty = GetNeonType(getLLVMContext(), 4, quad); 1303 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 1304 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 1305 } 1306 case ARM::BI__builtin_neon_vcvt_s32_v: 1307 case ARM::BI__builtin_neon_vcvt_u32_v: 1308 case ARM::BI__builtin_neon_vcvtq_s32_v: 1309 case ARM::BI__builtin_neon_vcvtq_u32_v: { 1310 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(getLLVMContext(), 4, quad)); 1311 return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 1312 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 1313 } 1314 case ARM::BI__builtin_neon_vcvt_n_f32_v: 1315 case ARM::BI__builtin_neon_vcvtq_n_f32_v: { 1316 const llvm::Type *Tys[2] = { GetNeonType(getLLVMContext(), 4, quad), Ty }; 1317 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp; 1318 Function *F = CGM.getIntrinsic(Int, Tys, 2); 1319 return EmitNeonCall(F, Ops, "vcvt_n"); 1320 } 1321 case ARM::BI__builtin_neon_vcvt_n_s32_v: 1322 case ARM::BI__builtin_neon_vcvt_n_u32_v: 1323 case ARM::BI__builtin_neon_vcvtq_n_s32_v: 1324 case ARM::BI__builtin_neon_vcvtq_n_u32_v: { 1325 const llvm::Type *Tys[2] = { Ty, GetNeonType(getLLVMContext(), 4, quad) }; 1326 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs; 1327 Function *F = CGM.getIntrinsic(Int, Tys, 2); 1328 return EmitNeonCall(F, Ops, "vcvt_n"); 1329 } 1330 case ARM::BI__builtin_neon_vext_v: 1331 case ARM::BI__builtin_neon_vextq_v: { 1332 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 1333 SmallVector<Constant*, 16> Indices; 1334 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 1335 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 1336 1337 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1338 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1339 Value *SV = llvm::ConstantVector::get(Indices); 1340 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 1341 } 1342 case ARM::BI__builtin_neon_vget_lane_i8: 1343 case ARM::BI__builtin_neon_vget_lane_i16: 1344 case ARM::BI__builtin_neon_vget_lane_i32: 1345 case ARM::BI__builtin_neon_vget_lane_i64: 1346 case ARM::BI__builtin_neon_vget_lane_f32: 1347 case ARM::BI__builtin_neon_vgetq_lane_i8: 1348 case ARM::BI__builtin_neon_vgetq_lane_i16: 1349 case ARM::BI__builtin_neon_vgetq_lane_i32: 1350 case ARM::BI__builtin_neon_vgetq_lane_i64: 1351 case ARM::BI__builtin_neon_vgetq_lane_f32: 1352 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 1353 "vget_lane"); 1354 case ARM::BI__builtin_neon_vhadd_v: 1355 case ARM::BI__builtin_neon_vhaddq_v: 1356 Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds; 1357 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhadd"); 1358 case ARM::BI__builtin_neon_vhsub_v: 1359 case ARM::BI__builtin_neon_vhsubq_v: 1360 Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs; 1361 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhsub"); 1362 case ARM::BI__builtin_neon_vld1_v: 1363 case ARM::BI__builtin_neon_vld1q_v: 1364 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1365 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, &Ty, 1), 1366 Ops, "vld1"); 1367 case ARM::BI__builtin_neon_vld1_lane_v: 1368 case ARM::BI__builtin_neon_vld1q_lane_v: 1369 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1370 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 1371 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1372 Ops[0] = Builder.CreateLoad(Ops[0]); 1373 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 1374 case ARM::BI__builtin_neon_vld1_dup_v: 1375 case ARM::BI__builtin_neon_vld1q_dup_v: { 1376 Value *V = UndefValue::get(Ty); 1377 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 1378 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1379 Ops[0] = Builder.CreateLoad(Ops[0]); 1380 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 1381 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 1382 return EmitNeonSplat(Ops[0], CI); 1383 } 1384 case ARM::BI__builtin_neon_vld2_v: 1385 case ARM::BI__builtin_neon_vld2q_v: { 1386 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, &Ty, 1); 1387 Value *Align = GetPointeeAlignment(*this, E->getArg(1)); 1388 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2"); 1389 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1390 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1391 return Builder.CreateStore(Ops[1], Ops[0]); 1392 } 1393 case ARM::BI__builtin_neon_vld3_v: 1394 case ARM::BI__builtin_neon_vld3q_v: { 1395 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, &Ty, 1); 1396 Value *Align = GetPointeeAlignment(*this, E->getArg(1)); 1397 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3"); 1398 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1399 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1400 return Builder.CreateStore(Ops[1], Ops[0]); 1401 } 1402 case ARM::BI__builtin_neon_vld4_v: 1403 case ARM::BI__builtin_neon_vld4q_v: { 1404 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, &Ty, 1); 1405 Value *Align = GetPointeeAlignment(*this, E->getArg(1)); 1406 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4"); 1407 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1408 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1409 return Builder.CreateStore(Ops[1], Ops[0]); 1410 } 1411 case ARM::BI__builtin_neon_vld2_lane_v: 1412 case ARM::BI__builtin_neon_vld2q_lane_v: { 1413 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, &Ty, 1); 1414 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1415 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 1416 Ops.push_back(GetPointeeAlignment(*this, E->getArg(1))); 1417 Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld2_lane"); 1418 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1419 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1420 return Builder.CreateStore(Ops[1], Ops[0]); 1421 } 1422 case ARM::BI__builtin_neon_vld3_lane_v: 1423 case ARM::BI__builtin_neon_vld3q_lane_v: { 1424 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, &Ty, 1); 1425 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1426 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 1427 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 1428 Ops.push_back(GetPointeeAlignment(*this, E->getArg(1))); 1429 Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane"); 1430 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1431 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1432 return Builder.CreateStore(Ops[1], Ops[0]); 1433 } 1434 case ARM::BI__builtin_neon_vld4_lane_v: 1435 case ARM::BI__builtin_neon_vld4q_lane_v: { 1436 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, &Ty, 1); 1437 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1438 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 1439 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 1440 Ops[5] = Builder.CreateBitCast(Ops[5], Ty); 1441 Ops.push_back(GetPointeeAlignment(*this, E->getArg(1))); 1442 Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane"); 1443 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1444 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1445 return Builder.CreateStore(Ops[1], Ops[0]); 1446 } 1447 case ARM::BI__builtin_neon_vld2_dup_v: 1448 case ARM::BI__builtin_neon_vld3_dup_v: 1449 case ARM::BI__builtin_neon_vld4_dup_v: { 1450 // Handle 64-bit elements as a special-case. There is no "dup" needed. 1451 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 1452 switch (BuiltinID) { 1453 case ARM::BI__builtin_neon_vld2_dup_v: 1454 Int = Intrinsic::arm_neon_vld2; 1455 break; 1456 case ARM::BI__builtin_neon_vld3_dup_v: 1457 Int = Intrinsic::arm_neon_vld2; 1458 break; 1459 case ARM::BI__builtin_neon_vld4_dup_v: 1460 Int = Intrinsic::arm_neon_vld2; 1461 break; 1462 default: assert(0 && "unknown vld_dup intrinsic?"); 1463 } 1464 Function *F = CGM.getIntrinsic(Int, &Ty, 1); 1465 Value *Align = GetPointeeAlignment(*this, E->getArg(1)); 1466 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 1467 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1468 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1469 return Builder.CreateStore(Ops[1], Ops[0]); 1470 } 1471 switch (BuiltinID) { 1472 case ARM::BI__builtin_neon_vld2_dup_v: 1473 Int = Intrinsic::arm_neon_vld2lane; 1474 break; 1475 case ARM::BI__builtin_neon_vld3_dup_v: 1476 Int = Intrinsic::arm_neon_vld2lane; 1477 break; 1478 case ARM::BI__builtin_neon_vld4_dup_v: 1479 Int = Intrinsic::arm_neon_vld2lane; 1480 break; 1481 default: assert(0 && "unknown vld_dup intrinsic?"); 1482 } 1483 Function *F = CGM.getIntrinsic(Int, &Ty, 1); 1484 const llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 1485 1486 SmallVector<Value*, 6> Args; 1487 Args.push_back(Ops[1]); 1488 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 1489 1490 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 1491 Args.push_back(CI); 1492 Args.push_back(GetPointeeAlignment(*this, E->getArg(1))); 1493 1494 Ops[1] = Builder.CreateCall(F, Args.begin(), Args.end(), "vld_dup"); 1495 // splat lane 0 to all elts in each vector of the result. 1496 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 1497 Value *Val = Builder.CreateExtractValue(Ops[1], i); 1498 Value *Elt = Builder.CreateBitCast(Val, Ty); 1499 Elt = EmitNeonSplat(Elt, CI); 1500 Elt = Builder.CreateBitCast(Elt, Val->getType()); 1501 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 1502 } 1503 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1504 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1505 return Builder.CreateStore(Ops[1], Ops[0]); 1506 } 1507 case ARM::BI__builtin_neon_vmax_v: 1508 case ARM::BI__builtin_neon_vmaxq_v: 1509 Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs; 1510 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmax"); 1511 case ARM::BI__builtin_neon_vmin_v: 1512 case ARM::BI__builtin_neon_vminq_v: 1513 Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins; 1514 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin"); 1515 case ARM::BI__builtin_neon_vmovl_v: { 1516 const llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 1517 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 1518 if (usgn) 1519 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 1520 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 1521 } 1522 case ARM::BI__builtin_neon_vmovn_v: { 1523 const llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 1524 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 1525 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 1526 } 1527 case ARM::BI__builtin_neon_vmul_v: 1528 case ARM::BI__builtin_neon_vmulq_v: 1529 assert(poly && "vmul builtin only supported for polynomial types"); 1530 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, &Ty, 1), 1531 Ops, "vmul"); 1532 case ARM::BI__builtin_neon_vmull_v: 1533 Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 1534 Int = poly ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 1535 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmull"); 1536 case ARM::BI__builtin_neon_vpadal_v: 1537 case ARM::BI__builtin_neon_vpadalq_v: { 1538 Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals; 1539 // The source operand type has twice as many elements of half the size. 1540 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 1541 const llvm::Type *EltTy = 1542 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 1543 const llvm::Type *NarrowTy = 1544 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 1545 const llvm::Type *Tys[2] = { Ty, NarrowTy }; 1546 return EmitNeonCall(CGM.getIntrinsic(Int, Tys, 2), Ops, "vpadal"); 1547 } 1548 case ARM::BI__builtin_neon_vpadd_v: 1549 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, &Ty, 1), 1550 Ops, "vpadd"); 1551 case ARM::BI__builtin_neon_vpaddl_v: 1552 case ARM::BI__builtin_neon_vpaddlq_v: { 1553 Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls; 1554 // The source operand type has twice as many elements of half the size. 1555 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 1556 const llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 1557 const llvm::Type *NarrowTy = 1558 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 1559 const llvm::Type *Tys[2] = { Ty, NarrowTy }; 1560 return EmitNeonCall(CGM.getIntrinsic(Int, Tys, 2), Ops, "vpaddl"); 1561 } 1562 case ARM::BI__builtin_neon_vpmax_v: 1563 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; 1564 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmax"); 1565 case ARM::BI__builtin_neon_vpmin_v: 1566 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; 1567 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vpmin"); 1568 case ARM::BI__builtin_neon_vqabs_v: 1569 case ARM::BI__builtin_neon_vqabsq_v: 1570 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, &Ty, 1), 1571 Ops, "vqabs"); 1572 case ARM::BI__builtin_neon_vqadd_v: 1573 case ARM::BI__builtin_neon_vqaddq_v: 1574 Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds; 1575 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqadd"); 1576 case ARM::BI__builtin_neon_vqdmlal_v: 1577 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, &Ty, 1), 1578 Ops, "vqdmlal"); 1579 case ARM::BI__builtin_neon_vqdmlsl_v: 1580 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, &Ty, 1), 1581 Ops, "vqdmlsl"); 1582 case ARM::BI__builtin_neon_vqdmulh_v: 1583 case ARM::BI__builtin_neon_vqdmulhq_v: 1584 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, &Ty, 1), 1585 Ops, "vqdmulh"); 1586 case ARM::BI__builtin_neon_vqdmull_v: 1587 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, &Ty, 1), 1588 Ops, "vqdmull"); 1589 case ARM::BI__builtin_neon_vqmovn_v: 1590 Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns; 1591 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqmovn"); 1592 case ARM::BI__builtin_neon_vqmovun_v: 1593 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, &Ty, 1), 1594 Ops, "vqdmull"); 1595 case ARM::BI__builtin_neon_vqneg_v: 1596 case ARM::BI__builtin_neon_vqnegq_v: 1597 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, &Ty, 1), 1598 Ops, "vqneg"); 1599 case ARM::BI__builtin_neon_vqrdmulh_v: 1600 case ARM::BI__builtin_neon_vqrdmulhq_v: 1601 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, &Ty, 1), 1602 Ops, "vqrdmulh"); 1603 case ARM::BI__builtin_neon_vqrshl_v: 1604 case ARM::BI__builtin_neon_vqrshlq_v: 1605 Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts; 1606 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshl"); 1607 case ARM::BI__builtin_neon_vqrshrn_n_v: 1608 Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 1609 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqrshrn_n", 1610 1, true); 1611 case ARM::BI__builtin_neon_vqrshrun_n_v: 1612 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, &Ty, 1), 1613 Ops, "vqrshrun_n", 1, true); 1614 case ARM::BI__builtin_neon_vqshl_v: 1615 case ARM::BI__builtin_neon_vqshlq_v: 1616 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 1617 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl"); 1618 case ARM::BI__builtin_neon_vqshl_n_v: 1619 case ARM::BI__builtin_neon_vqshlq_n_v: 1620 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 1621 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshl_n", 1622 1, false); 1623 case ARM::BI__builtin_neon_vqshlu_n_v: 1624 case ARM::BI__builtin_neon_vqshluq_n_v: 1625 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, &Ty, 1), 1626 Ops, "vqshlu", 1, false); 1627 case ARM::BI__builtin_neon_vqshrn_n_v: 1628 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 1629 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqshrn_n", 1630 1, true); 1631 case ARM::BI__builtin_neon_vqshrun_n_v: 1632 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, &Ty, 1), 1633 Ops, "vqshrun_n", 1, true); 1634 case ARM::BI__builtin_neon_vqsub_v: 1635 case ARM::BI__builtin_neon_vqsubq_v: 1636 Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs; 1637 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vqsub"); 1638 case ARM::BI__builtin_neon_vraddhn_v: 1639 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, &Ty, 1), 1640 Ops, "vraddhn"); 1641 case ARM::BI__builtin_neon_vrecpe_v: 1642 case ARM::BI__builtin_neon_vrecpeq_v: 1643 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, &Ty, 1), 1644 Ops, "vrecpe"); 1645 case ARM::BI__builtin_neon_vrecps_v: 1646 case ARM::BI__builtin_neon_vrecpsq_v: 1647 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, &Ty, 1), 1648 Ops, "vrecps"); 1649 case ARM::BI__builtin_neon_vrhadd_v: 1650 case ARM::BI__builtin_neon_vrhaddq_v: 1651 Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds; 1652 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrhadd"); 1653 case ARM::BI__builtin_neon_vrshl_v: 1654 case ARM::BI__builtin_neon_vrshlq_v: 1655 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 1656 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshl"); 1657 case ARM::BI__builtin_neon_vrshrn_n_v: 1658 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, &Ty, 1), 1659 Ops, "vrshrn_n", 1, true); 1660 case ARM::BI__builtin_neon_vrshr_n_v: 1661 case ARM::BI__builtin_neon_vrshrq_n_v: 1662 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 1663 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vrshr_n", 1, true); 1664 case ARM::BI__builtin_neon_vrsqrte_v: 1665 case ARM::BI__builtin_neon_vrsqrteq_v: 1666 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, &Ty, 1), 1667 Ops, "vrsqrte"); 1668 case ARM::BI__builtin_neon_vrsqrts_v: 1669 case ARM::BI__builtin_neon_vrsqrtsq_v: 1670 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, &Ty, 1), 1671 Ops, "vrsqrts"); 1672 case ARM::BI__builtin_neon_vrsra_n_v: 1673 case ARM::BI__builtin_neon_vrsraq_n_v: 1674 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1675 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1676 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 1677 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 1678 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, &Ty, 1), Ops[1], Ops[2]); 1679 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 1680 case ARM::BI__builtin_neon_vrsubhn_v: 1681 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, &Ty, 1), 1682 Ops, "vrsubhn"); 1683 case ARM::BI__builtin_neon_vset_lane_i8: 1684 case ARM::BI__builtin_neon_vset_lane_i16: 1685 case ARM::BI__builtin_neon_vset_lane_i32: 1686 case ARM::BI__builtin_neon_vset_lane_i64: 1687 case ARM::BI__builtin_neon_vset_lane_f32: 1688 case ARM::BI__builtin_neon_vsetq_lane_i8: 1689 case ARM::BI__builtin_neon_vsetq_lane_i16: 1690 case ARM::BI__builtin_neon_vsetq_lane_i32: 1691 case ARM::BI__builtin_neon_vsetq_lane_i64: 1692 case ARM::BI__builtin_neon_vsetq_lane_f32: 1693 Ops.push_back(EmitScalarExpr(E->getArg(2))); 1694 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 1695 case ARM::BI__builtin_neon_vshl_v: 1696 case ARM::BI__builtin_neon_vshlq_v: 1697 Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts; 1698 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshl"); 1699 case ARM::BI__builtin_neon_vshll_n_v: 1700 Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls; 1701 return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vshll", 1); 1702 case ARM::BI__builtin_neon_vshl_n_v: 1703 case ARM::BI__builtin_neon_vshlq_n_v: 1704 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 1705 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n"); 1706 case ARM::BI__builtin_neon_vshrn_n_v: 1707 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, &Ty, 1), 1708 Ops, "vshrn_n", 1, true); 1709 case ARM::BI__builtin_neon_vshr_n_v: 1710 case ARM::BI__builtin_neon_vshrq_n_v: 1711 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1712 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 1713 if (usgn) 1714 return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n"); 1715 else 1716 return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n"); 1717 case ARM::BI__builtin_neon_vsri_n_v: 1718 case ARM::BI__builtin_neon_vsriq_n_v: 1719 rightShift = true; 1720 case ARM::BI__builtin_neon_vsli_n_v: 1721 case ARM::BI__builtin_neon_vsliq_n_v: 1722 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 1723 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, &Ty, 1), 1724 Ops, "vsli_n"); 1725 case ARM::BI__builtin_neon_vsra_n_v: 1726 case ARM::BI__builtin_neon_vsraq_n_v: 1727 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1728 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1729 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false); 1730 if (usgn) 1731 Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n"); 1732 else 1733 Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n"); 1734 return Builder.CreateAdd(Ops[0], Ops[1]); 1735 case ARM::BI__builtin_neon_vst1_v: 1736 case ARM::BI__builtin_neon_vst1q_v: 1737 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1738 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, &Ty, 1), 1739 Ops, ""); 1740 case ARM::BI__builtin_neon_vst1_lane_v: 1741 case ARM::BI__builtin_neon_vst1q_lane_v: 1742 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1743 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 1744 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 1745 return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); 1746 case ARM::BI__builtin_neon_vst2_v: 1747 case ARM::BI__builtin_neon_vst2q_v: 1748 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1749 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, &Ty, 1), 1750 Ops, ""); 1751 case ARM::BI__builtin_neon_vst2_lane_v: 1752 case ARM::BI__builtin_neon_vst2q_lane_v: 1753 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1754 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, &Ty, 1), 1755 Ops, ""); 1756 case ARM::BI__builtin_neon_vst3_v: 1757 case ARM::BI__builtin_neon_vst3q_v: 1758 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1759 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, &Ty, 1), 1760 Ops, ""); 1761 case ARM::BI__builtin_neon_vst3_lane_v: 1762 case ARM::BI__builtin_neon_vst3q_lane_v: 1763 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1764 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, &Ty, 1), 1765 Ops, ""); 1766 case ARM::BI__builtin_neon_vst4_v: 1767 case ARM::BI__builtin_neon_vst4q_v: 1768 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1769 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, &Ty, 1), 1770 Ops, ""); 1771 case ARM::BI__builtin_neon_vst4_lane_v: 1772 case ARM::BI__builtin_neon_vst4q_lane_v: 1773 Ops.push_back(GetPointeeAlignment(*this, E->getArg(0))); 1774 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, &Ty, 1), 1775 Ops, ""); 1776 case ARM::BI__builtin_neon_vsubhn_v: 1777 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, &Ty, 1), 1778 Ops, "vsubhn"); 1779 case ARM::BI__builtin_neon_vtbl1_v: 1780 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 1781 Ops, "vtbl1"); 1782 case ARM::BI__builtin_neon_vtbl2_v: 1783 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 1784 Ops, "vtbl2"); 1785 case ARM::BI__builtin_neon_vtbl3_v: 1786 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 1787 Ops, "vtbl3"); 1788 case ARM::BI__builtin_neon_vtbl4_v: 1789 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 1790 Ops, "vtbl4"); 1791 case ARM::BI__builtin_neon_vtbx1_v: 1792 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 1793 Ops, "vtbx1"); 1794 case ARM::BI__builtin_neon_vtbx2_v: 1795 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 1796 Ops, "vtbx2"); 1797 case ARM::BI__builtin_neon_vtbx3_v: 1798 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 1799 Ops, "vtbx3"); 1800 case ARM::BI__builtin_neon_vtbx4_v: 1801 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 1802 Ops, "vtbx4"); 1803 case ARM::BI__builtin_neon_vtst_v: 1804 case ARM::BI__builtin_neon_vtstq_v: { 1805 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 1806 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1807 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 1808 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 1809 ConstantAggregateZero::get(Ty)); 1810 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 1811 } 1812 case ARM::BI__builtin_neon_vtrn_v: 1813 case ARM::BI__builtin_neon_vtrnq_v: { 1814 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 1815 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1816 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1817 Value *SV = 0; 1818 1819 for (unsigned vi = 0; vi != 2; ++vi) { 1820 SmallVector<Constant*, 16> Indices; 1821 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 1822 Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); 1823 Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); 1824 } 1825 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 1826 SV = llvm::ConstantVector::get(Indices); 1827 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 1828 SV = Builder.CreateStore(SV, Addr); 1829 } 1830 return SV; 1831 } 1832 case ARM::BI__builtin_neon_vuzp_v: 1833 case ARM::BI__builtin_neon_vuzpq_v: { 1834 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 1835 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1836 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1837 Value *SV = 0; 1838 1839 for (unsigned vi = 0; vi != 2; ++vi) { 1840 SmallVector<Constant*, 16> Indices; 1841 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 1842 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 1843 1844 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 1845 SV = llvm::ConstantVector::get(Indices); 1846 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 1847 SV = Builder.CreateStore(SV, Addr); 1848 } 1849 return SV; 1850 } 1851 case ARM::BI__builtin_neon_vzip_v: 1852 case ARM::BI__builtin_neon_vzipq_v: { 1853 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 1854 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 1855 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 1856 Value *SV = 0; 1857 1858 for (unsigned vi = 0; vi != 2; ++vi) { 1859 SmallVector<Constant*, 16> Indices; 1860 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 1861 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 1862 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 1863 } 1864 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 1865 SV = llvm::ConstantVector::get(Indices); 1866 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 1867 SV = Builder.CreateStore(SV, Addr); 1868 } 1869 return SV; 1870 } 1871 } 1872} 1873 1874llvm::Value *CodeGenFunction:: 1875BuildVector(const llvm::SmallVectorImpl<llvm::Value*> &Ops) { 1876 assert((Ops.size() & (Ops.size() - 1)) == 0 && 1877 "Not a power-of-two sized vector!"); 1878 bool AllConstants = true; 1879 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 1880 AllConstants &= isa<Constant>(Ops[i]); 1881 1882 // If this is a constant vector, create a ConstantVector. 1883 if (AllConstants) { 1884 std::vector<llvm::Constant*> CstOps; 1885 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1886 CstOps.push_back(cast<Constant>(Ops[i])); 1887 return llvm::ConstantVector::get(CstOps); 1888 } 1889 1890 // Otherwise, insertelement the values to build the vector. 1891 Value *Result = 1892 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 1893 1894 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 1895 Result = Builder.CreateInsertElement(Result, Ops[i], 1896 llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), i)); 1897 1898 return Result; 1899} 1900 1901Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 1902 const CallExpr *E) { 1903 llvm::SmallVector<Value*, 4> Ops; 1904 1905 // Find out if any arguments are required to be integer constant expressions. 1906 unsigned ICEArguments = 0; 1907 ASTContext::GetBuiltinTypeError Error; 1908 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1909 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1910 1911 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 1912 // If this is a normal argument, just emit it as a scalar. 1913 if ((ICEArguments & (1 << i)) == 0) { 1914 Ops.push_back(EmitScalarExpr(E->getArg(i))); 1915 continue; 1916 } 1917 1918 // If this is required to be a constant, constant fold it so that we know 1919 // that the generated intrinsic gets a ConstantInt. 1920 llvm::APSInt Result; 1921 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 1922 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 1923 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 1924 } 1925 1926 switch (BuiltinID) { 1927 default: return 0; 1928 case X86::BI__builtin_ia32_pslldi128: 1929 case X86::BI__builtin_ia32_psllqi128: 1930 case X86::BI__builtin_ia32_psllwi128: 1931 case X86::BI__builtin_ia32_psradi128: 1932 case X86::BI__builtin_ia32_psrawi128: 1933 case X86::BI__builtin_ia32_psrldi128: 1934 case X86::BI__builtin_ia32_psrlqi128: 1935 case X86::BI__builtin_ia32_psrlwi128: { 1936 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext"); 1937 const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 1938 llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0); 1939 Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty), 1940 Ops[1], Zero, "insert"); 1941 Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast"); 1942 const char *name = 0; 1943 Intrinsic::ID ID = Intrinsic::not_intrinsic; 1944 1945 switch (BuiltinID) { 1946 default: assert(0 && "Unsupported shift intrinsic!"); 1947 case X86::BI__builtin_ia32_pslldi128: 1948 name = "pslldi"; 1949 ID = Intrinsic::x86_sse2_psll_d; 1950 break; 1951 case X86::BI__builtin_ia32_psllqi128: 1952 name = "psllqi"; 1953 ID = Intrinsic::x86_sse2_psll_q; 1954 break; 1955 case X86::BI__builtin_ia32_psllwi128: 1956 name = "psllwi"; 1957 ID = Intrinsic::x86_sse2_psll_w; 1958 break; 1959 case X86::BI__builtin_ia32_psradi128: 1960 name = "psradi"; 1961 ID = Intrinsic::x86_sse2_psra_d; 1962 break; 1963 case X86::BI__builtin_ia32_psrawi128: 1964 name = "psrawi"; 1965 ID = Intrinsic::x86_sse2_psra_w; 1966 break; 1967 case X86::BI__builtin_ia32_psrldi128: 1968 name = "psrldi"; 1969 ID = Intrinsic::x86_sse2_psrl_d; 1970 break; 1971 case X86::BI__builtin_ia32_psrlqi128: 1972 name = "psrlqi"; 1973 ID = Intrinsic::x86_sse2_psrl_q; 1974 break; 1975 case X86::BI__builtin_ia32_psrlwi128: 1976 name = "psrlwi"; 1977 ID = Intrinsic::x86_sse2_psrl_w; 1978 break; 1979 } 1980 llvm::Function *F = CGM.getIntrinsic(ID); 1981 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); 1982 } 1983 case X86::BI__builtin_ia32_vec_init_v8qi: 1984 case X86::BI__builtin_ia32_vec_init_v4hi: 1985 case X86::BI__builtin_ia32_vec_init_v2si: 1986 return Builder.CreateBitCast(BuildVector(Ops), 1987 llvm::Type::getX86_MMXTy(getLLVMContext())); 1988 case X86::BI__builtin_ia32_vec_ext_v2si: 1989 return Builder.CreateExtractElement(Ops[0], 1990 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 1991 case X86::BI__builtin_ia32_pslldi: 1992 case X86::BI__builtin_ia32_psllqi: 1993 case X86::BI__builtin_ia32_psllwi: 1994 case X86::BI__builtin_ia32_psradi: 1995 case X86::BI__builtin_ia32_psrawi: 1996 case X86::BI__builtin_ia32_psrldi: 1997 case X86::BI__builtin_ia32_psrlqi: 1998 case X86::BI__builtin_ia32_psrlwi: { 1999 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext"); 2000 const llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1); 2001 Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast"); 2002 const char *name = 0; 2003 Intrinsic::ID ID = Intrinsic::not_intrinsic; 2004 2005 switch (BuiltinID) { 2006 default: assert(0 && "Unsupported shift intrinsic!"); 2007 case X86::BI__builtin_ia32_pslldi: 2008 name = "pslldi"; 2009 ID = Intrinsic::x86_mmx_psll_d; 2010 break; 2011 case X86::BI__builtin_ia32_psllqi: 2012 name = "psllqi"; 2013 ID = Intrinsic::x86_mmx_psll_q; 2014 break; 2015 case X86::BI__builtin_ia32_psllwi: 2016 name = "psllwi"; 2017 ID = Intrinsic::x86_mmx_psll_w; 2018 break; 2019 case X86::BI__builtin_ia32_psradi: 2020 name = "psradi"; 2021 ID = Intrinsic::x86_mmx_psra_d; 2022 break; 2023 case X86::BI__builtin_ia32_psrawi: 2024 name = "psrawi"; 2025 ID = Intrinsic::x86_mmx_psra_w; 2026 break; 2027 case X86::BI__builtin_ia32_psrldi: 2028 name = "psrldi"; 2029 ID = Intrinsic::x86_mmx_psrl_d; 2030 break; 2031 case X86::BI__builtin_ia32_psrlqi: 2032 name = "psrlqi"; 2033 ID = Intrinsic::x86_mmx_psrl_q; 2034 break; 2035 case X86::BI__builtin_ia32_psrlwi: 2036 name = "psrlwi"; 2037 ID = Intrinsic::x86_mmx_psrl_w; 2038 break; 2039 } 2040 llvm::Function *F = CGM.getIntrinsic(ID); 2041 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); 2042 } 2043 case X86::BI__builtin_ia32_cmpps: { 2044 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps); 2045 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpps"); 2046 } 2047 case X86::BI__builtin_ia32_cmpss: { 2048 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss); 2049 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpss"); 2050 } 2051 case X86::BI__builtin_ia32_ldmxcsr: { 2052 const llvm::Type *PtrTy = Int8PtrTy; 2053 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 2054 Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp"); 2055 Builder.CreateStore(Ops[0], Tmp); 2056 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 2057 Builder.CreateBitCast(Tmp, PtrTy)); 2058 } 2059 case X86::BI__builtin_ia32_stmxcsr: { 2060 const llvm::Type *PtrTy = Int8PtrTy; 2061 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 2062 Value *Tmp = Builder.CreateAlloca(Int32Ty, One, "tmp"); 2063 One = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 2064 Builder.CreateBitCast(Tmp, PtrTy)); 2065 return Builder.CreateLoad(Tmp, "stmxcsr"); 2066 } 2067 case X86::BI__builtin_ia32_cmppd: { 2068 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd); 2069 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmppd"); 2070 } 2071 case X86::BI__builtin_ia32_cmpsd: { 2072 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd); 2073 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpsd"); 2074 } 2075 case X86::BI__builtin_ia32_storehps: 2076 case X86::BI__builtin_ia32_storelps: { 2077 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 2078 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 2079 2080 // cast val v2i64 2081 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 2082 2083 // extract (0, 1) 2084 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 2085 llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index); 2086 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 2087 2088 // cast pointer to i64 & store 2089 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 2090 return Builder.CreateStore(Ops[1], Ops[0]); 2091 } 2092 case X86::BI__builtin_ia32_palignr: { 2093 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 2094 2095 // If palignr is shifting the pair of input vectors less than 9 bytes, 2096 // emit a shuffle instruction. 2097 if (shiftVal <= 8) { 2098 llvm::SmallVector<llvm::Constant*, 8> Indices; 2099 for (unsigned i = 0; i != 8; ++i) 2100 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 2101 2102 Value* SV = llvm::ConstantVector::get(Indices); 2103 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 2104 } 2105 2106 // If palignr is shifting the pair of input vectors more than 8 but less 2107 // than 16 bytes, emit a logical right shift of the destination. 2108 if (shiftVal < 16) { 2109 // MMX has these as 1 x i64 vectors for some odd optimization reasons. 2110 const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); 2111 2112 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 2113 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); 2114 2115 // create i32 constant 2116 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); 2117 return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr"); 2118 } 2119 2120 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 2121 return llvm::Constant::getNullValue(ConvertType(E->getType())); 2122 } 2123 case X86::BI__builtin_ia32_palignr128: { 2124 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 2125 2126 // If palignr is shifting the pair of input vectors less than 17 bytes, 2127 // emit a shuffle instruction. 2128 if (shiftVal <= 16) { 2129 llvm::SmallVector<llvm::Constant*, 16> Indices; 2130 for (unsigned i = 0; i != 16; ++i) 2131 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 2132 2133 Value* SV = llvm::ConstantVector::get(Indices); 2134 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 2135 } 2136 2137 // If palignr is shifting the pair of input vectors more than 16 but less 2138 // than 32 bytes, emit a logical right shift of the destination. 2139 if (shiftVal < 32) { 2140 const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 2141 2142 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 2143 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 2144 2145 // create i32 constant 2146 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); 2147 return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr"); 2148 } 2149 2150 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 2151 return llvm::Constant::getNullValue(ConvertType(E->getType())); 2152 } 2153 case X86::BI__builtin_ia32_loaddqu: { 2154 const llvm::Type *VecTy = ConvertType(E->getType()); 2155 const llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), 128); 2156 2157 Value *BC = Builder.CreateBitCast(Ops[0], 2158 llvm::PointerType::getUnqual(IntTy), 2159 "cast"); 2160 LoadInst *LI = Builder.CreateLoad(BC); 2161 LI->setAlignment(1); // Unaligned load. 2162 return Builder.CreateBitCast(LI, VecTy, "loadu.cast"); 2163 } 2164 // 3DNow! 2165 case X86::BI__builtin_ia32_pavgusb: 2166 case X86::BI__builtin_ia32_pf2id: 2167 case X86::BI__builtin_ia32_pfacc: 2168 case X86::BI__builtin_ia32_pfadd: 2169 case X86::BI__builtin_ia32_pfcmpeq: 2170 case X86::BI__builtin_ia32_pfcmpge: 2171 case X86::BI__builtin_ia32_pfcmpgt: 2172 case X86::BI__builtin_ia32_pfmax: 2173 case X86::BI__builtin_ia32_pfmin: 2174 case X86::BI__builtin_ia32_pfmul: 2175 case X86::BI__builtin_ia32_pfrcp: 2176 case X86::BI__builtin_ia32_pfrcpit1: 2177 case X86::BI__builtin_ia32_pfrcpit2: 2178 case X86::BI__builtin_ia32_pfrsqrt: 2179 case X86::BI__builtin_ia32_pfrsqit1: 2180 case X86::BI__builtin_ia32_pfrsqrtit1: 2181 case X86::BI__builtin_ia32_pfsub: 2182 case X86::BI__builtin_ia32_pfsubr: 2183 case X86::BI__builtin_ia32_pi2fd: 2184 case X86::BI__builtin_ia32_pmulhrw: 2185 case X86::BI__builtin_ia32_pf2iw: 2186 case X86::BI__builtin_ia32_pfnacc: 2187 case X86::BI__builtin_ia32_pfpnacc: 2188 case X86::BI__builtin_ia32_pi2fw: 2189 case X86::BI__builtin_ia32_pswapdsf: 2190 case X86::BI__builtin_ia32_pswapdsi: { 2191 const char *name = 0; 2192 Intrinsic::ID ID = Intrinsic::not_intrinsic; 2193 switch(BuiltinID) { 2194 case X86::BI__builtin_ia32_pavgusb: 2195 name = "pavgusb"; 2196 ID = Intrinsic::x86_3dnow_pavgusb; 2197 break; 2198 case X86::BI__builtin_ia32_pf2id: 2199 name = "pf2id"; 2200 ID = Intrinsic::x86_3dnow_pf2id; 2201 break; 2202 case X86::BI__builtin_ia32_pfacc: 2203 name = "pfacc"; 2204 ID = Intrinsic::x86_3dnow_pfacc; 2205 break; 2206 case X86::BI__builtin_ia32_pfadd: 2207 name = "pfadd"; 2208 ID = Intrinsic::x86_3dnow_pfadd; 2209 break; 2210 case X86::BI__builtin_ia32_pfcmpeq: 2211 name = "pfcmpeq"; 2212 ID = Intrinsic::x86_3dnow_pfcmpeq; 2213 break; 2214 case X86::BI__builtin_ia32_pfcmpge: 2215 name = "pfcmpge"; 2216 ID = Intrinsic::x86_3dnow_pfcmpge; 2217 break; 2218 case X86::BI__builtin_ia32_pfcmpgt: 2219 name = "pfcmpgt"; 2220 ID = Intrinsic::x86_3dnow_pfcmpgt; 2221 break; 2222 case X86::BI__builtin_ia32_pfmax: 2223 name = "pfmax"; 2224 ID = Intrinsic::x86_3dnow_pfmax; 2225 break; 2226 case X86::BI__builtin_ia32_pfmin: 2227 name = "pfmin"; 2228 ID = Intrinsic::x86_3dnow_pfmin; 2229 break; 2230 case X86::BI__builtin_ia32_pfmul: 2231 name = "pfmul"; 2232 ID = Intrinsic::x86_3dnow_pfmul; 2233 break; 2234 case X86::BI__builtin_ia32_pfrcp: 2235 name = "pfrcp"; 2236 ID = Intrinsic::x86_3dnow_pfrcp; 2237 break; 2238 case X86::BI__builtin_ia32_pfrcpit1: 2239 name = "pfrcpit1"; 2240 ID = Intrinsic::x86_3dnow_pfrcpit1; 2241 break; 2242 case X86::BI__builtin_ia32_pfrcpit2: 2243 name = "pfrcpit2"; 2244 ID = Intrinsic::x86_3dnow_pfrcpit2; 2245 break; 2246 case X86::BI__builtin_ia32_pfrsqrt: 2247 name = "pfrsqrt"; 2248 ID = Intrinsic::x86_3dnow_pfrsqrt; 2249 break; 2250 case X86::BI__builtin_ia32_pfrsqit1: 2251 case X86::BI__builtin_ia32_pfrsqrtit1: 2252 name = "pfrsqit1"; 2253 ID = Intrinsic::x86_3dnow_pfrsqit1; 2254 break; 2255 case X86::BI__builtin_ia32_pfsub: 2256 name = "pfsub"; 2257 ID = Intrinsic::x86_3dnow_pfsub; 2258 break; 2259 case X86::BI__builtin_ia32_pfsubr: 2260 name = "pfsubr"; 2261 ID = Intrinsic::x86_3dnow_pfsubr; 2262 break; 2263 case X86::BI__builtin_ia32_pi2fd: 2264 name = "pi2fd"; 2265 ID = Intrinsic::x86_3dnow_pi2fd; 2266 break; 2267 case X86::BI__builtin_ia32_pmulhrw: 2268 name = "pmulhrw"; 2269 ID = Intrinsic::x86_3dnow_pmulhrw; 2270 break; 2271 case X86::BI__builtin_ia32_pf2iw: 2272 name = "pf2iw"; 2273 ID = Intrinsic::x86_3dnowa_pf2iw; 2274 break; 2275 case X86::BI__builtin_ia32_pfnacc: 2276 name = "pfnacc"; 2277 ID = Intrinsic::x86_3dnowa_pfnacc; 2278 break; 2279 case X86::BI__builtin_ia32_pfpnacc: 2280 name = "pfpnacc"; 2281 ID = Intrinsic::x86_3dnowa_pfpnacc; 2282 break; 2283 case X86::BI__builtin_ia32_pi2fw: 2284 name = "pi2fw"; 2285 ID = Intrinsic::x86_3dnowa_pi2fw; 2286 break; 2287 case X86::BI__builtin_ia32_pswapdsf: 2288 case X86::BI__builtin_ia32_pswapdsi: 2289 name = "pswapd"; 2290 ID = Intrinsic::x86_3dnowa_pswapd; 2291 break; 2292 } 2293 llvm::Function *F = CGM.getIntrinsic(ID); 2294 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); 2295 } 2296 } 2297} 2298 2299Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 2300 const CallExpr *E) { 2301 llvm::SmallVector<Value*, 4> Ops; 2302 2303 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 2304 Ops.push_back(EmitScalarExpr(E->getArg(i))); 2305 2306 Intrinsic::ID ID = Intrinsic::not_intrinsic; 2307 2308 switch (BuiltinID) { 2309 default: return 0; 2310 2311 // vec_ld, vec_lvsl, vec_lvsr 2312 case PPC::BI__builtin_altivec_lvx: 2313 case PPC::BI__builtin_altivec_lvxl: 2314 case PPC::BI__builtin_altivec_lvebx: 2315 case PPC::BI__builtin_altivec_lvehx: 2316 case PPC::BI__builtin_altivec_lvewx: 2317 case PPC::BI__builtin_altivec_lvsl: 2318 case PPC::BI__builtin_altivec_lvsr: 2319 { 2320 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 2321 2322 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0], "tmp"); 2323 Ops.pop_back(); 2324 2325 switch (BuiltinID) { 2326 default: assert(0 && "Unsupported ld/lvsl/lvsr intrinsic!"); 2327 case PPC::BI__builtin_altivec_lvx: 2328 ID = Intrinsic::ppc_altivec_lvx; 2329 break; 2330 case PPC::BI__builtin_altivec_lvxl: 2331 ID = Intrinsic::ppc_altivec_lvxl; 2332 break; 2333 case PPC::BI__builtin_altivec_lvebx: 2334 ID = Intrinsic::ppc_altivec_lvebx; 2335 break; 2336 case PPC::BI__builtin_altivec_lvehx: 2337 ID = Intrinsic::ppc_altivec_lvehx; 2338 break; 2339 case PPC::BI__builtin_altivec_lvewx: 2340 ID = Intrinsic::ppc_altivec_lvewx; 2341 break; 2342 case PPC::BI__builtin_altivec_lvsl: 2343 ID = Intrinsic::ppc_altivec_lvsl; 2344 break; 2345 case PPC::BI__builtin_altivec_lvsr: 2346 ID = Intrinsic::ppc_altivec_lvsr; 2347 break; 2348 } 2349 llvm::Function *F = CGM.getIntrinsic(ID); 2350 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), ""); 2351 } 2352 2353 // vec_st 2354 case PPC::BI__builtin_altivec_stvx: 2355 case PPC::BI__builtin_altivec_stvxl: 2356 case PPC::BI__builtin_altivec_stvebx: 2357 case PPC::BI__builtin_altivec_stvehx: 2358 case PPC::BI__builtin_altivec_stvewx: 2359 { 2360 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 2361 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1], "tmp"); 2362 Ops.pop_back(); 2363 2364 switch (BuiltinID) { 2365 default: assert(0 && "Unsupported st intrinsic!"); 2366 case PPC::BI__builtin_altivec_stvx: 2367 ID = Intrinsic::ppc_altivec_stvx; 2368 break; 2369 case PPC::BI__builtin_altivec_stvxl: 2370 ID = Intrinsic::ppc_altivec_stvxl; 2371 break; 2372 case PPC::BI__builtin_altivec_stvebx: 2373 ID = Intrinsic::ppc_altivec_stvebx; 2374 break; 2375 case PPC::BI__builtin_altivec_stvehx: 2376 ID = Intrinsic::ppc_altivec_stvehx; 2377 break; 2378 case PPC::BI__builtin_altivec_stvewx: 2379 ID = Intrinsic::ppc_altivec_stvewx; 2380 break; 2381 } 2382 llvm::Function *F = CGM.getIntrinsic(ID); 2383 return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), ""); 2384 } 2385 } 2386 return 0; 2387} 2388