CGBuiltin.cpp revision 22229d6822324a42913d25f256045dbf348a53e9
1b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 3b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// The LLVM Compiler Infrastructure 4b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 5b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// This file is distributed under the University of Illinois Open Source 6b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// License. See LICENSE.TXT for details. 7b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 8b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//===----------------------------------------------------------------------===// 9b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 10b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// This contains code to emit Builtin calls as LLVM code. 11b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 12b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//===----------------------------------------------------------------------===// 13b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 14b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "CodeGenFunction.h" 15b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "CGObjCRuntime.h" 16b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "CodeGenModule.h" 17b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "TargetInfo.h" 18b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/AST/ASTContext.h" 19b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/AST/Decl.h" 20b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/Basic/TargetBuiltins.h" 21b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/Basic/TargetInfo.h" 22b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "clang/CodeGen/CGFunctionInfo.h" 23b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "llvm/IR/DataLayout.h" 2445779228f8c9e40851cfd23f727e2bd8ffdd4714Alex Vakulenko#include "llvm/IR/Intrinsics.h" 25b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 26b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratusing namespace clang; 27b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratusing namespace CodeGen; 28b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratusing namespace llvm; 29b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 30b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat/// getBuiltinLibFunction - Given a builtin id for a function like 31b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat/// "__builtin_fabsf", return a Function* for "fabsf". 32b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratllvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 33b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat unsigned BuiltinID) { 34b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 35b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 36b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // Get the name, skip over the __builtin_ prefix (if necessary). 37b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat StringRef Name; 38b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat GlobalDecl D(FD); 39b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 40b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // If the builtin has been declared explicitly with an assembler label, 41b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // use the mangled name. This differs from the plain label on platforms 42b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // that prefix labels. 43b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat if (FD->hasAttr<AsmLabelAttr>()) 44b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat Name = getMangledName(D); 45b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat else 46b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat Name = Context.BuiltinInfo.GetName(BuiltinID) + 10; 47b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 48b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat llvm::FunctionType *Ty = 49b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 50b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 51b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 52b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 53b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 54b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat/// Emit the conversions required to turn the given value into an 55b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat/// integer of the given size. 56b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratstatic Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 57b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat QualType T, llvm::IntegerType *IntType) { 58b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat V = CGF.EmitToMemory(V, T); 59b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 60b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat if (V->getType()->isPointerTy()) 61b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return CGF.Builder.CreatePtrToInt(V, IntType); 6294ffa55491333f3dcc701befd0d2652922916d99Luis Hector Chavez 63b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat assert(V->getType() == IntType); 64b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return V; 65b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 66b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 67b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratstatic Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 68b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat QualType T, llvm::Type *ResultType) { 69b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat V = CGF.EmitFromMemory(V, T); 70b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 71b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat if (ResultType->isPointerTy()) 72b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return CGF.Builder.CreateIntToPtr(V, ResultType); 73b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 74 assert(V->getType() == ResultType); 75 return V; 76} 77 78/// Utility to insert an atomic instruction based on Instrinsic::ID 79/// and the expression node. 80static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 81 llvm::AtomicRMWInst::BinOp Kind, 82 const CallExpr *E) { 83 QualType T = E->getType(); 84 assert(E->getArg(0)->getType()->isPointerType()); 85 assert(CGF.getContext().hasSameUnqualifiedType(T, 86 E->getArg(0)->getType()->getPointeeType())); 87 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 88 89 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 90 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 91 92 llvm::IntegerType *IntType = 93 llvm::IntegerType::get(CGF.getLLVMContext(), 94 CGF.getContext().getTypeSize(T)); 95 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 96 97 llvm::Value *Args[2]; 98 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 99 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 100 llvm::Type *ValueType = Args[1]->getType(); 101 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 102 103 llvm::Value *Result = 104 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 105 llvm::SequentiallyConsistent); 106 Result = EmitFromInt(CGF, Result, T, ValueType); 107 return RValue::get(Result); 108} 109 110/// Utility to insert an atomic instruction based Instrinsic::ID and 111/// the expression node, where the return value is the result of the 112/// operation. 113static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 114 llvm::AtomicRMWInst::BinOp Kind, 115 const CallExpr *E, 116 Instruction::BinaryOps Op) { 117 QualType T = E->getType(); 118 assert(E->getArg(0)->getType()->isPointerType()); 119 assert(CGF.getContext().hasSameUnqualifiedType(T, 120 E->getArg(0)->getType()->getPointeeType())); 121 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 122 123 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 124 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 125 126 llvm::IntegerType *IntType = 127 llvm::IntegerType::get(CGF.getLLVMContext(), 128 CGF.getContext().getTypeSize(T)); 129 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 130 131 llvm::Value *Args[2]; 132 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 133 llvm::Type *ValueType = Args[1]->getType(); 134 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 135 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 136 137 llvm::Value *Result = 138 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 139 llvm::SequentiallyConsistent); 140 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 141 Result = EmitFromInt(CGF, Result, T, ValueType); 142 return RValue::get(Result); 143} 144 145/// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy, 146/// which must be a scalar floating point type. 147static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) { 148 const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>(); 149 assert(ValTyP && "isn't scalar fp type!"); 150 151 StringRef FnName; 152 switch (ValTyP->getKind()) { 153 default: llvm_unreachable("Isn't a scalar fp type!"); 154 case BuiltinType::Float: FnName = "fabsf"; break; 155 case BuiltinType::Double: FnName = "fabs"; break; 156 case BuiltinType::LongDouble: FnName = "fabsl"; break; 157 } 158 159 // The prototype is something that takes and returns whatever V's type is. 160 llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(), 161 false); 162 llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName); 163 164 return CGF.EmitNounwindRuntimeCall(Fn, V, "abs"); 165} 166 167static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 168 const CallExpr *E, llvm::Value *calleeValue) { 169 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E->getLocStart(), 170 ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn); 171} 172 173/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 174/// depending on IntrinsicID. 175/// 176/// \arg CGF The current codegen function. 177/// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 178/// \arg X The first argument to the llvm.*.with.overflow.*. 179/// \arg Y The second argument to the llvm.*.with.overflow.*. 180/// \arg Carry The carry returned by the llvm.*.with.overflow.*. 181/// \returns The result (i.e. sum/product) returned by the intrinsic. 182static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 183 const llvm::Intrinsic::ID IntrinsicID, 184 llvm::Value *X, llvm::Value *Y, 185 llvm::Value *&Carry) { 186 // Make sure we have integers of the same width. 187 assert(X->getType() == Y->getType() && 188 "Arguments must be the same type. (Did you forget to make sure both " 189 "arguments have the same integer width?)"); 190 191 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 192 llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y); 193 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 194 return CGF.Builder.CreateExtractValue(Tmp, 0); 195} 196 197RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 198 unsigned BuiltinID, const CallExpr *E) { 199 // See if we can constant fold this builtin. If so, don't emit it at all. 200 Expr::EvalResult Result; 201 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 202 !Result.hasSideEffects()) { 203 if (Result.Val.isInt()) 204 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 205 Result.Val.getInt())); 206 if (Result.Val.isFloat()) 207 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 208 Result.Val.getFloat())); 209 } 210 211 switch (BuiltinID) { 212 default: break; // Handle intrinsics and libm functions below. 213 case Builtin::BI__builtin___CFStringMakeConstantString: 214 case Builtin::BI__builtin___NSStringMakeConstantString: 215 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0)); 216 case Builtin::BI__builtin_stdarg_start: 217 case Builtin::BI__builtin_va_start: 218 case Builtin::BI__builtin_va_end: { 219 Value *ArgValue = EmitVAListRef(E->getArg(0)); 220 llvm::Type *DestType = Int8PtrTy; 221 if (ArgValue->getType() != DestType) 222 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 223 ArgValue->getName().data()); 224 225 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 226 Intrinsic::vaend : Intrinsic::vastart; 227 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 228 } 229 case Builtin::BI__builtin_va_copy: { 230 Value *DstPtr = EmitVAListRef(E->getArg(0)); 231 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 232 233 llvm::Type *Type = Int8PtrTy; 234 235 DstPtr = Builder.CreateBitCast(DstPtr, Type); 236 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 237 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 238 DstPtr, SrcPtr)); 239 } 240 case Builtin::BI__builtin_abs: 241 case Builtin::BI__builtin_labs: 242 case Builtin::BI__builtin_llabs: { 243 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 244 245 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 246 Value *CmpResult = 247 Builder.CreateICmpSGE(ArgValue, 248 llvm::Constant::getNullValue(ArgValue->getType()), 249 "abscond"); 250 Value *Result = 251 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 252 253 return RValue::get(Result); 254 } 255 256 case Builtin::BI__builtin_conj: 257 case Builtin::BI__builtin_conjf: 258 case Builtin::BI__builtin_conjl: { 259 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 260 Value *Real = ComplexVal.first; 261 Value *Imag = ComplexVal.second; 262 Value *Zero = 263 Imag->getType()->isFPOrFPVectorTy() 264 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 265 : llvm::Constant::getNullValue(Imag->getType()); 266 267 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 268 return RValue::getComplex(std::make_pair(Real, Imag)); 269 } 270 case Builtin::BI__builtin_creal: 271 case Builtin::BI__builtin_crealf: 272 case Builtin::BI__builtin_creall: 273 case Builtin::BIcreal: 274 case Builtin::BIcrealf: 275 case Builtin::BIcreall: { 276 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 277 return RValue::get(ComplexVal.first); 278 } 279 280 case Builtin::BI__builtin_cimag: 281 case Builtin::BI__builtin_cimagf: 282 case Builtin::BI__builtin_cimagl: 283 case Builtin::BIcimag: 284 case Builtin::BIcimagf: 285 case Builtin::BIcimagl: { 286 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 287 return RValue::get(ComplexVal.second); 288 } 289 290 case Builtin::BI__builtin_ctzs: 291 case Builtin::BI__builtin_ctz: 292 case Builtin::BI__builtin_ctzl: 293 case Builtin::BI__builtin_ctzll: { 294 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 295 296 llvm::Type *ArgType = ArgValue->getType(); 297 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 298 299 llvm::Type *ResultType = ConvertType(E->getType()); 300 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 301 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 302 if (Result->getType() != ResultType) 303 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 304 "cast"); 305 return RValue::get(Result); 306 } 307 case Builtin::BI__builtin_clzs: 308 case Builtin::BI__builtin_clz: 309 case Builtin::BI__builtin_clzl: 310 case Builtin::BI__builtin_clzll: { 311 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 312 313 llvm::Type *ArgType = ArgValue->getType(); 314 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 315 316 llvm::Type *ResultType = ConvertType(E->getType()); 317 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 318 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 319 if (Result->getType() != ResultType) 320 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 321 "cast"); 322 return RValue::get(Result); 323 } 324 case Builtin::BI__builtin_ffs: 325 case Builtin::BI__builtin_ffsl: 326 case Builtin::BI__builtin_ffsll: { 327 // ffs(x) -> x ? cttz(x) + 1 : 0 328 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 329 330 llvm::Type *ArgType = ArgValue->getType(); 331 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 332 333 llvm::Type *ResultType = ConvertType(E->getType()); 334 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue, 335 Builder.getTrue()), 336 llvm::ConstantInt::get(ArgType, 1)); 337 Value *Zero = llvm::Constant::getNullValue(ArgType); 338 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 339 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 340 if (Result->getType() != ResultType) 341 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 342 "cast"); 343 return RValue::get(Result); 344 } 345 case Builtin::BI__builtin_parity: 346 case Builtin::BI__builtin_parityl: 347 case Builtin::BI__builtin_parityll: { 348 // parity(x) -> ctpop(x) & 1 349 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 350 351 llvm::Type *ArgType = ArgValue->getType(); 352 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 353 354 llvm::Type *ResultType = ConvertType(E->getType()); 355 Value *Tmp = Builder.CreateCall(F, ArgValue); 356 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 357 if (Result->getType() != ResultType) 358 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 359 "cast"); 360 return RValue::get(Result); 361 } 362 case Builtin::BI__builtin_popcount: 363 case Builtin::BI__builtin_popcountl: 364 case Builtin::BI__builtin_popcountll: { 365 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 366 367 llvm::Type *ArgType = ArgValue->getType(); 368 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 369 370 llvm::Type *ResultType = ConvertType(E->getType()); 371 Value *Result = Builder.CreateCall(F, ArgValue); 372 if (Result->getType() != ResultType) 373 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 374 "cast"); 375 return RValue::get(Result); 376 } 377 case Builtin::BI__builtin_expect: { 378 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 379 llvm::Type *ArgType = ArgValue->getType(); 380 381 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 382 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 383 384 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue, 385 "expval"); 386 return RValue::get(Result); 387 } 388 case Builtin::BI__builtin_bswap16: 389 case Builtin::BI__builtin_bswap32: 390 case Builtin::BI__builtin_bswap64: { 391 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 392 llvm::Type *ArgType = ArgValue->getType(); 393 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType); 394 return RValue::get(Builder.CreateCall(F, ArgValue)); 395 } 396 case Builtin::BI__builtin_object_size: { 397 // We rely on constant folding to deal with expressions with side effects. 398 assert(!E->getArg(0)->HasSideEffects(getContext()) && 399 "should have been constant folded"); 400 401 // We pass this builtin onto the optimizer so that it can 402 // figure out the object size in more complex cases. 403 llvm::Type *ResType = ConvertType(E->getType()); 404 405 // LLVM only supports 0 and 2, make sure that we pass along that 406 // as a boolean. 407 Value *Ty = EmitScalarExpr(E->getArg(1)); 408 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 409 assert(CI); 410 uint64_t val = CI->getZExtValue(); 411 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); 412 // FIXME: Get right address space. 413 llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) }; 414 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 415 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI)); 416 } 417 case Builtin::BI__builtin_prefetch: { 418 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 419 // FIXME: Technically these constants should of type 'int', yes? 420 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 421 llvm::ConstantInt::get(Int32Ty, 0); 422 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 423 llvm::ConstantInt::get(Int32Ty, 3); 424 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 425 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 426 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); 427 } 428 case Builtin::BI__builtin_readcyclecounter: { 429 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 430 return RValue::get(Builder.CreateCall(F)); 431 } 432 case Builtin::BI__builtin_trap: { 433 Value *F = CGM.getIntrinsic(Intrinsic::trap); 434 return RValue::get(Builder.CreateCall(F)); 435 } 436 case Builtin::BI__debugbreak: { 437 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap); 438 return RValue::get(Builder.CreateCall(F)); 439 } 440 case Builtin::BI__builtin_unreachable: { 441 if (SanOpts->Unreachable) 442 EmitCheck(Builder.getFalse(), "builtin_unreachable", 443 EmitCheckSourceLocation(E->getExprLoc()), 444 ArrayRef<llvm::Value *>(), CRK_Unrecoverable); 445 else 446 Builder.CreateUnreachable(); 447 448 // We do need to preserve an insertion point. 449 EmitBlock(createBasicBlock("unreachable.cont")); 450 451 return RValue::get(0); 452 } 453 454 case Builtin::BI__builtin_powi: 455 case Builtin::BI__builtin_powif: 456 case Builtin::BI__builtin_powil: { 457 Value *Base = EmitScalarExpr(E->getArg(0)); 458 Value *Exponent = EmitScalarExpr(E->getArg(1)); 459 llvm::Type *ArgType = Base->getType(); 460 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 461 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 462 } 463 464 case Builtin::BI__builtin_isgreater: 465 case Builtin::BI__builtin_isgreaterequal: 466 case Builtin::BI__builtin_isless: 467 case Builtin::BI__builtin_islessequal: 468 case Builtin::BI__builtin_islessgreater: 469 case Builtin::BI__builtin_isunordered: { 470 // Ordered comparisons: we know the arguments to these are matching scalar 471 // floating point values. 472 Value *LHS = EmitScalarExpr(E->getArg(0)); 473 Value *RHS = EmitScalarExpr(E->getArg(1)); 474 475 switch (BuiltinID) { 476 default: llvm_unreachable("Unknown ordered comparison"); 477 case Builtin::BI__builtin_isgreater: 478 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 479 break; 480 case Builtin::BI__builtin_isgreaterequal: 481 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 482 break; 483 case Builtin::BI__builtin_isless: 484 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 485 break; 486 case Builtin::BI__builtin_islessequal: 487 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 488 break; 489 case Builtin::BI__builtin_islessgreater: 490 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 491 break; 492 case Builtin::BI__builtin_isunordered: 493 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 494 break; 495 } 496 // ZExt bool to int type. 497 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 498 } 499 case Builtin::BI__builtin_isnan: { 500 Value *V = EmitScalarExpr(E->getArg(0)); 501 V = Builder.CreateFCmpUNO(V, V, "cmp"); 502 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 503 } 504 505 case Builtin::BI__builtin_isinf: { 506 // isinf(x) --> fabs(x) == infinity 507 Value *V = EmitScalarExpr(E->getArg(0)); 508 V = EmitFAbs(*this, V, E->getArg(0)->getType()); 509 510 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 511 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 512 } 513 514 // TODO: BI__builtin_isinf_sign 515 // isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0 516 517 case Builtin::BI__builtin_isnormal: { 518 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 519 Value *V = EmitScalarExpr(E->getArg(0)); 520 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 521 522 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 523 Value *IsLessThanInf = 524 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 525 APFloat Smallest = APFloat::getSmallestNormalized( 526 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 527 Value *IsNormal = 528 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 529 "isnormal"); 530 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 531 V = Builder.CreateAnd(V, IsNormal, "and"); 532 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 533 } 534 535 case Builtin::BI__builtin_isfinite: { 536 // isfinite(x) --> x == x && fabs(x) != infinity; 537 Value *V = EmitScalarExpr(E->getArg(0)); 538 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 539 540 Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType()); 541 Value *IsNotInf = 542 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 543 544 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 545 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 546 } 547 548 case Builtin::BI__builtin_fpclassify: { 549 Value *V = EmitScalarExpr(E->getArg(5)); 550 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 551 552 // Create Result 553 BasicBlock *Begin = Builder.GetInsertBlock(); 554 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 555 Builder.SetInsertPoint(End); 556 PHINode *Result = 557 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 558 "fpclassify_result"); 559 560 // if (V==0) return FP_ZERO 561 Builder.SetInsertPoint(Begin); 562 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 563 "iszero"); 564 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 565 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 566 Builder.CreateCondBr(IsZero, End, NotZero); 567 Result->addIncoming(ZeroLiteral, Begin); 568 569 // if (V != V) return FP_NAN 570 Builder.SetInsertPoint(NotZero); 571 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 572 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 573 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 574 Builder.CreateCondBr(IsNan, End, NotNan); 575 Result->addIncoming(NanLiteral, NotZero); 576 577 // if (fabs(V) == infinity) return FP_INFINITY 578 Builder.SetInsertPoint(NotNan); 579 Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType()); 580 Value *IsInf = 581 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 582 "isinf"); 583 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 584 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 585 Builder.CreateCondBr(IsInf, End, NotInf); 586 Result->addIncoming(InfLiteral, NotNan); 587 588 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 589 Builder.SetInsertPoint(NotInf); 590 APFloat Smallest = APFloat::getSmallestNormalized( 591 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 592 Value *IsNormal = 593 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 594 "isnormal"); 595 Value *NormalResult = 596 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 597 EmitScalarExpr(E->getArg(3))); 598 Builder.CreateBr(End); 599 Result->addIncoming(NormalResult, NotInf); 600 601 // return Result 602 Builder.SetInsertPoint(End); 603 return RValue::get(Result); 604 } 605 606 case Builtin::BIalloca: 607 case Builtin::BI__builtin_alloca: { 608 Value *Size = EmitScalarExpr(E->getArg(0)); 609 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 610 } 611 case Builtin::BIbzero: 612 case Builtin::BI__builtin_bzero: { 613 std::pair<llvm::Value*, unsigned> Dest = 614 EmitPointerWithAlignment(E->getArg(0)); 615 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 616 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal, 617 Dest.second, false); 618 return RValue::get(Dest.first); 619 } 620 case Builtin::BImemcpy: 621 case Builtin::BI__builtin_memcpy: { 622 std::pair<llvm::Value*, unsigned> Dest = 623 EmitPointerWithAlignment(E->getArg(0)); 624 std::pair<llvm::Value*, unsigned> Src = 625 EmitPointerWithAlignment(E->getArg(1)); 626 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 627 unsigned Align = std::min(Dest.second, Src.second); 628 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 629 return RValue::get(Dest.first); 630 } 631 632 case Builtin::BI__builtin___memcpy_chk: { 633 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 634 llvm::APSInt Size, DstSize; 635 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 636 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 637 break; 638 if (Size.ugt(DstSize)) 639 break; 640 std::pair<llvm::Value*, unsigned> Dest = 641 EmitPointerWithAlignment(E->getArg(0)); 642 std::pair<llvm::Value*, unsigned> Src = 643 EmitPointerWithAlignment(E->getArg(1)); 644 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 645 unsigned Align = std::min(Dest.second, Src.second); 646 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 647 return RValue::get(Dest.first); 648 } 649 650 case Builtin::BI__builtin_objc_memmove_collectable: { 651 Value *Address = EmitScalarExpr(E->getArg(0)); 652 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 653 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 654 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 655 Address, SrcAddr, SizeVal); 656 return RValue::get(Address); 657 } 658 659 case Builtin::BI__builtin___memmove_chk: { 660 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 661 llvm::APSInt Size, DstSize; 662 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 663 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 664 break; 665 if (Size.ugt(DstSize)) 666 break; 667 std::pair<llvm::Value*, unsigned> Dest = 668 EmitPointerWithAlignment(E->getArg(0)); 669 std::pair<llvm::Value*, unsigned> Src = 670 EmitPointerWithAlignment(E->getArg(1)); 671 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 672 unsigned Align = std::min(Dest.second, Src.second); 673 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 674 return RValue::get(Dest.first); 675 } 676 677 case Builtin::BImemmove: 678 case Builtin::BI__builtin_memmove: { 679 std::pair<llvm::Value*, unsigned> Dest = 680 EmitPointerWithAlignment(E->getArg(0)); 681 std::pair<llvm::Value*, unsigned> Src = 682 EmitPointerWithAlignment(E->getArg(1)); 683 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 684 unsigned Align = std::min(Dest.second, Src.second); 685 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 686 return RValue::get(Dest.first); 687 } 688 case Builtin::BImemset: 689 case Builtin::BI__builtin_memset: { 690 std::pair<llvm::Value*, unsigned> Dest = 691 EmitPointerWithAlignment(E->getArg(0)); 692 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 693 Builder.getInt8Ty()); 694 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 695 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 696 return RValue::get(Dest.first); 697 } 698 case Builtin::BI__builtin___memset_chk: { 699 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 700 llvm::APSInt Size, DstSize; 701 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 702 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 703 break; 704 if (Size.ugt(DstSize)) 705 break; 706 std::pair<llvm::Value*, unsigned> Dest = 707 EmitPointerWithAlignment(E->getArg(0)); 708 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 709 Builder.getInt8Ty()); 710 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 711 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 712 return RValue::get(Dest.first); 713 } 714 case Builtin::BI__builtin_dwarf_cfa: { 715 // The offset in bytes from the first argument to the CFA. 716 // 717 // Why on earth is this in the frontend? Is there any reason at 718 // all that the backend can't reasonably determine this while 719 // lowering llvm.eh.dwarf.cfa()? 720 // 721 // TODO: If there's a satisfactory reason, add a target hook for 722 // this instead of hard-coding 0, which is correct for most targets. 723 int32_t Offset = 0; 724 725 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 726 return RValue::get(Builder.CreateCall(F, 727 llvm::ConstantInt::get(Int32Ty, Offset))); 728 } 729 case Builtin::BI__builtin_return_address: { 730 Value *Depth = EmitScalarExpr(E->getArg(0)); 731 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 732 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 733 return RValue::get(Builder.CreateCall(F, Depth)); 734 } 735 case Builtin::BI__builtin_frame_address: { 736 Value *Depth = EmitScalarExpr(E->getArg(0)); 737 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 738 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 739 return RValue::get(Builder.CreateCall(F, Depth)); 740 } 741 case Builtin::BI__builtin_extract_return_addr: { 742 Value *Address = EmitScalarExpr(E->getArg(0)); 743 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 744 return RValue::get(Result); 745 } 746 case Builtin::BI__builtin_frob_return_addr: { 747 Value *Address = EmitScalarExpr(E->getArg(0)); 748 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 749 return RValue::get(Result); 750 } 751 case Builtin::BI__builtin_dwarf_sp_column: { 752 llvm::IntegerType *Ty 753 = cast<llvm::IntegerType>(ConvertType(E->getType())); 754 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 755 if (Column == -1) { 756 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 757 return RValue::get(llvm::UndefValue::get(Ty)); 758 } 759 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 760 } 761 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 762 Value *Address = EmitScalarExpr(E->getArg(0)); 763 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 764 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 765 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 766 } 767 case Builtin::BI__builtin_eh_return: { 768 Value *Int = EmitScalarExpr(E->getArg(0)); 769 Value *Ptr = EmitScalarExpr(E->getArg(1)); 770 771 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 772 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 773 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 774 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 775 ? Intrinsic::eh_return_i32 776 : Intrinsic::eh_return_i64); 777 Builder.CreateCall2(F, Int, Ptr); 778 Builder.CreateUnreachable(); 779 780 // We do need to preserve an insertion point. 781 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 782 783 return RValue::get(0); 784 } 785 case Builtin::BI__builtin_unwind_init: { 786 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 787 return RValue::get(Builder.CreateCall(F)); 788 } 789 case Builtin::BI__builtin_extend_pointer: { 790 // Extends a pointer to the size of an _Unwind_Word, which is 791 // uint64_t on all platforms. Generally this gets poked into a 792 // register and eventually used as an address, so if the 793 // addressing registers are wider than pointers and the platform 794 // doesn't implicitly ignore high-order bits when doing 795 // addressing, we need to make sure we zext / sext based on 796 // the platform's expectations. 797 // 798 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 799 800 // Cast the pointer to intptr_t. 801 Value *Ptr = EmitScalarExpr(E->getArg(0)); 802 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 803 804 // If that's 64 bits, we're done. 805 if (IntPtrTy->getBitWidth() == 64) 806 return RValue::get(Result); 807 808 // Otherwise, ask the codegen data what to do. 809 if (getTargetHooks().extendPointerWithSExt()) 810 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 811 else 812 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 813 } 814 case Builtin::BI__builtin_setjmp: { 815 // Buffer is a void**. 816 Value *Buf = EmitScalarExpr(E->getArg(0)); 817 818 // Store the frame pointer to the setjmp buffer. 819 Value *FrameAddr = 820 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 821 ConstantInt::get(Int32Ty, 0)); 822 Builder.CreateStore(FrameAddr, Buf); 823 824 // Store the stack pointer to the setjmp buffer. 825 Value *StackAddr = 826 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 827 Value *StackSaveSlot = 828 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 829 Builder.CreateStore(StackAddr, StackSaveSlot); 830 831 // Call LLVM's EH setjmp, which is lightweight. 832 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 833 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 834 return RValue::get(Builder.CreateCall(F, Buf)); 835 } 836 case Builtin::BI__builtin_longjmp: { 837 Value *Buf = EmitScalarExpr(E->getArg(0)); 838 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 839 840 // Call LLVM's EH longjmp, which is lightweight. 841 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 842 843 // longjmp doesn't return; mark this as unreachable. 844 Builder.CreateUnreachable(); 845 846 // We do need to preserve an insertion point. 847 EmitBlock(createBasicBlock("longjmp.cont")); 848 849 return RValue::get(0); 850 } 851 case Builtin::BI__sync_fetch_and_add: 852 case Builtin::BI__sync_fetch_and_sub: 853 case Builtin::BI__sync_fetch_and_or: 854 case Builtin::BI__sync_fetch_and_and: 855 case Builtin::BI__sync_fetch_and_xor: 856 case Builtin::BI__sync_add_and_fetch: 857 case Builtin::BI__sync_sub_and_fetch: 858 case Builtin::BI__sync_and_and_fetch: 859 case Builtin::BI__sync_or_and_fetch: 860 case Builtin::BI__sync_xor_and_fetch: 861 case Builtin::BI__sync_val_compare_and_swap: 862 case Builtin::BI__sync_bool_compare_and_swap: 863 case Builtin::BI__sync_lock_test_and_set: 864 case Builtin::BI__sync_lock_release: 865 case Builtin::BI__sync_swap: 866 llvm_unreachable("Shouldn't make it through sema"); 867 case Builtin::BI__sync_fetch_and_add_1: 868 case Builtin::BI__sync_fetch_and_add_2: 869 case Builtin::BI__sync_fetch_and_add_4: 870 case Builtin::BI__sync_fetch_and_add_8: 871 case Builtin::BI__sync_fetch_and_add_16: 872 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 873 case Builtin::BI__sync_fetch_and_sub_1: 874 case Builtin::BI__sync_fetch_and_sub_2: 875 case Builtin::BI__sync_fetch_and_sub_4: 876 case Builtin::BI__sync_fetch_and_sub_8: 877 case Builtin::BI__sync_fetch_and_sub_16: 878 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 879 case Builtin::BI__sync_fetch_and_or_1: 880 case Builtin::BI__sync_fetch_and_or_2: 881 case Builtin::BI__sync_fetch_and_or_4: 882 case Builtin::BI__sync_fetch_and_or_8: 883 case Builtin::BI__sync_fetch_and_or_16: 884 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 885 case Builtin::BI__sync_fetch_and_and_1: 886 case Builtin::BI__sync_fetch_and_and_2: 887 case Builtin::BI__sync_fetch_and_and_4: 888 case Builtin::BI__sync_fetch_and_and_8: 889 case Builtin::BI__sync_fetch_and_and_16: 890 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 891 case Builtin::BI__sync_fetch_and_xor_1: 892 case Builtin::BI__sync_fetch_and_xor_2: 893 case Builtin::BI__sync_fetch_and_xor_4: 894 case Builtin::BI__sync_fetch_and_xor_8: 895 case Builtin::BI__sync_fetch_and_xor_16: 896 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 897 898 // Clang extensions: not overloaded yet. 899 case Builtin::BI__sync_fetch_and_min: 900 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 901 case Builtin::BI__sync_fetch_and_max: 902 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 903 case Builtin::BI__sync_fetch_and_umin: 904 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 905 case Builtin::BI__sync_fetch_and_umax: 906 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 907 908 case Builtin::BI__sync_add_and_fetch_1: 909 case Builtin::BI__sync_add_and_fetch_2: 910 case Builtin::BI__sync_add_and_fetch_4: 911 case Builtin::BI__sync_add_and_fetch_8: 912 case Builtin::BI__sync_add_and_fetch_16: 913 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 914 llvm::Instruction::Add); 915 case Builtin::BI__sync_sub_and_fetch_1: 916 case Builtin::BI__sync_sub_and_fetch_2: 917 case Builtin::BI__sync_sub_and_fetch_4: 918 case Builtin::BI__sync_sub_and_fetch_8: 919 case Builtin::BI__sync_sub_and_fetch_16: 920 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 921 llvm::Instruction::Sub); 922 case Builtin::BI__sync_and_and_fetch_1: 923 case Builtin::BI__sync_and_and_fetch_2: 924 case Builtin::BI__sync_and_and_fetch_4: 925 case Builtin::BI__sync_and_and_fetch_8: 926 case Builtin::BI__sync_and_and_fetch_16: 927 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 928 llvm::Instruction::And); 929 case Builtin::BI__sync_or_and_fetch_1: 930 case Builtin::BI__sync_or_and_fetch_2: 931 case Builtin::BI__sync_or_and_fetch_4: 932 case Builtin::BI__sync_or_and_fetch_8: 933 case Builtin::BI__sync_or_and_fetch_16: 934 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 935 llvm::Instruction::Or); 936 case Builtin::BI__sync_xor_and_fetch_1: 937 case Builtin::BI__sync_xor_and_fetch_2: 938 case Builtin::BI__sync_xor_and_fetch_4: 939 case Builtin::BI__sync_xor_and_fetch_8: 940 case Builtin::BI__sync_xor_and_fetch_16: 941 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 942 llvm::Instruction::Xor); 943 944 case Builtin::BI__sync_val_compare_and_swap_1: 945 case Builtin::BI__sync_val_compare_and_swap_2: 946 case Builtin::BI__sync_val_compare_and_swap_4: 947 case Builtin::BI__sync_val_compare_and_swap_8: 948 case Builtin::BI__sync_val_compare_and_swap_16: { 949 QualType T = E->getType(); 950 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 951 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 952 953 llvm::IntegerType *IntType = 954 llvm::IntegerType::get(getLLVMContext(), 955 getContext().getTypeSize(T)); 956 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 957 958 Value *Args[3]; 959 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 960 Args[1] = EmitScalarExpr(E->getArg(1)); 961 llvm::Type *ValueType = Args[1]->getType(); 962 Args[1] = EmitToInt(*this, Args[1], T, IntType); 963 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 964 965 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 966 llvm::SequentiallyConsistent); 967 Result = EmitFromInt(*this, Result, T, ValueType); 968 return RValue::get(Result); 969 } 970 971 case Builtin::BI__sync_bool_compare_and_swap_1: 972 case Builtin::BI__sync_bool_compare_and_swap_2: 973 case Builtin::BI__sync_bool_compare_and_swap_4: 974 case Builtin::BI__sync_bool_compare_and_swap_8: 975 case Builtin::BI__sync_bool_compare_and_swap_16: { 976 QualType T = E->getArg(1)->getType(); 977 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 978 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 979 980 llvm::IntegerType *IntType = 981 llvm::IntegerType::get(getLLVMContext(), 982 getContext().getTypeSize(T)); 983 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 984 985 Value *Args[3]; 986 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 987 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType); 988 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 989 990 Value *OldVal = Args[1]; 991 Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 992 llvm::SequentiallyConsistent); 993 Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal); 994 // zext bool to int. 995 Result = Builder.CreateZExt(Result, ConvertType(E->getType())); 996 return RValue::get(Result); 997 } 998 999 case Builtin::BI__sync_swap_1: 1000 case Builtin::BI__sync_swap_2: 1001 case Builtin::BI__sync_swap_4: 1002 case Builtin::BI__sync_swap_8: 1003 case Builtin::BI__sync_swap_16: 1004 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1005 1006 case Builtin::BI__sync_lock_test_and_set_1: 1007 case Builtin::BI__sync_lock_test_and_set_2: 1008 case Builtin::BI__sync_lock_test_and_set_4: 1009 case Builtin::BI__sync_lock_test_and_set_8: 1010 case Builtin::BI__sync_lock_test_and_set_16: 1011 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1012 1013 case Builtin::BI__sync_lock_release_1: 1014 case Builtin::BI__sync_lock_release_2: 1015 case Builtin::BI__sync_lock_release_4: 1016 case Builtin::BI__sync_lock_release_8: 1017 case Builtin::BI__sync_lock_release_16: { 1018 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1019 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1020 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1021 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1022 StoreSize.getQuantity() * 8); 1023 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1024 llvm::StoreInst *Store = 1025 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); 1026 Store->setAlignment(StoreSize.getQuantity()); 1027 Store->setAtomic(llvm::Release); 1028 return RValue::get(0); 1029 } 1030 1031 case Builtin::BI__sync_synchronize: { 1032 // We assume this is supposed to correspond to a C++0x-style 1033 // sequentially-consistent fence (i.e. this is only usable for 1034 // synchonization, not device I/O or anything like that). This intrinsic 1035 // is really badly designed in the sense that in theory, there isn't 1036 // any way to safely use it... but in practice, it mostly works 1037 // to use it with non-atomic loads and stores to get acquire/release 1038 // semantics. 1039 Builder.CreateFence(llvm::SequentiallyConsistent); 1040 return RValue::get(0); 1041 } 1042 1043 case Builtin::BI__c11_atomic_is_lock_free: 1044 case Builtin::BI__atomic_is_lock_free: { 1045 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1046 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1047 // _Atomic(T) is always properly-aligned. 1048 const char *LibCallName = "__atomic_is_lock_free"; 1049 CallArgList Args; 1050 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1051 getContext().getSizeType()); 1052 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1053 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1054 getContext().VoidPtrTy); 1055 else 1056 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1057 getContext().VoidPtrTy); 1058 const CGFunctionInfo &FuncInfo = 1059 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, 1060 FunctionType::ExtInfo(), 1061 RequiredArgs::All); 1062 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1063 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1064 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 1065 } 1066 1067 case Builtin::BI__atomic_test_and_set: { 1068 // Look at the argument type to determine whether this is a volatile 1069 // operation. The parameter type is always volatile. 1070 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1071 bool Volatile = 1072 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1073 1074 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1075 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1076 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1077 Value *NewVal = Builder.getInt8(1); 1078 Value *Order = EmitScalarExpr(E->getArg(1)); 1079 if (isa<llvm::ConstantInt>(Order)) { 1080 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1081 AtomicRMWInst *Result = 0; 1082 switch (ord) { 1083 case 0: // memory_order_relaxed 1084 default: // invalid order 1085 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1086 Ptr, NewVal, 1087 llvm::Monotonic); 1088 break; 1089 case 1: // memory_order_consume 1090 case 2: // memory_order_acquire 1091 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1092 Ptr, NewVal, 1093 llvm::Acquire); 1094 break; 1095 case 3: // memory_order_release 1096 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1097 Ptr, NewVal, 1098 llvm::Release); 1099 break; 1100 case 4: // memory_order_acq_rel 1101 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1102 Ptr, NewVal, 1103 llvm::AcquireRelease); 1104 break; 1105 case 5: // memory_order_seq_cst 1106 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1107 Ptr, NewVal, 1108 llvm::SequentiallyConsistent); 1109 break; 1110 } 1111 Result->setVolatile(Volatile); 1112 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1113 } 1114 1115 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1116 1117 llvm::BasicBlock *BBs[5] = { 1118 createBasicBlock("monotonic", CurFn), 1119 createBasicBlock("acquire", CurFn), 1120 createBasicBlock("release", CurFn), 1121 createBasicBlock("acqrel", CurFn), 1122 createBasicBlock("seqcst", CurFn) 1123 }; 1124 llvm::AtomicOrdering Orders[5] = { 1125 llvm::Monotonic, llvm::Acquire, llvm::Release, 1126 llvm::AcquireRelease, llvm::SequentiallyConsistent 1127 }; 1128 1129 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1130 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1131 1132 Builder.SetInsertPoint(ContBB); 1133 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1134 1135 for (unsigned i = 0; i < 5; ++i) { 1136 Builder.SetInsertPoint(BBs[i]); 1137 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1138 Ptr, NewVal, Orders[i]); 1139 RMW->setVolatile(Volatile); 1140 Result->addIncoming(RMW, BBs[i]); 1141 Builder.CreateBr(ContBB); 1142 } 1143 1144 SI->addCase(Builder.getInt32(0), BBs[0]); 1145 SI->addCase(Builder.getInt32(1), BBs[1]); 1146 SI->addCase(Builder.getInt32(2), BBs[1]); 1147 SI->addCase(Builder.getInt32(3), BBs[2]); 1148 SI->addCase(Builder.getInt32(4), BBs[3]); 1149 SI->addCase(Builder.getInt32(5), BBs[4]); 1150 1151 Builder.SetInsertPoint(ContBB); 1152 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1153 } 1154 1155 case Builtin::BI__atomic_clear: { 1156 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1157 bool Volatile = 1158 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1159 1160 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1161 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1162 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1163 Value *NewVal = Builder.getInt8(0); 1164 Value *Order = EmitScalarExpr(E->getArg(1)); 1165 if (isa<llvm::ConstantInt>(Order)) { 1166 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1167 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1168 Store->setAlignment(1); 1169 switch (ord) { 1170 case 0: // memory_order_relaxed 1171 default: // invalid order 1172 Store->setOrdering(llvm::Monotonic); 1173 break; 1174 case 3: // memory_order_release 1175 Store->setOrdering(llvm::Release); 1176 break; 1177 case 5: // memory_order_seq_cst 1178 Store->setOrdering(llvm::SequentiallyConsistent); 1179 break; 1180 } 1181 return RValue::get(0); 1182 } 1183 1184 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1185 1186 llvm::BasicBlock *BBs[3] = { 1187 createBasicBlock("monotonic", CurFn), 1188 createBasicBlock("release", CurFn), 1189 createBasicBlock("seqcst", CurFn) 1190 }; 1191 llvm::AtomicOrdering Orders[3] = { 1192 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent 1193 }; 1194 1195 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1196 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1197 1198 for (unsigned i = 0; i < 3; ++i) { 1199 Builder.SetInsertPoint(BBs[i]); 1200 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1201 Store->setAlignment(1); 1202 Store->setOrdering(Orders[i]); 1203 Builder.CreateBr(ContBB); 1204 } 1205 1206 SI->addCase(Builder.getInt32(0), BBs[0]); 1207 SI->addCase(Builder.getInt32(3), BBs[1]); 1208 SI->addCase(Builder.getInt32(5), BBs[2]); 1209 1210 Builder.SetInsertPoint(ContBB); 1211 return RValue::get(0); 1212 } 1213 1214 case Builtin::BI__atomic_thread_fence: 1215 case Builtin::BI__atomic_signal_fence: 1216 case Builtin::BI__c11_atomic_thread_fence: 1217 case Builtin::BI__c11_atomic_signal_fence: { 1218 llvm::SynchronizationScope Scope; 1219 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1220 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1221 Scope = llvm::SingleThread; 1222 else 1223 Scope = llvm::CrossThread; 1224 Value *Order = EmitScalarExpr(E->getArg(0)); 1225 if (isa<llvm::ConstantInt>(Order)) { 1226 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1227 switch (ord) { 1228 case 0: // memory_order_relaxed 1229 default: // invalid order 1230 break; 1231 case 1: // memory_order_consume 1232 case 2: // memory_order_acquire 1233 Builder.CreateFence(llvm::Acquire, Scope); 1234 break; 1235 case 3: // memory_order_release 1236 Builder.CreateFence(llvm::Release, Scope); 1237 break; 1238 case 4: // memory_order_acq_rel 1239 Builder.CreateFence(llvm::AcquireRelease, Scope); 1240 break; 1241 case 5: // memory_order_seq_cst 1242 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1243 break; 1244 } 1245 return RValue::get(0); 1246 } 1247 1248 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1249 AcquireBB = createBasicBlock("acquire", CurFn); 1250 ReleaseBB = createBasicBlock("release", CurFn); 1251 AcqRelBB = createBasicBlock("acqrel", CurFn); 1252 SeqCstBB = createBasicBlock("seqcst", CurFn); 1253 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1254 1255 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1256 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1257 1258 Builder.SetInsertPoint(AcquireBB); 1259 Builder.CreateFence(llvm::Acquire, Scope); 1260 Builder.CreateBr(ContBB); 1261 SI->addCase(Builder.getInt32(1), AcquireBB); 1262 SI->addCase(Builder.getInt32(2), AcquireBB); 1263 1264 Builder.SetInsertPoint(ReleaseBB); 1265 Builder.CreateFence(llvm::Release, Scope); 1266 Builder.CreateBr(ContBB); 1267 SI->addCase(Builder.getInt32(3), ReleaseBB); 1268 1269 Builder.SetInsertPoint(AcqRelBB); 1270 Builder.CreateFence(llvm::AcquireRelease, Scope); 1271 Builder.CreateBr(ContBB); 1272 SI->addCase(Builder.getInt32(4), AcqRelBB); 1273 1274 Builder.SetInsertPoint(SeqCstBB); 1275 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1276 Builder.CreateBr(ContBB); 1277 SI->addCase(Builder.getInt32(5), SeqCstBB); 1278 1279 Builder.SetInsertPoint(ContBB); 1280 return RValue::get(0); 1281 } 1282 1283 // Library functions with special handling. 1284 case Builtin::BIsqrt: 1285 case Builtin::BIsqrtf: 1286 case Builtin::BIsqrtl: { 1287 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1288 // in finite- or unsafe-math mode (the intrinsic has different semantics 1289 // for handling negative numbers compared to the library function, so 1290 // -fmath-errno=0 is not enough). 1291 if (!FD->hasAttr<ConstAttr>()) 1292 break; 1293 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1294 CGM.getCodeGenOpts().NoNaNsFPMath)) 1295 break; 1296 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1297 llvm::Type *ArgType = Arg0->getType(); 1298 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1299 return RValue::get(Builder.CreateCall(F, Arg0)); 1300 } 1301 1302 case Builtin::BIpow: 1303 case Builtin::BIpowf: 1304 case Builtin::BIpowl: { 1305 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1306 if (!FD->hasAttr<ConstAttr>()) 1307 break; 1308 Value *Base = EmitScalarExpr(E->getArg(0)); 1309 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1310 llvm::Type *ArgType = Base->getType(); 1311 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1312 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 1313 break; 1314 } 1315 1316 case Builtin::BIfma: 1317 case Builtin::BIfmaf: 1318 case Builtin::BIfmal: 1319 case Builtin::BI__builtin_fma: 1320 case Builtin::BI__builtin_fmaf: 1321 case Builtin::BI__builtin_fmal: { 1322 // Rewrite fma to intrinsic. 1323 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1324 llvm::Type *ArgType = FirstArg->getType(); 1325 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1326 return RValue::get(Builder.CreateCall3(F, FirstArg, 1327 EmitScalarExpr(E->getArg(1)), 1328 EmitScalarExpr(E->getArg(2)))); 1329 } 1330 1331 case Builtin::BI__builtin_signbit: 1332 case Builtin::BI__builtin_signbitf: 1333 case Builtin::BI__builtin_signbitl: { 1334 LLVMContext &C = CGM.getLLVMContext(); 1335 1336 Value *Arg = EmitScalarExpr(E->getArg(0)); 1337 llvm::Type *ArgTy = Arg->getType(); 1338 if (ArgTy->isPPC_FP128Ty()) 1339 break; // FIXME: I'm not sure what the right implementation is here. 1340 int ArgWidth = ArgTy->getPrimitiveSizeInBits(); 1341 llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth); 1342 Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); 1343 Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy); 1344 Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); 1345 return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType()))); 1346 } 1347 case Builtin::BI__builtin_annotation: { 1348 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1349 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1350 AnnVal->getType()); 1351 1352 // Get the annotation string, go through casts. Sema requires this to be a 1353 // non-wide string literal, potentially casted, so the cast<> is safe. 1354 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1355 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1356 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1357 } 1358 case Builtin::BI__builtin_addcb: 1359 case Builtin::BI__builtin_addcs: 1360 case Builtin::BI__builtin_addc: 1361 case Builtin::BI__builtin_addcl: 1362 case Builtin::BI__builtin_addcll: 1363 case Builtin::BI__builtin_subcb: 1364 case Builtin::BI__builtin_subcs: 1365 case Builtin::BI__builtin_subc: 1366 case Builtin::BI__builtin_subcl: 1367 case Builtin::BI__builtin_subcll: { 1368 1369 // We translate all of these builtins from expressions of the form: 1370 // int x = ..., y = ..., carryin = ..., carryout, result; 1371 // result = __builtin_addc(x, y, carryin, &carryout); 1372 // 1373 // to LLVM IR of the form: 1374 // 1375 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1376 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1377 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1378 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1379 // i32 %carryin) 1380 // %result = extractvalue {i32, i1} %tmp2, 0 1381 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1382 // %tmp3 = or i1 %carry1, %carry2 1383 // %tmp4 = zext i1 %tmp3 to i32 1384 // store i32 %tmp4, i32* %carryout 1385 1386 // Scalarize our inputs. 1387 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1388 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1389 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1390 std::pair<llvm::Value*, unsigned> CarryOutPtr = 1391 EmitPointerWithAlignment(E->getArg(3)); 1392 1393 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1394 llvm::Intrinsic::ID IntrinsicId; 1395 switch (BuiltinID) { 1396 default: llvm_unreachable("Unknown multiprecision builtin id."); 1397 case Builtin::BI__builtin_addcb: 1398 case Builtin::BI__builtin_addcs: 1399 case Builtin::BI__builtin_addc: 1400 case Builtin::BI__builtin_addcl: 1401 case Builtin::BI__builtin_addcll: 1402 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1403 break; 1404 case Builtin::BI__builtin_subcb: 1405 case Builtin::BI__builtin_subcs: 1406 case Builtin::BI__builtin_subc: 1407 case Builtin::BI__builtin_subcl: 1408 case Builtin::BI__builtin_subcll: 1409 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1410 break; 1411 } 1412 1413 // Construct our resulting LLVM IR expression. 1414 llvm::Value *Carry1; 1415 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1416 X, Y, Carry1); 1417 llvm::Value *Carry2; 1418 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1419 Sum1, Carryin, Carry2); 1420 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1421 X->getType()); 1422 llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut, 1423 CarryOutPtr.first); 1424 CarryOutStore->setAlignment(CarryOutPtr.second); 1425 return RValue::get(Sum2); 1426 } 1427 case Builtin::BI__builtin_uadd_overflow: 1428 case Builtin::BI__builtin_uaddl_overflow: 1429 case Builtin::BI__builtin_uaddll_overflow: 1430 case Builtin::BI__builtin_usub_overflow: 1431 case Builtin::BI__builtin_usubl_overflow: 1432 case Builtin::BI__builtin_usubll_overflow: 1433 case Builtin::BI__builtin_umul_overflow: 1434 case Builtin::BI__builtin_umull_overflow: 1435 case Builtin::BI__builtin_umulll_overflow: 1436 case Builtin::BI__builtin_sadd_overflow: 1437 case Builtin::BI__builtin_saddl_overflow: 1438 case Builtin::BI__builtin_saddll_overflow: 1439 case Builtin::BI__builtin_ssub_overflow: 1440 case Builtin::BI__builtin_ssubl_overflow: 1441 case Builtin::BI__builtin_ssubll_overflow: 1442 case Builtin::BI__builtin_smul_overflow: 1443 case Builtin::BI__builtin_smull_overflow: 1444 case Builtin::BI__builtin_smulll_overflow: { 1445 1446 // We translate all of these builtins directly to the relevant llvm IR node. 1447 1448 // Scalarize our inputs. 1449 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1450 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1451 std::pair<llvm::Value *, unsigned> SumOutPtr = 1452 EmitPointerWithAlignment(E->getArg(2)); 1453 1454 // Decide which of the overflow intrinsics we are lowering to: 1455 llvm::Intrinsic::ID IntrinsicId; 1456 switch (BuiltinID) { 1457 default: llvm_unreachable("Unknown security overflow builtin id."); 1458 case Builtin::BI__builtin_uadd_overflow: 1459 case Builtin::BI__builtin_uaddl_overflow: 1460 case Builtin::BI__builtin_uaddll_overflow: 1461 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1462 break; 1463 case Builtin::BI__builtin_usub_overflow: 1464 case Builtin::BI__builtin_usubl_overflow: 1465 case Builtin::BI__builtin_usubll_overflow: 1466 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1467 break; 1468 case Builtin::BI__builtin_umul_overflow: 1469 case Builtin::BI__builtin_umull_overflow: 1470 case Builtin::BI__builtin_umulll_overflow: 1471 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 1472 break; 1473 case Builtin::BI__builtin_sadd_overflow: 1474 case Builtin::BI__builtin_saddl_overflow: 1475 case Builtin::BI__builtin_saddll_overflow: 1476 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 1477 break; 1478 case Builtin::BI__builtin_ssub_overflow: 1479 case Builtin::BI__builtin_ssubl_overflow: 1480 case Builtin::BI__builtin_ssubll_overflow: 1481 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 1482 break; 1483 case Builtin::BI__builtin_smul_overflow: 1484 case Builtin::BI__builtin_smull_overflow: 1485 case Builtin::BI__builtin_smulll_overflow: 1486 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 1487 break; 1488 } 1489 1490 1491 llvm::Value *Carry; 1492 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 1493 llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first); 1494 SumOutStore->setAlignment(SumOutPtr.second); 1495 1496 return RValue::get(Carry); 1497 } 1498 case Builtin::BI__builtin_addressof: 1499 return RValue::get(EmitLValue(E->getArg(0)).getAddress()); 1500 case Builtin::BI__noop: 1501 return RValue::get(0); 1502 } 1503 1504 // If this is an alias for a lib function (e.g. __builtin_sin), emit 1505 // the call using the normal call path, but using the unmangled 1506 // version of the function name. 1507 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 1508 return emitLibraryCall(*this, FD, E, 1509 CGM.getBuiltinLibFunction(FD, BuiltinID)); 1510 1511 // If this is a predefined lib function (e.g. malloc), emit the call 1512 // using exactly the normal call path. 1513 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 1514 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 1515 1516 // See if we have a target specific intrinsic. 1517 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 1518 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 1519 if (const char *Prefix = 1520 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) 1521 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 1522 1523 if (IntrinsicID != Intrinsic::not_intrinsic) { 1524 SmallVector<Value*, 16> Args; 1525 1526 // Find out if any arguments are required to be integer constant 1527 // expressions. 1528 unsigned ICEArguments = 0; 1529 ASTContext::GetBuiltinTypeError Error; 1530 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1531 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1532 1533 Function *F = CGM.getIntrinsic(IntrinsicID); 1534 llvm::FunctionType *FTy = F->getFunctionType(); 1535 1536 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 1537 Value *ArgValue; 1538 // If this is a normal argument, just emit it as a scalar. 1539 if ((ICEArguments & (1 << i)) == 0) { 1540 ArgValue = EmitScalarExpr(E->getArg(i)); 1541 } else { 1542 // If this is required to be a constant, constant fold it so that we 1543 // know that the generated intrinsic gets a ConstantInt. 1544 llvm::APSInt Result; 1545 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 1546 assert(IsConst && "Constant arg isn't actually constant?"); 1547 (void)IsConst; 1548 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 1549 } 1550 1551 // If the intrinsic arg type is different from the builtin arg type 1552 // we need to do a bit cast. 1553 llvm::Type *PTy = FTy->getParamType(i); 1554 if (PTy != ArgValue->getType()) { 1555 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 1556 "Must be able to losslessly bit cast to param"); 1557 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 1558 } 1559 1560 Args.push_back(ArgValue); 1561 } 1562 1563 Value *V = Builder.CreateCall(F, Args); 1564 QualType BuiltinRetType = E->getType(); 1565 1566 llvm::Type *RetTy = VoidTy; 1567 if (!BuiltinRetType->isVoidType()) 1568 RetTy = ConvertType(BuiltinRetType); 1569 1570 if (RetTy != V->getType()) { 1571 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 1572 "Must be able to losslessly bit cast result type"); 1573 V = Builder.CreateBitCast(V, RetTy); 1574 } 1575 1576 return RValue::get(V); 1577 } 1578 1579 // See if we have a target specific builtin that needs to be lowered. 1580 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 1581 return RValue::get(V); 1582 1583 ErrorUnsupported(E, "builtin function"); 1584 1585 // Unknown builtin, for now just dump it out and return undef. 1586 return GetUndefRValue(E->getType()); 1587} 1588 1589Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 1590 const CallExpr *E) { 1591 switch (getTarget().getTriple().getArch()) { 1592 case llvm::Triple::aarch64: 1593 return EmitAArch64BuiltinExpr(BuiltinID, E); 1594 case llvm::Triple::arm: 1595 case llvm::Triple::thumb: 1596 return EmitARMBuiltinExpr(BuiltinID, E); 1597 case llvm::Triple::x86: 1598 case llvm::Triple::x86_64: 1599 return EmitX86BuiltinExpr(BuiltinID, E); 1600 case llvm::Triple::ppc: 1601 case llvm::Triple::ppc64: 1602 case llvm::Triple::ppc64le: 1603 return EmitPPCBuiltinExpr(BuiltinID, E); 1604 default: 1605 return 0; 1606 } 1607} 1608 1609static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 1610 NeonTypeFlags TypeFlags, 1611 bool V1Ty=false) { 1612 int IsQuad = TypeFlags.isQuad(); 1613 switch (TypeFlags.getEltType()) { 1614 case NeonTypeFlags::Int8: 1615 case NeonTypeFlags::Poly8: 1616 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 1617 case NeonTypeFlags::Int16: 1618 case NeonTypeFlags::Poly16: 1619 case NeonTypeFlags::Float16: 1620 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 1621 case NeonTypeFlags::Int32: 1622 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 1623 case NeonTypeFlags::Int64: 1624 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 1625 case NeonTypeFlags::Float32: 1626 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 1627 case NeonTypeFlags::Float64: 1628 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 1629 } 1630 llvm_unreachable("Unknown vector element type!"); 1631} 1632 1633Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 1634 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 1635 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 1636 return Builder.CreateShuffleVector(V, V, SV, "lane"); 1637} 1638 1639Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 1640 const char *name, 1641 unsigned shift, bool rightshift) { 1642 unsigned j = 0; 1643 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 1644 ai != ae; ++ai, ++j) 1645 if (shift > 0 && shift == j) 1646 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 1647 else 1648 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 1649 1650 return Builder.CreateCall(F, Ops, name); 1651} 1652 1653Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 1654 bool neg) { 1655 int SV = cast<ConstantInt>(V)->getSExtValue(); 1656 1657 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1658 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 1659 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C); 1660} 1661 1662// \brief Right-shift a vector by a constant. 1663Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 1664 llvm::Type *Ty, bool usgn, 1665 const char *name) { 1666 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1667 1668 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 1669 int EltSize = VTy->getScalarSizeInBits(); 1670 1671 Vec = Builder.CreateBitCast(Vec, Ty); 1672 1673 // lshr/ashr are undefined when the shift amount is equal to the vector 1674 // element size. 1675 if (ShiftAmt == EltSize) { 1676 if (usgn) { 1677 // Right-shifting an unsigned value by its size yields 0. 1678 llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0); 1679 return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero); 1680 } else { 1681 // Right-shifting a signed value by its size is equivalent 1682 // to a shift of size-1. 1683 --ShiftAmt; 1684 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 1685 } 1686 } 1687 1688 Shift = EmitNeonShiftVector(Shift, Ty, false); 1689 if (usgn) 1690 return Builder.CreateLShr(Vec, Shift, name); 1691 else 1692 return Builder.CreateAShr(Vec, Shift, name); 1693} 1694 1695/// GetPointeeAlignment - Given an expression with a pointer type, find the 1696/// alignment of the type referenced by the pointer. Skip over implicit 1697/// casts. 1698std::pair<llvm::Value*, unsigned> 1699CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { 1700 assert(Addr->getType()->isPointerType()); 1701 Addr = Addr->IgnoreParens(); 1702 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) { 1703 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) && 1704 ICE->getSubExpr()->getType()->isPointerType()) { 1705 std::pair<llvm::Value*, unsigned> Ptr = 1706 EmitPointerWithAlignment(ICE->getSubExpr()); 1707 Ptr.first = Builder.CreateBitCast(Ptr.first, 1708 ConvertType(Addr->getType())); 1709 return Ptr; 1710 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) { 1711 LValue LV = EmitLValue(ICE->getSubExpr()); 1712 unsigned Align = LV.getAlignment().getQuantity(); 1713 if (!Align) { 1714 // FIXME: Once LValues are fixed to always set alignment, 1715 // zap this code. 1716 QualType PtTy = ICE->getSubExpr()->getType(); 1717 if (!PtTy->isIncompleteType()) 1718 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1719 else 1720 Align = 1; 1721 } 1722 return std::make_pair(LV.getAddress(), Align); 1723 } 1724 } 1725 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) { 1726 if (UO->getOpcode() == UO_AddrOf) { 1727 LValue LV = EmitLValue(UO->getSubExpr()); 1728 unsigned Align = LV.getAlignment().getQuantity(); 1729 if (!Align) { 1730 // FIXME: Once LValues are fixed to always set alignment, 1731 // zap this code. 1732 QualType PtTy = UO->getSubExpr()->getType(); 1733 if (!PtTy->isIncompleteType()) 1734 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1735 else 1736 Align = 1; 1737 } 1738 return std::make_pair(LV.getAddress(), Align); 1739 } 1740 } 1741 1742 unsigned Align = 1; 1743 QualType PtTy = Addr->getType()->getPointeeType(); 1744 if (!PtTy->isIncompleteType()) 1745 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1746 1747 return std::make_pair(EmitScalarExpr(Addr), Align); 1748} 1749 1750static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, 1751 unsigned BuiltinID, 1752 const CallExpr *E) { 1753 unsigned int Int = 0; 1754 // Scalar result generated across vectors 1755 bool AcrossVec = false; 1756 // Extend element of one-element vector 1757 bool ExtendEle = false; 1758 bool OverloadInt = false; 1759 bool OverloadCmpInt = false; 1760 bool OverloadWideInt = false; 1761 bool OverloadNarrowInt = false; 1762 const char *s = NULL; 1763 1764 SmallVector<Value *, 4> Ops; 1765 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 1766 Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); 1767 } 1768 1769 // AArch64 scalar builtins are not overloaded, they do not have an extra 1770 // argument that specifies the vector type, need to handle each case. 1771 switch (BuiltinID) { 1772 default: break; 1773 // Scalar Add 1774 case AArch64::BI__builtin_neon_vaddd_s64: 1775 Int = Intrinsic::aarch64_neon_vaddds; 1776 s = "vaddds"; break; 1777 case AArch64::BI__builtin_neon_vaddd_u64: 1778 Int = Intrinsic::aarch64_neon_vadddu; 1779 s = "vadddu"; break; 1780 // Scalar Sub 1781 case AArch64::BI__builtin_neon_vsubd_s64: 1782 Int = Intrinsic::aarch64_neon_vsubds; 1783 s = "vsubds"; break; 1784 case AArch64::BI__builtin_neon_vsubd_u64: 1785 Int = Intrinsic::aarch64_neon_vsubdu; 1786 s = "vsubdu"; break; 1787 // Scalar Saturating Add 1788 case AArch64::BI__builtin_neon_vqaddb_s8: 1789 case AArch64::BI__builtin_neon_vqaddh_s16: 1790 case AArch64::BI__builtin_neon_vqadds_s32: 1791 case AArch64::BI__builtin_neon_vqaddd_s64: 1792 Int = Intrinsic::aarch64_neon_vqadds; 1793 s = "vqadds"; OverloadInt = true; break; 1794 case AArch64::BI__builtin_neon_vqaddb_u8: 1795 case AArch64::BI__builtin_neon_vqaddh_u16: 1796 case AArch64::BI__builtin_neon_vqadds_u32: 1797 case AArch64::BI__builtin_neon_vqaddd_u64: 1798 Int = Intrinsic::aarch64_neon_vqaddu; 1799 s = "vqaddu"; OverloadInt = true; break; 1800 // Scalar Saturating Sub 1801 case AArch64::BI__builtin_neon_vqsubb_s8: 1802 case AArch64::BI__builtin_neon_vqsubh_s16: 1803 case AArch64::BI__builtin_neon_vqsubs_s32: 1804 case AArch64::BI__builtin_neon_vqsubd_s64: 1805 Int = Intrinsic::aarch64_neon_vqsubs; 1806 s = "vqsubs"; OverloadInt = true; break; 1807 case AArch64::BI__builtin_neon_vqsubb_u8: 1808 case AArch64::BI__builtin_neon_vqsubh_u16: 1809 case AArch64::BI__builtin_neon_vqsubs_u32: 1810 case AArch64::BI__builtin_neon_vqsubd_u64: 1811 Int = Intrinsic::aarch64_neon_vqsubu; 1812 s = "vqsubu"; OverloadInt = true; break; 1813 // Scalar Shift Left 1814 case AArch64::BI__builtin_neon_vshld_s64: 1815 Int = Intrinsic::aarch64_neon_vshlds; 1816 s = "vshlds"; break; 1817 case AArch64::BI__builtin_neon_vshld_u64: 1818 Int = Intrinsic::aarch64_neon_vshldu; 1819 s = "vshldu"; break; 1820 // Scalar Saturating Shift Left 1821 case AArch64::BI__builtin_neon_vqshlb_s8: 1822 case AArch64::BI__builtin_neon_vqshlh_s16: 1823 case AArch64::BI__builtin_neon_vqshls_s32: 1824 case AArch64::BI__builtin_neon_vqshld_s64: 1825 Int = Intrinsic::aarch64_neon_vqshls; 1826 s = "vqshls"; OverloadInt = true; break; 1827 case AArch64::BI__builtin_neon_vqshlb_u8: 1828 case AArch64::BI__builtin_neon_vqshlh_u16: 1829 case AArch64::BI__builtin_neon_vqshls_u32: 1830 case AArch64::BI__builtin_neon_vqshld_u64: 1831 Int = Intrinsic::aarch64_neon_vqshlu; 1832 s = "vqshlu"; OverloadInt = true; break; 1833 // Scalar Rouding Shift Left 1834 case AArch64::BI__builtin_neon_vrshld_s64: 1835 Int = Intrinsic::aarch64_neon_vrshlds; 1836 s = "vrshlds"; break; 1837 case AArch64::BI__builtin_neon_vrshld_u64: 1838 Int = Intrinsic::aarch64_neon_vrshldu; 1839 s = "vrshldu"; break; 1840 // Scalar Saturating Rouding Shift Left 1841 case AArch64::BI__builtin_neon_vqrshlb_s8: 1842 case AArch64::BI__builtin_neon_vqrshlh_s16: 1843 case AArch64::BI__builtin_neon_vqrshls_s32: 1844 case AArch64::BI__builtin_neon_vqrshld_s64: 1845 Int = Intrinsic::aarch64_neon_vqrshls; 1846 s = "vqrshls"; OverloadInt = true; break; 1847 case AArch64::BI__builtin_neon_vqrshlb_u8: 1848 case AArch64::BI__builtin_neon_vqrshlh_u16: 1849 case AArch64::BI__builtin_neon_vqrshls_u32: 1850 case AArch64::BI__builtin_neon_vqrshld_u64: 1851 Int = Intrinsic::aarch64_neon_vqrshlu; 1852 s = "vqrshlu"; OverloadInt = true; break; 1853 // Scalar Reduce Pairwise Add 1854 case AArch64::BI__builtin_neon_vpaddd_s64: 1855 Int = Intrinsic::aarch64_neon_vpadd; s = "vpadd"; 1856 break; 1857 case AArch64::BI__builtin_neon_vpadds_f32: 1858 Int = Intrinsic::aarch64_neon_vpfadd; s = "vpfadd"; 1859 break; 1860 case AArch64::BI__builtin_neon_vpaddd_f64: 1861 Int = Intrinsic::aarch64_neon_vpfaddq; s = "vpfaddq"; 1862 break; 1863 // Scalar Reduce Pairwise Floating Point Max 1864 case AArch64::BI__builtin_neon_vpmaxs_f32: 1865 Int = Intrinsic::aarch64_neon_vpmax; s = "vpmax"; 1866 break; 1867 case AArch64::BI__builtin_neon_vpmaxqd_f64: 1868 Int = Intrinsic::aarch64_neon_vpmaxq; s = "vpmaxq"; 1869 break; 1870 // Scalar Reduce Pairwise Floating Point Min 1871 case AArch64::BI__builtin_neon_vpmins_f32: 1872 Int = Intrinsic::aarch64_neon_vpmin; s = "vpmin"; 1873 break; 1874 case AArch64::BI__builtin_neon_vpminqd_f64: 1875 Int = Intrinsic::aarch64_neon_vpminq; s = "vpminq"; 1876 break; 1877 // Scalar Reduce Pairwise Floating Point Maxnm 1878 case AArch64::BI__builtin_neon_vpmaxnms_f32: 1879 Int = Intrinsic::aarch64_neon_vpfmaxnm; s = "vpfmaxnm"; 1880 break; 1881 case AArch64::BI__builtin_neon_vpmaxnmqd_f64: 1882 Int = Intrinsic::aarch64_neon_vpfmaxnmq; s = "vpfmaxnmq"; 1883 break; 1884 // Scalar Reduce Pairwise Floating Point Minnm 1885 case AArch64::BI__builtin_neon_vpminnms_f32: 1886 Int = Intrinsic::aarch64_neon_vpfminnm; s = "vpfminnm"; 1887 break; 1888 case AArch64::BI__builtin_neon_vpminnmqd_f64: 1889 Int = Intrinsic::aarch64_neon_vpfminnmq; s = "vpfminnmq"; 1890 break; 1891 // The followings are intrinsics with scalar results generated AcrossVec vectors 1892 case AArch64::BI__builtin_neon_vaddlv_s8: 1893 case AArch64::BI__builtin_neon_vaddlv_s16: 1894 case AArch64::BI__builtin_neon_vaddlvq_s8: 1895 case AArch64::BI__builtin_neon_vaddlvq_s16: 1896 case AArch64::BI__builtin_neon_vaddlvq_s32: 1897 Int = Intrinsic::aarch64_neon_saddlv; 1898 AcrossVec = true; ExtendEle = true; s = "saddlv"; break; 1899 case AArch64::BI__builtin_neon_vaddlv_u8: 1900 case AArch64::BI__builtin_neon_vaddlv_u16: 1901 case AArch64::BI__builtin_neon_vaddlvq_u8: 1902 case AArch64::BI__builtin_neon_vaddlvq_u16: 1903 case AArch64::BI__builtin_neon_vaddlvq_u32: 1904 Int = Intrinsic::aarch64_neon_uaddlv; 1905 AcrossVec = true; ExtendEle = true; s = "uaddlv"; break; 1906 case AArch64::BI__builtin_neon_vmaxv_s8: 1907 case AArch64::BI__builtin_neon_vmaxv_s16: 1908 case AArch64::BI__builtin_neon_vmaxvq_s8: 1909 case AArch64::BI__builtin_neon_vmaxvq_s16: 1910 case AArch64::BI__builtin_neon_vmaxvq_s32: 1911 Int = Intrinsic::aarch64_neon_smaxv; 1912 AcrossVec = true; ExtendEle = false; s = "smaxv"; break; 1913 case AArch64::BI__builtin_neon_vmaxv_u8: 1914 case AArch64::BI__builtin_neon_vmaxv_u16: 1915 case AArch64::BI__builtin_neon_vmaxvq_u8: 1916 case AArch64::BI__builtin_neon_vmaxvq_u16: 1917 case AArch64::BI__builtin_neon_vmaxvq_u32: 1918 Int = Intrinsic::aarch64_neon_umaxv; 1919 AcrossVec = true; ExtendEle = false; s = "umaxv"; break; 1920 case AArch64::BI__builtin_neon_vminv_s8: 1921 case AArch64::BI__builtin_neon_vminv_s16: 1922 case AArch64::BI__builtin_neon_vminvq_s8: 1923 case AArch64::BI__builtin_neon_vminvq_s16: 1924 case AArch64::BI__builtin_neon_vminvq_s32: 1925 Int = Intrinsic::aarch64_neon_sminv; 1926 AcrossVec = true; ExtendEle = false; s = "sminv"; break; 1927 case AArch64::BI__builtin_neon_vminv_u8: 1928 case AArch64::BI__builtin_neon_vminv_u16: 1929 case AArch64::BI__builtin_neon_vminvq_u8: 1930 case AArch64::BI__builtin_neon_vminvq_u16: 1931 case AArch64::BI__builtin_neon_vminvq_u32: 1932 Int = Intrinsic::aarch64_neon_uminv; 1933 AcrossVec = true; ExtendEle = false; s = "uminv"; break; 1934 case AArch64::BI__builtin_neon_vaddv_s8: 1935 case AArch64::BI__builtin_neon_vaddv_s16: 1936 case AArch64::BI__builtin_neon_vaddvq_s8: 1937 case AArch64::BI__builtin_neon_vaddvq_s16: 1938 case AArch64::BI__builtin_neon_vaddvq_s32: 1939 case AArch64::BI__builtin_neon_vaddv_u8: 1940 case AArch64::BI__builtin_neon_vaddv_u16: 1941 case AArch64::BI__builtin_neon_vaddvq_u8: 1942 case AArch64::BI__builtin_neon_vaddvq_u16: 1943 case AArch64::BI__builtin_neon_vaddvq_u32: 1944 Int = Intrinsic::aarch64_neon_vaddv; 1945 AcrossVec = true; ExtendEle = false; s = "vaddv"; break; 1946 case AArch64::BI__builtin_neon_vmaxvq_f32: 1947 Int = Intrinsic::aarch64_neon_vmaxv; 1948 AcrossVec = true; ExtendEle = false; s = "vmaxv"; break; 1949 case AArch64::BI__builtin_neon_vminvq_f32: 1950 Int = Intrinsic::aarch64_neon_vminv; 1951 AcrossVec = true; ExtendEle = false; s = "vminv"; break; 1952 case AArch64::BI__builtin_neon_vmaxnmvq_f32: 1953 Int = Intrinsic::aarch64_neon_vmaxnmv; 1954 AcrossVec = true; ExtendEle = false; s = "vmaxnmv"; break; 1955 case AArch64::BI__builtin_neon_vminnmvq_f32: 1956 Int = Intrinsic::aarch64_neon_vminnmv; 1957 AcrossVec = true; ExtendEle = false; s = "vminnmv"; break; 1958 // Scalar Integer Saturating Doubling Multiply Half High 1959 case AArch64::BI__builtin_neon_vqdmulhh_s16: 1960 case AArch64::BI__builtin_neon_vqdmulhs_s32: 1961 Int = Intrinsic::arm_neon_vqdmulh; 1962 s = "vqdmulh"; OverloadInt = true; break; 1963 // Scalar Integer Saturating Rounding Doubling Multiply Half High 1964 case AArch64::BI__builtin_neon_vqrdmulhh_s16: 1965 case AArch64::BI__builtin_neon_vqrdmulhs_s32: 1966 Int = Intrinsic::arm_neon_vqrdmulh; 1967 s = "vqrdmulh"; OverloadInt = true; break; 1968 // Scalar Floating-point Multiply Extended 1969 case AArch64::BI__builtin_neon_vmulxs_f32: 1970 case AArch64::BI__builtin_neon_vmulxd_f64: 1971 Int = Intrinsic::aarch64_neon_vmulx; 1972 s = "vmulx"; OverloadInt = true; break; 1973 // Scalar Floating-point Reciprocal Step and 1974 case AArch64::BI__builtin_neon_vrecpss_f32: 1975 case AArch64::BI__builtin_neon_vrecpsd_f64: 1976 Int = Intrinsic::arm_neon_vrecps; 1977 s = "vrecps"; OverloadInt = true; break; 1978 // Scalar Floating-point Reciprocal Square Root Step 1979 case AArch64::BI__builtin_neon_vrsqrtss_f32: 1980 case AArch64::BI__builtin_neon_vrsqrtsd_f64: 1981 Int = Intrinsic::arm_neon_vrsqrts; 1982 s = "vrsqrts"; OverloadInt = true; break; 1983 // Scalar Signed Integer Convert To Floating-point 1984 case AArch64::BI__builtin_neon_vcvts_f32_s32: 1985 Int = Intrinsic::aarch64_neon_vcvtf32_s32, 1986 s = "vcvtf"; OverloadInt = false; break; 1987 case AArch64::BI__builtin_neon_vcvtd_f64_s64: 1988 Int = Intrinsic::aarch64_neon_vcvtf64_s64, 1989 s = "vcvtf"; OverloadInt = false; break; 1990 // Scalar Unsigned Integer Convert To Floating-point 1991 case AArch64::BI__builtin_neon_vcvts_f32_u32: 1992 Int = Intrinsic::aarch64_neon_vcvtf32_u32, 1993 s = "vcvtf"; OverloadInt = false; break; 1994 case AArch64::BI__builtin_neon_vcvtd_f64_u64: 1995 Int = Intrinsic::aarch64_neon_vcvtf64_u64, 1996 s = "vcvtf"; OverloadInt = false; break; 1997 // Scalar Floating-point Reciprocal Estimate 1998 case AArch64::BI__builtin_neon_vrecpes_f32: 1999 case AArch64::BI__builtin_neon_vrecped_f64: 2000 Int = Intrinsic::arm_neon_vrecpe; 2001 s = "vrecpe"; OverloadInt = true; break; 2002 // Scalar Floating-point Reciprocal Exponent 2003 case AArch64::BI__builtin_neon_vrecpxs_f32: 2004 case AArch64::BI__builtin_neon_vrecpxd_f64: 2005 Int = Intrinsic::aarch64_neon_vrecpx; 2006 s = "vrecpx"; OverloadInt = true; break; 2007 // Scalar Floating-point Reciprocal Square Root Estimate 2008 case AArch64::BI__builtin_neon_vrsqrtes_f32: 2009 case AArch64::BI__builtin_neon_vrsqrted_f64: 2010 Int = Intrinsic::arm_neon_vrsqrte; 2011 s = "vrsqrte"; OverloadInt = true; break; 2012 // Scalar Compare Equal 2013 case AArch64::BI__builtin_neon_vceqd_s64: 2014 case AArch64::BI__builtin_neon_vceqd_u64: 2015 Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; 2016 OverloadCmpInt = true; break; 2017 // Scalar Compare Equal To Zero 2018 case AArch64::BI__builtin_neon_vceqzd_s64: 2019 case AArch64::BI__builtin_neon_vceqzd_u64: 2020 Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; 2021 // Add implicit zero operand. 2022 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2023 OverloadCmpInt = true; break; 2024 // Scalar Compare Greater Than or Equal 2025 case AArch64::BI__builtin_neon_vcged_s64: 2026 Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; 2027 OverloadCmpInt = true; break; 2028 case AArch64::BI__builtin_neon_vcged_u64: 2029 Int = Intrinsic::aarch64_neon_vchs; s = "vcge"; 2030 OverloadCmpInt = true; break; 2031 // Scalar Compare Greater Than or Equal To Zero 2032 case AArch64::BI__builtin_neon_vcgezd_s64: 2033 Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; 2034 // Add implicit zero operand. 2035 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2036 OverloadCmpInt = true; break; 2037 // Scalar Compare Greater Than 2038 case AArch64::BI__builtin_neon_vcgtd_s64: 2039 Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; 2040 OverloadCmpInt = true; break; 2041 case AArch64::BI__builtin_neon_vcgtd_u64: 2042 Int = Intrinsic::aarch64_neon_vchi; s = "vcgt"; 2043 OverloadCmpInt = true; break; 2044 // Scalar Compare Greater Than Zero 2045 case AArch64::BI__builtin_neon_vcgtzd_s64: 2046 Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; 2047 // Add implicit zero operand. 2048 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2049 OverloadCmpInt = true; break; 2050 // Scalar Compare Less Than or Equal 2051 case AArch64::BI__builtin_neon_vcled_s64: 2052 Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; 2053 OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; 2054 case AArch64::BI__builtin_neon_vcled_u64: 2055 Int = Intrinsic::aarch64_neon_vchs; s = "vchs"; 2056 OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; 2057 // Scalar Compare Less Than or Equal To Zero 2058 case AArch64::BI__builtin_neon_vclezd_s64: 2059 Int = Intrinsic::aarch64_neon_vclez; s = "vcle"; 2060 // Add implicit zero operand. 2061 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2062 OverloadCmpInt = true; break; 2063 // Scalar Compare Less Than 2064 case AArch64::BI__builtin_neon_vcltd_s64: 2065 Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; 2066 OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; 2067 case AArch64::BI__builtin_neon_vcltd_u64: 2068 Int = Intrinsic::aarch64_neon_vchi; s = "vchi"; 2069 OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; 2070 // Scalar Compare Less Than Zero 2071 case AArch64::BI__builtin_neon_vcltzd_s64: 2072 Int = Intrinsic::aarch64_neon_vcltz; s = "vclt"; 2073 // Add implicit zero operand. 2074 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2075 OverloadCmpInt = true; break; 2076 // Scalar Floating-point Compare Equal 2077 case AArch64::BI__builtin_neon_vceqs_f32: 2078 case AArch64::BI__builtin_neon_vceqd_f64: 2079 Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; 2080 OverloadCmpInt = true; break; 2081 // Scalar Floating-point Compare Equal To Zero 2082 case AArch64::BI__builtin_neon_vceqzs_f32: 2083 case AArch64::BI__builtin_neon_vceqzd_f64: 2084 Int = Intrinsic::aarch64_neon_vceq; s = "vceq"; 2085 // Add implicit zero operand. 2086 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2087 OverloadCmpInt = true; break; 2088 // Scalar Floating-point Compare Greater Than Or Equal 2089 case AArch64::BI__builtin_neon_vcges_f32: 2090 case AArch64::BI__builtin_neon_vcged_f64: 2091 Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; 2092 OverloadCmpInt = true; break; 2093 // Scalar Floating-point Compare Greater Than Or Equal To Zero 2094 case AArch64::BI__builtin_neon_vcgezs_f32: 2095 case AArch64::BI__builtin_neon_vcgezd_f64: 2096 Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; 2097 // Add implicit zero operand. 2098 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2099 OverloadCmpInt = true; break; 2100 // Scalar Floating-point Compare Greather Than 2101 case AArch64::BI__builtin_neon_vcgts_f32: 2102 case AArch64::BI__builtin_neon_vcgtd_f64: 2103 Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; 2104 OverloadCmpInt = true; break; 2105 // Scalar Floating-point Compare Greather Than Zero 2106 case AArch64::BI__builtin_neon_vcgtzs_f32: 2107 case AArch64::BI__builtin_neon_vcgtzd_f64: 2108 Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; 2109 // Add implicit zero operand. 2110 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2111 OverloadCmpInt = true; break; 2112 // Scalar Floating-point Compare Less Than or Equal 2113 case AArch64::BI__builtin_neon_vcles_f32: 2114 case AArch64::BI__builtin_neon_vcled_f64: 2115 Int = Intrinsic::aarch64_neon_vcge; s = "vcge"; 2116 OverloadCmpInt = true; break; 2117 // Scalar Floating-point Compare Less Than Or Equal To Zero 2118 case AArch64::BI__builtin_neon_vclezs_f32: 2119 case AArch64::BI__builtin_neon_vclezd_f64: 2120 Int = Intrinsic::aarch64_neon_vclez; s = "vcle"; 2121 // Add implicit zero operand. 2122 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2123 OverloadCmpInt = true; break; 2124 // Scalar Floating-point Compare Less Than Zero 2125 case AArch64::BI__builtin_neon_vclts_f32: 2126 case AArch64::BI__builtin_neon_vcltd_f64: 2127 Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt"; 2128 OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; 2129 // Scalar Floating-point Compare Less Than Zero 2130 case AArch64::BI__builtin_neon_vcltzs_f32: 2131 case AArch64::BI__builtin_neon_vcltzd_f64: 2132 Int = Intrinsic::aarch64_neon_vcltz; s = "vclt"; 2133 // Add implicit zero operand. 2134 Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); 2135 OverloadCmpInt = true; break; 2136 // Scalar Floating-point Absolute Compare Greater Than Or Equal 2137 case AArch64::BI__builtin_neon_vcages_f32: 2138 case AArch64::BI__builtin_neon_vcaged_f64: 2139 Int = Intrinsic::aarch64_neon_vcage; s = "vcage"; 2140 OverloadCmpInt = true; break; 2141 // Scalar Floating-point Absolute Compare Greater Than 2142 case AArch64::BI__builtin_neon_vcagts_f32: 2143 case AArch64::BI__builtin_neon_vcagtd_f64: 2144 Int = Intrinsic::aarch64_neon_vcagt; s = "vcagt"; 2145 OverloadCmpInt = true; break; 2146 // Scalar Floating-point Absolute Compare Less Than Or Equal 2147 case AArch64::BI__builtin_neon_vcales_f32: 2148 case AArch64::BI__builtin_neon_vcaled_f64: 2149 Int = Intrinsic::aarch64_neon_vcage; s = "vcage"; 2150 OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; 2151 // Scalar Floating-point Absolute Compare Less Than 2152 case AArch64::BI__builtin_neon_vcalts_f32: 2153 case AArch64::BI__builtin_neon_vcaltd_f64: 2154 Int = Intrinsic::aarch64_neon_vcagt; s = "vcalt"; 2155 OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break; 2156 // Scalar Compare Bitwise Test Bits 2157 case AArch64::BI__builtin_neon_vtstd_s64: 2158 case AArch64::BI__builtin_neon_vtstd_u64: 2159 Int = Intrinsic::aarch64_neon_vtstd; s = "vtst"; 2160 OverloadCmpInt = true; break; 2161 // Scalar Absolute Value 2162 case AArch64::BI__builtin_neon_vabsd_s64: 2163 Int = Intrinsic::aarch64_neon_vabs; 2164 s = "vabs"; OverloadInt = false; break; 2165 // Scalar Signed Saturating Absolute Value 2166 case AArch64::BI__builtin_neon_vqabsb_s8: 2167 case AArch64::BI__builtin_neon_vqabsh_s16: 2168 case AArch64::BI__builtin_neon_vqabss_s32: 2169 case AArch64::BI__builtin_neon_vqabsd_s64: 2170 Int = Intrinsic::arm_neon_vqabs; 2171 s = "vqabs"; OverloadInt = true; break; 2172 // Scalar Negate 2173 case AArch64::BI__builtin_neon_vnegd_s64: 2174 Int = Intrinsic::aarch64_neon_vneg; 2175 s = "vneg"; OverloadInt = false; break; 2176 // Scalar Signed Saturating Negate 2177 case AArch64::BI__builtin_neon_vqnegb_s8: 2178 case AArch64::BI__builtin_neon_vqnegh_s16: 2179 case AArch64::BI__builtin_neon_vqnegs_s32: 2180 case AArch64::BI__builtin_neon_vqnegd_s64: 2181 Int = Intrinsic::arm_neon_vqneg; 2182 s = "vqneg"; OverloadInt = true; break; 2183 // Scalar Signed Saturating Accumulated of Unsigned Value 2184 case AArch64::BI__builtin_neon_vuqaddb_s8: 2185 case AArch64::BI__builtin_neon_vuqaddh_s16: 2186 case AArch64::BI__builtin_neon_vuqadds_s32: 2187 case AArch64::BI__builtin_neon_vuqaddd_s64: 2188 Int = Intrinsic::aarch64_neon_vuqadd; 2189 s = "vuqadd"; OverloadInt = true; break; 2190 // Scalar Unsigned Saturating Accumulated of Signed Value 2191 case AArch64::BI__builtin_neon_vsqaddb_u8: 2192 case AArch64::BI__builtin_neon_vsqaddh_u16: 2193 case AArch64::BI__builtin_neon_vsqadds_u32: 2194 case AArch64::BI__builtin_neon_vsqaddd_u64: 2195 Int = Intrinsic::aarch64_neon_vsqadd; 2196 s = "vsqadd"; OverloadInt = true; break; 2197 // Signed Saturating Doubling Multiply-Add Long 2198 case AArch64::BI__builtin_neon_vqdmlalh_s16: 2199 case AArch64::BI__builtin_neon_vqdmlals_s32: 2200 Int = Intrinsic::aarch64_neon_vqdmlal; 2201 s = "vqdmlal"; OverloadWideInt = true; break; 2202 // Signed Saturating Doubling Multiply-Subtract Long 2203 case AArch64::BI__builtin_neon_vqdmlslh_s16: 2204 case AArch64::BI__builtin_neon_vqdmlsls_s32: 2205 Int = Intrinsic::aarch64_neon_vqdmlsl; 2206 s = "vqdmlsl"; OverloadWideInt = true; break; 2207 // Signed Saturating Doubling Multiply Long 2208 case AArch64::BI__builtin_neon_vqdmullh_s16: 2209 case AArch64::BI__builtin_neon_vqdmulls_s32: 2210 Int = Intrinsic::aarch64_neon_vqdmull; 2211 s = "vqdmull"; OverloadWideInt = true; break; 2212 // Scalar Signed Saturating Extract Unsigned Narrow 2213 case AArch64::BI__builtin_neon_vqmovunh_s16: 2214 case AArch64::BI__builtin_neon_vqmovuns_s32: 2215 case AArch64::BI__builtin_neon_vqmovund_s64: 2216 Int = Intrinsic::arm_neon_vqmovnsu; 2217 s = "vqmovun"; OverloadNarrowInt = true; break; 2218 // Scalar Signed Saturating Extract Narrow 2219 case AArch64::BI__builtin_neon_vqmovnh_s16: 2220 case AArch64::BI__builtin_neon_vqmovns_s32: 2221 case AArch64::BI__builtin_neon_vqmovnd_s64: 2222 Int = Intrinsic::arm_neon_vqmovns; 2223 s = "vqmovn"; OverloadNarrowInt = true; break; 2224 // Scalar Unsigned Saturating Extract Narrow 2225 case AArch64::BI__builtin_neon_vqmovnh_u16: 2226 case AArch64::BI__builtin_neon_vqmovns_u32: 2227 case AArch64::BI__builtin_neon_vqmovnd_u64: 2228 Int = Intrinsic::arm_neon_vqmovnu; 2229 s = "vqmovn"; OverloadNarrowInt = true; break; 2230 // Scalar Signed Shift Right (Immediate) 2231 case AArch64::BI__builtin_neon_vshrd_n_s64: 2232 Int = Intrinsic::aarch64_neon_vshrds_n; 2233 s = "vsshr"; OverloadInt = false; break; 2234 // Scalar Unsigned Shift Right (Immediate) 2235 case AArch64::BI__builtin_neon_vshrd_n_u64: 2236 Int = Intrinsic::aarch64_neon_vshrdu_n; 2237 s = "vushr"; OverloadInt = false; break; 2238 // Scalar Signed Rounding Shift Right (Immediate) 2239 case AArch64::BI__builtin_neon_vrshrd_n_s64: 2240 Int = Intrinsic::aarch64_neon_vrshrds_n; 2241 s = "vsrshr"; OverloadInt = false; break; 2242 // Scalar Unsigned Rounding Shift Right (Immediate) 2243 case AArch64::BI__builtin_neon_vrshrd_n_u64: 2244 Int = Intrinsic::aarch64_neon_vrshrdu_n; 2245 s = "vurshr"; OverloadInt = false; break; 2246 // Scalar Signed Shift Right and Accumulate (Immediate) 2247 case AArch64::BI__builtin_neon_vsrad_n_s64: 2248 Int = Intrinsic::aarch64_neon_vsrads_n; 2249 s = "vssra"; OverloadInt = false; break; 2250 // Scalar Unsigned Shift Right and Accumulate (Immediate) 2251 case AArch64::BI__builtin_neon_vsrad_n_u64: 2252 Int = Intrinsic::aarch64_neon_vsradu_n; 2253 s = "vusra"; OverloadInt = false; break; 2254 // Scalar Signed Rounding Shift Right and Accumulate (Immediate) 2255 case AArch64::BI__builtin_neon_vrsrad_n_s64: 2256 Int = Intrinsic::aarch64_neon_vrsrads_n; 2257 s = "vsrsra"; OverloadInt = false; break; 2258 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) 2259 case AArch64::BI__builtin_neon_vrsrad_n_u64: 2260 Int = Intrinsic::aarch64_neon_vrsradu_n; 2261 s = "vursra"; OverloadInt = false; break; 2262 // Scalar Signed/Unsigned Shift Left (Immediate) 2263 case AArch64::BI__builtin_neon_vshld_n_s64: 2264 case AArch64::BI__builtin_neon_vshld_n_u64: 2265 Int = Intrinsic::aarch64_neon_vshld_n; 2266 s = "vshl"; OverloadInt = false; break; 2267 // Signed Saturating Shift Left (Immediate) 2268 case AArch64::BI__builtin_neon_vqshlb_n_s8: 2269 case AArch64::BI__builtin_neon_vqshlh_n_s16: 2270 case AArch64::BI__builtin_neon_vqshls_n_s32: 2271 case AArch64::BI__builtin_neon_vqshld_n_s64: 2272 Int = Intrinsic::aarch64_neon_vqshls_n; 2273 s = "vsqshl"; OverloadInt = true; break; 2274 // Unsigned Saturating Shift Left (Immediate) 2275 case AArch64::BI__builtin_neon_vqshlb_n_u8: 2276 case AArch64::BI__builtin_neon_vqshlh_n_u16: 2277 case AArch64::BI__builtin_neon_vqshls_n_u32: 2278 case AArch64::BI__builtin_neon_vqshld_n_u64: 2279 Int = Intrinsic::aarch64_neon_vqshlu_n; 2280 s = "vuqshl"; OverloadInt = true; break; 2281 // Signed Saturating Shift Left Unsigned (Immediate) 2282 case AArch64::BI__builtin_neon_vqshlub_n_s8: 2283 case AArch64::BI__builtin_neon_vqshluh_n_s16: 2284 case AArch64::BI__builtin_neon_vqshlus_n_s32: 2285 case AArch64::BI__builtin_neon_vqshlud_n_s64: 2286 Int = Intrinsic::aarch64_neon_vqshlus_n; 2287 s = "vsqshlu"; OverloadInt = true; break; 2288 // Shift Right And Insert (Immediate) 2289 case AArch64::BI__builtin_neon_vsrid_n_s64: 2290 case AArch64::BI__builtin_neon_vsrid_n_u64: 2291 Int = Intrinsic::aarch64_neon_vsrid_n; 2292 s = "vsri"; OverloadInt = false; break; 2293 // Shift Left And Insert (Immediate) 2294 case AArch64::BI__builtin_neon_vslid_n_s64: 2295 case AArch64::BI__builtin_neon_vslid_n_u64: 2296 Int = Intrinsic::aarch64_neon_vslid_n; 2297 s = "vsli"; OverloadInt = false; break; 2298 // Signed Saturating Shift Right Narrow (Immediate) 2299 case AArch64::BI__builtin_neon_vqshrnh_n_s16: 2300 case AArch64::BI__builtin_neon_vqshrns_n_s32: 2301 case AArch64::BI__builtin_neon_vqshrnd_n_s64: 2302 Int = Intrinsic::aarch64_neon_vsqshrn; 2303 s = "vsqshrn"; OverloadInt = true; break; 2304 // Unsigned Saturating Shift Right Narrow (Immediate) 2305 case AArch64::BI__builtin_neon_vqshrnh_n_u16: 2306 case AArch64::BI__builtin_neon_vqshrns_n_u32: 2307 case AArch64::BI__builtin_neon_vqshrnd_n_u64: 2308 Int = Intrinsic::aarch64_neon_vuqshrn; 2309 s = "vuqshrn"; OverloadInt = true; break; 2310 // Signed Saturating Rounded Shift Right Narrow (Immediate) 2311 case AArch64::BI__builtin_neon_vqrshrnh_n_s16: 2312 case AArch64::BI__builtin_neon_vqrshrns_n_s32: 2313 case AArch64::BI__builtin_neon_vqrshrnd_n_s64: 2314 Int = Intrinsic::aarch64_neon_vsqrshrn; 2315 s = "vsqrshrn"; OverloadInt = true; break; 2316 // Unsigned Saturating Rounded Shift Right Narrow (Immediate) 2317 case AArch64::BI__builtin_neon_vqrshrnh_n_u16: 2318 case AArch64::BI__builtin_neon_vqrshrns_n_u32: 2319 case AArch64::BI__builtin_neon_vqrshrnd_n_u64: 2320 Int = Intrinsic::aarch64_neon_vuqrshrn; 2321 s = "vuqrshrn"; OverloadInt = true; break; 2322 // Signed Saturating Shift Right Unsigned Narrow (Immediate) 2323 case AArch64::BI__builtin_neon_vqshrunh_n_s16: 2324 case AArch64::BI__builtin_neon_vqshruns_n_s32: 2325 case AArch64::BI__builtin_neon_vqshrund_n_s64: 2326 Int = Intrinsic::aarch64_neon_vsqshrun; 2327 s = "vsqshrun"; OverloadInt = true; break; 2328 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) 2329 case AArch64::BI__builtin_neon_vqrshrunh_n_s16: 2330 case AArch64::BI__builtin_neon_vqrshruns_n_s32: 2331 case AArch64::BI__builtin_neon_vqrshrund_n_s64: 2332 Int = Intrinsic::aarch64_neon_vsqrshrun; 2333 s = "vsqrshrun"; OverloadInt = true; break; 2334 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate) 2335 case AArch64::BI__builtin_neon_vcvts_n_f32_s32: 2336 Int = Intrinsic::aarch64_neon_vcvtf32_n_s32; 2337 s = "vcvtf"; OverloadInt = false; break; 2338 case AArch64::BI__builtin_neon_vcvtd_n_f64_s64: 2339 Int = Intrinsic::aarch64_neon_vcvtf64_n_s64; 2340 s = "vcvtf"; OverloadInt = false; break; 2341 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) 2342 case AArch64::BI__builtin_neon_vcvts_n_f32_u32: 2343 Int = Intrinsic::aarch64_neon_vcvtf32_n_u32; 2344 s = "vcvtf"; OverloadInt = false; break; 2345 case AArch64::BI__builtin_neon_vcvtd_n_f64_u64: 2346 Int = Intrinsic::aarch64_neon_vcvtf64_n_u64; 2347 s = "vcvtf"; OverloadInt = false; break; 2348 } 2349 2350 if (!Int) 2351 return 0; 2352 2353 // AArch64 scalar builtin that returns scalar type 2354 // and should be mapped to AArch64 intrinsic that returns 2355 // one-element vector type. 2356 Function *F = 0; 2357 if (AcrossVec) { 2358 // Gen arg type 2359 const Expr *Arg = E->getArg(E->getNumArgs()-1); 2360 llvm::Type *Ty = CGF.ConvertType(Arg->getType()); 2361 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 2362 llvm::Type *ETy = VTy->getElementType(); 2363 llvm::VectorType *RTy = llvm::VectorType::get(ETy, 1); 2364 2365 if (ExtendEle) { 2366 assert(!ETy->isFloatingPointTy()); 2367 RTy = llvm::VectorType::getExtendedElementVectorType(RTy); 2368 } 2369 2370 llvm::Type *Tys[2] = {RTy, VTy}; 2371 F = CGF.CGM.getIntrinsic(Int, Tys); 2372 assert(E->getNumArgs() == 1); 2373 } else if (OverloadInt) { 2374 // Determine the type of this overloaded AArch64 intrinsic 2375 llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); 2376 llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1); 2377 assert(VTy); 2378 2379 F = CGF.CGM.getIntrinsic(Int, VTy); 2380 } else if (OverloadWideInt || OverloadNarrowInt) { 2381 // Determine the type of this overloaded AArch64 intrinsic 2382 const Expr *Arg = E->getArg(E->getNumArgs()-1); 2383 llvm::Type *Ty = CGF.ConvertType(Arg->getType()); 2384 llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1); 2385 llvm::VectorType *RTy = OverloadWideInt ? 2386 llvm::VectorType::getExtendedElementVectorType(VTy) : 2387 llvm::VectorType::getTruncatedElementVectorType(VTy); 2388 F = CGF.CGM.getIntrinsic(Int, RTy); 2389 } else if (OverloadCmpInt) { 2390 // Determine the types of this overloaded AArch64 intrinsic 2391 SmallVector<llvm::Type *, 3> Tys; 2392 const Expr *Arg = E->getArg(E->getNumArgs()-1); 2393 llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); 2394 llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1); 2395 Tys.push_back(VTy); 2396 Ty = CGF.ConvertType(Arg->getType()); 2397 VTy = llvm::VectorType::get(Ty, 1); 2398 Tys.push_back(VTy); 2399 Tys.push_back(VTy); 2400 2401 F = CGF.CGM.getIntrinsic(Int, Tys); 2402 } else 2403 F = CGF.CGM.getIntrinsic(Int); 2404 2405 Value *Result = CGF.EmitNeonCall(F, Ops, s); 2406 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 2407 // AArch64 intrinsic one-element vector type cast to 2408 // scalar type expected by the builtin 2409 return CGF.Builder.CreateBitCast(Result, ResultType, s); 2410} 2411 2412Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 2413 const CallExpr *E) { 2414 2415 // Process AArch64 scalar builtins 2416 if (Value *Result = EmitAArch64ScalarBuiltinExpr(*this, BuiltinID, E)) 2417 return Result; 2418 2419 if (BuiltinID == AArch64::BI__clear_cache) { 2420 assert(E->getNumArgs() == 2 && 2421 "Variadic __clear_cache slipped through on AArch64"); 2422 2423 const FunctionDecl *FD = E->getDirectCallee(); 2424 SmallVector<Value *, 2> Ops; 2425 for (unsigned i = 0; i < E->getNumArgs(); i++) 2426 Ops.push_back(EmitScalarExpr(E->getArg(i))); 2427 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 2428 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 2429 StringRef Name = FD->getName(); 2430 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 2431 } 2432 2433 SmallVector<Value *, 4> Ops; 2434 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 2435 Ops.push_back(EmitScalarExpr(E->getArg(i))); 2436 } 2437// Some intrinsic isn't overloaded. 2438 switch (BuiltinID) { 2439 default: break; 2440 case AArch64::BI__builtin_neon_vget_lane_i8: 2441 case AArch64::BI__builtin_neon_vget_lane_i16: 2442 case AArch64::BI__builtin_neon_vget_lane_i32: 2443 case AArch64::BI__builtin_neon_vget_lane_i64: 2444 case AArch64::BI__builtin_neon_vget_lane_f32: 2445 case AArch64::BI__builtin_neon_vget_lane_f64: 2446 case AArch64::BI__builtin_neon_vgetq_lane_i8: 2447 case AArch64::BI__builtin_neon_vgetq_lane_i16: 2448 case AArch64::BI__builtin_neon_vgetq_lane_i32: 2449 case AArch64::BI__builtin_neon_vgetq_lane_i64: 2450 case AArch64::BI__builtin_neon_vgetq_lane_f32: 2451 case AArch64::BI__builtin_neon_vgetq_lane_f64: 2452 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E); 2453 case AArch64::BI__builtin_neon_vset_lane_i8: 2454 case AArch64::BI__builtin_neon_vset_lane_i16: 2455 case AArch64::BI__builtin_neon_vset_lane_i32: 2456 case AArch64::BI__builtin_neon_vset_lane_i64: 2457 case AArch64::BI__builtin_neon_vset_lane_f32: 2458 case AArch64::BI__builtin_neon_vset_lane_f64: 2459 case AArch64::BI__builtin_neon_vsetq_lane_i8: 2460 case AArch64::BI__builtin_neon_vsetq_lane_i16: 2461 case AArch64::BI__builtin_neon_vsetq_lane_i32: 2462 case AArch64::BI__builtin_neon_vsetq_lane_i64: 2463 case AArch64::BI__builtin_neon_vsetq_lane_f32: 2464 case AArch64::BI__builtin_neon_vsetq_lane_f64: 2465 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E); 2466 } 2467 2468 // Get the last argument, which specifies the vector type. 2469 llvm::APSInt Result; 2470 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 2471 if (!Arg->isIntegerConstantExpr(Result, getContext())) 2472 return 0; 2473 2474 // Determine the type of this overloaded NEON intrinsic. 2475 NeonTypeFlags Type(Result.getZExtValue()); 2476 bool usgn = Type.isUnsigned(); 2477 2478 llvm::VectorType *VTy = GetNeonType(this, Type); 2479 llvm::Type *Ty = VTy; 2480 if (!Ty) 2481 return 0; 2482 2483 unsigned Int; 2484 switch (BuiltinID) { 2485 default: 2486 return 0; 2487 2488 // AArch64 builtins mapping to legacy ARM v7 builtins. 2489 // FIXME: the mapped builtins listed correspond to what has been tested 2490 // in aarch64-neon-intrinsics.c so far. 2491 case AArch64::BI__builtin_neon_vmul_v: 2492 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E); 2493 case AArch64::BI__builtin_neon_vmulq_v: 2494 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E); 2495 case AArch64::BI__builtin_neon_vabd_v: 2496 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E); 2497 case AArch64::BI__builtin_neon_vabdq_v: 2498 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E); 2499 case AArch64::BI__builtin_neon_vfma_v: 2500 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E); 2501 case AArch64::BI__builtin_neon_vfmaq_v: 2502 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E); 2503 case AArch64::BI__builtin_neon_vbsl_v: 2504 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E); 2505 case AArch64::BI__builtin_neon_vbslq_v: 2506 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E); 2507 case AArch64::BI__builtin_neon_vrsqrts_v: 2508 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E); 2509 case AArch64::BI__builtin_neon_vrsqrtsq_v: 2510 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E); 2511 case AArch64::BI__builtin_neon_vrecps_v: 2512 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E); 2513 case AArch64::BI__builtin_neon_vrecpsq_v: 2514 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E); 2515 case AArch64::BI__builtin_neon_vcage_v: 2516 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E); 2517 case AArch64::BI__builtin_neon_vcale_v: 2518 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E); 2519 case AArch64::BI__builtin_neon_vcaleq_v: 2520 std::swap(Ops[0], Ops[1]); 2521 case AArch64::BI__builtin_neon_vcageq_v: { 2522 Function *F; 2523 if (VTy->getElementType()->isIntegerTy(64)) 2524 F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq); 2525 else 2526 F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq); 2527 return EmitNeonCall(F, Ops, "vcage"); 2528 } 2529 case AArch64::BI__builtin_neon_vcalt_v: 2530 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E); 2531 case AArch64::BI__builtin_neon_vcagt_v: 2532 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E); 2533 case AArch64::BI__builtin_neon_vcaltq_v: 2534 std::swap(Ops[0], Ops[1]); 2535 case AArch64::BI__builtin_neon_vcagtq_v: { 2536 Function *F; 2537 if (VTy->getElementType()->isIntegerTy(64)) 2538 F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq); 2539 else 2540 F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq); 2541 return EmitNeonCall(F, Ops, "vcagt"); 2542 } 2543 case AArch64::BI__builtin_neon_vtst_v: 2544 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E); 2545 case AArch64::BI__builtin_neon_vtstq_v: 2546 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E); 2547 case AArch64::BI__builtin_neon_vhadd_v: 2548 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E); 2549 case AArch64::BI__builtin_neon_vhaddq_v: 2550 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E); 2551 case AArch64::BI__builtin_neon_vhsub_v: 2552 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E); 2553 case AArch64::BI__builtin_neon_vhsubq_v: 2554 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E); 2555 case AArch64::BI__builtin_neon_vrhadd_v: 2556 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E); 2557 case AArch64::BI__builtin_neon_vrhaddq_v: 2558 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E); 2559 case AArch64::BI__builtin_neon_vqadd_v: 2560 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E); 2561 case AArch64::BI__builtin_neon_vqaddq_v: 2562 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E); 2563 case AArch64::BI__builtin_neon_vqsub_v: 2564 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E); 2565 case AArch64::BI__builtin_neon_vqsubq_v: 2566 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E); 2567 case AArch64::BI__builtin_neon_vshl_v: 2568 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E); 2569 case AArch64::BI__builtin_neon_vshlq_v: 2570 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E); 2571 case AArch64::BI__builtin_neon_vqshl_v: 2572 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E); 2573 case AArch64::BI__builtin_neon_vqshlq_v: 2574 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E); 2575 case AArch64::BI__builtin_neon_vrshl_v: 2576 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E); 2577 case AArch64::BI__builtin_neon_vrshlq_v: 2578 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E); 2579 case AArch64::BI__builtin_neon_vqrshl_v: 2580 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E); 2581 case AArch64::BI__builtin_neon_vqrshlq_v: 2582 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E); 2583 case AArch64::BI__builtin_neon_vaddhn_v: 2584 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E); 2585 case AArch64::BI__builtin_neon_vraddhn_v: 2586 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E); 2587 case AArch64::BI__builtin_neon_vsubhn_v: 2588 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E); 2589 case AArch64::BI__builtin_neon_vrsubhn_v: 2590 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E); 2591 case AArch64::BI__builtin_neon_vmull_v: 2592 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E); 2593 case AArch64::BI__builtin_neon_vqdmull_v: 2594 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E); 2595 case AArch64::BI__builtin_neon_vqdmlal_v: 2596 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E); 2597 case AArch64::BI__builtin_neon_vqdmlsl_v: 2598 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E); 2599 case AArch64::BI__builtin_neon_vmax_v: 2600 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E); 2601 case AArch64::BI__builtin_neon_vmaxq_v: 2602 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E); 2603 case AArch64::BI__builtin_neon_vmin_v: 2604 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E); 2605 case AArch64::BI__builtin_neon_vminq_v: 2606 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E); 2607 case AArch64::BI__builtin_neon_vpmax_v: 2608 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E); 2609 case AArch64::BI__builtin_neon_vpmin_v: 2610 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E); 2611 case AArch64::BI__builtin_neon_vpadd_v: 2612 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E); 2613 case AArch64::BI__builtin_neon_vqdmulh_v: 2614 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E); 2615 case AArch64::BI__builtin_neon_vqdmulhq_v: 2616 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E); 2617 case AArch64::BI__builtin_neon_vqrdmulh_v: 2618 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E); 2619 case AArch64::BI__builtin_neon_vqrdmulhq_v: 2620 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E); 2621 2622 // Shift by immediate 2623 case AArch64::BI__builtin_neon_vshr_n_v: 2624 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshr_n_v, E); 2625 case AArch64::BI__builtin_neon_vshrq_n_v: 2626 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshrq_n_v, E); 2627 case AArch64::BI__builtin_neon_vrshr_n_v: 2628 case AArch64::BI__builtin_neon_vrshrq_n_v: 2629 Int = usgn ? Intrinsic::aarch64_neon_vurshr 2630 : Intrinsic::aarch64_neon_vsrshr; 2631 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n"); 2632 case AArch64::BI__builtin_neon_vsra_n_v: 2633 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsra_n_v, E); 2634 case AArch64::BI__builtin_neon_vsraq_n_v: 2635 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsraq_n_v, E); 2636 case AArch64::BI__builtin_neon_vrsra_n_v: 2637 case AArch64::BI__builtin_neon_vrsraq_n_v: { 2638 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2639 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2640 Int = usgn ? Intrinsic::aarch64_neon_vurshr 2641 : Intrinsic::aarch64_neon_vsrshr; 2642 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 2643 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 2644 } 2645 case AArch64::BI__builtin_neon_vshl_n_v: 2646 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_n_v, E); 2647 case AArch64::BI__builtin_neon_vshlq_n_v: 2648 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_n_v, E); 2649 case AArch64::BI__builtin_neon_vqshl_n_v: 2650 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_n_v, E); 2651 case AArch64::BI__builtin_neon_vqshlq_n_v: 2652 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_n_v, E); 2653 case AArch64::BI__builtin_neon_vqshlu_n_v: 2654 case AArch64::BI__builtin_neon_vqshluq_n_v: 2655 Int = Intrinsic::aarch64_neon_vsqshlu; 2656 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n"); 2657 case AArch64::BI__builtin_neon_vsri_n_v: 2658 case AArch64::BI__builtin_neon_vsriq_n_v: 2659 Int = Intrinsic::aarch64_neon_vsri; 2660 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n"); 2661 case AArch64::BI__builtin_neon_vsli_n_v: 2662 case AArch64::BI__builtin_neon_vsliq_n_v: 2663 Int = Intrinsic::aarch64_neon_vsli; 2664 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n"); 2665 case AArch64::BI__builtin_neon_vshll_n_v: { 2666 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 2667 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2668 if (usgn) 2669 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 2670 else 2671 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 2672 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 2673 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 2674 } 2675 case AArch64::BI__builtin_neon_vshrn_n_v: { 2676 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 2677 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2678 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 2679 if (usgn) 2680 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 2681 else 2682 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 2683 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 2684 } 2685 case AArch64::BI__builtin_neon_vqshrun_n_v: 2686 Int = Intrinsic::aarch64_neon_vsqshrun; 2687 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 2688 case AArch64::BI__builtin_neon_vrshrn_n_v: 2689 Int = Intrinsic::aarch64_neon_vrshrn; 2690 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 2691 case AArch64::BI__builtin_neon_vqrshrun_n_v: 2692 Int = Intrinsic::aarch64_neon_vsqrshrun; 2693 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 2694 case AArch64::BI__builtin_neon_vqshrn_n_v: 2695 Int = usgn ? Intrinsic::aarch64_neon_vuqshrn 2696 : Intrinsic::aarch64_neon_vsqshrn; 2697 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 2698 case AArch64::BI__builtin_neon_vqrshrn_n_v: 2699 Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn 2700 : Intrinsic::aarch64_neon_vsqrshrn; 2701 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 2702 2703 // Convert 2704 case AArch64::BI__builtin_neon_vmovl_v: 2705 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmovl_v, E); 2706 case AArch64::BI__builtin_neon_vcvt_n_f32_v: 2707 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E); 2708 case AArch64::BI__builtin_neon_vcvtq_n_f32_v: 2709 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E); 2710 case AArch64::BI__builtin_neon_vcvtq_n_f64_v: { 2711 llvm::Type *FloatTy = 2712 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 2713 llvm::Type *Tys[2] = { FloatTy, Ty }; 2714 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp 2715 : Intrinsic::arm_neon_vcvtfxs2fp; 2716 Function *F = CGM.getIntrinsic(Int, Tys); 2717 return EmitNeonCall(F, Ops, "vcvt_n"); 2718 } 2719 case AArch64::BI__builtin_neon_vcvt_n_s32_v: 2720 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_s32_v, E); 2721 case AArch64::BI__builtin_neon_vcvtq_n_s32_v: 2722 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_s32_v, E); 2723 case AArch64::BI__builtin_neon_vcvt_n_u32_v: 2724 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E); 2725 case AArch64::BI__builtin_neon_vcvtq_n_u32_v: 2726 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E); 2727 case AArch64::BI__builtin_neon_vcvtq_n_s64_v: 2728 case AArch64::BI__builtin_neon_vcvtq_n_u64_v: { 2729 llvm::Type *FloatTy = 2730 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 2731 llvm::Type *Tys[2] = { Ty, FloatTy }; 2732 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu 2733 : Intrinsic::arm_neon_vcvtfp2fxs; 2734 Function *F = CGM.getIntrinsic(Int, Tys); 2735 return EmitNeonCall(F, Ops, "vcvt_n"); 2736 } 2737 2738 // Load/Store 2739 case AArch64::BI__builtin_neon_vld1_v: 2740 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_v, E); 2741 case AArch64::BI__builtin_neon_vld1q_v: 2742 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_v, E); 2743 case AArch64::BI__builtin_neon_vld2_v: 2744 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2_v, E); 2745 case AArch64::BI__builtin_neon_vld2q_v: 2746 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_v, E); 2747 case AArch64::BI__builtin_neon_vld3_v: 2748 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_v, E); 2749 case AArch64::BI__builtin_neon_vld3q_v: 2750 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_v, E); 2751 case AArch64::BI__builtin_neon_vld4_v: 2752 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_v, E); 2753 case AArch64::BI__builtin_neon_vld4q_v: 2754 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_v, E); 2755 case AArch64::BI__builtin_neon_vst1_v: 2756 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1_v, E); 2757 case AArch64::BI__builtin_neon_vst1q_v: 2758 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst1q_v, E); 2759 case AArch64::BI__builtin_neon_vst2_v: 2760 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_v, E); 2761 case AArch64::BI__builtin_neon_vst2q_v: 2762 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_v, E); 2763 case AArch64::BI__builtin_neon_vst3_v: 2764 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_v, E); 2765 case AArch64::BI__builtin_neon_vst3q_v: 2766 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_v, E); 2767 case AArch64::BI__builtin_neon_vst4_v: 2768 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_v, E); 2769 case AArch64::BI__builtin_neon_vst4q_v: 2770 return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E); 2771 2772 // AArch64-only builtins 2773 case AArch64::BI__builtin_neon_vfma_lane_v: 2774 case AArch64::BI__builtin_neon_vfmaq_laneq_v: { 2775 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 2776 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2777 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2778 2779 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2780 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 2781 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 2782 } 2783 case AArch64::BI__builtin_neon_vfmaq_lane_v: { 2784 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 2785 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2786 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2787 2788 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 2789 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 2790 VTy->getNumElements() / 2); 2791 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 2792 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 2793 cast<ConstantInt>(Ops[3])); 2794 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 2795 2796 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 2797 } 2798 case AArch64::BI__builtin_neon_vfma_laneq_v: { 2799 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 2800 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2801 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2802 2803 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 2804 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 2805 VTy->getNumElements() * 2); 2806 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 2807 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 2808 cast<ConstantInt>(Ops[3])); 2809 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 2810 2811 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 2812 } 2813 case AArch64::BI__builtin_neon_vfms_v: 2814 case AArch64::BI__builtin_neon_vfmsq_v: { 2815 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 2816 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2817 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2818 Ops[1] = Builder.CreateFNeg(Ops[1]); 2819 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2820 2821 // LLVM's fma intrinsic puts the accumulator in the last position, but the 2822 // AArch64 intrinsic has it first. 2823 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 2824 } 2825 case AArch64::BI__builtin_neon_vmaxnm_v: 2826 case AArch64::BI__builtin_neon_vmaxnmq_v: { 2827 Int = Intrinsic::aarch64_neon_vmaxnm; 2828 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 2829 } 2830 case AArch64::BI__builtin_neon_vminnm_v: 2831 case AArch64::BI__builtin_neon_vminnmq_v: { 2832 Int = Intrinsic::aarch64_neon_vminnm; 2833 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 2834 } 2835 case AArch64::BI__builtin_neon_vpmaxnm_v: 2836 case AArch64::BI__builtin_neon_vpmaxnmq_v: { 2837 Int = Intrinsic::aarch64_neon_vpmaxnm; 2838 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 2839 } 2840 case AArch64::BI__builtin_neon_vpminnm_v: 2841 case AArch64::BI__builtin_neon_vpminnmq_v: { 2842 Int = Intrinsic::aarch64_neon_vpminnm; 2843 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 2844 } 2845 case AArch64::BI__builtin_neon_vpmaxq_v: { 2846 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; 2847 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 2848 } 2849 case AArch64::BI__builtin_neon_vpminq_v: { 2850 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; 2851 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 2852 } 2853 case AArch64::BI__builtin_neon_vpaddq_v: { 2854 Int = Intrinsic::arm_neon_vpadd; 2855 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd"); 2856 } 2857 case AArch64::BI__builtin_neon_vmulx_v: 2858 case AArch64::BI__builtin_neon_vmulxq_v: { 2859 Int = Intrinsic::aarch64_neon_vmulx; 2860 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 2861 } 2862 } 2863} 2864 2865Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 2866 const CallExpr *E) { 2867 if (BuiltinID == ARM::BI__clear_cache) { 2868 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 2869 const FunctionDecl *FD = E->getDirectCallee(); 2870 SmallVector<Value*, 2> Ops; 2871 for (unsigned i = 0; i < 2; i++) 2872 Ops.push_back(EmitScalarExpr(E->getArg(i))); 2873 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 2874 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 2875 StringRef Name = FD->getName(); 2876 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 2877 } 2878 2879 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 2880 (BuiltinID == ARM::BI__builtin_arm_ldrex && 2881 getContext().getTypeSize(E->getType()) == 64)) { 2882 Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 2883 2884 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 2885 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 2886 "ldrexd"); 2887 2888 Value *Val0 = Builder.CreateExtractValue(Val, 1); 2889 Value *Val1 = Builder.CreateExtractValue(Val, 0); 2890 Val0 = Builder.CreateZExt(Val0, Int64Ty); 2891 Val1 = Builder.CreateZExt(Val1, Int64Ty); 2892 2893 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 2894 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 2895 Val = Builder.CreateOr(Val, Val1); 2896 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 2897 } 2898 2899 if (BuiltinID == ARM::BI__builtin_arm_ldrex) { 2900 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 2901 2902 QualType Ty = E->getType(); 2903 llvm::Type *RealResTy = ConvertType(Ty); 2904 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 2905 getContext().getTypeSize(Ty)); 2906 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 2907 2908 Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrex, LoadAddr->getType()); 2909 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 2910 2911 if (RealResTy->isPointerTy()) 2912 return Builder.CreateIntToPtr(Val, RealResTy); 2913 else { 2914 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 2915 return Builder.CreateBitCast(Val, RealResTy); 2916 } 2917 } 2918 2919 if (BuiltinID == ARM::BI__builtin_arm_strexd || 2920 (BuiltinID == ARM::BI__builtin_arm_strex && 2921 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 2922 Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd); 2923 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL); 2924 2925 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 2926 Value *Val = EmitScalarExpr(E->getArg(0)); 2927 Builder.CreateStore(Val, Tmp); 2928 2929 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 2930 Val = Builder.CreateLoad(LdPtr); 2931 2932 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 2933 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 2934 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 2935 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd"); 2936 } 2937 2938 if (BuiltinID == ARM::BI__builtin_arm_strex) { 2939 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 2940 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 2941 2942 QualType Ty = E->getArg(0)->getType(); 2943 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 2944 getContext().getTypeSize(Ty)); 2945 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 2946 2947 if (StoreVal->getType()->isPointerTy()) 2948 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 2949 else { 2950 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 2951 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 2952 } 2953 2954 Function *F = CGM.getIntrinsic(Intrinsic::arm_strex, StoreAddr->getType()); 2955 return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex"); 2956 } 2957 2958 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 2959 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 2960 return Builder.CreateCall(F); 2961 } 2962 2963 if (BuiltinID == ARM::BI__builtin_arm_sevl) { 2964 Function *F = CGM.getIntrinsic(Intrinsic::arm_sevl); 2965 return Builder.CreateCall(F); 2966 } 2967 2968 // CRC32 2969 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 2970 switch (BuiltinID) { 2971 case ARM::BI__builtin_arm_crc32b: 2972 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 2973 case ARM::BI__builtin_arm_crc32cb: 2974 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 2975 case ARM::BI__builtin_arm_crc32h: 2976 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 2977 case ARM::BI__builtin_arm_crc32ch: 2978 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 2979 case ARM::BI__builtin_arm_crc32w: 2980 case ARM::BI__builtin_arm_crc32d: 2981 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 2982 case ARM::BI__builtin_arm_crc32cw: 2983 case ARM::BI__builtin_arm_crc32cd: 2984 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 2985 } 2986 2987 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 2988 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2989 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 2990 2991 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 2992 // intrinsics, hence we need different codegen for these cases. 2993 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 2994 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 2995 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 2996 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 2997 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 2998 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 2999 3000 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3001 Value *Res = Builder.CreateCall2(F, Arg0, Arg1a); 3002 return Builder.CreateCall2(F, Res, Arg1b); 3003 } else { 3004 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 3005 3006 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3007 return Builder.CreateCall2(F, Arg0, Arg1); 3008 } 3009 } 3010 3011 SmallVector<Value*, 4> Ops; 3012 llvm::Value *Align = 0; 3013 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 3014 if (i == 0) { 3015 switch (BuiltinID) { 3016 case ARM::BI__builtin_neon_vld1_v: 3017 case ARM::BI__builtin_neon_vld1q_v: 3018 case ARM::BI__builtin_neon_vld1q_lane_v: 3019 case ARM::BI__builtin_neon_vld1_lane_v: 3020 case ARM::BI__builtin_neon_vld1_dup_v: 3021 case ARM::BI__builtin_neon_vld1q_dup_v: 3022 case ARM::BI__builtin_neon_vst1_v: 3023 case ARM::BI__builtin_neon_vst1q_v: 3024 case ARM::BI__builtin_neon_vst1q_lane_v: 3025 case ARM::BI__builtin_neon_vst1_lane_v: 3026 case ARM::BI__builtin_neon_vst2_v: 3027 case ARM::BI__builtin_neon_vst2q_v: 3028 case ARM::BI__builtin_neon_vst2_lane_v: 3029 case ARM::BI__builtin_neon_vst2q_lane_v: 3030 case ARM::BI__builtin_neon_vst3_v: 3031 case ARM::BI__builtin_neon_vst3q_v: 3032 case ARM::BI__builtin_neon_vst3_lane_v: 3033 case ARM::BI__builtin_neon_vst3q_lane_v: 3034 case ARM::BI__builtin_neon_vst4_v: 3035 case ARM::BI__builtin_neon_vst4q_v: 3036 case ARM::BI__builtin_neon_vst4_lane_v: 3037 case ARM::BI__builtin_neon_vst4q_lane_v: 3038 // Get the alignment for the argument in addition to the value; 3039 // we'll use it later. 3040 std::pair<llvm::Value*, unsigned> Src = 3041 EmitPointerWithAlignment(E->getArg(0)); 3042 Ops.push_back(Src.first); 3043 Align = Builder.getInt32(Src.second); 3044 continue; 3045 } 3046 } 3047 if (i == 1) { 3048 switch (BuiltinID) { 3049 case ARM::BI__builtin_neon_vld2_v: 3050 case ARM::BI__builtin_neon_vld2q_v: 3051 case ARM::BI__builtin_neon_vld3_v: 3052 case ARM::BI__builtin_neon_vld3q_v: 3053 case ARM::BI__builtin_neon_vld4_v: 3054 case ARM::BI__builtin_neon_vld4q_v: 3055 case ARM::BI__builtin_neon_vld2_lane_v: 3056 case ARM::BI__builtin_neon_vld2q_lane_v: 3057 case ARM::BI__builtin_neon_vld3_lane_v: 3058 case ARM::BI__builtin_neon_vld3q_lane_v: 3059 case ARM::BI__builtin_neon_vld4_lane_v: 3060 case ARM::BI__builtin_neon_vld4q_lane_v: 3061 case ARM::BI__builtin_neon_vld2_dup_v: 3062 case ARM::BI__builtin_neon_vld3_dup_v: 3063 case ARM::BI__builtin_neon_vld4_dup_v: 3064 // Get the alignment for the argument in addition to the value; 3065 // we'll use it later. 3066 std::pair<llvm::Value*, unsigned> Src = 3067 EmitPointerWithAlignment(E->getArg(1)); 3068 Ops.push_back(Src.first); 3069 Align = Builder.getInt32(Src.second); 3070 continue; 3071 } 3072 } 3073 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3074 } 3075 3076 // vget_lane and vset_lane are not overloaded and do not have an extra 3077 // argument that specifies the vector type. 3078 switch (BuiltinID) { 3079 default: break; 3080 case ARM::BI__builtin_neon_vget_lane_i8: 3081 case ARM::BI__builtin_neon_vget_lane_i16: 3082 case ARM::BI__builtin_neon_vget_lane_i32: 3083 case ARM::BI__builtin_neon_vget_lane_i64: 3084 case ARM::BI__builtin_neon_vget_lane_f32: 3085 case ARM::BI__builtin_neon_vgetq_lane_i8: 3086 case ARM::BI__builtin_neon_vgetq_lane_i16: 3087 case ARM::BI__builtin_neon_vgetq_lane_i32: 3088 case ARM::BI__builtin_neon_vgetq_lane_i64: 3089 case ARM::BI__builtin_neon_vgetq_lane_f32: 3090 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 3091 "vget_lane"); 3092 case ARM::BI__builtin_neon_vset_lane_i8: 3093 case ARM::BI__builtin_neon_vset_lane_i16: 3094 case ARM::BI__builtin_neon_vset_lane_i32: 3095 case ARM::BI__builtin_neon_vset_lane_i64: 3096 case ARM::BI__builtin_neon_vset_lane_f32: 3097 case ARM::BI__builtin_neon_vsetq_lane_i8: 3098 case ARM::BI__builtin_neon_vsetq_lane_i16: 3099 case ARM::BI__builtin_neon_vsetq_lane_i32: 3100 case ARM::BI__builtin_neon_vsetq_lane_i64: 3101 case ARM::BI__builtin_neon_vsetq_lane_f32: 3102 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3103 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 3104 } 3105 3106 // Get the last argument, which specifies the vector type. 3107 llvm::APSInt Result; 3108 const Expr *Arg = E->getArg(E->getNumArgs()-1); 3109 if (!Arg->isIntegerConstantExpr(Result, getContext())) 3110 return 0; 3111 3112 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 3113 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 3114 // Determine the overloaded type of this builtin. 3115 llvm::Type *Ty; 3116 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 3117 Ty = FloatTy; 3118 else 3119 Ty = DoubleTy; 3120 3121 // Determine whether this is an unsigned conversion or not. 3122 bool usgn = Result.getZExtValue() == 1; 3123 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 3124 3125 // Call the appropriate intrinsic. 3126 Function *F = CGM.getIntrinsic(Int, Ty); 3127 return Builder.CreateCall(F, Ops, "vcvtr"); 3128 } 3129 3130 // Determine the type of this overloaded NEON intrinsic. 3131 NeonTypeFlags Type(Result.getZExtValue()); 3132 bool usgn = Type.isUnsigned(); 3133 bool quad = Type.isQuad(); 3134 bool rightShift = false; 3135 3136 llvm::VectorType *VTy = GetNeonType(this, Type); 3137 llvm::Type *Ty = VTy; 3138 if (!Ty) 3139 return 0; 3140 3141 unsigned Int; 3142 switch (BuiltinID) { 3143 default: return 0; 3144 case ARM::BI__builtin_neon_vbsl_v: 3145 case ARM::BI__builtin_neon_vbslq_v: 3146 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty), 3147 Ops, "vbsl"); 3148 case ARM::BI__builtin_neon_vabd_v: 3149 case ARM::BI__builtin_neon_vabdq_v: 3150 Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds; 3151 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 3152 case ARM::BI__builtin_neon_vabs_v: 3153 case ARM::BI__builtin_neon_vabsq_v: 3154 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty), 3155 Ops, "vabs"); 3156 case ARM::BI__builtin_neon_vaddhn_v: { 3157 llvm::VectorType *SrcTy = 3158 llvm::VectorType::getExtendedElementVectorType(VTy); 3159 3160 // %sum = add <4 x i32> %lhs, %rhs 3161 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3162 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3163 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 3164 3165 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3166 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 3167 SrcTy->getScalarSizeInBits() / 2); 3168 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 3169 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 3170 3171 // %res = trunc <4 x i32> %high to <4 x i16> 3172 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 3173 } 3174 case ARM::BI__builtin_neon_vcale_v: 3175 std::swap(Ops[0], Ops[1]); 3176 case ARM::BI__builtin_neon_vcage_v: { 3177 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged); 3178 return EmitNeonCall(F, Ops, "vcage"); 3179 } 3180 case ARM::BI__builtin_neon_vcaleq_v: 3181 std::swap(Ops[0], Ops[1]); 3182 case ARM::BI__builtin_neon_vcageq_v: { 3183 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq); 3184 return EmitNeonCall(F, Ops, "vcage"); 3185 } 3186 case ARM::BI__builtin_neon_vcalt_v: 3187 std::swap(Ops[0], Ops[1]); 3188 case ARM::BI__builtin_neon_vcagt_v: { 3189 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd); 3190 return EmitNeonCall(F, Ops, "vcagt"); 3191 } 3192 case ARM::BI__builtin_neon_vcaltq_v: 3193 std::swap(Ops[0], Ops[1]); 3194 case ARM::BI__builtin_neon_vcagtq_v: { 3195 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq); 3196 return EmitNeonCall(F, Ops, "vcagt"); 3197 } 3198 case ARM::BI__builtin_neon_vcls_v: 3199 case ARM::BI__builtin_neon_vclsq_v: { 3200 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty); 3201 return EmitNeonCall(F, Ops, "vcls"); 3202 } 3203 case ARM::BI__builtin_neon_vclz_v: 3204 case ARM::BI__builtin_neon_vclzq_v: { 3205 // Generate target-independent intrinsic; also need to add second argument 3206 // for whether or not clz of zero is undefined; on ARM it isn't. 3207 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty); 3208 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 3209 return EmitNeonCall(F, Ops, "vclz"); 3210 } 3211 case ARM::BI__builtin_neon_vcnt_v: 3212 case ARM::BI__builtin_neon_vcntq_v: { 3213 // generate target-independent intrinsic 3214 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty); 3215 return EmitNeonCall(F, Ops, "vctpop"); 3216 } 3217 case ARM::BI__builtin_neon_vcvt_f16_v: { 3218 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad && 3219 "unexpected vcvt_f16_v builtin"); 3220 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf); 3221 return EmitNeonCall(F, Ops, "vcvt"); 3222 } 3223 case ARM::BI__builtin_neon_vcvt_f32_f16: { 3224 assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad && 3225 "unexpected vcvt_f32_f16 builtin"); 3226 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp); 3227 return EmitNeonCall(F, Ops, "vcvt"); 3228 } 3229 case ARM::BI__builtin_neon_vcvt_f32_v: 3230 case ARM::BI__builtin_neon_vcvtq_f32_v: 3231 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3232 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad)); 3233 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3234 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3235 case ARM::BI__builtin_neon_vcvt_s32_v: 3236 case ARM::BI__builtin_neon_vcvt_u32_v: 3237 case ARM::BI__builtin_neon_vcvtq_s32_v: 3238 case ARM::BI__builtin_neon_vcvtq_u32_v: { 3239 llvm::Type *FloatTy = 3240 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad)); 3241 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 3242 return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 3243 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 3244 } 3245 case ARM::BI__builtin_neon_vcvt_n_f32_v: 3246 case ARM::BI__builtin_neon_vcvtq_n_f32_v: { 3247 llvm::Type *FloatTy = 3248 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad)); 3249 llvm::Type *Tys[2] = { FloatTy, Ty }; 3250 Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp 3251 : Intrinsic::arm_neon_vcvtfxs2fp; 3252 Function *F = CGM.getIntrinsic(Int, Tys); 3253 return EmitNeonCall(F, Ops, "vcvt_n"); 3254 } 3255 case ARM::BI__builtin_neon_vcvt_n_s32_v: 3256 case ARM::BI__builtin_neon_vcvt_n_u32_v: 3257 case ARM::BI__builtin_neon_vcvtq_n_s32_v: 3258 case ARM::BI__builtin_neon_vcvtq_n_u32_v: { 3259 llvm::Type *FloatTy = 3260 GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad)); 3261 llvm::Type *Tys[2] = { Ty, FloatTy }; 3262 Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu 3263 : Intrinsic::arm_neon_vcvtfp2fxs; 3264 Function *F = CGM.getIntrinsic(Int, Tys); 3265 return EmitNeonCall(F, Ops, "vcvt_n"); 3266 } 3267 case ARM::BI__builtin_neon_vext_v: 3268 case ARM::BI__builtin_neon_vextq_v: { 3269 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 3270 SmallVector<Constant*, 16> Indices; 3271 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3272 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 3273 3274 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3275 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3276 Value *SV = llvm::ConstantVector::get(Indices); 3277 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 3278 } 3279 case ARM::BI__builtin_neon_vhadd_v: 3280 case ARM::BI__builtin_neon_vhaddq_v: 3281 Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds; 3282 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd"); 3283 case ARM::BI__builtin_neon_vhsub_v: 3284 case ARM::BI__builtin_neon_vhsubq_v: 3285 Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs; 3286 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub"); 3287 case ARM::BI__builtin_neon_vld1_v: 3288 case ARM::BI__builtin_neon_vld1q_v: 3289 Ops.push_back(Align); 3290 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty), 3291 Ops, "vld1"); 3292 case ARM::BI__builtin_neon_vld1q_lane_v: 3293 // Handle 64-bit integer elements as a special case. Use shuffles of 3294 // one-element vectors to avoid poor code for i64 in the backend. 3295 if (VTy->getElementType()->isIntegerTy(64)) { 3296 // Extract the other lane. 3297 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3298 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 3299 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 3300 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3301 // Load the value as a one-element vector. 3302 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 3303 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); 3304 Value *Ld = Builder.CreateCall2(F, Ops[0], Align); 3305 // Combine them. 3306 SmallVector<Constant*, 2> Indices; 3307 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); 3308 Indices.push_back(ConstantInt::get(Int32Ty, Lane)); 3309 SV = llvm::ConstantVector::get(Indices); 3310 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 3311 } 3312 // fall through 3313 case ARM::BI__builtin_neon_vld1_lane_v: { 3314 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3315 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3316 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3317 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 3318 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3319 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 3320 } 3321 case ARM::BI__builtin_neon_vld1_dup_v: 3322 case ARM::BI__builtin_neon_vld1q_dup_v: { 3323 Value *V = UndefValue::get(Ty); 3324 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3325 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3326 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 3327 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3328 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 3329 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 3330 return EmitNeonSplat(Ops[0], CI); 3331 } 3332 case ARM::BI__builtin_neon_vld2_v: 3333 case ARM::BI__builtin_neon_vld2q_v: { 3334 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty); 3335 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2"); 3336 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3337 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3338 return Builder.CreateStore(Ops[1], Ops[0]); 3339 } 3340 case ARM::BI__builtin_neon_vld3_v: 3341 case ARM::BI__builtin_neon_vld3q_v: { 3342 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty); 3343 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3"); 3344 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3345 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3346 return Builder.CreateStore(Ops[1], Ops[0]); 3347 } 3348 case ARM::BI__builtin_neon_vld4_v: 3349 case ARM::BI__builtin_neon_vld4q_v: { 3350 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty); 3351 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4"); 3352 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3353 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3354 return Builder.CreateStore(Ops[1], Ops[0]); 3355 } 3356 case ARM::BI__builtin_neon_vld2_lane_v: 3357 case ARM::BI__builtin_neon_vld2q_lane_v: { 3358 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty); 3359 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3360 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 3361 Ops.push_back(Align); 3362 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 3363 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3364 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3365 return Builder.CreateStore(Ops[1], Ops[0]); 3366 } 3367 case ARM::BI__builtin_neon_vld3_lane_v: 3368 case ARM::BI__builtin_neon_vld3q_lane_v: { 3369 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty); 3370 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3371 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 3372 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 3373 Ops.push_back(Align); 3374 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 3375 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3376 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3377 return Builder.CreateStore(Ops[1], Ops[0]); 3378 } 3379 case ARM::BI__builtin_neon_vld4_lane_v: 3380 case ARM::BI__builtin_neon_vld4q_lane_v: { 3381 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty); 3382 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3383 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 3384 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 3385 Ops[5] = Builder.CreateBitCast(Ops[5], Ty); 3386 Ops.push_back(Align); 3387 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 3388 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3389 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3390 return Builder.CreateStore(Ops[1], Ops[0]); 3391 } 3392 case ARM::BI__builtin_neon_vld2_dup_v: 3393 case ARM::BI__builtin_neon_vld3_dup_v: 3394 case ARM::BI__builtin_neon_vld4_dup_v: { 3395 // Handle 64-bit elements as a special-case. There is no "dup" needed. 3396 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 3397 switch (BuiltinID) { 3398 case ARM::BI__builtin_neon_vld2_dup_v: 3399 Int = Intrinsic::arm_neon_vld2; 3400 break; 3401 case ARM::BI__builtin_neon_vld3_dup_v: 3402 Int = Intrinsic::arm_neon_vld3; 3403 break; 3404 case ARM::BI__builtin_neon_vld4_dup_v: 3405 Int = Intrinsic::arm_neon_vld4; 3406 break; 3407 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3408 } 3409 Function *F = CGM.getIntrinsic(Int, Ty); 3410 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 3411 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3412 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3413 return Builder.CreateStore(Ops[1], Ops[0]); 3414 } 3415 switch (BuiltinID) { 3416 case ARM::BI__builtin_neon_vld2_dup_v: 3417 Int = Intrinsic::arm_neon_vld2lane; 3418 break; 3419 case ARM::BI__builtin_neon_vld3_dup_v: 3420 Int = Intrinsic::arm_neon_vld3lane; 3421 break; 3422 case ARM::BI__builtin_neon_vld4_dup_v: 3423 Int = Intrinsic::arm_neon_vld4lane; 3424 break; 3425 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3426 } 3427 Function *F = CGM.getIntrinsic(Int, Ty); 3428 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 3429 3430 SmallVector<Value*, 6> Args; 3431 Args.push_back(Ops[1]); 3432 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 3433 3434 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 3435 Args.push_back(CI); 3436 Args.push_back(Align); 3437 3438 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 3439 // splat lane 0 to all elts in each vector of the result. 3440 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 3441 Value *Val = Builder.CreateExtractValue(Ops[1], i); 3442 Value *Elt = Builder.CreateBitCast(Val, Ty); 3443 Elt = EmitNeonSplat(Elt, CI); 3444 Elt = Builder.CreateBitCast(Elt, Val->getType()); 3445 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 3446 } 3447 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3448 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3449 return Builder.CreateStore(Ops[1], Ops[0]); 3450 } 3451 case ARM::BI__builtin_neon_vmax_v: 3452 case ARM::BI__builtin_neon_vmaxq_v: 3453 Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs; 3454 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 3455 case ARM::BI__builtin_neon_vmin_v: 3456 case ARM::BI__builtin_neon_vminq_v: 3457 Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins; 3458 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 3459 case ARM::BI__builtin_neon_vmovl_v: { 3460 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 3461 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 3462 if (usgn) 3463 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 3464 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 3465 } 3466 case ARM::BI__builtin_neon_vmovn_v: { 3467 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3468 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 3469 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 3470 } 3471 case ARM::BI__builtin_neon_vmul_v: 3472 case ARM::BI__builtin_neon_vmulq_v: 3473 assert(Type.isPoly() && "vmul builtin only supported for polynomial types"); 3474 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty), 3475 Ops, "vmul"); 3476 case ARM::BI__builtin_neon_vmull_v: 3477 // FIXME: the integer vmull operations could be emitted in terms of pure 3478 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 3479 // hoisting the exts outside loops. Until global ISel comes along that can 3480 // see through such movement this leads to bad CodeGen. So we need an 3481 // intrinsic for now. 3482 Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 3483 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 3484 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 3485 case ARM::BI__builtin_neon_vfma_v: 3486 case ARM::BI__builtin_neon_vfmaq_v: { 3487 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3488 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3489 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3490 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3491 3492 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 3493 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 3494 } 3495 case ARM::BI__builtin_neon_vpadal_v: 3496 case ARM::BI__builtin_neon_vpadalq_v: { 3497 Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals; 3498 // The source operand type has twice as many elements of half the size. 3499 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3500 llvm::Type *EltTy = 3501 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3502 llvm::Type *NarrowTy = 3503 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3504 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3505 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal"); 3506 } 3507 case ARM::BI__builtin_neon_vpadd_v: 3508 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty), 3509 Ops, "vpadd"); 3510 case ARM::BI__builtin_neon_vpaddl_v: 3511 case ARM::BI__builtin_neon_vpaddlq_v: { 3512 Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls; 3513 // The source operand type has twice as many elements of half the size. 3514 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3515 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3516 llvm::Type *NarrowTy = 3517 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3518 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3519 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 3520 } 3521 case ARM::BI__builtin_neon_vpmax_v: 3522 Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; 3523 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 3524 case ARM::BI__builtin_neon_vpmin_v: 3525 Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; 3526 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 3527 case ARM::BI__builtin_neon_vqabs_v: 3528 case ARM::BI__builtin_neon_vqabsq_v: 3529 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty), 3530 Ops, "vqabs"); 3531 case ARM::BI__builtin_neon_vqadd_v: 3532 case ARM::BI__builtin_neon_vqaddq_v: 3533 Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds; 3534 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd"); 3535 case ARM::BI__builtin_neon_vqdmlal_v: { 3536 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 3537 Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty), 3538 MulOps, "vqdmlal"); 3539 3540 SmallVector<Value *, 2> AddOps; 3541 AddOps.push_back(Ops[0]); 3542 AddOps.push_back(Mul); 3543 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqadds, Ty), 3544 AddOps, "vqdmlal"); 3545 } 3546 case ARM::BI__builtin_neon_vqdmlsl_v: { 3547 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 3548 Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty), 3549 MulOps, "vqdmlsl"); 3550 3551 SmallVector<Value *, 2> SubOps; 3552 SubOps.push_back(Ops[0]); 3553 SubOps.push_back(Mul); 3554 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqsubs, Ty), 3555 SubOps, "vqdmlsl"); 3556 } 3557 case ARM::BI__builtin_neon_vqdmulh_v: 3558 case ARM::BI__builtin_neon_vqdmulhq_v: 3559 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty), 3560 Ops, "vqdmulh"); 3561 case ARM::BI__builtin_neon_vqdmull_v: 3562 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty), 3563 Ops, "vqdmull"); 3564 case ARM::BI__builtin_neon_vqmovn_v: 3565 Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns; 3566 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn"); 3567 case ARM::BI__builtin_neon_vqmovun_v: 3568 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty), 3569 Ops, "vqdmull"); 3570 case ARM::BI__builtin_neon_vqneg_v: 3571 case ARM::BI__builtin_neon_vqnegq_v: 3572 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty), 3573 Ops, "vqneg"); 3574 case ARM::BI__builtin_neon_vqrdmulh_v: 3575 case ARM::BI__builtin_neon_vqrdmulhq_v: 3576 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty), 3577 Ops, "vqrdmulh"); 3578 case ARM::BI__builtin_neon_vqrshl_v: 3579 case ARM::BI__builtin_neon_vqrshlq_v: 3580 Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts; 3581 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl"); 3582 case ARM::BI__builtin_neon_vqrshrn_n_v: 3583 Int = 3584 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 3585 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 3586 1, true); 3587 case ARM::BI__builtin_neon_vqrshrun_n_v: 3588 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 3589 Ops, "vqrshrun_n", 1, true); 3590 case ARM::BI__builtin_neon_vqshl_v: 3591 case ARM::BI__builtin_neon_vqshlq_v: 3592 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 3593 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl"); 3594 case ARM::BI__builtin_neon_vqshl_n_v: 3595 case ARM::BI__builtin_neon_vqshlq_n_v: 3596 Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts; 3597 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 3598 1, false); 3599 case ARM::BI__builtin_neon_vqshlu_n_v: 3600 case ARM::BI__builtin_neon_vqshluq_n_v: 3601 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty), 3602 Ops, "vqshlu", 1, false); 3603 case ARM::BI__builtin_neon_vqshrn_n_v: 3604 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 3605 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 3606 1, true); 3607 case ARM::BI__builtin_neon_vqshrun_n_v: 3608 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 3609 Ops, "vqshrun_n", 1, true); 3610 case ARM::BI__builtin_neon_vqsub_v: 3611 case ARM::BI__builtin_neon_vqsubq_v: 3612 Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs; 3613 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub"); 3614 case ARM::BI__builtin_neon_vraddhn_v: 3615 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty), 3616 Ops, "vraddhn"); 3617 case ARM::BI__builtin_neon_vrecpe_v: 3618 case ARM::BI__builtin_neon_vrecpeq_v: 3619 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 3620 Ops, "vrecpe"); 3621 case ARM::BI__builtin_neon_vrecps_v: 3622 case ARM::BI__builtin_neon_vrecpsq_v: 3623 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty), 3624 Ops, "vrecps"); 3625 case ARM::BI__builtin_neon_vrhadd_v: 3626 case ARM::BI__builtin_neon_vrhaddq_v: 3627 Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds; 3628 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd"); 3629 case ARM::BI__builtin_neon_vrshl_v: 3630 case ARM::BI__builtin_neon_vrshlq_v: 3631 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 3632 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl"); 3633 case ARM::BI__builtin_neon_vrshrn_n_v: 3634 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 3635 Ops, "vrshrn_n", 1, true); 3636 case ARM::BI__builtin_neon_vrshr_n_v: 3637 case ARM::BI__builtin_neon_vrshrq_n_v: 3638 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 3639 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true); 3640 case ARM::BI__builtin_neon_vrsqrte_v: 3641 case ARM::BI__builtin_neon_vrsqrteq_v: 3642 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty), 3643 Ops, "vrsqrte"); 3644 case ARM::BI__builtin_neon_vrsqrts_v: 3645 case ARM::BI__builtin_neon_vrsqrtsq_v: 3646 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty), 3647 Ops, "vrsqrts"); 3648 case ARM::BI__builtin_neon_vrsra_n_v: 3649 case ARM::BI__builtin_neon_vrsraq_n_v: 3650 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3651 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3652 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 3653 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 3654 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 3655 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 3656 case ARM::BI__builtin_neon_vrsubhn_v: 3657 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty), 3658 Ops, "vrsubhn"); 3659 case ARM::BI__builtin_neon_vshl_v: 3660 case ARM::BI__builtin_neon_vshlq_v: 3661 Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts; 3662 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl"); 3663 case ARM::BI__builtin_neon_vshll_n_v: 3664 Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls; 3665 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1); 3666 case ARM::BI__builtin_neon_vshl_n_v: 3667 case ARM::BI__builtin_neon_vshlq_n_v: 3668 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 3669 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 3670 "vshl_n"); 3671 case ARM::BI__builtin_neon_vshrn_n_v: 3672 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty), 3673 Ops, "vshrn_n", 1, true); 3674 case ARM::BI__builtin_neon_vshr_n_v: 3675 case ARM::BI__builtin_neon_vshrq_n_v: 3676 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, usgn, "vshr_n"); 3677 case ARM::BI__builtin_neon_vsri_n_v: 3678 case ARM::BI__builtin_neon_vsriq_n_v: 3679 rightShift = true; 3680 case ARM::BI__builtin_neon_vsli_n_v: 3681 case ARM::BI__builtin_neon_vsliq_n_v: 3682 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 3683 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 3684 Ops, "vsli_n"); 3685 case ARM::BI__builtin_neon_vsra_n_v: 3686 case ARM::BI__builtin_neon_vsraq_n_v: 3687 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3688 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 3689 return Builder.CreateAdd(Ops[0], Ops[1]); 3690 case ARM::BI__builtin_neon_vst1_v: 3691 case ARM::BI__builtin_neon_vst1q_v: 3692 Ops.push_back(Align); 3693 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty), 3694 Ops, ""); 3695 case ARM::BI__builtin_neon_vst1q_lane_v: 3696 // Handle 64-bit integer elements as a special case. Use a shuffle to get 3697 // a one-element vector and avoid poor code for i64 in the backend. 3698 if (VTy->getElementType()->isIntegerTy(64)) { 3699 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3700 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 3701 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3702 Ops[2] = Align; 3703 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 3704 Ops[1]->getType()), Ops); 3705 } 3706 // fall through 3707 case ARM::BI__builtin_neon_vst1_lane_v: { 3708 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3709 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 3710 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3711 StoreInst *St = Builder.CreateStore(Ops[1], 3712 Builder.CreateBitCast(Ops[0], Ty)); 3713 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3714 return St; 3715 } 3716 case ARM::BI__builtin_neon_vst2_v: 3717 case ARM::BI__builtin_neon_vst2q_v: 3718 Ops.push_back(Align); 3719 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty), 3720 Ops, ""); 3721 case ARM::BI__builtin_neon_vst2_lane_v: 3722 case ARM::BI__builtin_neon_vst2q_lane_v: 3723 Ops.push_back(Align); 3724 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty), 3725 Ops, ""); 3726 case ARM::BI__builtin_neon_vst3_v: 3727 case ARM::BI__builtin_neon_vst3q_v: 3728 Ops.push_back(Align); 3729 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty), 3730 Ops, ""); 3731 case ARM::BI__builtin_neon_vst3_lane_v: 3732 case ARM::BI__builtin_neon_vst3q_lane_v: 3733 Ops.push_back(Align); 3734 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty), 3735 Ops, ""); 3736 case ARM::BI__builtin_neon_vst4_v: 3737 case ARM::BI__builtin_neon_vst4q_v: 3738 Ops.push_back(Align); 3739 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty), 3740 Ops, ""); 3741 case ARM::BI__builtin_neon_vst4_lane_v: 3742 case ARM::BI__builtin_neon_vst4q_lane_v: 3743 Ops.push_back(Align); 3744 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty), 3745 Ops, ""); 3746 case ARM::BI__builtin_neon_vsubhn_v: { 3747 llvm::VectorType *SrcTy = 3748 llvm::VectorType::getExtendedElementVectorType(VTy); 3749 3750 // %sum = add <4 x i32> %lhs, %rhs 3751 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3752 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3753 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 3754 3755 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3756 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 3757 SrcTy->getScalarSizeInBits() / 2); 3758 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 3759 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 3760 3761 // %res = trunc <4 x i32> %high to <4 x i16> 3762 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 3763 } 3764 case ARM::BI__builtin_neon_vtbl1_v: 3765 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 3766 Ops, "vtbl1"); 3767 case ARM::BI__builtin_neon_vtbl2_v: 3768 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 3769 Ops, "vtbl2"); 3770 case ARM::BI__builtin_neon_vtbl3_v: 3771 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 3772 Ops, "vtbl3"); 3773 case ARM::BI__builtin_neon_vtbl4_v: 3774 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 3775 Ops, "vtbl4"); 3776 case ARM::BI__builtin_neon_vtbx1_v: 3777 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 3778 Ops, "vtbx1"); 3779 case ARM::BI__builtin_neon_vtbx2_v: 3780 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 3781 Ops, "vtbx2"); 3782 case ARM::BI__builtin_neon_vtbx3_v: 3783 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 3784 Ops, "vtbx3"); 3785 case ARM::BI__builtin_neon_vtbx4_v: 3786 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 3787 Ops, "vtbx4"); 3788 case ARM::BI__builtin_neon_vtst_v: 3789 case ARM::BI__builtin_neon_vtstq_v: { 3790 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3791 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3792 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 3793 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 3794 ConstantAggregateZero::get(Ty)); 3795 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 3796 } 3797 case ARM::BI__builtin_neon_vtrn_v: 3798 case ARM::BI__builtin_neon_vtrnq_v: { 3799 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3800 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3801 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3802 Value *SV = 0; 3803 3804 for (unsigned vi = 0; vi != 2; ++vi) { 3805 SmallVector<Constant*, 16> Indices; 3806 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3807 Indices.push_back(Builder.getInt32(i+vi)); 3808 Indices.push_back(Builder.getInt32(i+e+vi)); 3809 } 3810 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 3811 SV = llvm::ConstantVector::get(Indices); 3812 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 3813 SV = Builder.CreateStore(SV, Addr); 3814 } 3815 return SV; 3816 } 3817 case ARM::BI__builtin_neon_vuzp_v: 3818 case ARM::BI__builtin_neon_vuzpq_v: { 3819 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3820 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3821 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3822 Value *SV = 0; 3823 3824 for (unsigned vi = 0; vi != 2; ++vi) { 3825 SmallVector<Constant*, 16> Indices; 3826 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3827 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 3828 3829 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 3830 SV = llvm::ConstantVector::get(Indices); 3831 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 3832 SV = Builder.CreateStore(SV, Addr); 3833 } 3834 return SV; 3835 } 3836 case ARM::BI__builtin_neon_vzip_v: 3837 case ARM::BI__builtin_neon_vzipq_v: { 3838 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3839 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3840 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3841 Value *SV = 0; 3842 3843 for (unsigned vi = 0; vi != 2; ++vi) { 3844 SmallVector<Constant*, 16> Indices; 3845 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3846 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 3847 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 3848 } 3849 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); 3850 SV = llvm::ConstantVector::get(Indices); 3851 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 3852 SV = Builder.CreateStore(SV, Addr); 3853 } 3854 return SV; 3855 } 3856 } 3857} 3858 3859llvm::Value *CodeGenFunction:: 3860BuildVector(ArrayRef<llvm::Value*> Ops) { 3861 assert((Ops.size() & (Ops.size() - 1)) == 0 && 3862 "Not a power-of-two sized vector!"); 3863 bool AllConstants = true; 3864 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 3865 AllConstants &= isa<Constant>(Ops[i]); 3866 3867 // If this is a constant vector, create a ConstantVector. 3868 if (AllConstants) { 3869 SmallVector<llvm::Constant*, 16> CstOps; 3870 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 3871 CstOps.push_back(cast<Constant>(Ops[i])); 3872 return llvm::ConstantVector::get(CstOps); 3873 } 3874 3875 // Otherwise, insertelement the values to build the vector. 3876 Value *Result = 3877 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 3878 3879 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 3880 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 3881 3882 return Result; 3883} 3884 3885Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 3886 const CallExpr *E) { 3887 SmallVector<Value*, 4> Ops; 3888 3889 // Find out if any arguments are required to be integer constant expressions. 3890 unsigned ICEArguments = 0; 3891 ASTContext::GetBuiltinTypeError Error; 3892 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 3893 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 3894 3895 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 3896 // If this is a normal argument, just emit it as a scalar. 3897 if ((ICEArguments & (1 << i)) == 0) { 3898 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3899 continue; 3900 } 3901 3902 // If this is required to be a constant, constant fold it so that we know 3903 // that the generated intrinsic gets a ConstantInt. 3904 llvm::APSInt Result; 3905 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 3906 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 3907 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 3908 } 3909 3910 switch (BuiltinID) { 3911 default: return 0; 3912 case X86::BI__builtin_ia32_vec_init_v8qi: 3913 case X86::BI__builtin_ia32_vec_init_v4hi: 3914 case X86::BI__builtin_ia32_vec_init_v2si: 3915 return Builder.CreateBitCast(BuildVector(Ops), 3916 llvm::Type::getX86_MMXTy(getLLVMContext())); 3917 case X86::BI__builtin_ia32_vec_ext_v2si: 3918 return Builder.CreateExtractElement(Ops[0], 3919 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 3920 case X86::BI__builtin_ia32_ldmxcsr: { 3921 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 3922 Builder.CreateStore(Ops[0], Tmp); 3923 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 3924 Builder.CreateBitCast(Tmp, Int8PtrTy)); 3925 } 3926 case X86::BI__builtin_ia32_stmxcsr: { 3927 Value *Tmp = CreateMemTemp(E->getType()); 3928 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 3929 Builder.CreateBitCast(Tmp, Int8PtrTy)); 3930 return Builder.CreateLoad(Tmp, "stmxcsr"); 3931 } 3932 case X86::BI__builtin_ia32_storehps: 3933 case X86::BI__builtin_ia32_storelps: { 3934 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 3935 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 3936 3937 // cast val v2i64 3938 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 3939 3940 // extract (0, 1) 3941 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 3942 llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index); 3943 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 3944 3945 // cast pointer to i64 & store 3946 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 3947 return Builder.CreateStore(Ops[1], Ops[0]); 3948 } 3949 case X86::BI__builtin_ia32_palignr: { 3950 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 3951 3952 // If palignr is shifting the pair of input vectors less than 9 bytes, 3953 // emit a shuffle instruction. 3954 if (shiftVal <= 8) { 3955 SmallVector<llvm::Constant*, 8> Indices; 3956 for (unsigned i = 0; i != 8; ++i) 3957 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 3958 3959 Value* SV = llvm::ConstantVector::get(Indices); 3960 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 3961 } 3962 3963 // If palignr is shifting the pair of input vectors more than 8 but less 3964 // than 16 bytes, emit a logical right shift of the destination. 3965 if (shiftVal < 16) { 3966 // MMX has these as 1 x i64 vectors for some odd optimization reasons. 3967 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); 3968 3969 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 3970 Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); 3971 3972 // create i32 constant 3973 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); 3974 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); 3975 } 3976 3977 // If palignr is shifting the pair of vectors more than 16 bytes, emit zero. 3978 return llvm::Constant::getNullValue(ConvertType(E->getType())); 3979 } 3980 case X86::BI__builtin_ia32_palignr128: { 3981 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 3982 3983 // If palignr is shifting the pair of input vectors less than 17 bytes, 3984 // emit a shuffle instruction. 3985 if (shiftVal <= 16) { 3986 SmallVector<llvm::Constant*, 16> Indices; 3987 for (unsigned i = 0; i != 16; ++i) 3988 Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); 3989 3990 Value* SV = llvm::ConstantVector::get(Indices); 3991 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 3992 } 3993 3994 // If palignr is shifting the pair of input vectors more than 16 but less 3995 // than 32 bytes, emit a logical right shift of the destination. 3996 if (shiftVal < 32) { 3997 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 3998 3999 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 4000 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 4001 4002 // create i32 constant 4003 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq); 4004 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); 4005 } 4006 4007 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 4008 return llvm::Constant::getNullValue(ConvertType(E->getType())); 4009 } 4010 case X86::BI__builtin_ia32_palignr256: { 4011 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 4012 4013 // If palignr is shifting the pair of input vectors less than 17 bytes, 4014 // emit a shuffle instruction. 4015 if (shiftVal <= 16) { 4016 SmallVector<llvm::Constant*, 32> Indices; 4017 // 256-bit palignr operates on 128-bit lanes so we need to handle that 4018 for (unsigned l = 0; l != 2; ++l) { 4019 unsigned LaneStart = l * 16; 4020 unsigned LaneEnd = (l+1) * 16; 4021 for (unsigned i = 0; i != 16; ++i) { 4022 unsigned Idx = shiftVal + i + LaneStart; 4023 if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand 4024 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx)); 4025 } 4026 } 4027 4028 Value* SV = llvm::ConstantVector::get(Indices); 4029 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 4030 } 4031 4032 // If palignr is shifting the pair of input vectors more than 16 but less 4033 // than 32 bytes, emit a logical right shift of the destination. 4034 if (shiftVal < 32) { 4035 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4); 4036 4037 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 4038 Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8); 4039 4040 // create i32 constant 4041 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq); 4042 return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr"); 4043 } 4044 4045 // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. 4046 return llvm::Constant::getNullValue(ConvertType(E->getType())); 4047 } 4048 case X86::BI__builtin_ia32_movntps: 4049 case X86::BI__builtin_ia32_movntps256: 4050 case X86::BI__builtin_ia32_movntpd: 4051 case X86::BI__builtin_ia32_movntpd256: 4052 case X86::BI__builtin_ia32_movntdq: 4053 case X86::BI__builtin_ia32_movntdq256: 4054 case X86::BI__builtin_ia32_movnti: 4055 case X86::BI__builtin_ia32_movnti64: { 4056 llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), 4057 Builder.getInt32(1)); 4058 4059 // Convert the type of the pointer to a pointer to the stored type. 4060 Value *BC = Builder.CreateBitCast(Ops[0], 4061 llvm::PointerType::getUnqual(Ops[1]->getType()), 4062 "cast"); 4063 StoreInst *SI = Builder.CreateStore(Ops[1], BC); 4064 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 4065 4066 // If the operand is an integer, we can't assume alignment. Otherwise, 4067 // assume natural alignment. 4068 QualType ArgTy = E->getArg(1)->getType(); 4069 unsigned Align; 4070 if (ArgTy->isIntegerType()) 4071 Align = 1; 4072 else 4073 Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); 4074 SI->setAlignment(Align); 4075 return SI; 4076 } 4077 // 3DNow! 4078 case X86::BI__builtin_ia32_pswapdsf: 4079 case X86::BI__builtin_ia32_pswapdsi: { 4080 const char *name = 0; 4081 Intrinsic::ID ID = Intrinsic::not_intrinsic; 4082 switch(BuiltinID) { 4083 default: llvm_unreachable("Unsupported intrinsic!"); 4084 case X86::BI__builtin_ia32_pswapdsf: 4085 case X86::BI__builtin_ia32_pswapdsi: 4086 name = "pswapd"; 4087 ID = Intrinsic::x86_3dnowa_pswapd; 4088 break; 4089 } 4090 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 4091 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 4092 llvm::Function *F = CGM.getIntrinsic(ID); 4093 return Builder.CreateCall(F, Ops, name); 4094 } 4095 case X86::BI__builtin_ia32_rdrand16_step: 4096 case X86::BI__builtin_ia32_rdrand32_step: 4097 case X86::BI__builtin_ia32_rdrand64_step: 4098 case X86::BI__builtin_ia32_rdseed16_step: 4099 case X86::BI__builtin_ia32_rdseed32_step: 4100 case X86::BI__builtin_ia32_rdseed64_step: { 4101 Intrinsic::ID ID; 4102 switch (BuiltinID) { 4103 default: llvm_unreachable("Unsupported intrinsic!"); 4104 case X86::BI__builtin_ia32_rdrand16_step: 4105 ID = Intrinsic::x86_rdrand_16; 4106 break; 4107 case X86::BI__builtin_ia32_rdrand32_step: 4108 ID = Intrinsic::x86_rdrand_32; 4109 break; 4110 case X86::BI__builtin_ia32_rdrand64_step: 4111 ID = Intrinsic::x86_rdrand_64; 4112 break; 4113 case X86::BI__builtin_ia32_rdseed16_step: 4114 ID = Intrinsic::x86_rdseed_16; 4115 break; 4116 case X86::BI__builtin_ia32_rdseed32_step: 4117 ID = Intrinsic::x86_rdseed_32; 4118 break; 4119 case X86::BI__builtin_ia32_rdseed64_step: 4120 ID = Intrinsic::x86_rdseed_64; 4121 break; 4122 } 4123 4124 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 4125 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); 4126 return Builder.CreateExtractValue(Call, 1); 4127 } 4128 // AVX2 broadcast 4129 case X86::BI__builtin_ia32_vbroadcastsi256: { 4130 Value *VecTmp = CreateMemTemp(E->getArg(0)->getType()); 4131 Builder.CreateStore(Ops[0], VecTmp); 4132 Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); 4133 return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); 4134 } 4135 } 4136} 4137 4138 4139Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 4140 const CallExpr *E) { 4141 SmallVector<Value*, 4> Ops; 4142 4143 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 4144 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4145 4146 Intrinsic::ID ID = Intrinsic::not_intrinsic; 4147 4148 switch (BuiltinID) { 4149 default: return 0; 4150 4151 // vec_ld, vec_lvsl, vec_lvsr 4152 case PPC::BI__builtin_altivec_lvx: 4153 case PPC::BI__builtin_altivec_lvxl: 4154 case PPC::BI__builtin_altivec_lvebx: 4155 case PPC::BI__builtin_altivec_lvehx: 4156 case PPC::BI__builtin_altivec_lvewx: 4157 case PPC::BI__builtin_altivec_lvsl: 4158 case PPC::BI__builtin_altivec_lvsr: 4159 { 4160 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 4161 4162 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 4163 Ops.pop_back(); 4164 4165 switch (BuiltinID) { 4166 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 4167 case PPC::BI__builtin_altivec_lvx: 4168 ID = Intrinsic::ppc_altivec_lvx; 4169 break; 4170 case PPC::BI__builtin_altivec_lvxl: 4171 ID = Intrinsic::ppc_altivec_lvxl; 4172 break; 4173 case PPC::BI__builtin_altivec_lvebx: 4174 ID = Intrinsic::ppc_altivec_lvebx; 4175 break; 4176 case PPC::BI__builtin_altivec_lvehx: 4177 ID = Intrinsic::ppc_altivec_lvehx; 4178 break; 4179 case PPC::BI__builtin_altivec_lvewx: 4180 ID = Intrinsic::ppc_altivec_lvewx; 4181 break; 4182 case PPC::BI__builtin_altivec_lvsl: 4183 ID = Intrinsic::ppc_altivec_lvsl; 4184 break; 4185 case PPC::BI__builtin_altivec_lvsr: 4186 ID = Intrinsic::ppc_altivec_lvsr; 4187 break; 4188 } 4189 llvm::Function *F = CGM.getIntrinsic(ID); 4190 return Builder.CreateCall(F, Ops, ""); 4191 } 4192 4193 // vec_st 4194 case PPC::BI__builtin_altivec_stvx: 4195 case PPC::BI__builtin_altivec_stvxl: 4196 case PPC::BI__builtin_altivec_stvebx: 4197 case PPC::BI__builtin_altivec_stvehx: 4198 case PPC::BI__builtin_altivec_stvewx: 4199 { 4200 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 4201 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 4202 Ops.pop_back(); 4203 4204 switch (BuiltinID) { 4205 default: llvm_unreachable("Unsupported st intrinsic!"); 4206 case PPC::BI__builtin_altivec_stvx: 4207 ID = Intrinsic::ppc_altivec_stvx; 4208 break; 4209 case PPC::BI__builtin_altivec_stvxl: 4210 ID = Intrinsic::ppc_altivec_stvxl; 4211 break; 4212 case PPC::BI__builtin_altivec_stvebx: 4213 ID = Intrinsic::ppc_altivec_stvebx; 4214 break; 4215 case PPC::BI__builtin_altivec_stvehx: 4216 ID = Intrinsic::ppc_altivec_stvehx; 4217 break; 4218 case PPC::BI__builtin_altivec_stvewx: 4219 ID = Intrinsic::ppc_altivec_stvewx; 4220 break; 4221 } 4222 llvm::Function *F = CGM.getIntrinsic(ID); 4223 return Builder.CreateCall(F, Ops, ""); 4224 } 4225 } 4226} 4227