AMDGPUISelLowering.cpp revision 1afaeb1c39125115260b7a06b1dfc8f651d3ac2f
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief This is the parent TargetLowering class for hardware code gen 12/// targets. 13// 14//===----------------------------------------------------------------------===// 15 16#include "AMDGPUISelLowering.h" 17#include "AMDGPU.h" 18#include "AMDGPUFrameLowering.h" 19#include "AMDGPURegisterInfo.h" 20#include "AMDGPUSubtarget.h" 21#include "AMDILIntrinsicInfo.h" 22#include "R600MachineFunctionInfo.h" 23#include "SIMachineFunctionInfo.h" 24#include "llvm/CodeGen/CallingConvLower.h" 25#include "llvm/CodeGen/MachineFunction.h" 26#include "llvm/CodeGen/MachineRegisterInfo.h" 27#include "llvm/CodeGen/SelectionDAG.h" 28#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29#include "llvm/IR/DataLayout.h" 30 31using namespace llvm; 32static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, 33 CCValAssign::LocInfo LocInfo, 34 ISD::ArgFlagsTy ArgFlags, CCState &State) { 35 unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign()); 36 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); 37 38 return true; 39} 40 41#include "AMDGPUGenCallingConv.inc" 42 43AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 44 TargetLowering(TM, new TargetLoweringObjectFileELF()) { 45 46 // Initialize target lowering borrowed from AMDIL 47 InitAMDILLowering(); 48 49 // We need to custom lower some of the intrinsics 50 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 51 52 // Library functions. These default to Expand, but we have instructions 53 // for them. 54 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 55 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 56 setOperationAction(ISD::FPOW, MVT::f32, Legal); 57 setOperationAction(ISD::FLOG2, MVT::f32, Legal); 58 setOperationAction(ISD::FABS, MVT::f32, Legal); 59 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 60 setOperationAction(ISD::FRINT, MVT::f32, Legal); 61 setOperationAction(ISD::FROUND, MVT::f32, Legal); 62 63 // The hardware supports ROTR, but not ROTL 64 setOperationAction(ISD::ROTL, MVT::i32, Expand); 65 66 // Lower floating point store/load to integer store/load to reduce the number 67 // of patterns in tablegen. 68 setOperationAction(ISD::STORE, MVT::f32, Promote); 69 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 70 71 setOperationAction(ISD::STORE, MVT::v2f32, Promote); 72 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 73 74 setOperationAction(ISD::STORE, MVT::v4f32, Promote); 75 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 76 77 setOperationAction(ISD::STORE, MVT::v8f32, Promote); 78 AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); 79 80 setOperationAction(ISD::STORE, MVT::v16f32, Promote); 81 AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); 82 83 setOperationAction(ISD::STORE, MVT::f64, Promote); 84 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); 85 86 // Custom lowering of vector stores is required for local address space 87 // stores. 88 setOperationAction(ISD::STORE, MVT::v4i32, Custom); 89 // XXX: Native v2i32 local address space stores are possible, but not 90 // currently implemented. 91 setOperationAction(ISD::STORE, MVT::v2i32, Custom); 92 93 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 94 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 95 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 96 // XXX: This can be change to Custom, once ExpandVectorStores can 97 // handle 64-bit stores. 98 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); 99 100 setOperationAction(ISD::LOAD, MVT::f32, Promote); 101 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 102 103 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 104 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 105 106 setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 107 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 108 109 setOperationAction(ISD::LOAD, MVT::v8f32, Promote); 110 AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); 111 112 setOperationAction(ISD::LOAD, MVT::v16f32, Promote); 113 AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); 114 115 setOperationAction(ISD::LOAD, MVT::f64, Promote); 116 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); 117 118 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 119 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); 120 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); 121 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); 122 123 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); 124 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); 125 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); 126 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); 127 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); 128 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); 129 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); 130 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); 131 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); 132 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); 133 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); 134 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); 135 136 setOperationAction(ISD::FNEG, MVT::v2f32, Expand); 137 setOperationAction(ISD::FNEG, MVT::v4f32, Expand); 138 139 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 140 141 setOperationAction(ISD::MUL, MVT::i64, Expand); 142 143 setOperationAction(ISD::UDIV, MVT::i32, Expand); 144 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 145 setOperationAction(ISD::UREM, MVT::i32, Expand); 146 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); 147 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); 148 149 static const MVT::SimpleValueType IntTypes[] = { 150 MVT::v2i32, MVT::v4i32 151 }; 152 const size_t NumIntTypes = array_lengthof(IntTypes); 153 154 for (unsigned int x = 0; x < NumIntTypes; ++x) { 155 MVT::SimpleValueType VT = IntTypes[x]; 156 //Expand the following operations for the current type by default 157 setOperationAction(ISD::ADD, VT, Expand); 158 setOperationAction(ISD::AND, VT, Expand); 159 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 160 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 161 setOperationAction(ISD::MUL, VT, Expand); 162 setOperationAction(ISD::OR, VT, Expand); 163 setOperationAction(ISD::SHL, VT, Expand); 164 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 165 setOperationAction(ISD::SRL, VT, Expand); 166 setOperationAction(ISD::SRA, VT, Expand); 167 setOperationAction(ISD::SUB, VT, Expand); 168 setOperationAction(ISD::UDIV, VT, Expand); 169 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 170 setOperationAction(ISD::UREM, VT, Expand); 171 setOperationAction(ISD::VSELECT, VT, Expand); 172 setOperationAction(ISD::XOR, VT, Expand); 173 } 174 175 static const MVT::SimpleValueType FloatTypes[] = { 176 MVT::v2f32, MVT::v4f32 177 }; 178 const size_t NumFloatTypes = array_lengthof(FloatTypes); 179 180 for (unsigned int x = 0; x < NumFloatTypes; ++x) { 181 MVT::SimpleValueType VT = FloatTypes[x]; 182 setOperationAction(ISD::FADD, VT, Expand); 183 setOperationAction(ISD::FDIV, VT, Expand); 184 setOperationAction(ISD::FFLOOR, VT, Expand); 185 setOperationAction(ISD::FMUL, VT, Expand); 186 setOperationAction(ISD::FRINT, VT, Expand); 187 setOperationAction(ISD::FSQRT, VT, Expand); 188 setOperationAction(ISD::FSUB, VT, Expand); 189 } 190} 191 192//===----------------------------------------------------------------------===// 193// Target Information 194//===----------------------------------------------------------------------===// 195 196MVT AMDGPUTargetLowering::getVectorIdxTy() const { 197 return MVT::i32; 198} 199 200bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, 201 EVT CastTy) const { 202 if (LoadTy.getSizeInBits() != CastTy.getSizeInBits()) 203 return true; 204 205 unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits(); 206 unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits(); 207 208 return ((LScalarSize <= CastScalarSize) || 209 (CastScalarSize >= 32) || 210 (LScalarSize < 32)); 211} 212 213//===---------------------------------------------------------------------===// 214// Target Properties 215//===---------------------------------------------------------------------===// 216 217bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 218 assert(VT.isFloatingPoint()); 219 return VT == MVT::f32; 220} 221 222bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 223 assert(VT.isFloatingPoint()); 224 return VT == MVT::f32; 225} 226 227//===---------------------------------------------------------------------===// 228// TargetLowering Callbacks 229//===---------------------------------------------------------------------===// 230 231void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 232 const SmallVectorImpl<ISD::InputArg> &Ins) const { 233 234 State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 235} 236 237SDValue AMDGPUTargetLowering::LowerReturn( 238 SDValue Chain, 239 CallingConv::ID CallConv, 240 bool isVarArg, 241 const SmallVectorImpl<ISD::OutputArg> &Outs, 242 const SmallVectorImpl<SDValue> &OutVals, 243 SDLoc DL, SelectionDAG &DAG) const { 244 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 245} 246 247//===---------------------------------------------------------------------===// 248// Target specific lowering 249//===---------------------------------------------------------------------===// 250 251SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 252 const { 253 switch (Op.getOpcode()) { 254 default: 255 Op.getNode()->dump(); 256 assert(0 && "Custom lowering code for this" 257 "instruction is not implemented yet!"); 258 break; 259 // AMDIL DAG lowering 260 case ISD::SDIV: return LowerSDIV(Op, DAG); 261 case ISD::SREM: return LowerSREM(Op, DAG); 262 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 263 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 264 // AMDGPU DAG lowering 265 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 266 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 267 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); 268 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 269 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 270 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 271 } 272 return Op; 273} 274 275SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 276 SDValue Op, 277 SelectionDAG &DAG) const { 278 279 const DataLayout *TD = getTargetMachine().getDataLayout(); 280 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 281 282 assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS); 283 // XXX: What does the value of G->getOffset() mean? 284 assert(G->getOffset() == 0 && 285 "Do not know what to do with an non-zero offset"); 286 287 const GlobalValue *GV = G->getGlobal(); 288 289 unsigned Offset; 290 if (MFI->LocalMemoryObjects.count(GV) == 0) { 291 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); 292 Offset = MFI->LDSSize; 293 MFI->LocalMemoryObjects[GV] = Offset; 294 // XXX: Account for alignment? 295 MFI->LDSSize += Size; 296 } else { 297 Offset = MFI->LocalMemoryObjects[GV]; 298 } 299 300 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); 301} 302 303void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, 304 SmallVectorImpl<SDValue> &Args, 305 unsigned Start, 306 unsigned Count) const { 307 EVT VT = Op.getValueType(); 308 for (unsigned i = Start, e = Start + Count; i != e; ++i) { 309 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 310 VT.getVectorElementType(), 311 Op, DAG.getConstant(i, MVT::i32))); 312 } 313} 314 315SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 316 SelectionDAG &DAG) const { 317 SmallVector<SDValue, 8> Args; 318 SDValue A = Op.getOperand(0); 319 SDValue B = Op.getOperand(1); 320 321 ExtractVectorElements(A, DAG, Args, 0, 322 A.getValueType().getVectorNumElements()); 323 ExtractVectorElements(B, DAG, Args, 0, 324 B.getValueType().getVectorNumElements()); 325 326 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 327 &Args[0], Args.size()); 328} 329 330SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 331 SelectionDAG &DAG) const { 332 333 SmallVector<SDValue, 8> Args; 334 EVT VT = Op.getValueType(); 335 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 336 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, 337 VT.getVectorNumElements()); 338 339 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 340 &Args[0], Args.size()); 341} 342 343SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, 344 SelectionDAG &DAG) const { 345 346 MachineFunction &MF = DAG.getMachineFunction(); 347 const AMDGPUFrameLowering *TFL = 348 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); 349 350 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); 351 assert(FIN); 352 353 unsigned FrameIndex = FIN->getIndex(); 354 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); 355 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), 356 Op.getValueType()); 357} 358 359SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 360 SelectionDAG &DAG) const { 361 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 362 SDLoc DL(Op); 363 EVT VT = Op.getValueType(); 364 365 switch (IntrinsicID) { 366 default: return Op; 367 case AMDGPUIntrinsic::AMDIL_abs: 368 return LowerIntrinsicIABS(Op, DAG); 369 case AMDGPUIntrinsic::AMDIL_exp: 370 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 371 case AMDGPUIntrinsic::AMDGPU_lrp: 372 return LowerIntrinsicLRP(Op, DAG); 373 case AMDGPUIntrinsic::AMDIL_fraction: 374 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 375 case AMDGPUIntrinsic::AMDIL_max: 376 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 377 Op.getOperand(2)); 378 case AMDGPUIntrinsic::AMDGPU_imax: 379 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 380 Op.getOperand(2)); 381 case AMDGPUIntrinsic::AMDGPU_umax: 382 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 383 Op.getOperand(2)); 384 case AMDGPUIntrinsic::AMDIL_min: 385 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 386 Op.getOperand(2)); 387 case AMDGPUIntrinsic::AMDGPU_imin: 388 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 389 Op.getOperand(2)); 390 case AMDGPUIntrinsic::AMDGPU_umin: 391 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 392 Op.getOperand(2)); 393 case AMDGPUIntrinsic::AMDIL_round_nearest: 394 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 395 } 396} 397 398///IABS(a) = SMAX(sub(0, a), a) 399SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 400 SelectionDAG &DAG) const { 401 402 SDLoc DL(Op); 403 EVT VT = Op.getValueType(); 404 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 405 Op.getOperand(1)); 406 407 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 408} 409 410/// Linear Interpolation 411/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 412SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 413 SelectionDAG &DAG) const { 414 SDLoc DL(Op); 415 EVT VT = Op.getValueType(); 416 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 417 DAG.getConstantFP(1.0f, MVT::f32), 418 Op.getOperand(1)); 419 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 420 Op.getOperand(3)); 421 return DAG.getNode(ISD::FADD, DL, VT, 422 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 423 OneSubAC); 424} 425 426/// \brief Generate Min/Max node 427SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 428 SelectionDAG &DAG) const { 429 SDLoc DL(Op); 430 EVT VT = Op.getValueType(); 431 432 SDValue LHS = Op.getOperand(0); 433 SDValue RHS = Op.getOperand(1); 434 SDValue True = Op.getOperand(2); 435 SDValue False = Op.getOperand(3); 436 SDValue CC = Op.getOperand(4); 437 438 if (VT != MVT::f32 || 439 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 440 return SDValue(); 441 } 442 443 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 444 switch (CCOpcode) { 445 case ISD::SETOEQ: 446 case ISD::SETONE: 447 case ISD::SETUNE: 448 case ISD::SETNE: 449 case ISD::SETUEQ: 450 case ISD::SETEQ: 451 case ISD::SETFALSE: 452 case ISD::SETFALSE2: 453 case ISD::SETTRUE: 454 case ISD::SETTRUE2: 455 case ISD::SETUO: 456 case ISD::SETO: 457 assert(0 && "Operation should already be optimised !"); 458 case ISD::SETULE: 459 case ISD::SETULT: 460 case ISD::SETOLE: 461 case ISD::SETOLT: 462 case ISD::SETLE: 463 case ISD::SETLT: { 464 if (LHS == True) 465 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 466 else 467 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 468 } 469 case ISD::SETGT: 470 case ISD::SETGE: 471 case ISD::SETUGE: 472 case ISD::SETOGE: 473 case ISD::SETUGT: 474 case ISD::SETOGT: { 475 if (LHS == True) 476 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 477 else 478 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 479 } 480 case ISD::SETCC_INVALID: 481 assert(0 && "Invalid setcc condcode !"); 482 } 483 return Op; 484} 485 486SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, 487 SelectionDAG &DAG) const { 488 LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); 489 EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); 490 EVT EltVT = Op.getValueType().getVectorElementType(); 491 EVT PtrVT = Load->getBasePtr().getValueType(); 492 unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); 493 SmallVector<SDValue, 8> Loads; 494 SDLoc SL(Op); 495 496 for (unsigned i = 0, e = NumElts; i != e; ++i) { 497 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), 498 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); 499 Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, 500 Load->getChain(), Ptr, 501 MachinePointerInfo(Load->getMemOperand()->getValue()), 502 MemEltVT, Load->isVolatile(), Load->isNonTemporal(), 503 Load->getAlignment())); 504 } 505 return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0], 506 Loads.size()); 507} 508 509SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, 510 SelectionDAG &DAG) const { 511 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); 512 EVT MemVT = Store->getMemoryVT(); 513 unsigned MemBits = MemVT.getSizeInBits(); 514 515 // Byte stores are really expensive, so if possible, try to pack 516 // 32-bit vector truncatating store into an i32 store. 517 // XXX: We could also handle optimize other vector bitwidths 518 if (!MemVT.isVector() || MemBits > 32) { 519 return SDValue(); 520 } 521 522 SDLoc DL(Op); 523 const SDValue &Value = Store->getValue(); 524 EVT VT = Value.getValueType(); 525 const SDValue &Ptr = Store->getBasePtr(); 526 EVT MemEltVT = MemVT.getVectorElementType(); 527 unsigned MemEltBits = MemEltVT.getSizeInBits(); 528 unsigned MemNumElements = MemVT.getVectorNumElements(); 529 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 530 SDValue Mask; 531 switch(MemEltBits) { 532 case 8: 533 Mask = DAG.getConstant(0xFF, PackedVT); 534 break; 535 case 16: 536 Mask = DAG.getConstant(0xFFFF, PackedVT); 537 break; 538 default: 539 llvm_unreachable("Cannot lower this vector store"); 540 } 541 SDValue PackedValue; 542 for (unsigned i = 0; i < MemNumElements; ++i) { 543 EVT ElemVT = VT.getVectorElementType(); 544 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, 545 DAG.getConstant(i, MVT::i32)); 546 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); 547 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); 548 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); 549 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); 550 if (i == 0) { 551 PackedValue = Elt; 552 } else { 553 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); 554 } 555 } 556 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, 557 MachinePointerInfo(Store->getMemOperand()->getValue()), 558 Store->isVolatile(), Store->isNonTemporal(), 559 Store->getAlignment()); 560} 561 562SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, 563 SelectionDAG &DAG) const { 564 StoreSDNode *Store = cast<StoreSDNode>(Op); 565 EVT MemEltVT = Store->getMemoryVT().getVectorElementType(); 566 EVT EltVT = Store->getValue().getValueType().getVectorElementType(); 567 EVT PtrVT = Store->getBasePtr().getValueType(); 568 unsigned NumElts = Store->getMemoryVT().getVectorNumElements(); 569 SDLoc SL(Op); 570 571 SmallVector<SDValue, 8> Chains; 572 573 for (unsigned i = 0, e = NumElts; i != e; ++i) { 574 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, 575 Store->getValue(), DAG.getConstant(i, MVT::i32)); 576 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, 577 Store->getBasePtr(), 578 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), 579 PtrVT)); 580 Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, 581 MachinePointerInfo(Store->getMemOperand()->getValue()), 582 MemEltVT, Store->isVolatile(), Store->isNonTemporal(), 583 Store->getAlignment())); 584 } 585 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); 586} 587 588SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 589 SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); 590 if (Result.getNode()) { 591 return Result; 592 } 593 594 StoreSDNode *Store = cast<StoreSDNode>(Op); 595 if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 596 Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && 597 Store->getValue().getValueType().isVector()) { 598 return SplitVectorStore(Op, DAG); 599 } 600 return SDValue(); 601} 602 603SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 604 SelectionDAG &DAG) const { 605 SDLoc DL(Op); 606 EVT VT = Op.getValueType(); 607 608 SDValue Num = Op.getOperand(0); 609 SDValue Den = Op.getOperand(1); 610 611 SmallVector<SDValue, 8> Results; 612 613 // RCP = URECIP(Den) = 2^32 / Den + e 614 // e is rounding error. 615 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 616 617 // RCP_LO = umulo(RCP, Den) */ 618 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 619 620 // RCP_HI = mulhu (RCP, Den) */ 621 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 622 623 // NEG_RCP_LO = -RCP_LO 624 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 625 RCP_LO); 626 627 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 628 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 629 NEG_RCP_LO, RCP_LO, 630 ISD::SETEQ); 631 // Calculate the rounding error from the URECIP instruction 632 // E = mulhu(ABS_RCP_LO, RCP) 633 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 634 635 // RCP_A_E = RCP + E 636 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 637 638 // RCP_S_E = RCP - E 639 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 640 641 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 642 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 643 RCP_A_E, RCP_S_E, 644 ISD::SETEQ); 645 // Quotient = mulhu(Tmp0, Num) 646 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 647 648 // Num_S_Remainder = Quotient * Den 649 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 650 651 // Remainder = Num - Num_S_Remainder 652 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 653 654 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 655 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 656 DAG.getConstant(-1, VT), 657 DAG.getConstant(0, VT), 658 ISD::SETUGE); 659 // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) 660 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num, 661 Num_S_Remainder, 662 DAG.getConstant(-1, VT), 663 DAG.getConstant(0, VT), 664 ISD::SETUGE); 665 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 666 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 667 Remainder_GE_Zero); 668 669 // Calculate Division result: 670 671 // Quotient_A_One = Quotient + 1 672 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 673 DAG.getConstant(1, VT)); 674 675 // Quotient_S_One = Quotient - 1 676 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 677 DAG.getConstant(1, VT)); 678 679 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 680 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 681 Quotient, Quotient_A_One, ISD::SETEQ); 682 683 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 684 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 685 Quotient_S_One, Div, ISD::SETEQ); 686 687 // Calculate Rem result: 688 689 // Remainder_S_Den = Remainder - Den 690 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 691 692 // Remainder_A_Den = Remainder + Den 693 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 694 695 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 696 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 697 Remainder, Remainder_S_Den, ISD::SETEQ); 698 699 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 700 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 701 Remainder_A_Den, Rem, ISD::SETEQ); 702 SDValue Ops[2]; 703 Ops[0] = Div; 704 Ops[1] = Rem; 705 return DAG.getMergeValues(Ops, 2, DL); 706} 707 708SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, 709 SelectionDAG &DAG) const { 710 SDValue S0 = Op.getOperand(0); 711 SDLoc DL(Op); 712 if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64) 713 return SDValue(); 714 715 // f32 uint_to_fp i64 716 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 717 DAG.getConstant(0, MVT::i32)); 718 SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo); 719 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 720 DAG.getConstant(1, MVT::i32)); 721 SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi); 722 FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, 723 DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32 724 return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); 725 726} 727 728//===----------------------------------------------------------------------===// 729// Helper functions 730//===----------------------------------------------------------------------===// 731 732void AMDGPUTargetLowering::getOriginalFunctionArgs( 733 SelectionDAG &DAG, 734 const Function *F, 735 const SmallVectorImpl<ISD::InputArg> &Ins, 736 SmallVectorImpl<ISD::InputArg> &OrigIns) const { 737 738 for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 739 if (Ins[i].ArgVT == Ins[i].VT) { 740 OrigIns.push_back(Ins[i]); 741 continue; 742 } 743 744 EVT VT; 745 if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) { 746 // Vector has been split into scalars. 747 VT = Ins[i].ArgVT.getVectorElementType(); 748 } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() && 749 Ins[i].ArgVT.getVectorElementType() != 750 Ins[i].VT.getVectorElementType()) { 751 // Vector elements have been promoted 752 VT = Ins[i].ArgVT; 753 } else { 754 // Vector has been spilt into smaller vectors. 755 VT = Ins[i].VT; 756 } 757 758 ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used, 759 Ins[i].OrigArgIndex, Ins[i].PartOffset); 760 OrigIns.push_back(Arg); 761 } 762} 763 764bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 765 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 766 return CFP->isExactlyValue(1.0); 767 } 768 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 769 return C->isAllOnesValue(); 770 } 771 return false; 772} 773 774bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 775 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 776 return CFP->getValueAPF().isZero(); 777 } 778 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 779 return C->isNullValue(); 780 } 781 return false; 782} 783 784SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 785 const TargetRegisterClass *RC, 786 unsigned Reg, EVT VT) const { 787 MachineFunction &MF = DAG.getMachineFunction(); 788 MachineRegisterInfo &MRI = MF.getRegInfo(); 789 unsigned VirtualRegister; 790 if (!MRI.isLiveIn(Reg)) { 791 VirtualRegister = MRI.createVirtualRegister(RC); 792 MRI.addLiveIn(Reg, VirtualRegister); 793 } else { 794 VirtualRegister = MRI.getLiveInVirtReg(Reg); 795 } 796 return DAG.getRegister(VirtualRegister, VT); 797} 798 799#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 800 801const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 802 switch (Opcode) { 803 default: return 0; 804 // AMDIL DAG nodes 805 NODE_NAME_CASE(CALL); 806 NODE_NAME_CASE(UMUL); 807 NODE_NAME_CASE(DIV_INF); 808 NODE_NAME_CASE(RET_FLAG); 809 NODE_NAME_CASE(BRANCH_COND); 810 811 // AMDGPU DAG nodes 812 NODE_NAME_CASE(DWORDADDR) 813 NODE_NAME_CASE(FRACT) 814 NODE_NAME_CASE(FMAX) 815 NODE_NAME_CASE(SMAX) 816 NODE_NAME_CASE(UMAX) 817 NODE_NAME_CASE(FMIN) 818 NODE_NAME_CASE(SMIN) 819 NODE_NAME_CASE(UMIN) 820 NODE_NAME_CASE(URECIP) 821 NODE_NAME_CASE(EXPORT) 822 NODE_NAME_CASE(CONST_ADDRESS) 823 NODE_NAME_CASE(REGISTER_LOAD) 824 NODE_NAME_CASE(REGISTER_STORE) 825 NODE_NAME_CASE(LOAD_CONSTANT) 826 NODE_NAME_CASE(LOAD_INPUT) 827 NODE_NAME_CASE(SAMPLE) 828 NODE_NAME_CASE(SAMPLEB) 829 NODE_NAME_CASE(SAMPLED) 830 NODE_NAME_CASE(SAMPLEL) 831 NODE_NAME_CASE(STORE_MSKOR) 832 NODE_NAME_CASE(TBUFFER_STORE_FORMAT) 833 } 834} 835