AMDGPUISelLowering.cpp revision 5464a92861c76f1e091cd219dee71ce9858eb195
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief This is the parent TargetLowering class for hardware code gen 12/// targets. 13// 14//===----------------------------------------------------------------------===// 15 16#include "AMDGPUISelLowering.h" 17#include "AMDGPU.h" 18#include "AMDGPURegisterInfo.h" 19#include "AMDGPUSubtarget.h" 20#include "AMDILIntrinsicInfo.h" 21#include "R600MachineFunctionInfo.h" 22#include "SIMachineFunctionInfo.h" 23#include "llvm/CodeGen/CallingConvLower.h" 24#include "llvm/CodeGen/MachineFunction.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/SelectionDAG.h" 27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28#include "llvm/IR/DataLayout.h" 29 30using namespace llvm; 31 32#include "AMDGPUGenCallingConv.inc" 33 34AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 35 TargetLowering(TM, new TargetLoweringObjectFileELF()) { 36 37 // Initialize target lowering borrowed from AMDIL 38 InitAMDILLowering(); 39 40 // We need to custom lower some of the intrinsics 41 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 42 43 // Library functions. These default to Expand, but we have instructions 44 // for them. 45 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 46 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 47 setOperationAction(ISD::FPOW, MVT::f32, Legal); 48 setOperationAction(ISD::FLOG2, MVT::f32, Legal); 49 setOperationAction(ISD::FABS, MVT::f32, Legal); 50 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 51 setOperationAction(ISD::FRINT, MVT::f32, Legal); 52 53 // The hardware supports ROTR, but not ROTL 54 setOperationAction(ISD::ROTL, MVT::i32, Expand); 55 56 // Lower floating point store/load to integer store/load to reduce the number 57 // of patterns in tablegen. 58 setOperationAction(ISD::STORE, MVT::f32, Promote); 59 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 60 61 setOperationAction(ISD::STORE, MVT::v2f32, Promote); 62 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 63 64 setOperationAction(ISD::STORE, MVT::v4f32, Promote); 65 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 66 67 setOperationAction(ISD::STORE, MVT::f64, Promote); 68 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); 69 70 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 71 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 72 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 73 // XXX: This can be change to Custom, once ExpandVectorStores can 74 // handle 64-bit stores. 75 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); 76 77 setOperationAction(ISD::LOAD, MVT::f32, Promote); 78 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 79 80 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 81 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 82 83 setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 84 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 85 86 setOperationAction(ISD::LOAD, MVT::f64, Promote); 87 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); 88 89 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 90 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); 91 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); 92 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); 93 94 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); 95 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); 96 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); 97 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); 98 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); 99 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); 100 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); 101 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); 102 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); 103 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); 104 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); 105 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); 106 107 setOperationAction(ISD::FNEG, MVT::v2f32, Expand); 108 setOperationAction(ISD::FNEG, MVT::v4f32, Expand); 109 110 setOperationAction(ISD::MUL, MVT::i64, Expand); 111 112 setOperationAction(ISD::UDIV, MVT::i32, Expand); 113 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 114 setOperationAction(ISD::UREM, MVT::i32, Expand); 115 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); 116 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); 117 118 static const MVT::SimpleValueType IntTypes[] = { 119 MVT::v2i32, MVT::v4i32 120 }; 121 const size_t NumIntTypes = array_lengthof(IntTypes); 122 123 for (unsigned int x = 0; x < NumIntTypes; ++x) { 124 MVT::SimpleValueType VT = IntTypes[x]; 125 //Expand the following operations for the current type by default 126 setOperationAction(ISD::ADD, VT, Expand); 127 setOperationAction(ISD::AND, VT, Expand); 128 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 129 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 130 setOperationAction(ISD::MUL, VT, Expand); 131 setOperationAction(ISD::OR, VT, Expand); 132 setOperationAction(ISD::SHL, VT, Expand); 133 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 134 setOperationAction(ISD::SRL, VT, Expand); 135 setOperationAction(ISD::SRA, VT, Expand); 136 setOperationAction(ISD::SUB, VT, Expand); 137 setOperationAction(ISD::UDIV, VT, Expand); 138 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 139 setOperationAction(ISD::UREM, VT, Expand); 140 setOperationAction(ISD::VSELECT, VT, Expand); 141 setOperationAction(ISD::XOR, VT, Expand); 142 } 143 144 static const MVT::SimpleValueType FloatTypes[] = { 145 MVT::v2f32, MVT::v4f32 146 }; 147 const size_t NumFloatTypes = array_lengthof(FloatTypes); 148 149 for (unsigned int x = 0; x < NumFloatTypes; ++x) { 150 MVT::SimpleValueType VT = FloatTypes[x]; 151 setOperationAction(ISD::FADD, VT, Expand); 152 setOperationAction(ISD::FDIV, VT, Expand); 153 setOperationAction(ISD::FFLOOR, VT, Expand); 154 setOperationAction(ISD::FMUL, VT, Expand); 155 setOperationAction(ISD::FRINT, VT, Expand); 156 setOperationAction(ISD::FSUB, VT, Expand); 157 } 158} 159 160//===----------------------------------------------------------------------===// 161// Target Information 162//===----------------------------------------------------------------------===// 163 164MVT AMDGPUTargetLowering::getVectorIdxTy() const { 165 return MVT::i32; 166} 167 168 169//===---------------------------------------------------------------------===// 170// Target Properties 171//===---------------------------------------------------------------------===// 172 173bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 174 assert(VT.isFloatingPoint()); 175 return VT == MVT::f32; 176} 177 178bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 179 assert(VT.isFloatingPoint()); 180 return VT == MVT::f32; 181} 182 183//===---------------------------------------------------------------------===// 184// TargetLowering Callbacks 185//===---------------------------------------------------------------------===// 186 187void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 188 const SmallVectorImpl<ISD::InputArg> &Ins) const { 189 190 State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 191} 192 193SDValue AMDGPUTargetLowering::LowerReturn( 194 SDValue Chain, 195 CallingConv::ID CallConv, 196 bool isVarArg, 197 const SmallVectorImpl<ISD::OutputArg> &Outs, 198 const SmallVectorImpl<SDValue> &OutVals, 199 SDLoc DL, SelectionDAG &DAG) const { 200 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 201} 202 203//===---------------------------------------------------------------------===// 204// Target specific lowering 205//===---------------------------------------------------------------------===// 206 207SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 208 const { 209 switch (Op.getOpcode()) { 210 default: 211 Op.getNode()->dump(); 212 assert(0 && "Custom lowering code for this" 213 "instruction is not implemented yet!"); 214 break; 215 // AMDIL DAG lowering 216 case ISD::SDIV: return LowerSDIV(Op, DAG); 217 case ISD::SREM: return LowerSREM(Op, DAG); 218 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 219 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 220 // AMDGPU DAG lowering 221 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 222 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 223 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 224 case ISD::STORE: return LowerVectorStore(Op, DAG); 225 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 226 } 227 return Op; 228} 229 230SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 231 SDValue Op, 232 SelectionDAG &DAG) const { 233 234 const DataLayout *TD = getTargetMachine().getDataLayout(); 235 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 236 // XXX: What does the value of G->getOffset() mean? 237 assert(G->getOffset() == 0 && 238 "Do not know what to do with an non-zero offset"); 239 240 unsigned Offset = MFI->LDSSize; 241 const GlobalValue *GV = G->getGlobal(); 242 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); 243 244 // XXX: Account for alignment? 245 MFI->LDSSize += Size; 246 247 return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32); 248} 249 250void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, 251 SmallVectorImpl<SDValue> &Args, 252 unsigned Start, 253 unsigned Count) const { 254 EVT VT = Op.getValueType(); 255 for (unsigned i = Start, e = Start + Count; i != e; ++i) { 256 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 257 VT.getVectorElementType(), 258 Op, DAG.getConstant(i, MVT::i32))); 259 } 260} 261 262SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 263 SelectionDAG &DAG) const { 264 SmallVector<SDValue, 8> Args; 265 SDValue A = Op.getOperand(0); 266 SDValue B = Op.getOperand(1); 267 268 ExtractVectorElements(A, DAG, Args, 0, 269 A.getValueType().getVectorNumElements()); 270 ExtractVectorElements(B, DAG, Args, 0, 271 B.getValueType().getVectorNumElements()); 272 273 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 274 &Args[0], Args.size()); 275} 276 277SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 278 SelectionDAG &DAG) const { 279 280 SmallVector<SDValue, 8> Args; 281 EVT VT = Op.getValueType(); 282 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 283 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, 284 VT.getVectorNumElements()); 285 286 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 287 &Args[0], Args.size()); 288} 289 290 291SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 292 SelectionDAG &DAG) const { 293 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 294 SDLoc DL(Op); 295 EVT VT = Op.getValueType(); 296 297 switch (IntrinsicID) { 298 default: return Op; 299 case AMDGPUIntrinsic::AMDIL_abs: 300 return LowerIntrinsicIABS(Op, DAG); 301 case AMDGPUIntrinsic::AMDIL_exp: 302 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 303 case AMDGPUIntrinsic::AMDGPU_lrp: 304 return LowerIntrinsicLRP(Op, DAG); 305 case AMDGPUIntrinsic::AMDIL_fraction: 306 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 307 case AMDGPUIntrinsic::AMDIL_max: 308 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 309 Op.getOperand(2)); 310 case AMDGPUIntrinsic::AMDGPU_imax: 311 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 312 Op.getOperand(2)); 313 case AMDGPUIntrinsic::AMDGPU_umax: 314 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 315 Op.getOperand(2)); 316 case AMDGPUIntrinsic::AMDIL_min: 317 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 318 Op.getOperand(2)); 319 case AMDGPUIntrinsic::AMDGPU_imin: 320 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 321 Op.getOperand(2)); 322 case AMDGPUIntrinsic::AMDGPU_umin: 323 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 324 Op.getOperand(2)); 325 case AMDGPUIntrinsic::AMDIL_round_nearest: 326 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 327 } 328} 329 330///IABS(a) = SMAX(sub(0, a), a) 331SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 332 SelectionDAG &DAG) const { 333 334 SDLoc DL(Op); 335 EVT VT = Op.getValueType(); 336 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 337 Op.getOperand(1)); 338 339 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 340} 341 342/// Linear Interpolation 343/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 344SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 345 SelectionDAG &DAG) const { 346 SDLoc DL(Op); 347 EVT VT = Op.getValueType(); 348 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 349 DAG.getConstantFP(1.0f, MVT::f32), 350 Op.getOperand(1)); 351 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 352 Op.getOperand(3)); 353 return DAG.getNode(ISD::FADD, DL, VT, 354 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 355 OneSubAC); 356} 357 358/// \brief Generate Min/Max node 359SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 360 SelectionDAG &DAG) const { 361 SDLoc DL(Op); 362 EVT VT = Op.getValueType(); 363 364 SDValue LHS = Op.getOperand(0); 365 SDValue RHS = Op.getOperand(1); 366 SDValue True = Op.getOperand(2); 367 SDValue False = Op.getOperand(3); 368 SDValue CC = Op.getOperand(4); 369 370 if (VT != MVT::f32 || 371 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 372 return SDValue(); 373 } 374 375 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 376 switch (CCOpcode) { 377 case ISD::SETOEQ: 378 case ISD::SETONE: 379 case ISD::SETUNE: 380 case ISD::SETNE: 381 case ISD::SETUEQ: 382 case ISD::SETEQ: 383 case ISD::SETFALSE: 384 case ISD::SETFALSE2: 385 case ISD::SETTRUE: 386 case ISD::SETTRUE2: 387 case ISD::SETUO: 388 case ISD::SETO: 389 assert(0 && "Operation should already be optimised !"); 390 case ISD::SETULE: 391 case ISD::SETULT: 392 case ISD::SETOLE: 393 case ISD::SETOLT: 394 case ISD::SETLE: 395 case ISD::SETLT: { 396 if (LHS == True) 397 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 398 else 399 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 400 } 401 case ISD::SETGT: 402 case ISD::SETGE: 403 case ISD::SETUGE: 404 case ISD::SETOGE: 405 case ISD::SETUGT: 406 case ISD::SETOGT: { 407 if (LHS == True) 408 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 409 else 410 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 411 } 412 case ISD::SETCC_INVALID: 413 assert(0 && "Invalid setcc condcode !"); 414 } 415 return Op; 416} 417 418 419 420SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 421 SelectionDAG &DAG) const { 422 SDLoc DL(Op); 423 EVT VT = Op.getValueType(); 424 425 SDValue Num = Op.getOperand(0); 426 SDValue Den = Op.getOperand(1); 427 428 SmallVector<SDValue, 8> Results; 429 430 // RCP = URECIP(Den) = 2^32 / Den + e 431 // e is rounding error. 432 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 433 434 // RCP_LO = umulo(RCP, Den) */ 435 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 436 437 // RCP_HI = mulhu (RCP, Den) */ 438 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 439 440 // NEG_RCP_LO = -RCP_LO 441 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 442 RCP_LO); 443 444 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 445 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 446 NEG_RCP_LO, RCP_LO, 447 ISD::SETEQ); 448 // Calculate the rounding error from the URECIP instruction 449 // E = mulhu(ABS_RCP_LO, RCP) 450 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 451 452 // RCP_A_E = RCP + E 453 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 454 455 // RCP_S_E = RCP - E 456 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 457 458 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 459 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 460 RCP_A_E, RCP_S_E, 461 ISD::SETEQ); 462 // Quotient = mulhu(Tmp0, Num) 463 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 464 465 // Num_S_Remainder = Quotient * Den 466 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 467 468 // Remainder = Num - Num_S_Remainder 469 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 470 471 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 472 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 473 DAG.getConstant(-1, VT), 474 DAG.getConstant(0, VT), 475 ISD::SETGE); 476 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) 477 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, 478 DAG.getConstant(0, VT), 479 DAG.getConstant(-1, VT), 480 DAG.getConstant(0, VT), 481 ISD::SETGE); 482 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 483 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 484 Remainder_GE_Zero); 485 486 // Calculate Division result: 487 488 // Quotient_A_One = Quotient + 1 489 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 490 DAG.getConstant(1, VT)); 491 492 // Quotient_S_One = Quotient - 1 493 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 494 DAG.getConstant(1, VT)); 495 496 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 497 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 498 Quotient, Quotient_A_One, ISD::SETEQ); 499 500 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 501 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 502 Quotient_S_One, Div, ISD::SETEQ); 503 504 // Calculate Rem result: 505 506 // Remainder_S_Den = Remainder - Den 507 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 508 509 // Remainder_A_Den = Remainder + Den 510 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 511 512 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 513 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 514 Remainder, Remainder_S_Den, ISD::SETEQ); 515 516 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 517 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 518 Remainder_A_Den, Rem, ISD::SETEQ); 519 SDValue Ops[2]; 520 Ops[0] = Div; 521 Ops[1] = Rem; 522 return DAG.getMergeValues(Ops, 2, DL); 523} 524 525SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op, 526 SelectionDAG &DAG) const { 527 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); 528 EVT MemVT = Store->getMemoryVT(); 529 unsigned MemBits = MemVT.getSizeInBits(); 530 531 // Byte stores are really expensive, so if possible, try to pack 532 // 32-bit vector truncatating store into an i32 store. 533 // XXX: We could also handle optimize other vector bitwidths 534 if (!MemVT.isVector() || MemBits > 32) { 535 return SDValue(); 536 } 537 538 SDLoc DL(Op); 539 const SDValue &Value = Store->getValue(); 540 EVT VT = Value.getValueType(); 541 const SDValue &Ptr = Store->getBasePtr(); 542 EVT MemEltVT = MemVT.getVectorElementType(); 543 unsigned MemEltBits = MemEltVT.getSizeInBits(); 544 unsigned MemNumElements = MemVT.getVectorNumElements(); 545 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 546 SDValue Mask; 547 switch(MemEltBits) { 548 case 8: 549 Mask = DAG.getConstant(0xFF, PackedVT); 550 break; 551 case 16: 552 Mask = DAG.getConstant(0xFFFF, PackedVT); 553 break; 554 default: 555 llvm_unreachable("Cannot lower this vector store"); 556 } 557 SDValue PackedValue; 558 for (unsigned i = 0; i < MemNumElements; ++i) { 559 EVT ElemVT = VT.getVectorElementType(); 560 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, 561 DAG.getConstant(i, MVT::i32)); 562 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); 563 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); 564 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); 565 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); 566 if (i == 0) { 567 PackedValue = Elt; 568 } else { 569 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); 570 } 571 } 572 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, 573 MachinePointerInfo(Store->getMemOperand()->getValue()), 574 Store->isVolatile(), Store->isNonTemporal(), 575 Store->getAlignment()); 576} 577 578//===----------------------------------------------------------------------===// 579// Helper functions 580//===----------------------------------------------------------------------===// 581 582bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 583 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 584 return CFP->isExactlyValue(1.0); 585 } 586 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 587 return C->isAllOnesValue(); 588 } 589 return false; 590} 591 592bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 593 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 594 return CFP->getValueAPF().isZero(); 595 } 596 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 597 return C->isNullValue(); 598 } 599 return false; 600} 601 602SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 603 const TargetRegisterClass *RC, 604 unsigned Reg, EVT VT) const { 605 MachineFunction &MF = DAG.getMachineFunction(); 606 MachineRegisterInfo &MRI = MF.getRegInfo(); 607 unsigned VirtualRegister; 608 if (!MRI.isLiveIn(Reg)) { 609 VirtualRegister = MRI.createVirtualRegister(RC); 610 MRI.addLiveIn(Reg, VirtualRegister); 611 } else { 612 VirtualRegister = MRI.getLiveInVirtReg(Reg); 613 } 614 return DAG.getRegister(VirtualRegister, VT); 615} 616 617#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 618 619const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 620 switch (Opcode) { 621 default: return 0; 622 // AMDIL DAG nodes 623 NODE_NAME_CASE(CALL); 624 NODE_NAME_CASE(UMUL); 625 NODE_NAME_CASE(DIV_INF); 626 NODE_NAME_CASE(RET_FLAG); 627 NODE_NAME_CASE(BRANCH_COND); 628 629 // AMDGPU DAG nodes 630 NODE_NAME_CASE(DWORDADDR) 631 NODE_NAME_CASE(FRACT) 632 NODE_NAME_CASE(FMAX) 633 NODE_NAME_CASE(SMAX) 634 NODE_NAME_CASE(UMAX) 635 NODE_NAME_CASE(FMIN) 636 NODE_NAME_CASE(SMIN) 637 NODE_NAME_CASE(UMIN) 638 NODE_NAME_CASE(URECIP) 639 NODE_NAME_CASE(EXPORT) 640 NODE_NAME_CASE(CONST_ADDRESS) 641 NODE_NAME_CASE(REGISTER_LOAD) 642 NODE_NAME_CASE(REGISTER_STORE) 643 NODE_NAME_CASE(LOAD_CONSTANT) 644 NODE_NAME_CASE(LOAD_INPUT) 645 NODE_NAME_CASE(SAMPLE) 646 NODE_NAME_CASE(SAMPLEB) 647 NODE_NAME_CASE(SAMPLED) 648 NODE_NAME_CASE(SAMPLEL) 649 NODE_NAME_CASE(STORE_MSKOR) 650 } 651} 652