AMDGPUISelLowering.cpp revision 30d84d8dfa0433088d541c66b92af0da3855bc9c
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief This is the parent TargetLowering class for hardware code gen 12/// targets. 13// 14//===----------------------------------------------------------------------===// 15 16#include "AMDGPUISelLowering.h" 17#include "AMDGPU.h" 18#include "AMDGPURegisterInfo.h" 19#include "AMDGPUSubtarget.h" 20#include "AMDILIntrinsicInfo.h" 21#include "R600MachineFunctionInfo.h" 22#include "SIMachineFunctionInfo.h" 23#include "llvm/CodeGen/CallingConvLower.h" 24#include "llvm/CodeGen/MachineFunction.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/SelectionDAG.h" 27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28#include "llvm/IR/DataLayout.h" 29 30using namespace llvm; 31 32#include "AMDGPUGenCallingConv.inc" 33 34AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 35 TargetLowering(TM, new TargetLoweringObjectFileELF()) { 36 37 // Initialize target lowering borrowed from AMDIL 38 InitAMDILLowering(); 39 40 // We need to custom lower some of the intrinsics 41 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 42 43 // Library functions. These default to Expand, but we have instructions 44 // for them. 45 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 46 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 47 setOperationAction(ISD::FPOW, MVT::f32, Legal); 48 setOperationAction(ISD::FLOG2, MVT::f32, Legal); 49 setOperationAction(ISD::FABS, MVT::f32, Legal); 50 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 51 setOperationAction(ISD::FRINT, MVT::f32, Legal); 52 53 // The hardware supports ROTR, but not ROTL 54 setOperationAction(ISD::ROTL, MVT::i32, Expand); 55 56 // Lower floating point store/load to integer store/load to reduce the number 57 // of patterns in tablegen. 58 setOperationAction(ISD::STORE, MVT::f32, Promote); 59 AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 60 61 setOperationAction(ISD::STORE, MVT::v2f32, Promote); 62 AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 63 64 setOperationAction(ISD::STORE, MVT::v4f32, Promote); 65 AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 66 67 setOperationAction(ISD::STORE, MVT::f64, Promote); 68 AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); 69 70 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 71 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 72 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 73 // XXX: This can be change to Custom, once ExpandVectorStores can 74 // handle 64-bit stores. 75 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); 76 77 setOperationAction(ISD::LOAD, MVT::f32, Promote); 78 AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 79 80 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 81 AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 82 83 setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 84 AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 85 86 setOperationAction(ISD::LOAD, MVT::f64, Promote); 87 AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); 88 89 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 90 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); 91 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); 92 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); 93 94 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); 95 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); 96 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); 97 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); 98 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); 99 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); 100 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); 101 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); 102 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); 103 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); 104 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); 105 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); 106 107 setOperationAction(ISD::FNEG, MVT::v2f32, Expand); 108 setOperationAction(ISD::FNEG, MVT::v4f32, Expand); 109 110 setOperationAction(ISD::MUL, MVT::i64, Expand); 111 112 setOperationAction(ISD::UDIV, MVT::i32, Expand); 113 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 114 setOperationAction(ISD::UREM, MVT::i32, Expand); 115 setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); 116 setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); 117 118 static const int types[] = { 119 (int)MVT::v2i32, 120 (int)MVT::v4i32 121 }; 122 const size_t NumTypes = array_lengthof(types); 123 124 for (unsigned int x = 0; x < NumTypes; ++x) { 125 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 126 //Expand the following operations for the current type by default 127 setOperationAction(ISD::ADD, VT, Expand); 128 setOperationAction(ISD::AND, VT, Expand); 129 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 130 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 131 setOperationAction(ISD::MUL, VT, Expand); 132 setOperationAction(ISD::OR, VT, Expand); 133 setOperationAction(ISD::SHL, VT, Expand); 134 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 135 setOperationAction(ISD::SRL, VT, Expand); 136 setOperationAction(ISD::SRA, VT, Expand); 137 setOperationAction(ISD::SUB, VT, Expand); 138 setOperationAction(ISD::UDIV, VT, Expand); 139 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 140 setOperationAction(ISD::UREM, VT, Expand); 141 setOperationAction(ISD::VSELECT, VT, Expand); 142 setOperationAction(ISD::XOR, VT, Expand); 143 } 144} 145 146//===----------------------------------------------------------------------===// 147// Target Information 148//===----------------------------------------------------------------------===// 149 150MVT AMDGPUTargetLowering::getVectorIdxTy() const { 151 return MVT::i32; 152} 153 154 155//===---------------------------------------------------------------------===// 156// Target Properties 157//===---------------------------------------------------------------------===// 158 159bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 160 assert(VT.isFloatingPoint()); 161 return VT == MVT::f32; 162} 163 164bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 165 assert(VT.isFloatingPoint()); 166 return VT == MVT::f32; 167} 168 169//===---------------------------------------------------------------------===// 170// TargetLowering Callbacks 171//===---------------------------------------------------------------------===// 172 173void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 174 const SmallVectorImpl<ISD::InputArg> &Ins) const { 175 176 State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 177} 178 179SDValue AMDGPUTargetLowering::LowerReturn( 180 SDValue Chain, 181 CallingConv::ID CallConv, 182 bool isVarArg, 183 const SmallVectorImpl<ISD::OutputArg> &Outs, 184 const SmallVectorImpl<SDValue> &OutVals, 185 SDLoc DL, SelectionDAG &DAG) const { 186 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 187} 188 189//===---------------------------------------------------------------------===// 190// Target specific lowering 191//===---------------------------------------------------------------------===// 192 193SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 194 const { 195 switch (Op.getOpcode()) { 196 default: 197 Op.getNode()->dump(); 198 assert(0 && "Custom lowering code for this" 199 "instruction is not implemented yet!"); 200 break; 201 // AMDIL DAG lowering 202 case ISD::SDIV: return LowerSDIV(Op, DAG); 203 case ISD::SREM: return LowerSREM(Op, DAG); 204 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 205 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 206 // AMDGPU DAG lowering 207 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 208 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 209 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 210 case ISD::STORE: return LowerVectorStore(Op, DAG); 211 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 212 } 213 return Op; 214} 215 216SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 217 SDValue Op, 218 SelectionDAG &DAG) const { 219 220 const DataLayout *TD = getTargetMachine().getDataLayout(); 221 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 222 // XXX: What does the value of G->getOffset() mean? 223 assert(G->getOffset() == 0 && 224 "Do not know what to do with an non-zero offset"); 225 226 unsigned Offset = MFI->LDSSize; 227 const GlobalValue *GV = G->getGlobal(); 228 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); 229 230 // XXX: Account for alignment? 231 MFI->LDSSize += Size; 232 233 return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32); 234} 235 236void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, 237 SmallVectorImpl<SDValue> &Args, 238 unsigned Start, 239 unsigned Count) const { 240 EVT VT = Op.getValueType(); 241 for (unsigned i = Start, e = Start + Count; i != e; ++i) { 242 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 243 VT.getVectorElementType(), 244 Op, DAG.getConstant(i, MVT::i32))); 245 } 246} 247 248SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 249 SelectionDAG &DAG) const { 250 SmallVector<SDValue, 8> Args; 251 SDValue A = Op.getOperand(0); 252 SDValue B = Op.getOperand(1); 253 254 ExtractVectorElements(A, DAG, Args, 0, 255 A.getValueType().getVectorNumElements()); 256 ExtractVectorElements(B, DAG, Args, 0, 257 B.getValueType().getVectorNumElements()); 258 259 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 260 &Args[0], Args.size()); 261} 262 263SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 264 SelectionDAG &DAG) const { 265 266 SmallVector<SDValue, 8> Args; 267 EVT VT = Op.getValueType(); 268 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 269 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, 270 VT.getVectorNumElements()); 271 272 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 273 &Args[0], Args.size()); 274} 275 276 277SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 278 SelectionDAG &DAG) const { 279 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 280 SDLoc DL(Op); 281 EVT VT = Op.getValueType(); 282 283 switch (IntrinsicID) { 284 default: return Op; 285 case AMDGPUIntrinsic::AMDIL_abs: 286 return LowerIntrinsicIABS(Op, DAG); 287 case AMDGPUIntrinsic::AMDIL_exp: 288 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 289 case AMDGPUIntrinsic::AMDGPU_lrp: 290 return LowerIntrinsicLRP(Op, DAG); 291 case AMDGPUIntrinsic::AMDIL_fraction: 292 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 293 case AMDGPUIntrinsic::AMDIL_max: 294 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 295 Op.getOperand(2)); 296 case AMDGPUIntrinsic::AMDGPU_imax: 297 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 298 Op.getOperand(2)); 299 case AMDGPUIntrinsic::AMDGPU_umax: 300 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 301 Op.getOperand(2)); 302 case AMDGPUIntrinsic::AMDIL_min: 303 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 304 Op.getOperand(2)); 305 case AMDGPUIntrinsic::AMDGPU_imin: 306 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 307 Op.getOperand(2)); 308 case AMDGPUIntrinsic::AMDGPU_umin: 309 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 310 Op.getOperand(2)); 311 case AMDGPUIntrinsic::AMDIL_round_nearest: 312 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 313 } 314} 315 316///IABS(a) = SMAX(sub(0, a), a) 317SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 318 SelectionDAG &DAG) const { 319 320 SDLoc DL(Op); 321 EVT VT = Op.getValueType(); 322 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 323 Op.getOperand(1)); 324 325 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 326} 327 328/// Linear Interpolation 329/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 330SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 331 SelectionDAG &DAG) const { 332 SDLoc DL(Op); 333 EVT VT = Op.getValueType(); 334 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 335 DAG.getConstantFP(1.0f, MVT::f32), 336 Op.getOperand(1)); 337 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 338 Op.getOperand(3)); 339 return DAG.getNode(ISD::FADD, DL, VT, 340 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 341 OneSubAC); 342} 343 344/// \brief Generate Min/Max node 345SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 346 SelectionDAG &DAG) const { 347 SDLoc DL(Op); 348 EVT VT = Op.getValueType(); 349 350 SDValue LHS = Op.getOperand(0); 351 SDValue RHS = Op.getOperand(1); 352 SDValue True = Op.getOperand(2); 353 SDValue False = Op.getOperand(3); 354 SDValue CC = Op.getOperand(4); 355 356 if (VT != MVT::f32 || 357 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 358 return SDValue(); 359 } 360 361 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 362 switch (CCOpcode) { 363 case ISD::SETOEQ: 364 case ISD::SETONE: 365 case ISD::SETUNE: 366 case ISD::SETNE: 367 case ISD::SETUEQ: 368 case ISD::SETEQ: 369 case ISD::SETFALSE: 370 case ISD::SETFALSE2: 371 case ISD::SETTRUE: 372 case ISD::SETTRUE2: 373 case ISD::SETUO: 374 case ISD::SETO: 375 assert(0 && "Operation should already be optimised !"); 376 case ISD::SETULE: 377 case ISD::SETULT: 378 case ISD::SETOLE: 379 case ISD::SETOLT: 380 case ISD::SETLE: 381 case ISD::SETLT: { 382 if (LHS == True) 383 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 384 else 385 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 386 } 387 case ISD::SETGT: 388 case ISD::SETGE: 389 case ISD::SETUGE: 390 case ISD::SETOGE: 391 case ISD::SETUGT: 392 case ISD::SETOGT: { 393 if (LHS == True) 394 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 395 else 396 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 397 } 398 case ISD::SETCC_INVALID: 399 assert(0 && "Invalid setcc condcode !"); 400 } 401 return Op; 402} 403 404 405 406SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 407 SelectionDAG &DAG) const { 408 SDLoc DL(Op); 409 EVT VT = Op.getValueType(); 410 411 SDValue Num = Op.getOperand(0); 412 SDValue Den = Op.getOperand(1); 413 414 SmallVector<SDValue, 8> Results; 415 416 // RCP = URECIP(Den) = 2^32 / Den + e 417 // e is rounding error. 418 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 419 420 // RCP_LO = umulo(RCP, Den) */ 421 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 422 423 // RCP_HI = mulhu (RCP, Den) */ 424 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 425 426 // NEG_RCP_LO = -RCP_LO 427 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 428 RCP_LO); 429 430 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 431 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 432 NEG_RCP_LO, RCP_LO, 433 ISD::SETEQ); 434 // Calculate the rounding error from the URECIP instruction 435 // E = mulhu(ABS_RCP_LO, RCP) 436 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 437 438 // RCP_A_E = RCP + E 439 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 440 441 // RCP_S_E = RCP - E 442 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 443 444 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 445 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 446 RCP_A_E, RCP_S_E, 447 ISD::SETEQ); 448 // Quotient = mulhu(Tmp0, Num) 449 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 450 451 // Num_S_Remainder = Quotient * Den 452 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 453 454 // Remainder = Num - Num_S_Remainder 455 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 456 457 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 458 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 459 DAG.getConstant(-1, VT), 460 DAG.getConstant(0, VT), 461 ISD::SETGE); 462 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) 463 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, 464 DAG.getConstant(0, VT), 465 DAG.getConstant(-1, VT), 466 DAG.getConstant(0, VT), 467 ISD::SETGE); 468 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 469 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 470 Remainder_GE_Zero); 471 472 // Calculate Division result: 473 474 // Quotient_A_One = Quotient + 1 475 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 476 DAG.getConstant(1, VT)); 477 478 // Quotient_S_One = Quotient - 1 479 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 480 DAG.getConstant(1, VT)); 481 482 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 483 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 484 Quotient, Quotient_A_One, ISD::SETEQ); 485 486 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 487 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 488 Quotient_S_One, Div, ISD::SETEQ); 489 490 // Calculate Rem result: 491 492 // Remainder_S_Den = Remainder - Den 493 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 494 495 // Remainder_A_Den = Remainder + Den 496 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 497 498 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 499 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 500 Remainder, Remainder_S_Den, ISD::SETEQ); 501 502 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 503 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 504 Remainder_A_Den, Rem, ISD::SETEQ); 505 SDValue Ops[2]; 506 Ops[0] = Div; 507 Ops[1] = Rem; 508 return DAG.getMergeValues(Ops, 2, DL); 509} 510 511SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op, 512 SelectionDAG &DAG) const { 513 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); 514 EVT MemVT = Store->getMemoryVT(); 515 unsigned MemBits = MemVT.getSizeInBits(); 516 517 // Byte stores are really expensive, so if possible, try to pack 518 // 32-bit vector truncatating store into an i32 store. 519 // XXX: We could also handle optimize other vector bitwidths 520 if (!MemVT.isVector() || MemBits > 32) { 521 return SDValue(); 522 } 523 524 SDLoc DL(Op); 525 const SDValue &Value = Store->getValue(); 526 EVT VT = Value.getValueType(); 527 const SDValue &Ptr = Store->getBasePtr(); 528 EVT MemEltVT = MemVT.getVectorElementType(); 529 unsigned MemEltBits = MemEltVT.getSizeInBits(); 530 unsigned MemNumElements = MemVT.getVectorNumElements(); 531 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 532 SDValue Mask; 533 switch(MemEltBits) { 534 case 8: 535 Mask = DAG.getConstant(0xFF, PackedVT); 536 break; 537 case 16: 538 Mask = DAG.getConstant(0xFFFF, PackedVT); 539 break; 540 default: 541 llvm_unreachable("Cannot lower this vector store"); 542 } 543 SDValue PackedValue; 544 for (unsigned i = 0; i < MemNumElements; ++i) { 545 EVT ElemVT = VT.getVectorElementType(); 546 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, 547 DAG.getConstant(i, MVT::i32)); 548 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); 549 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); 550 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); 551 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); 552 if (i == 0) { 553 PackedValue = Elt; 554 } else { 555 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); 556 } 557 } 558 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, 559 MachinePointerInfo(Store->getMemOperand()->getValue()), 560 Store->isVolatile(), Store->isNonTemporal(), 561 Store->getAlignment()); 562} 563 564//===----------------------------------------------------------------------===// 565// Helper functions 566//===----------------------------------------------------------------------===// 567 568bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 569 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 570 return CFP->isExactlyValue(1.0); 571 } 572 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 573 return C->isAllOnesValue(); 574 } 575 return false; 576} 577 578bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 579 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 580 return CFP->getValueAPF().isZero(); 581 } 582 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 583 return C->isNullValue(); 584 } 585 return false; 586} 587 588SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 589 const TargetRegisterClass *RC, 590 unsigned Reg, EVT VT) const { 591 MachineFunction &MF = DAG.getMachineFunction(); 592 MachineRegisterInfo &MRI = MF.getRegInfo(); 593 unsigned VirtualRegister; 594 if (!MRI.isLiveIn(Reg)) { 595 VirtualRegister = MRI.createVirtualRegister(RC); 596 MRI.addLiveIn(Reg, VirtualRegister); 597 } else { 598 VirtualRegister = MRI.getLiveInVirtReg(Reg); 599 } 600 return DAG.getRegister(VirtualRegister, VT); 601} 602 603#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 604 605const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 606 switch (Opcode) { 607 default: return 0; 608 // AMDIL DAG nodes 609 NODE_NAME_CASE(CALL); 610 NODE_NAME_CASE(UMUL); 611 NODE_NAME_CASE(DIV_INF); 612 NODE_NAME_CASE(RET_FLAG); 613 NODE_NAME_CASE(BRANCH_COND); 614 615 // AMDGPU DAG nodes 616 NODE_NAME_CASE(DWORDADDR) 617 NODE_NAME_CASE(FRACT) 618 NODE_NAME_CASE(FMAX) 619 NODE_NAME_CASE(SMAX) 620 NODE_NAME_CASE(UMAX) 621 NODE_NAME_CASE(FMIN) 622 NODE_NAME_CASE(SMIN) 623 NODE_NAME_CASE(UMIN) 624 NODE_NAME_CASE(URECIP) 625 NODE_NAME_CASE(EXPORT) 626 NODE_NAME_CASE(CONST_ADDRESS) 627 NODE_NAME_CASE(REGISTER_LOAD) 628 NODE_NAME_CASE(REGISTER_STORE) 629 NODE_NAME_CASE(LOAD_CONSTANT) 630 NODE_NAME_CASE(LOAD_INPUT) 631 NODE_NAME_CASE(SAMPLE) 632 NODE_NAME_CASE(SAMPLEB) 633 NODE_NAME_CASE(SAMPLED) 634 NODE_NAME_CASE(SAMPLEL) 635 NODE_NAME_CASE(STORE_MSKOR) 636 } 637} 638