AMDGPUISelDAGToDAG.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief Defines an instruction selector for the AMDGPU target. 12// 13//===----------------------------------------------------------------------===// 14#include "AMDGPUInstrInfo.h" 15#include "AMDGPUISelLowering.h" // For AMDGPUISD 16#include "AMDGPURegisterInfo.h" 17#include "R600InstrInfo.h" 18#include "SIISelLowering.h" 19#include "llvm/CodeGen/FunctionLoweringInfo.h" 20#include "llvm/CodeGen/PseudoSourceValue.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/IR/Function.h" 24 25using namespace llvm; 26 27//===----------------------------------------------------------------------===// 28// Instruction Selector Implementation 29//===----------------------------------------------------------------------===// 30 31namespace { 32/// AMDGPU specific code to select AMDGPU machine instructions for 33/// SelectionDAG operations. 34class AMDGPUDAGToDAGISel : public SelectionDAGISel { 35 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 36 // make the right decision when generating code for different targets. 37 const AMDGPUSubtarget &Subtarget; 38public: 39 AMDGPUDAGToDAGISel(TargetMachine &TM); 40 virtual ~AMDGPUDAGToDAGISel(); 41 42 SDNode *Select(SDNode *N) override; 43 const char *getPassName() const override; 44 void PostprocessISelDAG() override; 45 46private: 47 bool isInlineImmediate(SDNode *N) const; 48 inline SDValue getSmallIPtrImm(unsigned Imm); 49 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 50 const R600InstrInfo *TII); 51 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 52 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 53 54 // Complex pattern selectors 55 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 56 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 57 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 58 59 static bool checkType(const Value *ptr, unsigned int addrspace); 60 static bool checkPrivateAddress(const MachineMemOperand *Op); 61 62 static bool isGlobalStore(const StoreSDNode *N); 63 static bool isPrivateStore(const StoreSDNode *N); 64 static bool isLocalStore(const StoreSDNode *N); 65 static bool isRegionStore(const StoreSDNode *N); 66 67 bool isCPLoad(const LoadSDNode *N) const; 68 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 69 bool isGlobalLoad(const LoadSDNode *N) const; 70 bool isParamLoad(const LoadSDNode *N) const; 71 bool isPrivateLoad(const LoadSDNode *N) const; 72 bool isLocalLoad(const LoadSDNode *N) const; 73 bool isRegionLoad(const LoadSDNode *N) const; 74 75 /// \returns True if the current basic block being selected is at control 76 /// flow depth 0. Meaning that the current block dominates the 77 // exit block. 78 bool isCFDepth0() const; 79 80 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 81 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 82 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 83 SDValue& Offset); 84 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 85 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 86 87 // Include the pieces autogenerated from the target description. 88#include "AMDGPUGenDAGISel.inc" 89}; 90} // end anonymous namespace 91 92/// \brief This pass converts a legalized DAG into a AMDGPU-specific 93// DAG, ready for instruction scheduling. 94FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 95 return new AMDGPUDAGToDAGISel(TM); 96} 97 98AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 99 : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) { 100} 101 102AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 103} 104 105bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 106 const SITargetLowering *TL 107 = static_cast<const SITargetLowering *>(getTargetLowering()); 108 return TL->analyzeImmediate(N) == 0; 109} 110 111/// \brief Determine the register class for \p OpNo 112/// \returns The register class of the virtual register that will be used for 113/// the given operand number \OpNo or NULL if the register class cannot be 114/// determined. 115const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 116 unsigned OpNo) const { 117 if (!N->isMachineOpcode()) 118 return nullptr; 119 120 switch (N->getMachineOpcode()) { 121 default: { 122 const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode()); 123 unsigned OpIdx = Desc.getNumDefs() + OpNo; 124 if (OpIdx >= Desc.getNumOperands()) 125 return nullptr; 126 int RegClass = Desc.OpInfo[OpIdx].RegClass; 127 if (RegClass == -1) 128 return nullptr; 129 130 return TM.getRegisterInfo()->getRegClass(RegClass); 131 } 132 case AMDGPU::REG_SEQUENCE: { 133 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 134 const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID); 135 136 SDValue SubRegOp = N->getOperand(OpNo + 1); 137 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 138 return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx); 139 } 140 } 141} 142 143SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { 144 return CurDAG->getTargetConstant(Imm, MVT::i32); 145} 146 147bool AMDGPUDAGToDAGISel::SelectADDRParam( 148 SDValue Addr, SDValue& R1, SDValue& R2) { 149 150 if (Addr.getOpcode() == ISD::FrameIndex) { 151 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 152 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 153 R2 = CurDAG->getTargetConstant(0, MVT::i32); 154 } else { 155 R1 = Addr; 156 R2 = CurDAG->getTargetConstant(0, MVT::i32); 157 } 158 } else if (Addr.getOpcode() == ISD::ADD) { 159 R1 = Addr.getOperand(0); 160 R2 = Addr.getOperand(1); 161 } else { 162 R1 = Addr; 163 R2 = CurDAG->getTargetConstant(0, MVT::i32); 164 } 165 return true; 166} 167 168bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 169 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 170 Addr.getOpcode() == ISD::TargetGlobalAddress) { 171 return false; 172 } 173 return SelectADDRParam(Addr, R1, R2); 174} 175 176 177bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 178 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 179 Addr.getOpcode() == ISD::TargetGlobalAddress) { 180 return false; 181 } 182 183 if (Addr.getOpcode() == ISD::FrameIndex) { 184 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 185 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 186 R2 = CurDAG->getTargetConstant(0, MVT::i64); 187 } else { 188 R1 = Addr; 189 R2 = CurDAG->getTargetConstant(0, MVT::i64); 190 } 191 } else if (Addr.getOpcode() == ISD::ADD) { 192 R1 = Addr.getOperand(0); 193 R2 = Addr.getOperand(1); 194 } else { 195 R1 = Addr; 196 R2 = CurDAG->getTargetConstant(0, MVT::i64); 197 } 198 return true; 199} 200 201SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 202 unsigned int Opc = N->getOpcode(); 203 if (N->isMachineOpcode()) { 204 N->setNodeId(-1); 205 return nullptr; // Already selected. 206 } 207 208 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 209 switch (Opc) { 210 default: break; 211 // We are selecting i64 ADD here instead of custom lower it during 212 // DAG legalization, so we can fold some i64 ADDs used for address 213 // calculation into the LOAD and STORE instructions. 214 case ISD::ADD: { 215 if (N->getValueType(0) != MVT::i64 || 216 ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 217 break; 218 219 SDLoc DL(N); 220 SDValue LHS = N->getOperand(0); 221 SDValue RHS = N->getOperand(1); 222 223 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); 224 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); 225 226 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 227 DL, MVT::i32, LHS, Sub0); 228 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 229 DL, MVT::i32, LHS, Sub1); 230 231 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 232 DL, MVT::i32, RHS, Sub0); 233 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 234 DL, MVT::i32, RHS, Sub1); 235 236 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 237 238 SmallVector<SDValue, 8> AddLoArgs; 239 AddLoArgs.push_back(SDValue(Lo0, 0)); 240 AddLoArgs.push_back(SDValue(Lo1, 0)); 241 242 SDNode *AddLo = CurDAG->getMachineNode( 243 isCFDepth0() ? AMDGPU::S_ADD_I32 : AMDGPU::V_ADD_I32_e32, 244 DL, VTList, AddLoArgs); 245 SDValue Carry = SDValue(AddLo, 1); 246 SDNode *AddHi = CurDAG->getMachineNode( 247 isCFDepth0() ? AMDGPU::S_ADDC_U32 : AMDGPU::V_ADDC_U32_e32, 248 DL, MVT::i32, SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 249 250 SDValue Args[5] = { 251 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), 252 SDValue(AddLo,0), 253 Sub0, 254 SDValue(AddHi,0), 255 Sub1, 256 }; 257 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 258 } 259 case ISD::BUILD_VECTOR: { 260 unsigned RegClassID; 261 const AMDGPURegisterInfo *TRI = 262 static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo()); 263 const SIRegisterInfo *SIRI = 264 static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); 265 EVT VT = N->getValueType(0); 266 unsigned NumVectorElts = VT.getVectorNumElements(); 267 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 268 if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 269 bool UseVReg = true; 270 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); 271 U != E; ++U) { 272 if (!U->isMachineOpcode()) { 273 continue; 274 } 275 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); 276 if (!RC) { 277 continue; 278 } 279 if (SIRI->isSGPRClass(RC)) { 280 UseVReg = false; 281 } 282 } 283 switch(NumVectorElts) { 284 case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID : 285 AMDGPU::SReg_32RegClassID; 286 break; 287 case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : 288 AMDGPU::SReg_64RegClassID; 289 break; 290 case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : 291 AMDGPU::SReg_128RegClassID; 292 break; 293 case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : 294 AMDGPU::SReg_256RegClassID; 295 break; 296 case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : 297 AMDGPU::SReg_512RegClassID; 298 break; 299 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 300 } 301 } else { 302 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 303 // that adds a 128 bits reg copy when going through TwoAddressInstructions 304 // pass. We want to avoid 128 bits copies as much as possible because they 305 // can't be bundled by our scheduler. 306 switch(NumVectorElts) { 307 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 308 case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; 309 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 310 } 311 } 312 313 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); 314 315 if (NumVectorElts == 1) { 316 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, 317 VT.getVectorElementType(), 318 N->getOperand(0), RegClass); 319 } 320 321 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 322 "supported yet"); 323 // 16 = Max Num Vector Elements 324 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 325 // 1 = Vector Register Class 326 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(N->getNumOperands() * 2 + 1); 327 328 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); 329 bool IsRegSeq = true; 330 for (unsigned i = 0; i < N->getNumOperands(); i++) { 331 // XXX: Why is this here? 332 if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { 333 IsRegSeq = false; 334 break; 335 } 336 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 337 RegSeqArgs[1 + (2 * i) + 1] = 338 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); 339 } 340 if (!IsRegSeq) 341 break; 342 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 343 RegSeqArgs); 344 } 345 case ISD::BUILD_PAIR: { 346 SDValue RC, SubReg0, SubReg1; 347 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 348 break; 349 } 350 if (N->getValueType(0) == MVT::i128) { 351 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); 352 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); 353 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); 354 } else if (N->getValueType(0) == MVT::i64) { 355 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32); 356 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); 357 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); 358 } else { 359 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 360 } 361 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 362 N->getOperand(1), SubReg1 }; 363 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 364 SDLoc(N), N->getValueType(0), Ops); 365 } 366 367 case ISD::Constant: 368 case ISD::ConstantFP: { 369 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 370 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 371 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 372 break; 373 374 uint64_t Imm; 375 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 376 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 377 else { 378 ConstantSDNode *C = cast<ConstantSDNode>(N); 379 Imm = C->getZExtValue(); 380 } 381 382 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, 383 CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32)); 384 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, 385 CurDAG->getConstant(Imm >> 32, MVT::i32)); 386 const SDValue Ops[] = { 387 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), 388 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), 389 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32) 390 }; 391 392 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), 393 N->getValueType(0), Ops); 394 } 395 396 case AMDGPUISD::REGISTER_LOAD: { 397 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 398 break; 399 SDValue Addr, Offset; 400 401 SelectADDRIndirect(N->getOperand(1), Addr, Offset); 402 const SDValue Ops[] = { 403 Addr, 404 Offset, 405 CurDAG->getTargetConstant(0, MVT::i32), 406 N->getOperand(0), 407 }; 408 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N), 409 CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other), 410 Ops); 411 } 412 case AMDGPUISD::REGISTER_STORE: { 413 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 414 break; 415 SDValue Addr, Offset; 416 SelectADDRIndirect(N->getOperand(2), Addr, Offset); 417 const SDValue Ops[] = { 418 N->getOperand(1), 419 Addr, 420 Offset, 421 CurDAG->getTargetConstant(0, MVT::i32), 422 N->getOperand(0), 423 }; 424 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N), 425 CurDAG->getVTList(MVT::Other), 426 Ops); 427 } 428 429 case AMDGPUISD::BFE_I32: 430 case AMDGPUISD::BFE_U32: { 431 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 432 break; 433 434 // There is a scalar version available, but unlike the vector version which 435 // has a separate operand for the offset and width, the scalar version packs 436 // the width and offset into a single operand. Try to move to the scalar 437 // version if the offsets are constant, so that we can try to keep extended 438 // loads of kernel arguments in SGPRs. 439 440 // TODO: Technically we could try to pattern match scalar bitshifts of 441 // dynamic values, but it's probably not useful. 442 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 443 if (!Offset) 444 break; 445 446 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 447 if (!Width) 448 break; 449 450 bool Signed = Opc == AMDGPUISD::BFE_I32; 451 452 // Transformation function, pack the offset and width of a BFE into 453 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 454 // source, bits [5:0] contain the offset and bits [22:16] the width. 455 456 uint32_t OffsetVal = Offset->getZExtValue(); 457 uint32_t WidthVal = Width->getZExtValue(); 458 459 uint32_t PackedVal = OffsetVal | WidthVal << 16; 460 461 SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32); 462 return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 463 SDLoc(N), 464 MVT::i32, 465 N->getOperand(0), 466 PackedOffsetWidth); 467 468 } 469 } 470 return SelectCode(N); 471} 472 473 474bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 475 assert(AS != 0 && "Use checkPrivateAddress instead."); 476 if (!Ptr) 477 return false; 478 479 return Ptr->getType()->getPointerAddressSpace() == AS; 480} 481 482bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 483 if (Op->getPseudoValue()) 484 return true; 485 486 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 487 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 488 489 return false; 490} 491 492bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 493 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 494} 495 496bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 497 const Value *MemVal = N->getMemOperand()->getValue(); 498 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 499 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 500 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 501} 502 503bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 504 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 505} 506 507bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 508 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 509} 510 511bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 512 const Value *MemVal = N->getMemOperand()->getValue(); 513 if (CbId == -1) 514 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 515 516 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 517} 518 519bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 520 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { 521 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 522 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 523 N->getMemoryVT().bitsLT(MVT::i32)) { 524 return true; 525 } 526 } 527 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 528} 529 530bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 531 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 532} 533 534bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 535 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 536} 537 538bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 539 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 540} 541 542bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 543 MachineMemOperand *MMO = N->getMemOperand(); 544 if (checkPrivateAddress(N->getMemOperand())) { 545 if (MMO) { 546 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 547 if (PSV && PSV == PseudoSourceValue::getConstantPool()) { 548 return true; 549 } 550 } 551 } 552 return false; 553} 554 555bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 556 if (checkPrivateAddress(N->getMemOperand())) { 557 // Check to make sure we are not a constant pool load or a constant load 558 // that is marked as a private load 559 if (isCPLoad(N) || isConstantLoad(N, -1)) { 560 return false; 561 } 562 } 563 564 const Value *MemVal = N->getMemOperand()->getValue(); 565 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 566 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 567 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 568 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 569 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 570 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){ 571 return true; 572 } 573 return false; 574} 575 576bool AMDGPUDAGToDAGISel::isCFDepth0() const { 577 // FIXME: Figure out a way to use DominatorTree analysis here. 578 const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock(); 579 const Function *Fn = FuncInfo->Fn; 580 return &Fn->front() == CurBlock || &Fn->back() == CurBlock; 581} 582 583 584const char *AMDGPUDAGToDAGISel::getPassName() const { 585 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 586} 587 588#ifdef DEBUGTMP 589#undef INT64_C 590#endif 591#undef DEBUGTMP 592 593//===----------------------------------------------------------------------===// 594// Complex Patterns 595//===----------------------------------------------------------------------===// 596 597bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 598 SDValue& IntPtr) { 599 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 600 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true); 601 return true; 602 } 603 return false; 604} 605 606bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 607 SDValue& BaseReg, SDValue &Offset) { 608 if (!isa<ConstantSDNode>(Addr)) { 609 BaseReg = Addr; 610 Offset = CurDAG->getIntPtrConstant(0, true); 611 return true; 612 } 613 return false; 614} 615 616bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 617 SDValue &Offset) { 618 ConstantSDNode *IMMOffset; 619 620 if (Addr.getOpcode() == ISD::ADD 621 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 622 && isInt<16>(IMMOffset->getZExtValue())) { 623 624 Base = Addr.getOperand(0); 625 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); 626 return true; 627 // If the pointer address is constant, we can move it to the offset field. 628 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 629 && isInt<16>(IMMOffset->getZExtValue())) { 630 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 631 SDLoc(CurDAG->getEntryNode()), 632 AMDGPU::ZERO, MVT::i32); 633 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); 634 return true; 635 } 636 637 // Default case, no offset 638 Base = Addr; 639 Offset = CurDAG->getTargetConstant(0, MVT::i32); 640 return true; 641} 642 643bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 644 SDValue &Offset) { 645 ConstantSDNode *C; 646 647 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 648 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 649 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 650 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 651 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 652 Base = Addr.getOperand(0); 653 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 654 } else { 655 Base = Addr; 656 Offset = CurDAG->getTargetConstant(0, MVT::i32); 657 } 658 659 return true; 660} 661 662void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 663 const AMDGPUTargetLowering& Lowering = 664 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 665 bool IsModified = false; 666 do { 667 IsModified = false; 668 // Go over all selected nodes and try to fold them a bit more 669 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 670 E = CurDAG->allnodes_end(); I != E; ++I) { 671 672 SDNode *Node = I; 673 674 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); 675 if (!MachineNode) 676 continue; 677 678 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 679 if (ResNode != Node) { 680 ReplaceUses(Node, ResNode); 681 IsModified = true; 682 } 683 } 684 CurDAG->RemoveDeadNodes(); 685 } while (IsModified); 686} 687