R600ISelLowering.cpp revision 0eca5fd919b0a31ea926b5f5072e5e56f7a55269
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file 11// is mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "R600InstrInfo.h" 17#include "R600MachineFunctionInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/CodeGen/SelectionDAG.h" 21 22using namespace llvm; 23 24R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 25 AMDGPUTargetLowering(TM), 26 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) 27{ 28 setOperationAction(ISD::MUL, MVT::i64, Expand); 29 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 30 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 31 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 32 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 33 computeRegisterProperties(); 34 35 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 36 37 setOperationAction(ISD::FSUB, MVT::f32, Expand); 38 39 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 40 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 41 42 setOperationAction(ISD::ROTL, MVT::i32, Custom); 43 44 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 45 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 46 47 setOperationAction(ISD::SETCC, MVT::i32, Custom); 48 49 setSchedulingPreference(Sched::VLIW); 50} 51 52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 53 MachineInstr * MI, MachineBasicBlock * BB) const 54{ 55 MachineFunction * MF = BB->getParent(); 56 MachineRegisterInfo &MRI = MF->getRegInfo(); 57 MachineBasicBlock::iterator I = *MI; 58 59 switch (MI->getOpcode()) { 60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 61 case AMDGPU::NGROUPS_X: 62 lowerImplicitParameter(MI, *BB, MRI, 0); 63 break; 64 case AMDGPU::NGROUPS_Y: 65 lowerImplicitParameter(MI, *BB, MRI, 1); 66 break; 67 case AMDGPU::NGROUPS_Z: 68 lowerImplicitParameter(MI, *BB, MRI, 2); 69 break; 70 case AMDGPU::GLOBAL_SIZE_X: 71 lowerImplicitParameter(MI, *BB, MRI, 3); 72 break; 73 case AMDGPU::GLOBAL_SIZE_Y: 74 lowerImplicitParameter(MI, *BB, MRI, 4); 75 break; 76 case AMDGPU::GLOBAL_SIZE_Z: 77 lowerImplicitParameter(MI, *BB, MRI, 5); 78 break; 79 case AMDGPU::LOCAL_SIZE_X: 80 lowerImplicitParameter(MI, *BB, MRI, 6); 81 break; 82 case AMDGPU::LOCAL_SIZE_Y: 83 lowerImplicitParameter(MI, *BB, MRI, 7); 84 break; 85 case AMDGPU::LOCAL_SIZE_Z: 86 lowerImplicitParameter(MI, *BB, MRI, 8); 87 break; 88 89 case AMDGPU::CLAMP_R600: 90 MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP); 91 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 92 .addOperand(MI->getOperand(0)) 93 .addOperand(MI->getOperand(1)) 94 .addReg(AMDGPU::PRED_SEL_OFF); 95 break; 96 97 case AMDGPU::FABS_R600: 98 MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); 99 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 100 .addOperand(MI->getOperand(0)) 101 .addOperand(MI->getOperand(1)) 102 .addReg(AMDGPU::PRED_SEL_OFF); 103 break; 104 105 case AMDGPU::FNEG_R600: 106 MI->getOperand(1).addTargetFlag(MO_FLAG_NEG); 107 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 108 .addOperand(MI->getOperand(0)) 109 .addOperand(MI->getOperand(1)) 110 .addReg(AMDGPU::PRED_SEL_OFF); 111 break; 112 113 case AMDGPU::R600_LOAD_CONST: 114 { 115 int64_t RegIndex = MI->getOperand(1).getImm(); 116 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); 117 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) 118 .addOperand(MI->getOperand(0)) 119 .addReg(ConstantReg); 120 break; 121 } 122 123 case AMDGPU::MASK_WRITE: 124 { 125 unsigned maskedRegister = MI->getOperand(0).getReg(); 126 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 127 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 128 MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); 129 def->addTargetFlag(MO_FLAG_MASK); 130 // Return early so the instruction is not erased 131 return BB; 132 } 133 134 case AMDGPU::RAT_WRITE_CACHELESS_eg: 135 { 136 // Convert to DWORD address 137 unsigned NewAddr = MRI.createVirtualRegister( 138 AMDGPU::R600_TReg32_XRegisterClass); 139 unsigned ShiftValue = MRI.createVirtualRegister( 140 AMDGPU::R600_TReg32RegisterClass); 141 142 // XXX In theory, we should be able to pass ShiftValue directly to 143 // the LSHR_eg instruction as an inline literal, but I tried doing it 144 // this way and it didn't produce the correct results. 145 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) 146 .addReg(AMDGPU::ALU_LITERAL_X) 147 .addReg(AMDGPU::PRED_SEL_OFF) 148 .addImm(2); 149 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) 150 .addOperand(MI->getOperand(1)) 151 .addReg(ShiftValue) 152 .addReg(AMDGPU::PRED_SEL_OFF); 153 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 154 .addOperand(MI->getOperand(0)) 155 .addReg(NewAddr); 156 break; 157 } 158 159 case AMDGPU::RESERVE_REG: 160 { 161 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); 162 int64_t ReservedIndex = MI->getOperand(0).getImm(); 163 unsigned ReservedReg = 164 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); 165 MFI->ReservedRegs.push_back(ReservedReg); 166 break; 167 } 168 169 case AMDGPU::TXD: 170 { 171 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 172 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 173 174 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 175 .addOperand(MI->getOperand(3)) 176 .addOperand(MI->getOperand(4)) 177 .addOperand(MI->getOperand(5)); 178 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 179 .addOperand(MI->getOperand(2)) 180 .addOperand(MI->getOperand(4)) 181 .addOperand(MI->getOperand(5)); 182 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 183 .addOperand(MI->getOperand(0)) 184 .addOperand(MI->getOperand(1)) 185 .addOperand(MI->getOperand(4)) 186 .addOperand(MI->getOperand(5)) 187 .addReg(t0, RegState::Implicit) 188 .addReg(t1, RegState::Implicit); 189 break; 190 } 191 case AMDGPU::TXD_SHADOW: 192 { 193 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 194 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 195 196 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 197 .addOperand(MI->getOperand(3)) 198 .addOperand(MI->getOperand(4)) 199 .addOperand(MI->getOperand(5)); 200 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 201 .addOperand(MI->getOperand(2)) 202 .addOperand(MI->getOperand(4)) 203 .addOperand(MI->getOperand(5)); 204 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 205 .addOperand(MI->getOperand(0)) 206 .addOperand(MI->getOperand(1)) 207 .addOperand(MI->getOperand(4)) 208 .addOperand(MI->getOperand(5)) 209 .addReg(t0, RegState::Implicit) 210 .addReg(t1, RegState::Implicit); 211 break; 212 } 213 case AMDGPU::BRANCH: 214 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 215 .addOperand(MI->getOperand(0)) 216 .addReg(0); 217 break; 218 case AMDGPU::BRANCH_COND_f32: 219 MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH); 220 221 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 222 .addReg(AMDGPU::PREDICATE_BIT) 223 .addOperand(MI->getOperand(1)) 224 .addImm(OPCODE_IS_ZERO); 225 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 226 .addOperand(MI->getOperand(0)) 227 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 228 break; 229 case AMDGPU::BRANCH_COND_i32: 230 MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH); 231 232 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 233 .addReg(AMDGPU::PREDICATE_BIT) 234 .addOperand(MI->getOperand(1)) 235 .addImm(OPCODE_IS_ZERO_INT); 236 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 237 .addOperand(MI->getOperand(0)) 238 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 239 break; 240 241 242 } 243 244 MI->eraseFromParent(); 245 return BB; 246} 247 248void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, 249 MachineRegisterInfo & MRI, unsigned dword_offset) const 250{ 251 unsigned ByteOffset = dword_offset * 4; 252 253 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 254 assert(isInt<16>(ByteOffset)); 255 256 MachineBasicBlock::iterator I = *MI; 257 unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass); 258 MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass); 259 260 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::COPY), PtrReg) 261 .addReg(AMDGPU::ZERO); 262 263 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg)) 264 .addOperand(MI->getOperand(0)) 265 .addReg(PtrReg) 266 .addImm(ByteOffset); 267} 268 269//===----------------------------------------------------------------------===// 270// Custom DAG Lowering Operations 271//===----------------------------------------------------------------------===// 272 273using namespace llvm::Intrinsic; 274using namespace llvm::AMDGPUIntrinsic; 275 276SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 277{ 278 switch (Op.getOpcode()) { 279 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 280 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 281 case ISD::ROTL: return LowerROTL(Op, DAG); 282 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 283 case ISD::SETCC: return LowerSETCC(Op, DAG); 284 case ISD::INTRINSIC_VOID: { 285 SDValue Chain = Op.getOperand(0); 286 unsigned IntrinsicID = 287 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 288 switch (IntrinsicID) { 289 case AMDGPUIntrinsic::AMDGPU_store_output: { 290 MachineFunction &MF = DAG.getMachineFunction(); 291 MachineRegisterInfo &MRI = MF.getRegInfo(); 292 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 293 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 294 if (!MRI.isLiveOut(Reg)) { 295 MRI.addLiveOut(Reg); 296 } 297 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 298 } 299 // default for switch(IntrinsicID) 300 default: break; 301 } 302 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 303 break; 304 } 305 case ISD::INTRINSIC_WO_CHAIN: { 306 unsigned IntrinsicID = 307 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 308 EVT VT = Op.getValueType(); 309 switch(IntrinsicID) { 310 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 311 case AMDGPUIntrinsic::R600_load_input: { 312 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 313 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 314 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 315 } 316 case r600_read_tgid_x: 317 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 318 AMDGPU::T1_X, VT); 319 case r600_read_tgid_y: 320 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 321 AMDGPU::T1_Y, VT); 322 case r600_read_tgid_z: 323 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 324 AMDGPU::T1_Z, VT); 325 case r600_read_tidig_x: 326 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 327 AMDGPU::T0_X, VT); 328 case r600_read_tidig_y: 329 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 330 AMDGPU::T0_Y, VT); 331 case r600_read_tidig_z: 332 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 333 AMDGPU::T0_Z, VT); 334 } 335 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 336 break; 337 } 338 } // end switch(Op.getOpcode()) 339 return SDValue(); 340} 341 342SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 343{ 344 SDValue Chain = Op.getOperand(0); 345 SDValue CC = Op.getOperand(1); 346 SDValue LHS = Op.getOperand(2); 347 SDValue RHS = Op.getOperand(3); 348 SDValue JumpT = Op.getOperand(4); 349 SDValue CmpValue; 350 SDValue Result; 351 CmpValue = DAG.getNode( 352 ISD::SELECT_CC, 353 Op.getDebugLoc(), 354 MVT::i32, 355 LHS, RHS, 356 DAG.getConstant(-1, MVT::i32), 357 DAG.getConstant(0, MVT::i32), 358 CC); 359 Result = DAG.getNode( 360 AMDGPUISD::BRANCH_COND, 361 CmpValue.getDebugLoc(), 362 MVT::Other, Chain, 363 JumpT, CmpValue); 364 return Result; 365} 366 367 368SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const 369{ 370 DebugLoc DL = Op.getDebugLoc(); 371 EVT VT = Op.getValueType(); 372 373 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 374 Op.getOperand(0), 375 Op.getOperand(0), 376 DAG.getNode(ISD::SUB, DL, VT, 377 DAG.getConstant(32, MVT::i32), 378 Op.getOperand(1))); 379} 380 381SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 382{ 383 DebugLoc DL = Op.getDebugLoc(); 384 EVT VT = Op.getValueType(); 385 386 SDValue LHS = Op.getOperand(0); 387 SDValue RHS = Op.getOperand(1); 388 SDValue True = Op.getOperand(2); 389 SDValue False = Op.getOperand(3); 390 SDValue CC = Op.getOperand(4); 391 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 392 SDValue Temp; 393 394 // LHS and RHS are guaranteed to be the same value type 395 EVT CompareVT = LHS.getValueType(); 396 397 // We need all the operands of SELECT_CC to have the same value type, so if 398 // necessary we need to convert LHS and RHS to be the same type True and 399 // False. True and False are guaranteed to have the same type as this 400 // SELECT_CC node. 401 402 if (CompareVT != VT) { 403 ISD::NodeType ConversionOp = ISD::DELETED_NODE; 404 if (VT == MVT::f32 && CompareVT == MVT::i32) { 405 if (isUnsignedIntSetCC(CCOpcode)) { 406 ConversionOp = ISD::UINT_TO_FP; 407 } else { 408 ConversionOp = ISD::SINT_TO_FP; 409 } 410 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 411 ConversionOp = ISD::FP_TO_SINT; 412 } else { 413 // I don't think there will be any other type pairings. 414 assert(!"Unhandled operand type parings in SELECT_CC"); 415 } 416 // XXX Check the value of LHS and RHS and avoid creating sequences like 417 // (FTOI (ITOF)) 418 LHS = DAG.getNode(ConversionOp, DL, VT, LHS); 419 RHS = DAG.getNode(ConversionOp, DL, VT, RHS); 420 } 421 422 // If True is a hardware TRUE value and False is a hardware FALSE value or 423 // vice-versa we can handle this with a native instruction (SET* instructions). 424 if ((isHWTrueValue(True) && isHWFalseValue(False))) { 425 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 426 } 427 428 // XXX If True is a hardware TRUE value and False is a hardware FALSE value, 429 // we can handle this with a native instruction, but we need to swap true 430 // and false and change the conditional. 431 if (isHWTrueValue(False) && isHWFalseValue(True)) { 432 } 433 434 // XXX Check if we can lower this to a SELECT or if it is supported by a native 435 // operation. (The code below does this but we don't have the Instruction 436 // selection patterns to do this yet. 437#if 0 438 if (isZero(LHS) || isZero(RHS)) { 439 SDValue Cond = (isZero(LHS) ? RHS : LHS); 440 bool SwapTF = false; 441 switch (CCOpcode) { 442 case ISD::SETOEQ: 443 case ISD::SETUEQ: 444 case ISD::SETEQ: 445 SwapTF = true; 446 // Fall through 447 case ISD::SETONE: 448 case ISD::SETUNE: 449 case ISD::SETNE: 450 // We can lower to select 451 if (SwapTF) { 452 Temp = True; 453 True = False; 454 False = Temp; 455 } 456 // CNDE 457 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 458 default: 459 // Supported by a native operation (CNDGE, CNDGT) 460 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 461 } 462 } 463#endif 464 465 // If we make it this for it means we have no native instructions to handle 466 // this SELECT_CC, so we must lower it. 467 SDValue HWTrue, HWFalse; 468 469 if (VT == MVT::f32) { 470 HWTrue = DAG.getConstantFP(1.0f, VT); 471 HWFalse = DAG.getConstantFP(0.0f, VT); 472 } else if (VT == MVT::i32) { 473 HWTrue = DAG.getConstant(-1, VT); 474 HWFalse = DAG.getConstant(0, VT); 475 } 476 else { 477 assert(!"Unhandled value type in LowerSELECT_CC"); 478 } 479 480 // Lower this unsupported SELECT_CC into a combination of two supported 481 // SELECT_CC operations. 482 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); 483 484 // Convert floating point condition to i1 485 if (VT == MVT::f32) { 486 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, 487 DAG.getNode(ISD::FNEG, DL, VT, Cond)); 488 } 489 490 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 491} 492 493SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 494{ 495 SDValue Cond; 496 SDValue LHS = Op.getOperand(0); 497 SDValue RHS = Op.getOperand(1); 498 SDValue CC = Op.getOperand(2); 499 DebugLoc DL = Op.getDebugLoc(); 500 assert(Op.getValueType() == MVT::i32); 501 Cond = DAG.getNode( 502 ISD::SELECT_CC, 503 Op.getDebugLoc(), 504 MVT::i32, 505 LHS, RHS, 506 DAG.getConstant(-1, MVT::i32), 507 DAG.getConstant(0, MVT::i32), 508 CC); 509 Cond = DAG.getNode( 510 ISD::AND, 511 DL, 512 MVT::i32, 513 DAG.getConstant(1, MVT::i32), 514 Cond); 515 return Cond; 516} 517