R600ISelLowering.cpp revision 8263408a91b6b3beb5af5de6bdc7e5d13197a268
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file 11// is mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "R600InstrInfo.h" 17#include "R600MachineFunctionInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/CodeGen/SelectionDAG.h" 21 22using namespace llvm; 23 24R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 25 AMDGPUTargetLowering(TM), 26 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) 27{ 28 setOperationAction(ISD::MUL, MVT::i64, Expand); 29 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 30 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 31 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 32 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 33 computeRegisterProperties(); 34 35 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 36 37 setOperationAction(ISD::FSUB, MVT::f32, Expand); 38 39 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 40 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 41 42 setOperationAction(ISD::ROTL, MVT::i32, Custom); 43 44 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 45 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 46 47 setOperationAction(ISD::SETCC, MVT::i32, Custom); 48 49 setSchedulingPreference(Sched::VLIW); 50} 51 52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 53 MachineInstr * MI, MachineBasicBlock * BB) const 54{ 55 MachineFunction * MF = BB->getParent(); 56 MachineRegisterInfo &MRI = MF->getRegInfo(); 57 MachineBasicBlock::iterator I = *MI; 58 59 switch (MI->getOpcode()) { 60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 61 case AMDGPU::NGROUPS_X: 62 lowerImplicitParameter(MI, *BB, MRI, 0); 63 break; 64 case AMDGPU::NGROUPS_Y: 65 lowerImplicitParameter(MI, *BB, MRI, 1); 66 break; 67 case AMDGPU::NGROUPS_Z: 68 lowerImplicitParameter(MI, *BB, MRI, 2); 69 break; 70 case AMDGPU::GLOBAL_SIZE_X: 71 lowerImplicitParameter(MI, *BB, MRI, 3); 72 break; 73 case AMDGPU::GLOBAL_SIZE_Y: 74 lowerImplicitParameter(MI, *BB, MRI, 4); 75 break; 76 case AMDGPU::GLOBAL_SIZE_Z: 77 lowerImplicitParameter(MI, *BB, MRI, 5); 78 break; 79 case AMDGPU::LOCAL_SIZE_X: 80 lowerImplicitParameter(MI, *BB, MRI, 6); 81 break; 82 case AMDGPU::LOCAL_SIZE_Y: 83 lowerImplicitParameter(MI, *BB, MRI, 7); 84 break; 85 case AMDGPU::LOCAL_SIZE_Z: 86 lowerImplicitParameter(MI, *BB, MRI, 8); 87 break; 88 89 case AMDGPU::CLAMP_R600: 90 MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP); 91 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 92 .addOperand(MI->getOperand(0)) 93 .addOperand(MI->getOperand(1)) 94 .addReg(AMDGPU::PRED_SEL_OFF); 95 break; 96 97 case AMDGPU::FABS_R600: 98 MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); 99 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 100 .addOperand(MI->getOperand(0)) 101 .addOperand(MI->getOperand(1)) 102 .addReg(AMDGPU::PRED_SEL_OFF); 103 break; 104 105 case AMDGPU::FNEG_R600: 106 MI->getOperand(1).addTargetFlag(MO_FLAG_NEG); 107 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 108 .addOperand(MI->getOperand(0)) 109 .addOperand(MI->getOperand(1)) 110 .addReg(AMDGPU::PRED_SEL_OFF); 111 break; 112 113 case AMDGPU::R600_LOAD_CONST: 114 { 115 int64_t RegIndex = MI->getOperand(1).getImm(); 116 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); 117 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) 118 .addOperand(MI->getOperand(0)) 119 .addReg(ConstantReg); 120 break; 121 } 122 123 case AMDGPU::MASK_WRITE: 124 { 125 unsigned maskedRegister = MI->getOperand(0).getReg(); 126 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 127 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 128 MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); 129 def->addTargetFlag(MO_FLAG_MASK); 130 // Return early so the instruction is not erased 131 return BB; 132 } 133 134 case AMDGPU::RAT_WRITE_CACHELESS_eg: 135 { 136 // Convert to DWORD address 137 unsigned NewAddr = MRI.createVirtualRegister( 138 AMDGPU::R600_TReg32_XRegisterClass); 139 unsigned ShiftValue = MRI.createVirtualRegister( 140 AMDGPU::R600_TReg32RegisterClass); 141 142 // XXX In theory, we should be able to pass ShiftValue directly to 143 // the LSHR_eg instruction as an inline literal, but I tried doing it 144 // this way and it didn't produce the correct results. 145 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) 146 .addReg(AMDGPU::ALU_LITERAL_X) 147 .addReg(AMDGPU::PRED_SEL_OFF) 148 .addImm(2); 149 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) 150 .addOperand(MI->getOperand(1)) 151 .addReg(ShiftValue) 152 .addReg(AMDGPU::PRED_SEL_OFF); 153 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 154 .addOperand(MI->getOperand(0)) 155 .addReg(NewAddr); 156 break; 157 } 158 159 case AMDGPU::RESERVE_REG: 160 { 161 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); 162 int64_t ReservedIndex = MI->getOperand(0).getImm(); 163 unsigned ReservedReg = 164 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); 165 MFI->ReservedRegs.push_back(ReservedReg); 166 break; 167 } 168 169 case AMDGPU::TXD: 170 { 171 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 172 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 173 174 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 175 .addOperand(MI->getOperand(3)) 176 .addOperand(MI->getOperand(4)) 177 .addOperand(MI->getOperand(5)); 178 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 179 .addOperand(MI->getOperand(2)) 180 .addOperand(MI->getOperand(4)) 181 .addOperand(MI->getOperand(5)); 182 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 183 .addOperand(MI->getOperand(0)) 184 .addOperand(MI->getOperand(1)) 185 .addOperand(MI->getOperand(4)) 186 .addOperand(MI->getOperand(5)) 187 .addReg(t0, RegState::Implicit) 188 .addReg(t1, RegState::Implicit); 189 break; 190 } 191 case AMDGPU::TXD_SHADOW: 192 { 193 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 194 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 195 196 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 197 .addOperand(MI->getOperand(3)) 198 .addOperand(MI->getOperand(4)) 199 .addOperand(MI->getOperand(5)); 200 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 201 .addOperand(MI->getOperand(2)) 202 .addOperand(MI->getOperand(4)) 203 .addOperand(MI->getOperand(5)); 204 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 205 .addOperand(MI->getOperand(0)) 206 .addOperand(MI->getOperand(1)) 207 .addOperand(MI->getOperand(4)) 208 .addOperand(MI->getOperand(5)) 209 .addReg(t0, RegState::Implicit) 210 .addReg(t1, RegState::Implicit); 211 break; 212 } 213 214 215 } 216 217 MI->eraseFromParent(); 218 return BB; 219} 220 221void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, 222 MachineRegisterInfo & MRI, unsigned dword_offset) const 223{ 224 unsigned ByteOffset = dword_offset * 4; 225 226 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 227 assert(isInt<16>(ByteOffset)); 228 229 MachineBasicBlock::iterator I = *MI; 230 unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass); 231 MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass); 232 233 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::COPY), PtrReg) 234 .addReg(AMDGPU::ZERO); 235 236 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg)) 237 .addOperand(MI->getOperand(0)) 238 .addReg(PtrReg) 239 .addImm(ByteOffset); 240} 241 242//===----------------------------------------------------------------------===// 243// Custom DAG Lowering Operations 244//===----------------------------------------------------------------------===// 245 246using namespace llvm::Intrinsic; 247using namespace llvm::AMDGPUIntrinsic; 248 249SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 250{ 251 switch (Op.getOpcode()) { 252 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 253 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 254 case ISD::ROTL: return LowerROTL(Op, DAG); 255 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 256 case ISD::SETCC: return LowerSETCC(Op, DAG); 257 case ISD::INTRINSIC_VOID: { 258 SDValue Chain = Op.getOperand(0); 259 unsigned IntrinsicID = 260 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 261 switch (IntrinsicID) { 262 case AMDGPUIntrinsic::AMDGPU_store_output: { 263 MachineFunction &MF = DAG.getMachineFunction(); 264 MachineRegisterInfo &MRI = MF.getRegInfo(); 265 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 266 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 267 if (!MRI.isLiveOut(Reg)) { 268 MRI.addLiveOut(Reg); 269 } 270 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 271 } 272 // default for switch(IntrinsicID) 273 default: break; 274 } 275 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 276 break; 277 } 278 case ISD::INTRINSIC_WO_CHAIN: { 279 unsigned IntrinsicID = 280 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 281 EVT VT = Op.getValueType(); 282 switch(IntrinsicID) { 283 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 284 case AMDGPUIntrinsic::R600_load_input: { 285 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 286 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 287 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 288 } 289 case r600_read_tgid_x: 290 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 291 AMDGPU::T1_X, VT); 292 case r600_read_tgid_y: 293 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 294 AMDGPU::T1_Y, VT); 295 case r600_read_tgid_z: 296 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 297 AMDGPU::T1_Z, VT); 298 case r600_read_tidig_x: 299 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 300 AMDGPU::T0_X, VT); 301 case r600_read_tidig_y: 302 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 303 AMDGPU::T0_Y, VT); 304 case r600_read_tidig_z: 305 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 306 AMDGPU::T0_Z, VT); 307 } 308 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 309 break; 310 } 311 } // end switch(Op.getOpcode()) 312 return SDValue(); 313} 314 315SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 316{ 317 SDValue Chain = Op.getOperand(0); 318 SDValue CC = Op.getOperand(1); 319 SDValue LHS = Op.getOperand(2); 320 SDValue RHS = Op.getOperand(3); 321 SDValue JumpT = Op.getOperand(4); 322 SDValue CmpValue; 323 SDValue Result; 324 CmpValue = DAG.getNode( 325 ISD::SELECT_CC, 326 Op.getDebugLoc(), 327 MVT::i32, 328 LHS, RHS, 329 DAG.getConstant(-1, MVT::i32), 330 DAG.getConstant(0, MVT::i32), 331 CC); 332 Result = DAG.getNode( 333 AMDGPUISD::BRANCH_COND, 334 CmpValue.getDebugLoc(), 335 MVT::Other, Chain, 336 JumpT, CmpValue); 337 return Result; 338} 339 340 341SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const 342{ 343 DebugLoc DL = Op.getDebugLoc(); 344 EVT VT = Op.getValueType(); 345 346 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 347 Op.getOperand(0), 348 Op.getOperand(0), 349 DAG.getNode(ISD::SUB, DL, VT, 350 DAG.getConstant(32, MVT::i32), 351 Op.getOperand(1))); 352} 353 354SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 355{ 356 DebugLoc DL = Op.getDebugLoc(); 357 EVT VT = Op.getValueType(); 358 359 SDValue LHS = Op.getOperand(0); 360 SDValue RHS = Op.getOperand(1); 361 SDValue True = Op.getOperand(2); 362 SDValue False = Op.getOperand(3); 363 SDValue CC = Op.getOperand(4); 364 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 365 SDValue Temp; 366 367 // LHS and RHS are guaranteed to be the same value type 368 EVT CompareVT = LHS.getValueType(); 369 370 // We need all the operands of SELECT_CC to have the same value type, so if 371 // necessary we need to convert LHS and RHS to be the same type True and 372 // False. True and False are guaranteed to have the same type as this 373 // SELECT_CC node. 374 375 if (CompareVT != VT) { 376 ISD::NodeType ConversionOp = ISD::DELETED_NODE; 377 if (VT == MVT::f32 && CompareVT == MVT::i32) { 378 if (isUnsignedIntSetCC(CCOpcode)) { 379 ConversionOp = ISD::UINT_TO_FP; 380 } else { 381 ConversionOp = ISD::SINT_TO_FP; 382 } 383 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 384 ConversionOp = ISD::FP_TO_SINT; 385 } else { 386 // I don't think there will be any other type pairings. 387 assert(!"Unhandled operand type parings in SELECT_CC"); 388 } 389 // XXX Check the value of LHS and RHS and avoid creating sequences like 390 // (FTOI (ITOF)) 391 LHS = DAG.getNode(ConversionOp, DL, VT, LHS); 392 RHS = DAG.getNode(ConversionOp, DL, VT, RHS); 393 } 394 395 // If True is a hardware TRUE value and False is a hardware FALSE value or 396 // vice-versa we can handle this with a native instruction (SET* instructions). 397 if ((isHWTrueValue(True) && isHWFalseValue(False))) { 398 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 399 } 400 401 // XXX If True is a hardware TRUE value and False is a hardware FALSE value, 402 // we can handle this with a native instruction, but we need to swap true 403 // and false and change the conditional. 404 if (isHWTrueValue(False) && isHWFalseValue(True)) { 405 } 406 407 // XXX Check if we can lower this to a SELECT or if it is supported by a native 408 // operation. (The code below does this but we don't have the Instruction 409 // selection patterns to do this yet. 410#if 0 411 if (isZero(LHS) || isZero(RHS)) { 412 SDValue Cond = (isZero(LHS) ? RHS : LHS); 413 bool SwapTF = false; 414 switch (CCOpcode) { 415 case ISD::SETOEQ: 416 case ISD::SETUEQ: 417 case ISD::SETEQ: 418 SwapTF = true; 419 // Fall through 420 case ISD::SETONE: 421 case ISD::SETUNE: 422 case ISD::SETNE: 423 // We can lower to select 424 if (SwapTF) { 425 Temp = True; 426 True = False; 427 False = Temp; 428 } 429 // CNDE 430 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 431 default: 432 // Supported by a native operation (CNDGE, CNDGT) 433 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 434 } 435 } 436#endif 437 438 // If we make it this for it means we have no native instructions to handle 439 // this SELECT_CC, so we must lower it. 440 SDValue HWTrue, HWFalse; 441 442 if (VT == MVT::f32) { 443 HWTrue = DAG.getConstantFP(1.0f, VT); 444 HWFalse = DAG.getConstantFP(0.0f, VT); 445 } else if (VT == MVT::i32) { 446 HWTrue = DAG.getConstant(-1, VT); 447 HWFalse = DAG.getConstant(0, VT); 448 } 449 else { 450 assert(!"Unhandled value type in LowerSELECT_CC"); 451 } 452 453 // Lower this unsupported SELECT_CC into a combination of two supported 454 // SELECT_CC operations. 455 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); 456 457 // Convert floating point condition to i1 458 if (VT == MVT::f32) { 459 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, 460 DAG.getNode(ISD::FNEG, DL, VT, Cond)); 461 } 462 463 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 464} 465 466SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 467{ 468 SDValue Cond; 469 SDValue LHS = Op.getOperand(0); 470 SDValue RHS = Op.getOperand(1); 471 SDValue CC = Op.getOperand(2); 472 DebugLoc DL = Op.getDebugLoc(); 473 assert(Op.getValueType() == MVT::i32); 474 Cond = DAG.getNode( 475 ISD::SELECT_CC, 476 Op.getDebugLoc(), 477 MVT::i32, 478 LHS, RHS, 479 DAG.getConstant(-1, MVT::i32), 480 DAG.getConstant(0, MVT::i32), 481 CC); 482 Cond = DAG.getNode( 483 ISD::AND, 484 DL, 485 MVT::i32, 486 DAG.getConstant(1, MVT::i32), 487 Cond); 488 return Cond; 489} 490