R600ISelLowering.cpp revision 5f82d1924831da7467bfe8025ca18e98b9548ca4
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file 11// is mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "R600InstrInfo.h" 17#include "R600MachineFunctionInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/CodeGen/SelectionDAG.h" 21 22using namespace llvm; 23 24R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 25 AMDGPUTargetLowering(TM), 26 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) 27{ 28 setOperationAction(ISD::MUL, MVT::i64, Expand); 29 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 30 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 31 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 32 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 33 computeRegisterProperties(); 34 35 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 36 37 setOperationAction(ISD::FSUB, MVT::f32, Expand); 38 39 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 40 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 41 42 setOperationAction(ISD::ROTL, MVT::i32, Custom); 43 44 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 45 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 46 47 setOperationAction(ISD::SETCC, MVT::i32, Custom); 48 49 setSchedulingPreference(Sched::VLIW); 50} 51 52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 53 MachineInstr * MI, MachineBasicBlock * BB) const 54{ 55 MachineFunction * MF = BB->getParent(); 56 MachineRegisterInfo &MRI = MF->getRegInfo(); 57 MachineBasicBlock::iterator I = *MI; 58 59 switch (MI->getOpcode()) { 60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 61 case AMDGPU::CLAMP_R600: 62 MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP); 63 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 64 .addOperand(MI->getOperand(0)) 65 .addOperand(MI->getOperand(1)) 66 .addReg(AMDGPU::PRED_SEL_OFF); 67 break; 68 69 case AMDGPU::FABS_R600: 70 MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); 71 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 72 .addOperand(MI->getOperand(0)) 73 .addOperand(MI->getOperand(1)) 74 .addReg(AMDGPU::PRED_SEL_OFF); 75 break; 76 77 case AMDGPU::FNEG_R600: 78 MI->getOperand(1).addTargetFlag(MO_FLAG_NEG); 79 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 80 .addOperand(MI->getOperand(0)) 81 .addOperand(MI->getOperand(1)) 82 .addReg(AMDGPU::PRED_SEL_OFF); 83 break; 84 85 case AMDGPU::R600_LOAD_CONST: 86 { 87 int64_t RegIndex = MI->getOperand(1).getImm(); 88 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); 89 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) 90 .addOperand(MI->getOperand(0)) 91 .addReg(ConstantReg); 92 break; 93 } 94 95 case AMDGPU::MASK_WRITE: 96 { 97 unsigned maskedRegister = MI->getOperand(0).getReg(); 98 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 99 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 100 MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); 101 def->addTargetFlag(MO_FLAG_MASK); 102 // Return early so the instruction is not erased 103 return BB; 104 } 105 106 case AMDGPU::RAT_WRITE_CACHELESS_eg: 107 { 108 // Convert to DWORD address 109 unsigned NewAddr = MRI.createVirtualRegister( 110 AMDGPU::R600_TReg32_XRegisterClass); 111 unsigned ShiftValue = MRI.createVirtualRegister( 112 AMDGPU::R600_TReg32RegisterClass); 113 114 // XXX In theory, we should be able to pass ShiftValue directly to 115 // the LSHR_eg instruction as an inline literal, but I tried doing it 116 // this way and it didn't produce the correct results. 117 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) 118 .addReg(AMDGPU::ALU_LITERAL_X) 119 .addReg(AMDGPU::PRED_SEL_OFF) 120 .addImm(2); 121 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) 122 .addOperand(MI->getOperand(1)) 123 .addReg(ShiftValue) 124 .addReg(AMDGPU::PRED_SEL_OFF); 125 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 126 .addOperand(MI->getOperand(0)) 127 .addReg(NewAddr); 128 break; 129 } 130 131 case AMDGPU::RESERVE_REG: 132 { 133 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); 134 int64_t ReservedIndex = MI->getOperand(0).getImm(); 135 unsigned ReservedReg = 136 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); 137 MFI->ReservedRegs.push_back(ReservedReg); 138 break; 139 } 140 141 case AMDGPU::TXD: 142 { 143 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 144 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 145 146 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 147 .addOperand(MI->getOperand(3)) 148 .addOperand(MI->getOperand(4)) 149 .addOperand(MI->getOperand(5)); 150 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 151 .addOperand(MI->getOperand(2)) 152 .addOperand(MI->getOperand(4)) 153 .addOperand(MI->getOperand(5)); 154 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 155 .addOperand(MI->getOperand(0)) 156 .addOperand(MI->getOperand(1)) 157 .addOperand(MI->getOperand(4)) 158 .addOperand(MI->getOperand(5)) 159 .addReg(t0, RegState::Implicit) 160 .addReg(t1, RegState::Implicit); 161 break; 162 } 163 case AMDGPU::TXD_SHADOW: 164 { 165 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 166 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 167 168 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 169 .addOperand(MI->getOperand(3)) 170 .addOperand(MI->getOperand(4)) 171 .addOperand(MI->getOperand(5)); 172 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 173 .addOperand(MI->getOperand(2)) 174 .addOperand(MI->getOperand(4)) 175 .addOperand(MI->getOperand(5)); 176 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 177 .addOperand(MI->getOperand(0)) 178 .addOperand(MI->getOperand(1)) 179 .addOperand(MI->getOperand(4)) 180 .addOperand(MI->getOperand(5)) 181 .addReg(t0, RegState::Implicit) 182 .addReg(t1, RegState::Implicit); 183 break; 184 } 185 case AMDGPU::BRANCH: 186 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 187 .addOperand(MI->getOperand(0)) 188 .addReg(0); 189 break; 190 case AMDGPU::BRANCH_COND_f32: 191 MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH); 192 193 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 194 .addReg(AMDGPU::PREDICATE_BIT) 195 .addOperand(MI->getOperand(1)) 196 .addImm(OPCODE_IS_ZERO); 197 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 198 .addOperand(MI->getOperand(0)) 199 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 200 break; 201 case AMDGPU::BRANCH_COND_i32: 202 MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH); 203 204 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 205 .addReg(AMDGPU::PREDICATE_BIT) 206 .addOperand(MI->getOperand(1)) 207 .addImm(OPCODE_IS_ZERO_INT); 208 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 209 .addOperand(MI->getOperand(0)) 210 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 211 break; 212 213 214 } 215 216 MI->eraseFromParent(); 217 return BB; 218} 219 220//===----------------------------------------------------------------------===// 221// Custom DAG Lowering Operations 222//===----------------------------------------------------------------------===// 223 224using namespace llvm::Intrinsic; 225using namespace llvm::AMDGPUIntrinsic; 226 227SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 228{ 229 switch (Op.getOpcode()) { 230 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 231 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 232 case ISD::ROTL: return LowerROTL(Op, DAG); 233 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 234 case ISD::SETCC: return LowerSETCC(Op, DAG); 235 case ISD::INTRINSIC_VOID: { 236 SDValue Chain = Op.getOperand(0); 237 unsigned IntrinsicID = 238 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 239 switch (IntrinsicID) { 240 case AMDGPUIntrinsic::AMDGPU_store_output: { 241 MachineFunction &MF = DAG.getMachineFunction(); 242 MachineRegisterInfo &MRI = MF.getRegInfo(); 243 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 244 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 245 if (!MRI.isLiveOut(Reg)) { 246 MRI.addLiveOut(Reg); 247 } 248 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 249 } 250 // default for switch(IntrinsicID) 251 default: break; 252 } 253 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 254 break; 255 } 256 case ISD::INTRINSIC_WO_CHAIN: { 257 unsigned IntrinsicID = 258 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 259 EVT VT = Op.getValueType(); 260 DebugLoc DL = Op.getDebugLoc(); 261 switch(IntrinsicID) { 262 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 263 case AMDGPUIntrinsic::R600_load_input: { 264 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 265 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 266 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 267 } 268 269 case r600_read_ngroups_x: 270 return LowerImplicitParameter(DAG, VT, DL, 0); 271 case r600_read_ngroups_y: 272 return LowerImplicitParameter(DAG, VT, DL, 1); 273 case r600_read_ngroups_z: 274 return LowerImplicitParameter(DAG, VT, DL, 2); 275 case r600_read_global_size_x: 276 return LowerImplicitParameter(DAG, VT, DL, 3); 277 case r600_read_global_size_y: 278 return LowerImplicitParameter(DAG, VT, DL, 4); 279 case r600_read_global_size_z: 280 return LowerImplicitParameter(DAG, VT, DL, 5); 281 case r600_read_local_size_x: 282 return LowerImplicitParameter(DAG, VT, DL, 6); 283 case r600_read_local_size_y: 284 return LowerImplicitParameter(DAG, VT, DL, 7); 285 case r600_read_local_size_z: 286 return LowerImplicitParameter(DAG, VT, DL, 8); 287 288 case r600_read_tgid_x: 289 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 290 AMDGPU::T1_X, VT); 291 case r600_read_tgid_y: 292 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 293 AMDGPU::T1_Y, VT); 294 case r600_read_tgid_z: 295 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 296 AMDGPU::T1_Z, VT); 297 case r600_read_tidig_x: 298 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 299 AMDGPU::T0_X, VT); 300 case r600_read_tidig_y: 301 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 302 AMDGPU::T0_Y, VT); 303 case r600_read_tidig_z: 304 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 305 AMDGPU::T0_Z, VT); 306 } 307 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 308 break; 309 } 310 } // end switch(Op.getOpcode()) 311 return SDValue(); 312} 313 314SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 315{ 316 SDValue Chain = Op.getOperand(0); 317 SDValue CC = Op.getOperand(1); 318 SDValue LHS = Op.getOperand(2); 319 SDValue RHS = Op.getOperand(3); 320 SDValue JumpT = Op.getOperand(4); 321 SDValue CmpValue; 322 SDValue Result; 323 CmpValue = DAG.getNode( 324 ISD::SELECT_CC, 325 Op.getDebugLoc(), 326 MVT::i32, 327 LHS, RHS, 328 DAG.getConstant(-1, MVT::i32), 329 DAG.getConstant(0, MVT::i32), 330 CC); 331 Result = DAG.getNode( 332 AMDGPUISD::BRANCH_COND, 333 CmpValue.getDebugLoc(), 334 MVT::Other, Chain, 335 JumpT, CmpValue); 336 return Result; 337} 338 339SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 340 DebugLoc DL, 341 unsigned DwordOffset) const 342{ 343 unsigned ByteOffset = DwordOffset * 4; 344 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 345 AMDGPUAS::PARAM_I_ADDRESS); 346 347 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 348 assert(isInt<16>(ByteOffset)); 349 350 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 351 DAG.getConstant(ByteOffset, MVT::i32), // PTR 352 MachinePointerInfo(ConstantPointerNull::get(PtrType)), 353 false, false, false, 0); 354} 355 356SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const 357{ 358 DebugLoc DL = Op.getDebugLoc(); 359 EVT VT = Op.getValueType(); 360 361 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 362 Op.getOperand(0), 363 Op.getOperand(0), 364 DAG.getNode(ISD::SUB, DL, VT, 365 DAG.getConstant(32, MVT::i32), 366 Op.getOperand(1))); 367} 368 369SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 370{ 371 DebugLoc DL = Op.getDebugLoc(); 372 EVT VT = Op.getValueType(); 373 374 SDValue LHS = Op.getOperand(0); 375 SDValue RHS = Op.getOperand(1); 376 SDValue True = Op.getOperand(2); 377 SDValue False = Op.getOperand(3); 378 SDValue CC = Op.getOperand(4); 379 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 380 SDValue Temp; 381 382 // LHS and RHS are guaranteed to be the same value type 383 EVT CompareVT = LHS.getValueType(); 384 385 // We need all the operands of SELECT_CC to have the same value type, so if 386 // necessary we need to convert LHS and RHS to be the same type True and 387 // False. True and False are guaranteed to have the same type as this 388 // SELECT_CC node. 389 390 if (CompareVT != VT) { 391 ISD::NodeType ConversionOp = ISD::DELETED_NODE; 392 if (VT == MVT::f32 && CompareVT == MVT::i32) { 393 if (isUnsignedIntSetCC(CCOpcode)) { 394 ConversionOp = ISD::UINT_TO_FP; 395 } else { 396 ConversionOp = ISD::SINT_TO_FP; 397 } 398 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 399 ConversionOp = ISD::FP_TO_SINT; 400 } else { 401 // I don't think there will be any other type pairings. 402 assert(!"Unhandled operand type parings in SELECT_CC"); 403 } 404 // XXX Check the value of LHS and RHS and avoid creating sequences like 405 // (FTOI (ITOF)) 406 LHS = DAG.getNode(ConversionOp, DL, VT, LHS); 407 RHS = DAG.getNode(ConversionOp, DL, VT, RHS); 408 } 409 410 // If True is a hardware TRUE value and False is a hardware FALSE value or 411 // vice-versa we can handle this with a native instruction (SET* instructions). 412 if ((isHWTrueValue(True) && isHWFalseValue(False))) { 413 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 414 } 415 416 // XXX If True is a hardware TRUE value and False is a hardware FALSE value, 417 // we can handle this with a native instruction, but we need to swap true 418 // and false and change the conditional. 419 if (isHWTrueValue(False) && isHWFalseValue(True)) { 420 } 421 422 // XXX Check if we can lower this to a SELECT or if it is supported by a native 423 // operation. (The code below does this but we don't have the Instruction 424 // selection patterns to do this yet. 425#if 0 426 if (isZero(LHS) || isZero(RHS)) { 427 SDValue Cond = (isZero(LHS) ? RHS : LHS); 428 bool SwapTF = false; 429 switch (CCOpcode) { 430 case ISD::SETOEQ: 431 case ISD::SETUEQ: 432 case ISD::SETEQ: 433 SwapTF = true; 434 // Fall through 435 case ISD::SETONE: 436 case ISD::SETUNE: 437 case ISD::SETNE: 438 // We can lower to select 439 if (SwapTF) { 440 Temp = True; 441 True = False; 442 False = Temp; 443 } 444 // CNDE 445 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 446 default: 447 // Supported by a native operation (CNDGE, CNDGT) 448 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 449 } 450 } 451#endif 452 453 // If we make it this for it means we have no native instructions to handle 454 // this SELECT_CC, so we must lower it. 455 SDValue HWTrue, HWFalse; 456 457 if (VT == MVT::f32) { 458 HWTrue = DAG.getConstantFP(1.0f, VT); 459 HWFalse = DAG.getConstantFP(0.0f, VT); 460 } else if (VT == MVT::i32) { 461 HWTrue = DAG.getConstant(-1, VT); 462 HWFalse = DAG.getConstant(0, VT); 463 } 464 else { 465 assert(!"Unhandled value type in LowerSELECT_CC"); 466 } 467 468 // Lower this unsupported SELECT_CC into a combination of two supported 469 // SELECT_CC operations. 470 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); 471 472 // Convert floating point condition to i1 473 if (VT == MVT::f32) { 474 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, 475 DAG.getNode(ISD::FNEG, DL, VT, Cond)); 476 } 477 478 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 479} 480 481SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 482{ 483 SDValue Cond; 484 SDValue LHS = Op.getOperand(0); 485 SDValue RHS = Op.getOperand(1); 486 SDValue CC = Op.getOperand(2); 487 DebugLoc DL = Op.getDebugLoc(); 488 assert(Op.getValueType() == MVT::i32); 489 Cond = DAG.getNode( 490 ISD::SELECT_CC, 491 Op.getDebugLoc(), 492 MVT::i32, 493 LHS, RHS, 494 DAG.getConstant(-1, MVT::i32), 495 DAG.getConstant(0, MVT::i32), 496 CC); 497 Cond = DAG.getNode( 498 ISD::AND, 499 DL, 500 MVT::i32, 501 DAG.getConstant(1, MVT::i32), 502 Cond); 503 return Cond; 504} 505