R600ISelLowering.cpp revision f3480f92349c90f55e2e80d9a4536ab048fb5652
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file 11// is mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "AMDGPUUtil.h" 17#include "R600InstrInfo.h" 18#include "R600MachineFunctionInfo.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22 23using namespace llvm; 24 25R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 26 AMDGPUTargetLowering(TM), 27 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) 28{ 29 setOperationAction(ISD::MUL, MVT::i64, Expand); 30 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 31 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 32 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 33 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 34 computeRegisterProperties(); 35 36 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 37 38 setOperationAction(ISD::FSUB, MVT::f32, Expand); 39 40 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 41 42 setOperationAction(ISD::ROTL, MVT::i32, Custom); 43 44 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 45 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 46 47 setOperationAction(ISD::SETCC, MVT::i32, Custom); 48 49 setSchedulingPreference(Sched::VLIW); 50} 51 52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 53 MachineInstr * MI, MachineBasicBlock * BB) const 54{ 55 MachineFunction * MF = BB->getParent(); 56 MachineRegisterInfo &MRI = MF->getRegInfo(); 57 MachineBasicBlock::iterator I = *MI; 58 59 switch (MI->getOpcode()) { 60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 61 case AMDGPU::TGID_X: 62 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X); 63 break; 64 case AMDGPU::TGID_Y: 65 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y); 66 break; 67 case AMDGPU::TGID_Z: 68 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z); 69 break; 70 case AMDGPU::TIDIG_X: 71 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X); 72 break; 73 case AMDGPU::TIDIG_Y: 74 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y); 75 break; 76 case AMDGPU::TIDIG_Z: 77 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z); 78 break; 79 case AMDGPU::NGROUPS_X: 80 lowerImplicitParameter(MI, *BB, MRI, 0); 81 break; 82 case AMDGPU::NGROUPS_Y: 83 lowerImplicitParameter(MI, *BB, MRI, 1); 84 break; 85 case AMDGPU::NGROUPS_Z: 86 lowerImplicitParameter(MI, *BB, MRI, 2); 87 break; 88 case AMDGPU::GLOBAL_SIZE_X: 89 lowerImplicitParameter(MI, *BB, MRI, 3); 90 break; 91 case AMDGPU::GLOBAL_SIZE_Y: 92 lowerImplicitParameter(MI, *BB, MRI, 4); 93 break; 94 case AMDGPU::GLOBAL_SIZE_Z: 95 lowerImplicitParameter(MI, *BB, MRI, 5); 96 break; 97 case AMDGPU::LOCAL_SIZE_X: 98 lowerImplicitParameter(MI, *BB, MRI, 6); 99 break; 100 case AMDGPU::LOCAL_SIZE_Y: 101 lowerImplicitParameter(MI, *BB, MRI, 7); 102 break; 103 case AMDGPU::LOCAL_SIZE_Z: 104 lowerImplicitParameter(MI, *BB, MRI, 8); 105 break; 106 107 case AMDGPU::CLAMP_R600: 108 MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP); 109 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 110 .addOperand(MI->getOperand(0)) 111 .addOperand(MI->getOperand(1)); 112 break; 113 114 case AMDGPU::FABS_R600: 115 MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); 116 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 117 .addOperand(MI->getOperand(0)) 118 .addOperand(MI->getOperand(1)); 119 break; 120 121 case AMDGPU::FNEG_R600: 122 MI->getOperand(1).addTargetFlag(MO_FLAG_NEG); 123 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 124 .addOperand(MI->getOperand(0)) 125 .addOperand(MI->getOperand(1)); 126 break; 127 128 case AMDGPU::R600_LOAD_CONST: 129 { 130 int64_t RegIndex = MI->getOperand(1).getImm(); 131 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); 132 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) 133 .addOperand(MI->getOperand(0)) 134 .addReg(ConstantReg); 135 break; 136 } 137 138 case AMDGPU::LOAD_INPUT: 139 { 140 int64_t RegIndex = MI->getOperand(1).getImm(); 141 addLiveIn(MI, MF, MRI, TII, 142 AMDGPU::R600_TReg32RegClass.getRegister(RegIndex)); 143 break; 144 } 145 146 case AMDGPU::MASK_WRITE: 147 { 148 unsigned maskedRegister = MI->getOperand(0).getReg(); 149 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 150 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 151 MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); 152 def->addTargetFlag(MO_FLAG_MASK); 153 // Return early so the instruction is not erased 154 return BB; 155 } 156 157 case AMDGPU::RAT_WRITE_CACHELESS_eg: 158 { 159 // Convert to DWORD address 160 unsigned NewAddr = MRI.createVirtualRegister( 161 AMDGPU::R600_TReg32_XRegisterClass); 162 unsigned ShiftValue = MRI.createVirtualRegister( 163 AMDGPU::R600_TReg32RegisterClass); 164 165 // XXX In theory, we should be able to pass ShiftValue directly to 166 // the LSHR_eg instruction as an inline literal, but I tried doing it 167 // this way and it didn't produce the correct results. 168 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) 169 .addReg(AMDGPU::ALU_LITERAL_X) 170 .addImm(2); 171 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) 172 .addOperand(MI->getOperand(1)) 173 .addReg(ShiftValue); 174 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 175 .addOperand(MI->getOperand(0)) 176 .addReg(NewAddr); 177 break; 178 } 179 180 case AMDGPU::RESERVE_REG: 181 { 182 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); 183 int64_t ReservedIndex = MI->getOperand(0).getImm(); 184 unsigned ReservedReg = 185 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); 186 MFI->ReservedRegs.push_back(ReservedReg); 187 break; 188 } 189 190 case AMDGPU::TXD: 191 { 192 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 193 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 194 195 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 196 .addOperand(MI->getOperand(3)) 197 .addOperand(MI->getOperand(4)) 198 .addOperand(MI->getOperand(5)); 199 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 200 .addOperand(MI->getOperand(2)) 201 .addOperand(MI->getOperand(4)) 202 .addOperand(MI->getOperand(5)); 203 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 204 .addOperand(MI->getOperand(0)) 205 .addOperand(MI->getOperand(1)) 206 .addOperand(MI->getOperand(4)) 207 .addOperand(MI->getOperand(5)) 208 .addReg(t0, RegState::Implicit) 209 .addReg(t1, RegState::Implicit); 210 break; 211 } 212 case AMDGPU::TXD_SHADOW: 213 { 214 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 215 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 216 217 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 218 .addOperand(MI->getOperand(3)) 219 .addOperand(MI->getOperand(4)) 220 .addOperand(MI->getOperand(5)); 221 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 222 .addOperand(MI->getOperand(2)) 223 .addOperand(MI->getOperand(4)) 224 .addOperand(MI->getOperand(5)); 225 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 226 .addOperand(MI->getOperand(0)) 227 .addOperand(MI->getOperand(1)) 228 .addOperand(MI->getOperand(4)) 229 .addOperand(MI->getOperand(5)) 230 .addReg(t0, RegState::Implicit) 231 .addReg(t1, RegState::Implicit); 232 break; 233 } 234 235 236 } 237 238 MI->eraseFromParent(); 239 return BB; 240} 241 242void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, 243 MachineRegisterInfo & MRI, unsigned dword_offset) const 244{ 245 unsigned ByteOffset = dword_offset * 4; 246 247 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 248 assert(isInt<16>(ByteOffset)); 249 250 MachineBasicBlock::iterator I = *MI; 251 unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass); 252 MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass); 253 254 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::COPY), PtrReg) 255 .addReg(AMDGPU::ZERO); 256 257 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg)) 258 .addOperand(MI->getOperand(0)) 259 .addReg(PtrReg) 260 .addImm(ByteOffset); 261} 262 263//===----------------------------------------------------------------------===// 264// Custom DAG Lowering Operations 265//===----------------------------------------------------------------------===// 266 267 268SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 269{ 270 switch (Op.getOpcode()) { 271 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 272 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 273 case ISD::ROTL: return LowerROTL(Op, DAG); 274 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 275 case ISD::SETCC: return LowerSETCC(Op, DAG); 276 case ISD::INTRINSIC_VOID: { 277 SDValue Chain = Op.getOperand(0); 278 unsigned IntrinsicID = 279 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 280 switch (IntrinsicID) { 281 case AMDGPUIntrinsic::AMDGPU_store_output: { 282 MachineFunction &MF = DAG.getMachineFunction(); 283 MachineRegisterInfo &MRI = MF.getRegInfo(); 284 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 285 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 286 if (!MRI.isLiveOut(Reg)) { 287 MRI.addLiveOut(Reg); 288 } 289 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 290 } 291 default: return SDValue(); 292 } 293 break; 294 } 295 } 296} 297 298SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 299{ 300 SDValue Chain = Op.getOperand(0); 301 SDValue CC = Op.getOperand(1); 302 SDValue LHS = Op.getOperand(2); 303 SDValue RHS = Op.getOperand(3); 304 SDValue JumpT = Op.getOperand(4); 305 SDValue CmpValue; 306 SDValue Result; 307 CmpValue = DAG.getNode( 308 ISD::SELECT_CC, 309 Op.getDebugLoc(), 310 MVT::i32, 311 LHS, RHS, 312 DAG.getConstant(-1, MVT::i32), 313 DAG.getConstant(0, MVT::i32), 314 CC); 315 Result = DAG.getNode( 316 AMDGPUISD::BRANCH_COND, 317 CmpValue.getDebugLoc(), 318 MVT::Other, Chain, 319 JumpT, CmpValue); 320 return Result; 321} 322 323 324SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const 325{ 326 DebugLoc DL = Op.getDebugLoc(); 327 EVT VT = Op.getValueType(); 328 329 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 330 Op.getOperand(0), 331 Op.getOperand(0), 332 DAG.getNode(ISD::SUB, DL, VT, 333 DAG.getConstant(32, MVT::i32), 334 Op.getOperand(1))); 335} 336 337SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 338{ 339 DebugLoc DL = Op.getDebugLoc(); 340 EVT VT = Op.getValueType(); 341 342 SDValue LHS = Op.getOperand(0); 343 SDValue RHS = Op.getOperand(1); 344 SDValue True = Op.getOperand(2); 345 SDValue False = Op.getOperand(3); 346 SDValue CC = Op.getOperand(4); 347 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 348 SDValue Temp; 349 350 // LHS and RHS are guaranteed to be the same value type 351 EVT CompareVT = LHS.getValueType(); 352 353 // We need all the operands of SELECT_CC to have the same value type, so if 354 // necessary we need to convert LHS and RHS to be the same type True and 355 // False. True and False are guaranteed to have the same type as this 356 // SELECT_CC node. 357 358 if (CompareVT != VT) { 359 ISD::NodeType ConversionOp = ISD::DELETED_NODE; 360 if (VT == MVT::f32 && CompareVT == MVT::i32) { 361 if (isUnsignedIntSetCC(CCOpcode)) { 362 ConversionOp = ISD::UINT_TO_FP; 363 } else { 364 ConversionOp = ISD::SINT_TO_FP; 365 } 366 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 367 ConversionOp = ISD::FP_TO_SINT; 368 } else { 369 // I don't think there will be any other type pairings. 370 assert(!"Unhandled operand type parings in SELECT_CC"); 371 } 372 // XXX Check the value of LHS and RHS and avoid creating sequences like 373 // (FTOI (ITOF)) 374 LHS = DAG.getNode(ConversionOp, DL, VT, LHS); 375 RHS = DAG.getNode(ConversionOp, DL, VT, RHS); 376 } 377 378 // If True is a hardware TRUE value and False is a hardware FALSE value or 379 // vice-versa we can handle this with a native instruction (SET* instructions). 380 if ((isHWTrueValue(True) && isHWFalseValue(False))) { 381 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 382 } 383 384 // XXX If True is a hardware TRUE value and False is a hardware FALSE value, 385 // we can handle this with a native instruction, but we need to swap true 386 // and false and change the conditional. 387 if (isHWTrueValue(False) && isHWFalseValue(True)) { 388 } 389 390 // XXX Check if we can lower this to a SELECT or if it is supported by a native 391 // operation. (The code below does this but we don't have the Instruction 392 // selection patterns to do this yet. 393#if 0 394 if (isZero(LHS) || isZero(RHS)) { 395 SDValue Cond = (isZero(LHS) ? RHS : LHS); 396 bool SwapTF = false; 397 switch (CCOpcode) { 398 case ISD::SETOEQ: 399 case ISD::SETUEQ: 400 case ISD::SETEQ: 401 SwapTF = true; 402 // Fall through 403 case ISD::SETONE: 404 case ISD::SETUNE: 405 case ISD::SETNE: 406 // We can lower to select 407 if (SwapTF) { 408 Temp = True; 409 True = False; 410 False = Temp; 411 } 412 // CNDE 413 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 414 default: 415 // Supported by a native operation (CNDGE, CNDGT) 416 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 417 } 418 } 419#endif 420 421 // If we make it this for it means we have no native instructions to handle 422 // this SELECT_CC, so we must lower it. 423 SDValue HWTrue, HWFalse; 424 425 if (VT == MVT::f32) { 426 HWTrue = DAG.getConstantFP(1.0f, VT); 427 HWFalse = DAG.getConstantFP(0.0f, VT); 428 } else if (VT == MVT::i32) { 429 HWTrue = DAG.getConstant(-1, VT); 430 HWFalse = DAG.getConstant(0, VT); 431 } 432 else { 433 assert(!"Unhandled value type in LowerSELECT_CC"); 434 } 435 436 // Lower this unsupported SELECT_CC into a combination of two supported 437 // SELECT_CC operations. 438 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); 439 440 // Convert floating point condition to i1 441 if (VT == MVT::f32) { 442 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, 443 DAG.getNode(ISD::FNEG, DL, VT, Cond)); 444 } 445 446 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 447} 448 449SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 450{ 451 SDValue Cond; 452 SDValue LHS = Op.getOperand(0); 453 SDValue RHS = Op.getOperand(1); 454 SDValue CC = Op.getOperand(2); 455 DebugLoc DL = Op.getDebugLoc(); 456 assert(Op.getValueType() == MVT::i32); 457 Cond = DAG.getNode( 458 ISD::SELECT_CC, 459 Op.getDebugLoc(), 460 MVT::i32, 461 LHS, RHS, 462 DAG.getConstant(-1, MVT::i32), 463 DAG.getConstant(0, MVT::i32), 464 CC); 465 Cond = DAG.getNode( 466 ISD::AND, 467 DL, 468 MVT::i32, 469 DAG.getConstant(1, MVT::i32), 470 Cond); 471 return Cond; 472} 473