R600ISelLowering.cpp revision f7fcaa07df7b3aab124576dec346ae4fa7c6715b
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file 11// is mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "AMDGPUUtil.h" 17#include "R600InstrInfo.h" 18#include "R600MachineFunctionInfo.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22 23using namespace llvm; 24 25R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 26 AMDGPUTargetLowering(TM), 27 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) 28{ 29 setOperationAction(ISD::MUL, MVT::i64, Expand); 30 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 31 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 32 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 33 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 34 computeRegisterProperties(); 35 36 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 37 38 setOperationAction(ISD::FSUB, MVT::f32, Expand); 39 40 setOperationAction(ISD::ROTL, MVT::i32, Custom); 41 42 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 43 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 44 45 setOperationAction(ISD::SETCC, MVT::i32, Custom); 46 47 setSchedulingPreference(Sched::VLIW); 48} 49 50MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 51 MachineInstr * MI, MachineBasicBlock * BB) const 52{ 53 MachineFunction * MF = BB->getParent(); 54 MachineRegisterInfo &MRI = MF->getRegInfo(); 55 MachineBasicBlock::iterator I = *MI; 56 57 switch (MI->getOpcode()) { 58 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 59 case AMDGPU::TGID_X: 60 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X); 61 break; 62 case AMDGPU::TGID_Y: 63 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y); 64 break; 65 case AMDGPU::TGID_Z: 66 addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z); 67 break; 68 case AMDGPU::TIDIG_X: 69 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X); 70 break; 71 case AMDGPU::TIDIG_Y: 72 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y); 73 break; 74 case AMDGPU::TIDIG_Z: 75 addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z); 76 break; 77 case AMDGPU::NGROUPS_X: 78 lowerImplicitParameter(MI, *BB, MRI, 0); 79 break; 80 case AMDGPU::NGROUPS_Y: 81 lowerImplicitParameter(MI, *BB, MRI, 1); 82 break; 83 case AMDGPU::NGROUPS_Z: 84 lowerImplicitParameter(MI, *BB, MRI, 2); 85 break; 86 case AMDGPU::GLOBAL_SIZE_X: 87 lowerImplicitParameter(MI, *BB, MRI, 3); 88 break; 89 case AMDGPU::GLOBAL_SIZE_Y: 90 lowerImplicitParameter(MI, *BB, MRI, 4); 91 break; 92 case AMDGPU::GLOBAL_SIZE_Z: 93 lowerImplicitParameter(MI, *BB, MRI, 5); 94 break; 95 case AMDGPU::LOCAL_SIZE_X: 96 lowerImplicitParameter(MI, *BB, MRI, 6); 97 break; 98 case AMDGPU::LOCAL_SIZE_Y: 99 lowerImplicitParameter(MI, *BB, MRI, 7); 100 break; 101 case AMDGPU::LOCAL_SIZE_Z: 102 lowerImplicitParameter(MI, *BB, MRI, 8); 103 break; 104 105 case AMDGPU::CLAMP_R600: 106 MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP); 107 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 108 .addOperand(MI->getOperand(0)) 109 .addOperand(MI->getOperand(1)); 110 break; 111 112 case AMDGPU::FABS_R600: 113 MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); 114 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 115 .addOperand(MI->getOperand(0)) 116 .addOperand(MI->getOperand(1)); 117 break; 118 119 case AMDGPU::FNEG_R600: 120 MI->getOperand(1).addTargetFlag(MO_FLAG_NEG); 121 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 122 .addOperand(MI->getOperand(0)) 123 .addOperand(MI->getOperand(1)); 124 break; 125 126 case AMDGPU::R600_LOAD_CONST: 127 { 128 int64_t RegIndex = MI->getOperand(1).getImm(); 129 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); 130 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) 131 .addOperand(MI->getOperand(0)) 132 .addReg(ConstantReg); 133 break; 134 } 135 136 case AMDGPU::LOAD_INPUT: 137 { 138 int64_t RegIndex = MI->getOperand(1).getImm(); 139 addLiveIn(MI, MF, MRI, TII, 140 AMDGPU::R600_TReg32RegClass.getRegister(RegIndex)); 141 break; 142 } 143 144 case AMDGPU::MASK_WRITE: 145 { 146 unsigned maskedRegister = MI->getOperand(0).getReg(); 147 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 148 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 149 MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); 150 def->addTargetFlag(MO_FLAG_MASK); 151 // Return early so the instruction is not erased 152 return BB; 153 } 154 155 case AMDGPU::RAT_WRITE_CACHELESS_eg: 156 { 157 // Convert to DWORD address 158 unsigned NewAddr = MRI.createVirtualRegister( 159 AMDGPU::R600_TReg32_XRegisterClass); 160 unsigned ShiftValue = MRI.createVirtualRegister( 161 AMDGPU::R600_TReg32RegisterClass); 162 163 // XXX In theory, we should be able to pass ShiftValue directly to 164 // the LSHR_eg instruction as an inline literal, but I tried doing it 165 // this way and it didn't produce the correct results. 166 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) 167 .addReg(AMDGPU::ALU_LITERAL_X) 168 .addImm(2); 169 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) 170 .addOperand(MI->getOperand(1)) 171 .addReg(ShiftValue); 172 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 173 .addOperand(MI->getOperand(0)) 174 .addReg(NewAddr); 175 break; 176 } 177 178 case AMDGPU::STORE_OUTPUT: 179 { 180 int64_t OutputIndex = MI->getOperand(1).getImm(); 181 unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex); 182 183 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg) 184 .addOperand(MI->getOperand(0)); 185 186 if (!MRI.isLiveOut(OutputReg)) { 187 MRI.addLiveOut(OutputReg); 188 } 189 break; 190 } 191 192 case AMDGPU::RESERVE_REG: 193 { 194 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); 195 int64_t ReservedIndex = MI->getOperand(0).getImm(); 196 unsigned ReservedReg = 197 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); 198 MFI->ReservedRegs.push_back(ReservedReg); 199 break; 200 } 201 202 case AMDGPU::TXD: 203 { 204 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 205 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 206 207 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 208 .addOperand(MI->getOperand(3)) 209 .addOperand(MI->getOperand(4)) 210 .addOperand(MI->getOperand(5)); 211 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 212 .addOperand(MI->getOperand(2)) 213 .addOperand(MI->getOperand(4)) 214 .addOperand(MI->getOperand(5)); 215 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 216 .addOperand(MI->getOperand(0)) 217 .addOperand(MI->getOperand(1)) 218 .addOperand(MI->getOperand(4)) 219 .addOperand(MI->getOperand(5)) 220 .addReg(t0, RegState::Implicit) 221 .addReg(t1, RegState::Implicit); 222 break; 223 } 224 case AMDGPU::TXD_SHADOW: 225 { 226 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 227 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 228 229 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 230 .addOperand(MI->getOperand(3)) 231 .addOperand(MI->getOperand(4)) 232 .addOperand(MI->getOperand(5)); 233 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 234 .addOperand(MI->getOperand(2)) 235 .addOperand(MI->getOperand(4)) 236 .addOperand(MI->getOperand(5)); 237 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 238 .addOperand(MI->getOperand(0)) 239 .addOperand(MI->getOperand(1)) 240 .addOperand(MI->getOperand(4)) 241 .addOperand(MI->getOperand(5)) 242 .addReg(t0, RegState::Implicit) 243 .addReg(t1, RegState::Implicit); 244 break; 245 } 246 247 248 } 249 250 MI->eraseFromParent(); 251 return BB; 252} 253 254void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, 255 MachineRegisterInfo & MRI, unsigned dword_offset) const 256{ 257 MachineBasicBlock::iterator I = *MI; 258 unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass); 259 MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass); 260 261 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg) 262 .addReg(AMDGPU::ALU_LITERAL_X) 263 .addImm(dword_offset * 4); 264 265 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg)) 266 .addOperand(MI->getOperand(0)) 267 .addReg(PtrReg) 268 .addImm(0); 269} 270 271//===----------------------------------------------------------------------===// 272// Custom DAG Lowering Operations 273//===----------------------------------------------------------------------===// 274 275 276SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 277{ 278 switch (Op.getOpcode()) { 279 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 280 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 281 case ISD::ROTL: return LowerROTL(Op, DAG); 282 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 283 case ISD::SETCC: return LowerSETCC(Op, DAG); 284 } 285} 286 287SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 288{ 289 SDValue Chain = Op.getOperand(0); 290 SDValue CC = Op.getOperand(1); 291 SDValue LHS = Op.getOperand(2); 292 SDValue RHS = Op.getOperand(3); 293 SDValue JumpT = Op.getOperand(4); 294 SDValue CmpValue; 295 SDValue Result; 296 CmpValue = DAG.getNode( 297 ISD::SELECT_CC, 298 Op.getDebugLoc(), 299 MVT::i32, 300 LHS, RHS, 301 DAG.getConstant(-1, MVT::i32), 302 DAG.getConstant(0, MVT::i32), 303 CC); 304 Result = DAG.getNode( 305 AMDGPUISD::BRANCH_COND, 306 CmpValue.getDebugLoc(), 307 MVT::Other, Chain, 308 JumpT, CmpValue); 309 return Result; 310} 311 312 313SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const 314{ 315 DebugLoc DL = Op.getDebugLoc(); 316 EVT VT = Op.getValueType(); 317 318 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 319 Op.getOperand(0), 320 Op.getOperand(0), 321 DAG.getNode(ISD::SUB, DL, VT, 322 DAG.getConstant(32, MVT::i32), 323 Op.getOperand(1))); 324} 325 326SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 327{ 328 DebugLoc DL = Op.getDebugLoc(); 329 EVT VT = Op.getValueType(); 330 331 SDValue LHS = Op.getOperand(0); 332 SDValue RHS = Op.getOperand(1); 333 SDValue True = Op.getOperand(2); 334 SDValue False = Op.getOperand(3); 335 SDValue CC = Op.getOperand(4); 336 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 337 SDValue Temp; 338 339 // LHS and RHS are guaranteed to be the same value type 340 EVT CompareVT = LHS.getValueType(); 341 342 // We need all the operands of SELECT_CC to have the same value type, so if 343 // necessary we need to convert LHS and RHS to be the same type True and 344 // False. True and False are guaranteed to have the same type as this 345 // SELECT_CC node. 346 347 if (CompareVT != VT) { 348 ISD::NodeType ConversionOp = ISD::DELETED_NODE; 349 if (VT == MVT::f32 && CompareVT == MVT::i32) { 350 if (isUnsignedIntSetCC(CCOpcode)) { 351 ConversionOp = ISD::UINT_TO_FP; 352 } else { 353 ConversionOp = ISD::SINT_TO_FP; 354 } 355 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 356 ConversionOp = ISD::FP_TO_SINT; 357 } else { 358 // I don't think there will be any other type pairings. 359 assert(!"Unhandled operand type parings in SELECT_CC"); 360 } 361 // XXX Check the value of LHS and RHS and avoid creating sequences like 362 // (FTOI (ITOF)) 363 LHS = DAG.getNode(ConversionOp, DL, VT, LHS); 364 RHS = DAG.getNode(ConversionOp, DL, VT, RHS); 365 } 366 367 // If True is a hardware TRUE value and False is a hardware FALSE value or 368 // vice-versa we can handle this with a native instruction (SET* instructions). 369 if ((isHWTrueValue(True) && isHWFalseValue(False))) { 370 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 371 } 372 373 // XXX If True is a hardware TRUE value and False is a hardware FALSE value, 374 // we can handle this with a native instruction, but we need to swap true 375 // and false and change the conditional. 376 if (isHWTrueValue(False) && isHWFalseValue(True)) { 377 } 378 379 // XXX Check if we can lower this to a SELECT or if it is supported by a native 380 // operation. (The code below does this but we don't have the Instruction 381 // selection patterns to do this yet. 382#if 0 383 if (isZero(LHS) || isZero(RHS)) { 384 SDValue Cond = (isZero(LHS) ? RHS : LHS); 385 bool SwapTF = false; 386 switch (CCOpcode) { 387 case ISD::SETOEQ: 388 case ISD::SETUEQ: 389 case ISD::SETEQ: 390 SwapTF = true; 391 // Fall through 392 case ISD::SETONE: 393 case ISD::SETUNE: 394 case ISD::SETNE: 395 // We can lower to select 396 if (SwapTF) { 397 Temp = True; 398 True = False; 399 False = Temp; 400 } 401 // CNDE 402 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 403 default: 404 // Supported by a native operation (CNDGE, CNDGT) 405 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 406 } 407 } 408#endif 409 410 // If we make it this for it means we have no native instructions to handle 411 // this SELECT_CC, so we must lower it. 412 SDValue HWTrue, HWFalse; 413 414 if (VT == MVT::f32) { 415 HWTrue = DAG.getConstantFP(1.0f, VT); 416 HWFalse = DAG.getConstantFP(0.0f, VT); 417 } else if (VT == MVT::i32) { 418 HWTrue = DAG.getConstant(-1, VT); 419 HWFalse = DAG.getConstant(0, VT); 420 } 421 else { 422 assert(!"Unhandled value type in LowerSELECT_CC"); 423 } 424 425 // Lower this unsupported SELECT_CC into a combination of two supported 426 // SELECT_CC operations. 427 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); 428 429 // Convert floating point condition to i1 430 if (VT == MVT::f32) { 431 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, 432 DAG.getNode(ISD::FNEG, DL, VT, Cond)); 433 } 434 435 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 436} 437 438SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 439{ 440 SDValue Cond; 441 SDValue LHS = Op.getOperand(0); 442 SDValue RHS = Op.getOperand(1); 443 SDValue CC = Op.getOperand(2); 444 DebugLoc DL = Op.getDebugLoc(); 445 assert(Op.getValueType() == MVT::i32); 446 Cond = DAG.getNode( 447 ISD::SELECT_CC, 448 Op.getDebugLoc(), 449 MVT::i32, 450 LHS, RHS, 451 DAG.getConstant(-1, MVT::i32), 452 DAG.getConstant(0, MVT::i32), 453 CC); 454 Cond = DAG.getNode( 455 ISD::AND, 456 DL, 457 MVT::i32, 458 DAG.getConstant(1, MVT::i32), 459 Cond); 460 return Cond; 461} 462