R600ISelLowering.cpp revision 228a6641ccddaf24a993f827af1e97379785985a
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file 11// is mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "R600Defines.h" 17#include "R600InstrInfo.h" 18#include "R600MachineFunctionInfo.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22 23using namespace llvm; 24 25R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 26 AMDGPUTargetLowering(TM), 27 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) 28{ 29 setOperationAction(ISD::MUL, MVT::i64, Expand); 30 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 31 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 32 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 33 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 34 computeRegisterProperties(); 35 36 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 37 38 setOperationAction(ISD::FSUB, MVT::f32, Expand); 39 40 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 41 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 42 43 setOperationAction(ISD::ROTL, MVT::i32, Custom); 44 45 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 46 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 47 48 setOperationAction(ISD::SETCC, MVT::i32, Custom); 49 50 setSchedulingPreference(Sched::VLIW); 51} 52 53MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 54 MachineInstr * MI, MachineBasicBlock * BB) const 55{ 56 MachineFunction * MF = BB->getParent(); 57 MachineRegisterInfo &MRI = MF->getRegInfo(); 58 MachineBasicBlock::iterator I = *MI; 59 60 switch (MI->getOpcode()) { 61 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 62 case AMDGPU::CLAMP_R600: 63 { 64 MachineInstr *NewMI = 65 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 66 .addOperand(MI->getOperand(0)) 67 .addOperand(MI->getOperand(1)) 68 .addImm(0) // Flags 69 .addReg(AMDGPU::PRED_SEL_OFF); 70 TII->AddFlag(NewMI, 0, MO_FLAG_CLAMP); 71 break; 72 } 73 case AMDGPU::FABS_R600: 74 { 75 MachineInstr *NewMI = 76 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 77 .addOperand(MI->getOperand(0)) 78 .addOperand(MI->getOperand(1)) 79 .addImm(0) // Flags 80 .addReg(AMDGPU::PRED_SEL_OFF); 81 TII->AddFlag(NewMI, 1, MO_FLAG_ABS); 82 break; 83 } 84 85 case AMDGPU::FNEG_R600: 86 { 87 MachineInstr *NewMI = 88 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 89 .addOperand(MI->getOperand(0)) 90 .addOperand(MI->getOperand(1)) 91 .addImm(0) // Flags 92 .addReg(AMDGPU::PRED_SEL_OFF); 93 TII->AddFlag(NewMI, 1, MO_FLAG_NEG); 94 break; 95 } 96 97 case AMDGPU::R600_LOAD_CONST: 98 { 99 int64_t RegIndex = MI->getOperand(1).getImm(); 100 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); 101 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) 102 .addOperand(MI->getOperand(0)) 103 .addReg(ConstantReg); 104 break; 105 } 106 107 case AMDGPU::MASK_WRITE: 108 { 109 unsigned maskedRegister = MI->getOperand(0).getReg(); 110 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 111 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 112 TII->AddFlag(defInstr, 0, MO_FLAG_MASK); 113 // Return early so the instruction is not erased 114 return BB; 115 } 116 117 case AMDGPU::RAT_WRITE_CACHELESS_eg: 118 { 119 // Convert to DWORD address 120 unsigned NewAddr = MRI.createVirtualRegister( 121 &AMDGPU::R600_TReg32_XRegClass); 122 unsigned ShiftValue = MRI.createVirtualRegister( 123 &AMDGPU::R600_TReg32RegClass); 124 125 // XXX In theory, we should be able to pass ShiftValue directly to 126 // the LSHR_eg instruction as an inline literal, but I tried doing it 127 // this way and it didn't produce the correct results. 128 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) 129 .addReg(AMDGPU::ALU_LITERAL_X) 130 .addReg(AMDGPU::PRED_SEL_OFF) 131 .addImm(2); 132 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) 133 .addOperand(MI->getOperand(1)) 134 .addReg(ShiftValue) 135 .addReg(AMDGPU::PRED_SEL_OFF); 136 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 137 .addOperand(MI->getOperand(0)) 138 .addReg(NewAddr); 139 break; 140 } 141 142 case AMDGPU::RESERVE_REG: 143 { 144 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); 145 int64_t ReservedIndex = MI->getOperand(0).getImm(); 146 unsigned ReservedReg = 147 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); 148 MFI->ReservedRegs.push_back(ReservedReg); 149 break; 150 } 151 152 case AMDGPU::TXD: 153 { 154 unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 155 unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 156 157 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 158 .addOperand(MI->getOperand(3)) 159 .addOperand(MI->getOperand(4)) 160 .addOperand(MI->getOperand(5)); 161 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 162 .addOperand(MI->getOperand(2)) 163 .addOperand(MI->getOperand(4)) 164 .addOperand(MI->getOperand(5)); 165 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 166 .addOperand(MI->getOperand(0)) 167 .addOperand(MI->getOperand(1)) 168 .addOperand(MI->getOperand(4)) 169 .addOperand(MI->getOperand(5)) 170 .addReg(t0, RegState::Implicit) 171 .addReg(t1, RegState::Implicit); 172 break; 173 } 174 case AMDGPU::TXD_SHADOW: 175 { 176 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 177 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 178 179 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 180 .addOperand(MI->getOperand(3)) 181 .addOperand(MI->getOperand(4)) 182 .addOperand(MI->getOperand(5)); 183 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 184 .addOperand(MI->getOperand(2)) 185 .addOperand(MI->getOperand(4)) 186 .addOperand(MI->getOperand(5)); 187 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 188 .addOperand(MI->getOperand(0)) 189 .addOperand(MI->getOperand(1)) 190 .addOperand(MI->getOperand(4)) 191 .addOperand(MI->getOperand(5)) 192 .addReg(t0, RegState::Implicit) 193 .addReg(t1, RegState::Implicit); 194 break; 195 } 196 case AMDGPU::BRANCH: 197 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 198 .addOperand(MI->getOperand(0)) 199 .addReg(0); 200 break; 201 case AMDGPU::BRANCH_COND_f32: 202 { 203 MachineInstr *NewMI = 204 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 205 .addReg(AMDGPU::PREDICATE_BIT) 206 .addOperand(MI->getOperand(1)) 207 .addImm(OPCODE_IS_ZERO) 208 .addImm(0); // Flags 209 TII->AddFlag(NewMI, 1, MO_FLAG_PUSH); 210 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 211 .addOperand(MI->getOperand(0)) 212 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 213 break; 214 } 215 case AMDGPU::BRANCH_COND_i32: 216 { 217 MachineInstr *NewMI = 218 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 219 .addReg(AMDGPU::PREDICATE_BIT) 220 .addOperand(MI->getOperand(1)) 221 .addImm(OPCODE_IS_ZERO_INT) 222 .addImm(0); // Flags 223 TII->AddFlag(NewMI, 1, MO_FLAG_PUSH); 224 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 225 .addOperand(MI->getOperand(0)) 226 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 227 break; 228 } 229 } 230 231 MI->eraseFromParent(); 232 return BB; 233} 234 235//===----------------------------------------------------------------------===// 236// Custom DAG Lowering Operations 237//===----------------------------------------------------------------------===// 238 239using namespace llvm::Intrinsic; 240using namespace llvm::AMDGPUIntrinsic; 241 242SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 243{ 244 switch (Op.getOpcode()) { 245 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 246 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 247 case ISD::ROTL: return LowerROTL(Op, DAG); 248 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 249 case ISD::SETCC: return LowerSETCC(Op, DAG); 250 case ISD::INTRINSIC_VOID: { 251 SDValue Chain = Op.getOperand(0); 252 unsigned IntrinsicID = 253 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 254 switch (IntrinsicID) { 255 case AMDGPUIntrinsic::AMDGPU_store_output: { 256 MachineFunction &MF = DAG.getMachineFunction(); 257 MachineRegisterInfo &MRI = MF.getRegInfo(); 258 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 259 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 260 if (!MRI.isLiveOut(Reg)) { 261 MRI.addLiveOut(Reg); 262 } 263 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 264 } 265 // default for switch(IntrinsicID) 266 default: break; 267 } 268 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 269 break; 270 } 271 case ISD::INTRINSIC_WO_CHAIN: { 272 unsigned IntrinsicID = 273 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 274 EVT VT = Op.getValueType(); 275 DebugLoc DL = Op.getDebugLoc(); 276 switch(IntrinsicID) { 277 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 278 case AMDGPUIntrinsic::R600_load_input: { 279 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 280 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 281 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 282 } 283 284 case r600_read_ngroups_x: 285 return LowerImplicitParameter(DAG, VT, DL, 0); 286 case r600_read_ngroups_y: 287 return LowerImplicitParameter(DAG, VT, DL, 1); 288 case r600_read_ngroups_z: 289 return LowerImplicitParameter(DAG, VT, DL, 2); 290 case r600_read_global_size_x: 291 return LowerImplicitParameter(DAG, VT, DL, 3); 292 case r600_read_global_size_y: 293 return LowerImplicitParameter(DAG, VT, DL, 4); 294 case r600_read_global_size_z: 295 return LowerImplicitParameter(DAG, VT, DL, 5); 296 case r600_read_local_size_x: 297 return LowerImplicitParameter(DAG, VT, DL, 6); 298 case r600_read_local_size_y: 299 return LowerImplicitParameter(DAG, VT, DL, 7); 300 case r600_read_local_size_z: 301 return LowerImplicitParameter(DAG, VT, DL, 8); 302 303 case r600_read_tgid_x: 304 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 305 AMDGPU::T1_X, VT); 306 case r600_read_tgid_y: 307 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 308 AMDGPU::T1_Y, VT); 309 case r600_read_tgid_z: 310 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 311 AMDGPU::T1_Z, VT); 312 case r600_read_tidig_x: 313 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 314 AMDGPU::T0_X, VT); 315 case r600_read_tidig_y: 316 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 317 AMDGPU::T0_Y, VT); 318 case r600_read_tidig_z: 319 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 320 AMDGPU::T0_Z, VT); 321 } 322 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 323 break; 324 } 325 } // end switch(Op.getOpcode()) 326 return SDValue(); 327} 328 329SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 330{ 331 SDValue Chain = Op.getOperand(0); 332 SDValue CC = Op.getOperand(1); 333 SDValue LHS = Op.getOperand(2); 334 SDValue RHS = Op.getOperand(3); 335 SDValue JumpT = Op.getOperand(4); 336 SDValue CmpValue; 337 SDValue Result; 338 CmpValue = DAG.getNode( 339 ISD::SELECT_CC, 340 Op.getDebugLoc(), 341 MVT::i32, 342 LHS, RHS, 343 DAG.getConstant(-1, MVT::i32), 344 DAG.getConstant(0, MVT::i32), 345 CC); 346 Result = DAG.getNode( 347 AMDGPUISD::BRANCH_COND, 348 CmpValue.getDebugLoc(), 349 MVT::Other, Chain, 350 JumpT, CmpValue); 351 return Result; 352} 353 354SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 355 DebugLoc DL, 356 unsigned DwordOffset) const 357{ 358 unsigned ByteOffset = DwordOffset * 4; 359 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 360 AMDGPUAS::PARAM_I_ADDRESS); 361 362 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 363 assert(isInt<16>(ByteOffset)); 364 365 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 366 DAG.getConstant(ByteOffset, MVT::i32), // PTR 367 MachinePointerInfo(ConstantPointerNull::get(PtrType)), 368 false, false, false, 0); 369} 370 371SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const 372{ 373 DebugLoc DL = Op.getDebugLoc(); 374 EVT VT = Op.getValueType(); 375 376 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 377 Op.getOperand(0), 378 Op.getOperand(0), 379 DAG.getNode(ISD::SUB, DL, VT, 380 DAG.getConstant(32, MVT::i32), 381 Op.getOperand(1))); 382} 383 384SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 385{ 386 DebugLoc DL = Op.getDebugLoc(); 387 EVT VT = Op.getValueType(); 388 389 SDValue LHS = Op.getOperand(0); 390 SDValue RHS = Op.getOperand(1); 391 SDValue True = Op.getOperand(2); 392 SDValue False = Op.getOperand(3); 393 SDValue CC = Op.getOperand(4); 394 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 395 SDValue Temp; 396 397 // LHS and RHS are guaranteed to be the same value type 398 EVT CompareVT = LHS.getValueType(); 399 400 // We need all the operands of SELECT_CC to have the same value type, so if 401 // necessary we need to convert LHS and RHS to be the same type True and 402 // False. True and False are guaranteed to have the same type as this 403 // SELECT_CC node. 404 405 if (CompareVT != VT) { 406 ISD::NodeType ConversionOp = ISD::DELETED_NODE; 407 if (VT == MVT::f32 && CompareVT == MVT::i32) { 408 if (isUnsignedIntSetCC(CCOpcode)) { 409 ConversionOp = ISD::UINT_TO_FP; 410 } else { 411 ConversionOp = ISD::SINT_TO_FP; 412 } 413 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 414 ConversionOp = ISD::FP_TO_SINT; 415 } else { 416 // I don't think there will be any other type pairings. 417 assert(!"Unhandled operand type parings in SELECT_CC"); 418 } 419 // XXX Check the value of LHS and RHS and avoid creating sequences like 420 // (FTOI (ITOF)) 421 LHS = DAG.getNode(ConversionOp, DL, VT, LHS); 422 RHS = DAG.getNode(ConversionOp, DL, VT, RHS); 423 } 424 425 // If True is a hardware TRUE value and False is a hardware FALSE value or 426 // vice-versa we can handle this with a native instruction (SET* instructions). 427 if ((isHWTrueValue(True) && isHWFalseValue(False))) { 428 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 429 } 430 431 // XXX If True is a hardware TRUE value and False is a hardware FALSE value, 432 // we can handle this with a native instruction, but we need to swap true 433 // and false and change the conditional. 434 if (isHWTrueValue(False) && isHWFalseValue(True)) { 435 } 436 437 // XXX Check if we can lower this to a SELECT or if it is supported by a native 438 // operation. (The code below does this but we don't have the Instruction 439 // selection patterns to do this yet. 440#if 0 441 if (isZero(LHS) || isZero(RHS)) { 442 SDValue Cond = (isZero(LHS) ? RHS : LHS); 443 bool SwapTF = false; 444 switch (CCOpcode) { 445 case ISD::SETOEQ: 446 case ISD::SETUEQ: 447 case ISD::SETEQ: 448 SwapTF = true; 449 // Fall through 450 case ISD::SETONE: 451 case ISD::SETUNE: 452 case ISD::SETNE: 453 // We can lower to select 454 if (SwapTF) { 455 Temp = True; 456 True = False; 457 False = Temp; 458 } 459 // CNDE 460 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 461 default: 462 // Supported by a native operation (CNDGE, CNDGT) 463 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 464 } 465 } 466#endif 467 468 // If we make it this for it means we have no native instructions to handle 469 // this SELECT_CC, so we must lower it. 470 SDValue HWTrue, HWFalse; 471 472 if (VT == MVT::f32) { 473 HWTrue = DAG.getConstantFP(1.0f, VT); 474 HWFalse = DAG.getConstantFP(0.0f, VT); 475 } else if (VT == MVT::i32) { 476 HWTrue = DAG.getConstant(-1, VT); 477 HWFalse = DAG.getConstant(0, VT); 478 } 479 else { 480 assert(!"Unhandled value type in LowerSELECT_CC"); 481 } 482 483 // Lower this unsupported SELECT_CC into a combination of two supported 484 // SELECT_CC operations. 485 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); 486 487 // Convert floating point condition to i1 488 if (VT == MVT::f32) { 489 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, 490 DAG.getNode(ISD::FNEG, DL, VT, Cond)); 491 } 492 493 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 494} 495 496SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 497{ 498 SDValue Cond; 499 SDValue LHS = Op.getOperand(0); 500 SDValue RHS = Op.getOperand(1); 501 SDValue CC = Op.getOperand(2); 502 DebugLoc DL = Op.getDebugLoc(); 503 assert(Op.getValueType() == MVT::i32); 504 Cond = DAG.getNode( 505 ISD::SELECT_CC, 506 Op.getDebugLoc(), 507 MVT::i32, 508 LHS, RHS, 509 DAG.getConstant(-1, MVT::i32), 510 DAG.getConstant(0, MVT::i32), 511 CC); 512 Cond = DAG.getNode( 513 ISD::AND, 514 DL, 515 MVT::i32, 516 DAG.getConstant(1, MVT::i32), 517 Cond); 518 return Cond; 519} 520