R600ISelLowering.cpp revision 67a47a445b544ac638d10303dc697d70f25d12fb
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file 11// is mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "R600InstrInfo.h" 17#include "R600MachineFunctionInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/CodeGen/SelectionDAG.h" 21 22using namespace llvm; 23 24R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 25 AMDGPUTargetLowering(TM), 26 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) 27{ 28 setOperationAction(ISD::MUL, MVT::i64, Expand); 29 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 30 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 31 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 32 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 33 computeRegisterProperties(); 34 35 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 36 37 setOperationAction(ISD::FSUB, MVT::f32, Expand); 38 39 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 40 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 41 42 setOperationAction(ISD::ROTL, MVT::i32, Custom); 43 44 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 45 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 46 47 setOperationAction(ISD::SETCC, MVT::i32, Custom); 48 49 setSchedulingPreference(Sched::VLIW); 50} 51 52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 53 MachineInstr * MI, MachineBasicBlock * BB) const 54{ 55 MachineFunction * MF = BB->getParent(); 56 MachineRegisterInfo &MRI = MF->getRegInfo(); 57 MachineBasicBlock::iterator I = *MI; 58 59 switch (MI->getOpcode()) { 60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 61 case AMDGPU::CLAMP_R600: 62 { 63 MachineInstr *NewMI = 64 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 65 .addOperand(MI->getOperand(0)) 66 .addOperand(MI->getOperand(1)) 67 .addImm(0) // Flags 68 .addReg(AMDGPU::PRED_SEL_OFF); 69 TII->AddFlag(NewMI, 0, MO_FLAG_CLAMP); 70 break; 71 } 72 case AMDGPU::FABS_R600: 73 { 74 MachineInstr *NewMI = 75 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 76 .addOperand(MI->getOperand(0)) 77 .addOperand(MI->getOperand(1)) 78 .addImm(0) // Flags 79 .addReg(AMDGPU::PRED_SEL_OFF); 80 TII->AddFlag(NewMI, 1, MO_FLAG_ABS); 81 break; 82 } 83 84 case AMDGPU::FNEG_R600: 85 { 86 MachineInstr *NewMI = 87 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 88 .addOperand(MI->getOperand(0)) 89 .addOperand(MI->getOperand(1)) 90 .addImm(0) // Flags 91 .addReg(AMDGPU::PRED_SEL_OFF); 92 TII->AddFlag(NewMI, 1, MO_FLAG_NEG); 93 break; 94 } 95 96 case AMDGPU::R600_LOAD_CONST: 97 { 98 int64_t RegIndex = MI->getOperand(1).getImm(); 99 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); 100 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) 101 .addOperand(MI->getOperand(0)) 102 .addReg(ConstantReg); 103 break; 104 } 105 106 case AMDGPU::MASK_WRITE: 107 { 108 unsigned maskedRegister = MI->getOperand(0).getReg(); 109 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 110 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 111 TII->AddFlag(defInstr, 0, MO_FLAG_MASK); 112 // Return early so the instruction is not erased 113 return BB; 114 } 115 116 case AMDGPU::RAT_WRITE_CACHELESS_eg: 117 { 118 // Convert to DWORD address 119 unsigned NewAddr = MRI.createVirtualRegister( 120 AMDGPU::R600_TReg32_XRegisterClass); 121 unsigned ShiftValue = MRI.createVirtualRegister( 122 AMDGPU::R600_TReg32RegisterClass); 123 124 // XXX In theory, we should be able to pass ShiftValue directly to 125 // the LSHR_eg instruction as an inline literal, but I tried doing it 126 // this way and it didn't produce the correct results. 127 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) 128 .addReg(AMDGPU::ALU_LITERAL_X) 129 .addReg(AMDGPU::PRED_SEL_OFF) 130 .addImm(2); 131 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) 132 .addOperand(MI->getOperand(1)) 133 .addReg(ShiftValue) 134 .addReg(AMDGPU::PRED_SEL_OFF); 135 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 136 .addOperand(MI->getOperand(0)) 137 .addReg(NewAddr); 138 break; 139 } 140 141 case AMDGPU::RESERVE_REG: 142 { 143 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); 144 int64_t ReservedIndex = MI->getOperand(0).getImm(); 145 unsigned ReservedReg = 146 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); 147 MFI->ReservedRegs.push_back(ReservedReg); 148 break; 149 } 150 151 case AMDGPU::TXD: 152 { 153 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 154 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 155 156 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 157 .addOperand(MI->getOperand(3)) 158 .addOperand(MI->getOperand(4)) 159 .addOperand(MI->getOperand(5)); 160 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 161 .addOperand(MI->getOperand(2)) 162 .addOperand(MI->getOperand(4)) 163 .addOperand(MI->getOperand(5)); 164 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 165 .addOperand(MI->getOperand(0)) 166 .addOperand(MI->getOperand(1)) 167 .addOperand(MI->getOperand(4)) 168 .addOperand(MI->getOperand(5)) 169 .addReg(t0, RegState::Implicit) 170 .addReg(t1, RegState::Implicit); 171 break; 172 } 173 case AMDGPU::TXD_SHADOW: 174 { 175 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 176 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 177 178 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 179 .addOperand(MI->getOperand(3)) 180 .addOperand(MI->getOperand(4)) 181 .addOperand(MI->getOperand(5)); 182 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 183 .addOperand(MI->getOperand(2)) 184 .addOperand(MI->getOperand(4)) 185 .addOperand(MI->getOperand(5)); 186 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 187 .addOperand(MI->getOperand(0)) 188 .addOperand(MI->getOperand(1)) 189 .addOperand(MI->getOperand(4)) 190 .addOperand(MI->getOperand(5)) 191 .addReg(t0, RegState::Implicit) 192 .addReg(t1, RegState::Implicit); 193 break; 194 } 195 case AMDGPU::BRANCH: 196 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 197 .addOperand(MI->getOperand(0)) 198 .addReg(0); 199 break; 200 case AMDGPU::BRANCH_COND_f32: 201 { 202 MachineInstr *NewMI = 203 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 204 .addReg(AMDGPU::PREDICATE_BIT) 205 .addOperand(MI->getOperand(1)) 206 .addImm(OPCODE_IS_ZERO) 207 .addImm(0); // Flags 208 TII->AddFlag(NewMI, 1, MO_FLAG_PUSH); 209 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 210 .addOperand(MI->getOperand(0)) 211 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 212 break; 213 } 214 case AMDGPU::BRANCH_COND_i32: 215 { 216 MachineInstr *NewMI = 217 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 218 .addReg(AMDGPU::PREDICATE_BIT) 219 .addOperand(MI->getOperand(1)) 220 .addImm(OPCODE_IS_ZERO_INT) 221 .addImm(0); // Flags 222 TII->AddFlag(NewMI, 1, MO_FLAG_PUSH); 223 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 224 .addOperand(MI->getOperand(0)) 225 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 226 break; 227 } 228 } 229 230 MI->eraseFromParent(); 231 return BB; 232} 233 234//===----------------------------------------------------------------------===// 235// Custom DAG Lowering Operations 236//===----------------------------------------------------------------------===// 237 238using namespace llvm::Intrinsic; 239using namespace llvm::AMDGPUIntrinsic; 240 241SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 242{ 243 switch (Op.getOpcode()) { 244 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 245 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 246 case ISD::ROTL: return LowerROTL(Op, DAG); 247 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 248 case ISD::SETCC: return LowerSETCC(Op, DAG); 249 case ISD::INTRINSIC_VOID: { 250 SDValue Chain = Op.getOperand(0); 251 unsigned IntrinsicID = 252 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 253 switch (IntrinsicID) { 254 case AMDGPUIntrinsic::AMDGPU_store_output: { 255 MachineFunction &MF = DAG.getMachineFunction(); 256 MachineRegisterInfo &MRI = MF.getRegInfo(); 257 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 258 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 259 if (!MRI.isLiveOut(Reg)) { 260 MRI.addLiveOut(Reg); 261 } 262 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 263 } 264 // default for switch(IntrinsicID) 265 default: break; 266 } 267 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 268 break; 269 } 270 case ISD::INTRINSIC_WO_CHAIN: { 271 unsigned IntrinsicID = 272 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 273 EVT VT = Op.getValueType(); 274 DebugLoc DL = Op.getDebugLoc(); 275 switch(IntrinsicID) { 276 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 277 case AMDGPUIntrinsic::R600_load_input: { 278 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 279 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 280 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 281 } 282 283 case r600_read_ngroups_x: 284 return LowerImplicitParameter(DAG, VT, DL, 0); 285 case r600_read_ngroups_y: 286 return LowerImplicitParameter(DAG, VT, DL, 1); 287 case r600_read_ngroups_z: 288 return LowerImplicitParameter(DAG, VT, DL, 2); 289 case r600_read_global_size_x: 290 return LowerImplicitParameter(DAG, VT, DL, 3); 291 case r600_read_global_size_y: 292 return LowerImplicitParameter(DAG, VT, DL, 4); 293 case r600_read_global_size_z: 294 return LowerImplicitParameter(DAG, VT, DL, 5); 295 case r600_read_local_size_x: 296 return LowerImplicitParameter(DAG, VT, DL, 6); 297 case r600_read_local_size_y: 298 return LowerImplicitParameter(DAG, VT, DL, 7); 299 case r600_read_local_size_z: 300 return LowerImplicitParameter(DAG, VT, DL, 8); 301 302 case r600_read_tgid_x: 303 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 304 AMDGPU::T1_X, VT); 305 case r600_read_tgid_y: 306 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 307 AMDGPU::T1_Y, VT); 308 case r600_read_tgid_z: 309 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 310 AMDGPU::T1_Z, VT); 311 case r600_read_tidig_x: 312 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 313 AMDGPU::T0_X, VT); 314 case r600_read_tidig_y: 315 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 316 AMDGPU::T0_Y, VT); 317 case r600_read_tidig_z: 318 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 319 AMDGPU::T0_Z, VT); 320 } 321 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 322 break; 323 } 324 } // end switch(Op.getOpcode()) 325 return SDValue(); 326} 327 328SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 329{ 330 SDValue Chain = Op.getOperand(0); 331 SDValue CC = Op.getOperand(1); 332 SDValue LHS = Op.getOperand(2); 333 SDValue RHS = Op.getOperand(3); 334 SDValue JumpT = Op.getOperand(4); 335 SDValue CmpValue; 336 SDValue Result; 337 CmpValue = DAG.getNode( 338 ISD::SELECT_CC, 339 Op.getDebugLoc(), 340 MVT::i32, 341 LHS, RHS, 342 DAG.getConstant(-1, MVT::i32), 343 DAG.getConstant(0, MVT::i32), 344 CC); 345 Result = DAG.getNode( 346 AMDGPUISD::BRANCH_COND, 347 CmpValue.getDebugLoc(), 348 MVT::Other, Chain, 349 JumpT, CmpValue); 350 return Result; 351} 352 353SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 354 DebugLoc DL, 355 unsigned DwordOffset) const 356{ 357 unsigned ByteOffset = DwordOffset * 4; 358 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 359 AMDGPUAS::PARAM_I_ADDRESS); 360 361 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 362 assert(isInt<16>(ByteOffset)); 363 364 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 365 DAG.getConstant(ByteOffset, MVT::i32), // PTR 366 MachinePointerInfo(ConstantPointerNull::get(PtrType)), 367 false, false, false, 0); 368} 369 370SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const 371{ 372 DebugLoc DL = Op.getDebugLoc(); 373 EVT VT = Op.getValueType(); 374 375 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 376 Op.getOperand(0), 377 Op.getOperand(0), 378 DAG.getNode(ISD::SUB, DL, VT, 379 DAG.getConstant(32, MVT::i32), 380 Op.getOperand(1))); 381} 382 383SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 384{ 385 DebugLoc DL = Op.getDebugLoc(); 386 EVT VT = Op.getValueType(); 387 388 SDValue LHS = Op.getOperand(0); 389 SDValue RHS = Op.getOperand(1); 390 SDValue True = Op.getOperand(2); 391 SDValue False = Op.getOperand(3); 392 SDValue CC = Op.getOperand(4); 393 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 394 SDValue Temp; 395 396 // LHS and RHS are guaranteed to be the same value type 397 EVT CompareVT = LHS.getValueType(); 398 399 // We need all the operands of SELECT_CC to have the same value type, so if 400 // necessary we need to convert LHS and RHS to be the same type True and 401 // False. True and False are guaranteed to have the same type as this 402 // SELECT_CC node. 403 404 if (CompareVT != VT) { 405 ISD::NodeType ConversionOp = ISD::DELETED_NODE; 406 if (VT == MVT::f32 && CompareVT == MVT::i32) { 407 if (isUnsignedIntSetCC(CCOpcode)) { 408 ConversionOp = ISD::UINT_TO_FP; 409 } else { 410 ConversionOp = ISD::SINT_TO_FP; 411 } 412 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 413 ConversionOp = ISD::FP_TO_SINT; 414 } else { 415 // I don't think there will be any other type pairings. 416 assert(!"Unhandled operand type parings in SELECT_CC"); 417 } 418 // XXX Check the value of LHS and RHS and avoid creating sequences like 419 // (FTOI (ITOF)) 420 LHS = DAG.getNode(ConversionOp, DL, VT, LHS); 421 RHS = DAG.getNode(ConversionOp, DL, VT, RHS); 422 } 423 424 // If True is a hardware TRUE value and False is a hardware FALSE value or 425 // vice-versa we can handle this with a native instruction (SET* instructions). 426 if ((isHWTrueValue(True) && isHWFalseValue(False))) { 427 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 428 } 429 430 // XXX If True is a hardware TRUE value and False is a hardware FALSE value, 431 // we can handle this with a native instruction, but we need to swap true 432 // and false and change the conditional. 433 if (isHWTrueValue(False) && isHWFalseValue(True)) { 434 } 435 436 // XXX Check if we can lower this to a SELECT or if it is supported by a native 437 // operation. (The code below does this but we don't have the Instruction 438 // selection patterns to do this yet. 439#if 0 440 if (isZero(LHS) || isZero(RHS)) { 441 SDValue Cond = (isZero(LHS) ? RHS : LHS); 442 bool SwapTF = false; 443 switch (CCOpcode) { 444 case ISD::SETOEQ: 445 case ISD::SETUEQ: 446 case ISD::SETEQ: 447 SwapTF = true; 448 // Fall through 449 case ISD::SETONE: 450 case ISD::SETUNE: 451 case ISD::SETNE: 452 // We can lower to select 453 if (SwapTF) { 454 Temp = True; 455 True = False; 456 False = Temp; 457 } 458 // CNDE 459 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 460 default: 461 // Supported by a native operation (CNDGE, CNDGT) 462 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 463 } 464 } 465#endif 466 467 // If we make it this for it means we have no native instructions to handle 468 // this SELECT_CC, so we must lower it. 469 SDValue HWTrue, HWFalse; 470 471 if (VT == MVT::f32) { 472 HWTrue = DAG.getConstantFP(1.0f, VT); 473 HWFalse = DAG.getConstantFP(0.0f, VT); 474 } else if (VT == MVT::i32) { 475 HWTrue = DAG.getConstant(-1, VT); 476 HWFalse = DAG.getConstant(0, VT); 477 } 478 else { 479 assert(!"Unhandled value type in LowerSELECT_CC"); 480 } 481 482 // Lower this unsupported SELECT_CC into a combination of two supported 483 // SELECT_CC operations. 484 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); 485 486 // Convert floating point condition to i1 487 if (VT == MVT::f32) { 488 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, 489 DAG.getNode(ISD::FNEG, DL, VT, Cond)); 490 } 491 492 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 493} 494 495SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 496{ 497 SDValue Cond; 498 SDValue LHS = Op.getOperand(0); 499 SDValue RHS = Op.getOperand(1); 500 SDValue CC = Op.getOperand(2); 501 DebugLoc DL = Op.getDebugLoc(); 502 assert(Op.getValueType() == MVT::i32); 503 Cond = DAG.getNode( 504 ISD::SELECT_CC, 505 Op.getDebugLoc(), 506 MVT::i32, 507 LHS, RHS, 508 DAG.getConstant(-1, MVT::i32), 509 DAG.getConstant(0, MVT::i32), 510 CC); 511 Cond = DAG.getNode( 512 ISD::AND, 513 DL, 514 MVT::i32, 515 DAG.getConstant(1, MVT::i32), 516 Cond); 517 return Cond; 518} 519