R600ISelLowering.cpp revision 3a7a56e7aa56bc6cb847c241ef6bd749713ae6e1
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file 11// is mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "R600InstrInfo.h" 17#include "R600MachineFunctionInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/CodeGen/SelectionDAG.h" 21 22using namespace llvm; 23 24R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 25 AMDGPUTargetLowering(TM), 26 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) 27{ 28 setOperationAction(ISD::MUL, MVT::i64, Expand); 29 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 30 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 31 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 32 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 33 computeRegisterProperties(); 34 35 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 36 37 setOperationAction(ISD::FSUB, MVT::f32, Expand); 38 39 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 40 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 41 42 setOperationAction(ISD::ROTL, MVT::i32, Custom); 43 44 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 45 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 46 47 setOperationAction(ISD::SETCC, MVT::i32, Custom); 48 49 setSchedulingPreference(Sched::VLIW); 50} 51 52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 53 MachineInstr * MI, MachineBasicBlock * BB) const 54{ 55 MachineFunction * MF = BB->getParent(); 56 MachineRegisterInfo &MRI = MF->getRegInfo(); 57 MachineBasicBlock::iterator I = *MI; 58 59 switch (MI->getOpcode()) { 60 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 61 case AMDGPU::CLAMP_R600: 62 { 63 MachineInstr *NewMI = 64 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 65 .addOperand(MI->getOperand(0)) 66 .addOperand(MI->getOperand(1)) 67 .addReg(AMDGPU::PRED_SEL_OFF); 68 TII->AddFlag(NewMI, 0, MO_FLAG_CLAMP); 69 break; 70 } 71 case AMDGPU::FABS_R600: 72 { 73 MachineInstr *NewMI = 74 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 75 .addOperand(MI->getOperand(0)) 76 .addOperand(MI->getOperand(1)) 77 .addReg(AMDGPU::PRED_SEL_OFF); 78 TII->AddFlag(NewMI, 1, MO_FLAG_ABS); 79 break; 80 } 81 82 case AMDGPU::FNEG_R600: 83 { 84 MachineInstr *NewMI = 85 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 86 .addOperand(MI->getOperand(0)) 87 .addOperand(MI->getOperand(1)) 88 .addReg(AMDGPU::PRED_SEL_OFF); 89 TII->AddFlag(NewMI, 1, MO_FLAG_NEG); 90 break; 91 } 92 93 case AMDGPU::R600_LOAD_CONST: 94 { 95 int64_t RegIndex = MI->getOperand(1).getImm(); 96 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); 97 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) 98 .addOperand(MI->getOperand(0)) 99 .addReg(ConstantReg); 100 break; 101 } 102 103 case AMDGPU::MASK_WRITE: 104 { 105 unsigned maskedRegister = MI->getOperand(0).getReg(); 106 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 107 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 108 TII->AddFlag(defInstr, 0, MO_FLAG_MASK); 109 // Return early so the instruction is not erased 110 return BB; 111 } 112 113 case AMDGPU::RAT_WRITE_CACHELESS_eg: 114 { 115 // Convert to DWORD address 116 unsigned NewAddr = MRI.createVirtualRegister( 117 AMDGPU::R600_TReg32_XRegisterClass); 118 unsigned ShiftValue = MRI.createVirtualRegister( 119 AMDGPU::R600_TReg32RegisterClass); 120 121 // XXX In theory, we should be able to pass ShiftValue directly to 122 // the LSHR_eg instruction as an inline literal, but I tried doing it 123 // this way and it didn't produce the correct results. 124 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) 125 .addReg(AMDGPU::ALU_LITERAL_X) 126 .addReg(AMDGPU::PRED_SEL_OFF) 127 .addImm(2); 128 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) 129 .addOperand(MI->getOperand(1)) 130 .addReg(ShiftValue) 131 .addReg(AMDGPU::PRED_SEL_OFF); 132 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 133 .addOperand(MI->getOperand(0)) 134 .addReg(NewAddr); 135 break; 136 } 137 138 case AMDGPU::RESERVE_REG: 139 { 140 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); 141 int64_t ReservedIndex = MI->getOperand(0).getImm(); 142 unsigned ReservedReg = 143 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); 144 MFI->ReservedRegs.push_back(ReservedReg); 145 break; 146 } 147 148 case AMDGPU::TXD: 149 { 150 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 151 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 152 153 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 154 .addOperand(MI->getOperand(3)) 155 .addOperand(MI->getOperand(4)) 156 .addOperand(MI->getOperand(5)); 157 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 158 .addOperand(MI->getOperand(2)) 159 .addOperand(MI->getOperand(4)) 160 .addOperand(MI->getOperand(5)); 161 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 162 .addOperand(MI->getOperand(0)) 163 .addOperand(MI->getOperand(1)) 164 .addOperand(MI->getOperand(4)) 165 .addOperand(MI->getOperand(5)) 166 .addReg(t0, RegState::Implicit) 167 .addReg(t1, RegState::Implicit); 168 break; 169 } 170 case AMDGPU::TXD_SHADOW: 171 { 172 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 173 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 174 175 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 176 .addOperand(MI->getOperand(3)) 177 .addOperand(MI->getOperand(4)) 178 .addOperand(MI->getOperand(5)); 179 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 180 .addOperand(MI->getOperand(2)) 181 .addOperand(MI->getOperand(4)) 182 .addOperand(MI->getOperand(5)); 183 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 184 .addOperand(MI->getOperand(0)) 185 .addOperand(MI->getOperand(1)) 186 .addOperand(MI->getOperand(4)) 187 .addOperand(MI->getOperand(5)) 188 .addReg(t0, RegState::Implicit) 189 .addReg(t1, RegState::Implicit); 190 break; 191 } 192 case AMDGPU::BRANCH: 193 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 194 .addOperand(MI->getOperand(0)) 195 .addReg(0); 196 break; 197 case AMDGPU::BRANCH_COND_f32: 198 { 199 MachineInstr *NewMI = 200 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 201 .addReg(AMDGPU::PREDICATE_BIT) 202 .addOperand(MI->getOperand(1)) 203 .addImm(OPCODE_IS_ZERO); 204 TII->AddFlag(NewMI, 1, MO_FLAG_PUSH); 205 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 206 .addOperand(MI->getOperand(0)) 207 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 208 break; 209 } 210 case AMDGPU::BRANCH_COND_i32: 211 { 212 MachineInstr *NewMI = 213 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 214 .addReg(AMDGPU::PREDICATE_BIT) 215 .addOperand(MI->getOperand(1)) 216 .addImm(OPCODE_IS_ZERO_INT); 217 TII->AddFlag(NewMI, 1, MO_FLAG_PUSH); 218 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 219 .addOperand(MI->getOperand(0)) 220 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 221 break; 222 } 223 } 224 225 MI->eraseFromParent(); 226 return BB; 227} 228 229//===----------------------------------------------------------------------===// 230// Custom DAG Lowering Operations 231//===----------------------------------------------------------------------===// 232 233using namespace llvm::Intrinsic; 234using namespace llvm::AMDGPUIntrinsic; 235 236SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 237{ 238 switch (Op.getOpcode()) { 239 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 240 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 241 case ISD::ROTL: return LowerROTL(Op, DAG); 242 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 243 case ISD::SETCC: return LowerSETCC(Op, DAG); 244 case ISD::INTRINSIC_VOID: { 245 SDValue Chain = Op.getOperand(0); 246 unsigned IntrinsicID = 247 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 248 switch (IntrinsicID) { 249 case AMDGPUIntrinsic::AMDGPU_store_output: { 250 MachineFunction &MF = DAG.getMachineFunction(); 251 MachineRegisterInfo &MRI = MF.getRegInfo(); 252 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 253 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 254 if (!MRI.isLiveOut(Reg)) { 255 MRI.addLiveOut(Reg); 256 } 257 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 258 } 259 // default for switch(IntrinsicID) 260 default: break; 261 } 262 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 263 break; 264 } 265 case ISD::INTRINSIC_WO_CHAIN: { 266 unsigned IntrinsicID = 267 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 268 EVT VT = Op.getValueType(); 269 DebugLoc DL = Op.getDebugLoc(); 270 switch(IntrinsicID) { 271 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 272 case AMDGPUIntrinsic::R600_load_input: { 273 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 274 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 275 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 276 } 277 278 case r600_read_ngroups_x: 279 return LowerImplicitParameter(DAG, VT, DL, 0); 280 case r600_read_ngroups_y: 281 return LowerImplicitParameter(DAG, VT, DL, 1); 282 case r600_read_ngroups_z: 283 return LowerImplicitParameter(DAG, VT, DL, 2); 284 case r600_read_global_size_x: 285 return LowerImplicitParameter(DAG, VT, DL, 3); 286 case r600_read_global_size_y: 287 return LowerImplicitParameter(DAG, VT, DL, 4); 288 case r600_read_global_size_z: 289 return LowerImplicitParameter(DAG, VT, DL, 5); 290 case r600_read_local_size_x: 291 return LowerImplicitParameter(DAG, VT, DL, 6); 292 case r600_read_local_size_y: 293 return LowerImplicitParameter(DAG, VT, DL, 7); 294 case r600_read_local_size_z: 295 return LowerImplicitParameter(DAG, VT, DL, 8); 296 297 case r600_read_tgid_x: 298 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 299 AMDGPU::T1_X, VT); 300 case r600_read_tgid_y: 301 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 302 AMDGPU::T1_Y, VT); 303 case r600_read_tgid_z: 304 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 305 AMDGPU::T1_Z, VT); 306 case r600_read_tidig_x: 307 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 308 AMDGPU::T0_X, VT); 309 case r600_read_tidig_y: 310 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 311 AMDGPU::T0_Y, VT); 312 case r600_read_tidig_z: 313 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 314 AMDGPU::T0_Z, VT); 315 } 316 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 317 break; 318 } 319 } // end switch(Op.getOpcode()) 320 return SDValue(); 321} 322 323SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 324{ 325 SDValue Chain = Op.getOperand(0); 326 SDValue CC = Op.getOperand(1); 327 SDValue LHS = Op.getOperand(2); 328 SDValue RHS = Op.getOperand(3); 329 SDValue JumpT = Op.getOperand(4); 330 SDValue CmpValue; 331 SDValue Result; 332 CmpValue = DAG.getNode( 333 ISD::SELECT_CC, 334 Op.getDebugLoc(), 335 MVT::i32, 336 LHS, RHS, 337 DAG.getConstant(-1, MVT::i32), 338 DAG.getConstant(0, MVT::i32), 339 CC); 340 Result = DAG.getNode( 341 AMDGPUISD::BRANCH_COND, 342 CmpValue.getDebugLoc(), 343 MVT::Other, Chain, 344 JumpT, CmpValue); 345 return Result; 346} 347 348SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 349 DebugLoc DL, 350 unsigned DwordOffset) const 351{ 352 unsigned ByteOffset = DwordOffset * 4; 353 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 354 AMDGPUAS::PARAM_I_ADDRESS); 355 356 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 357 assert(isInt<16>(ByteOffset)); 358 359 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 360 DAG.getConstant(ByteOffset, MVT::i32), // PTR 361 MachinePointerInfo(ConstantPointerNull::get(PtrType)), 362 false, false, false, 0); 363} 364 365SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const 366{ 367 DebugLoc DL = Op.getDebugLoc(); 368 EVT VT = Op.getValueType(); 369 370 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 371 Op.getOperand(0), 372 Op.getOperand(0), 373 DAG.getNode(ISD::SUB, DL, VT, 374 DAG.getConstant(32, MVT::i32), 375 Op.getOperand(1))); 376} 377 378SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 379{ 380 DebugLoc DL = Op.getDebugLoc(); 381 EVT VT = Op.getValueType(); 382 383 SDValue LHS = Op.getOperand(0); 384 SDValue RHS = Op.getOperand(1); 385 SDValue True = Op.getOperand(2); 386 SDValue False = Op.getOperand(3); 387 SDValue CC = Op.getOperand(4); 388 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 389 SDValue Temp; 390 391 // LHS and RHS are guaranteed to be the same value type 392 EVT CompareVT = LHS.getValueType(); 393 394 // We need all the operands of SELECT_CC to have the same value type, so if 395 // necessary we need to convert LHS and RHS to be the same type True and 396 // False. True and False are guaranteed to have the same type as this 397 // SELECT_CC node. 398 399 if (CompareVT != VT) { 400 ISD::NodeType ConversionOp = ISD::DELETED_NODE; 401 if (VT == MVT::f32 && CompareVT == MVT::i32) { 402 if (isUnsignedIntSetCC(CCOpcode)) { 403 ConversionOp = ISD::UINT_TO_FP; 404 } else { 405 ConversionOp = ISD::SINT_TO_FP; 406 } 407 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 408 ConversionOp = ISD::FP_TO_SINT; 409 } else { 410 // I don't think there will be any other type pairings. 411 assert(!"Unhandled operand type parings in SELECT_CC"); 412 } 413 // XXX Check the value of LHS and RHS and avoid creating sequences like 414 // (FTOI (ITOF)) 415 LHS = DAG.getNode(ConversionOp, DL, VT, LHS); 416 RHS = DAG.getNode(ConversionOp, DL, VT, RHS); 417 } 418 419 // If True is a hardware TRUE value and False is a hardware FALSE value or 420 // vice-versa we can handle this with a native instruction (SET* instructions). 421 if ((isHWTrueValue(True) && isHWFalseValue(False))) { 422 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 423 } 424 425 // XXX If True is a hardware TRUE value and False is a hardware FALSE value, 426 // we can handle this with a native instruction, but we need to swap true 427 // and false and change the conditional. 428 if (isHWTrueValue(False) && isHWFalseValue(True)) { 429 } 430 431 // XXX Check if we can lower this to a SELECT or if it is supported by a native 432 // operation. (The code below does this but we don't have the Instruction 433 // selection patterns to do this yet. 434#if 0 435 if (isZero(LHS) || isZero(RHS)) { 436 SDValue Cond = (isZero(LHS) ? RHS : LHS); 437 bool SwapTF = false; 438 switch (CCOpcode) { 439 case ISD::SETOEQ: 440 case ISD::SETUEQ: 441 case ISD::SETEQ: 442 SwapTF = true; 443 // Fall through 444 case ISD::SETONE: 445 case ISD::SETUNE: 446 case ISD::SETNE: 447 // We can lower to select 448 if (SwapTF) { 449 Temp = True; 450 True = False; 451 False = Temp; 452 } 453 // CNDE 454 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 455 default: 456 // Supported by a native operation (CNDGE, CNDGT) 457 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 458 } 459 } 460#endif 461 462 // If we make it this for it means we have no native instructions to handle 463 // this SELECT_CC, so we must lower it. 464 SDValue HWTrue, HWFalse; 465 466 if (VT == MVT::f32) { 467 HWTrue = DAG.getConstantFP(1.0f, VT); 468 HWFalse = DAG.getConstantFP(0.0f, VT); 469 } else if (VT == MVT::i32) { 470 HWTrue = DAG.getConstant(-1, VT); 471 HWFalse = DAG.getConstant(0, VT); 472 } 473 else { 474 assert(!"Unhandled value type in LowerSELECT_CC"); 475 } 476 477 // Lower this unsupported SELECT_CC into a combination of two supported 478 // SELECT_CC operations. 479 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); 480 481 // Convert floating point condition to i1 482 if (VT == MVT::f32) { 483 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, 484 DAG.getNode(ISD::FNEG, DL, VT, Cond)); 485 } 486 487 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 488} 489 490SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 491{ 492 SDValue Cond; 493 SDValue LHS = Op.getOperand(0); 494 SDValue RHS = Op.getOperand(1); 495 SDValue CC = Op.getOperand(2); 496 DebugLoc DL = Op.getDebugLoc(); 497 assert(Op.getValueType() == MVT::i32); 498 Cond = DAG.getNode( 499 ISD::SELECT_CC, 500 Op.getDebugLoc(), 501 MVT::i32, 502 LHS, RHS, 503 DAG.getConstant(-1, MVT::i32), 504 DAG.getConstant(0, MVT::i32), 505 CC); 506 Cond = DAG.getNode( 507 ISD::AND, 508 DL, 509 MVT::i32, 510 DAG.getConstant(1, MVT::i32), 511 Cond); 512 return Cond; 513} 514