1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file 11// is mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "R600Defines.h" 17#include "R600InstrInfo.h" 18#include "R600MachineFunctionInfo.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22 23using namespace llvm; 24 25R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 26 AMDGPUTargetLowering(TM), 27 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) 28{ 29 setOperationAction(ISD::MUL, MVT::i64, Expand); 30 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 31 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 32 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 33 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 34 computeRegisterProperties(); 35 36 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 37 38 setOperationAction(ISD::FSUB, MVT::f32, Expand); 39 40 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 41 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 42 43 setOperationAction(ISD::ROTL, MVT::i32, Custom); 44 45 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 46 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 47 48 setOperationAction(ISD::SETCC, MVT::i32, Custom); 49 50 setSchedulingPreference(Sched::VLIW); 51} 52 53MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 54 MachineInstr * MI, MachineBasicBlock * BB) const 55{ 56 MachineFunction * MF = BB->getParent(); 57 MachineRegisterInfo &MRI = MF->getRegInfo(); 58 MachineBasicBlock::iterator I = *MI; 59 60 switch (MI->getOpcode()) { 61 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 62 case AMDGPU::CLAMP_R600: 63 { 64 MachineInstr *NewMI = 65 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 66 .addOperand(MI->getOperand(0)) 67 .addOperand(MI->getOperand(1)) 68 .addImm(0) // Flags 69 .addReg(AMDGPU::PRED_SEL_OFF); 70 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); 71 break; 72 } 73 case AMDGPU::FABS_R600: 74 { 75 MachineInstr *NewMI = 76 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 77 .addOperand(MI->getOperand(0)) 78 .addOperand(MI->getOperand(1)) 79 .addImm(0) // Flags 80 .addReg(AMDGPU::PRED_SEL_OFF); 81 TII->addFlag(NewMI, 1, MO_FLAG_ABS); 82 break; 83 } 84 85 case AMDGPU::FNEG_R600: 86 { 87 MachineInstr *NewMI = 88 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) 89 .addOperand(MI->getOperand(0)) 90 .addOperand(MI->getOperand(1)) 91 .addImm(0) // Flags 92 .addReg(AMDGPU::PRED_SEL_OFF); 93 TII->addFlag(NewMI, 1, MO_FLAG_NEG); 94 break; 95 } 96 97 case AMDGPU::R600_LOAD_CONST: 98 { 99 int64_t RegIndex = MI->getOperand(1).getImm(); 100 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); 101 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) 102 .addOperand(MI->getOperand(0)) 103 .addReg(ConstantReg); 104 break; 105 } 106 107 case AMDGPU::MASK_WRITE: 108 { 109 unsigned maskedRegister = MI->getOperand(0).getReg(); 110 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 111 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 112 TII->addFlag(defInstr, 0, MO_FLAG_MASK); 113 // Return early so the instruction is not erased 114 return BB; 115 } 116 117 case AMDGPU::RAT_WRITE_CACHELESS_eg: 118 { 119 // Convert to DWORD address 120 unsigned NewAddr = MRI.createVirtualRegister( 121 &AMDGPU::R600_TReg32_XRegClass); 122 unsigned ShiftValue = MRI.createVirtualRegister( 123 &AMDGPU::R600_TReg32RegClass); 124 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; 125 126 // XXX In theory, we should be able to pass ShiftValue directly to 127 // the LSHR_eg instruction as an inline literal, but I tried doing it 128 // this way and it didn't produce the correct results. 129 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32), 130 ShiftValue) 131 .addReg(AMDGPU::ALU_LITERAL_X) 132 .addReg(AMDGPU::PRED_SEL_OFF) 133 .addImm(2); 134 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) 135 .addOperand(MI->getOperand(1)) 136 .addReg(ShiftValue) 137 .addReg(AMDGPU::PRED_SEL_OFF); 138 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 139 .addOperand(MI->getOperand(0)) 140 .addReg(NewAddr) 141 .addImm(EOP); // Set End of program bit 142 break; 143 } 144 145 case AMDGPU::RESERVE_REG: 146 { 147 R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); 148 int64_t ReservedIndex = MI->getOperand(0).getImm(); 149 unsigned ReservedReg = 150 AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); 151 MFI->ReservedRegs.push_back(ReservedReg); 152 break; 153 } 154 155 case AMDGPU::TXD: 156 { 157 unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 158 unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 159 160 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 161 .addOperand(MI->getOperand(3)) 162 .addOperand(MI->getOperand(4)) 163 .addOperand(MI->getOperand(5)); 164 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 165 .addOperand(MI->getOperand(2)) 166 .addOperand(MI->getOperand(4)) 167 .addOperand(MI->getOperand(5)); 168 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 169 .addOperand(MI->getOperand(0)) 170 .addOperand(MI->getOperand(1)) 171 .addOperand(MI->getOperand(4)) 172 .addOperand(MI->getOperand(5)) 173 .addReg(t0, RegState::Implicit) 174 .addReg(t1, RegState::Implicit); 175 break; 176 } 177 case AMDGPU::TXD_SHADOW: 178 { 179 unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 180 unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass); 181 182 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) 183 .addOperand(MI->getOperand(3)) 184 .addOperand(MI->getOperand(4)) 185 .addOperand(MI->getOperand(5)); 186 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) 187 .addOperand(MI->getOperand(2)) 188 .addOperand(MI->getOperand(4)) 189 .addOperand(MI->getOperand(5)); 190 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 191 .addOperand(MI->getOperand(0)) 192 .addOperand(MI->getOperand(1)) 193 .addOperand(MI->getOperand(4)) 194 .addOperand(MI->getOperand(5)) 195 .addReg(t0, RegState::Implicit) 196 .addReg(t1, RegState::Implicit); 197 break; 198 } 199 case AMDGPU::BRANCH: 200 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 201 .addOperand(MI->getOperand(0)) 202 .addReg(0); 203 break; 204 case AMDGPU::BRANCH_COND_f32: 205 { 206 MachineInstr *NewMI = 207 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 208 .addReg(AMDGPU::PREDICATE_BIT) 209 .addOperand(MI->getOperand(1)) 210 .addImm(OPCODE_IS_ZERO) 211 .addImm(0); // Flags 212 TII->addFlag(NewMI, 1, MO_FLAG_PUSH); 213 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 214 .addOperand(MI->getOperand(0)) 215 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 216 break; 217 } 218 case AMDGPU::BRANCH_COND_i32: 219 { 220 MachineInstr *NewMI = 221 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X)) 222 .addReg(AMDGPU::PREDICATE_BIT) 223 .addOperand(MI->getOperand(1)) 224 .addImm(OPCODE_IS_ZERO_INT) 225 .addImm(0); // Flags 226 TII->addFlag(NewMI, 1, MO_FLAG_PUSH); 227 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 228 .addOperand(MI->getOperand(0)) 229 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 230 break; 231 } 232 } 233 234 MI->eraseFromParent(); 235 return BB; 236} 237 238//===----------------------------------------------------------------------===// 239// Custom DAG Lowering Operations 240//===----------------------------------------------------------------------===// 241 242using namespace llvm::Intrinsic; 243using namespace llvm::AMDGPUIntrinsic; 244 245SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 246{ 247 switch (Op.getOpcode()) { 248 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 249 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 250 case ISD::ROTL: return LowerROTL(Op, DAG); 251 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 252 case ISD::SETCC: return LowerSETCC(Op, DAG); 253 case ISD::INTRINSIC_VOID: { 254 SDValue Chain = Op.getOperand(0); 255 unsigned IntrinsicID = 256 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 257 switch (IntrinsicID) { 258 case AMDGPUIntrinsic::AMDGPU_store_output: { 259 MachineFunction &MF = DAG.getMachineFunction(); 260 MachineRegisterInfo &MRI = MF.getRegInfo(); 261 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 262 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 263 if (!MRI.isLiveOut(Reg)) { 264 MRI.addLiveOut(Reg); 265 } 266 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 267 } 268 // default for switch(IntrinsicID) 269 default: break; 270 } 271 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 272 break; 273 } 274 case ISD::INTRINSIC_WO_CHAIN: { 275 unsigned IntrinsicID = 276 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 277 EVT VT = Op.getValueType(); 278 DebugLoc DL = Op.getDebugLoc(); 279 switch(IntrinsicID) { 280 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 281 case AMDGPUIntrinsic::R600_load_input: { 282 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 283 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 284 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 285 } 286 287 case r600_read_ngroups_x: 288 return LowerImplicitParameter(DAG, VT, DL, 0); 289 case r600_read_ngroups_y: 290 return LowerImplicitParameter(DAG, VT, DL, 1); 291 case r600_read_ngroups_z: 292 return LowerImplicitParameter(DAG, VT, DL, 2); 293 case r600_read_global_size_x: 294 return LowerImplicitParameter(DAG, VT, DL, 3); 295 case r600_read_global_size_y: 296 return LowerImplicitParameter(DAG, VT, DL, 4); 297 case r600_read_global_size_z: 298 return LowerImplicitParameter(DAG, VT, DL, 5); 299 case r600_read_local_size_x: 300 return LowerImplicitParameter(DAG, VT, DL, 6); 301 case r600_read_local_size_y: 302 return LowerImplicitParameter(DAG, VT, DL, 7); 303 case r600_read_local_size_z: 304 return LowerImplicitParameter(DAG, VT, DL, 8); 305 306 case r600_read_tgid_x: 307 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 308 AMDGPU::T1_X, VT); 309 case r600_read_tgid_y: 310 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 311 AMDGPU::T1_Y, VT); 312 case r600_read_tgid_z: 313 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 314 AMDGPU::T1_Z, VT); 315 case r600_read_tidig_x: 316 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 317 AMDGPU::T0_X, VT); 318 case r600_read_tidig_y: 319 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 320 AMDGPU::T0_Y, VT); 321 case r600_read_tidig_z: 322 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 323 AMDGPU::T0_Z, VT); 324 } 325 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 326 break; 327 } 328 } // end switch(Op.getOpcode()) 329 return SDValue(); 330} 331 332SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 333{ 334 SDValue Chain = Op.getOperand(0); 335 SDValue CC = Op.getOperand(1); 336 SDValue LHS = Op.getOperand(2); 337 SDValue RHS = Op.getOperand(3); 338 SDValue JumpT = Op.getOperand(4); 339 SDValue CmpValue; 340 SDValue Result; 341 CmpValue = DAG.getNode( 342 ISD::SELECT_CC, 343 Op.getDebugLoc(), 344 MVT::i32, 345 LHS, RHS, 346 DAG.getConstant(-1, MVT::i32), 347 DAG.getConstant(0, MVT::i32), 348 CC); 349 Result = DAG.getNode( 350 AMDGPUISD::BRANCH_COND, 351 CmpValue.getDebugLoc(), 352 MVT::Other, Chain, 353 JumpT, CmpValue); 354 return Result; 355} 356 357SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 358 DebugLoc DL, 359 unsigned DwordOffset) const 360{ 361 unsigned ByteOffset = DwordOffset * 4; 362 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 363 AMDGPUAS::PARAM_I_ADDRESS); 364 365 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 366 assert(isInt<16>(ByteOffset)); 367 368 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 369 DAG.getConstant(ByteOffset, MVT::i32), // PTR 370 MachinePointerInfo(ConstantPointerNull::get(PtrType)), 371 false, false, false, 0); 372} 373 374SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const 375{ 376 DebugLoc DL = Op.getDebugLoc(); 377 EVT VT = Op.getValueType(); 378 379 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 380 Op.getOperand(0), 381 Op.getOperand(0), 382 DAG.getNode(ISD::SUB, DL, VT, 383 DAG.getConstant(32, MVT::i32), 384 Op.getOperand(1))); 385} 386 387SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 388{ 389 DebugLoc DL = Op.getDebugLoc(); 390 EVT VT = Op.getValueType(); 391 392 SDValue LHS = Op.getOperand(0); 393 SDValue RHS = Op.getOperand(1); 394 SDValue True = Op.getOperand(2); 395 SDValue False = Op.getOperand(3); 396 SDValue CC = Op.getOperand(4); 397 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 398 SDValue Temp; 399 400 // LHS and RHS are guaranteed to be the same value type 401 EVT CompareVT = LHS.getValueType(); 402 403 // We need all the operands of SELECT_CC to have the same value type, so if 404 // necessary we need to convert LHS and RHS to be the same type True and 405 // False. True and False are guaranteed to have the same type as this 406 // SELECT_CC node. 407 408 if (CompareVT != VT) { 409 ISD::NodeType ConversionOp = ISD::DELETED_NODE; 410 if (VT == MVT::f32 && CompareVT == MVT::i32) { 411 if (isUnsignedIntSetCC(CCOpcode)) { 412 ConversionOp = ISD::UINT_TO_FP; 413 } else { 414 ConversionOp = ISD::SINT_TO_FP; 415 } 416 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 417 ConversionOp = ISD::FP_TO_SINT; 418 } else { 419 // I don't think there will be any other type pairings. 420 assert(!"Unhandled operand type parings in SELECT_CC"); 421 } 422 // XXX Check the value of LHS and RHS and avoid creating sequences like 423 // (FTOI (ITOF)) 424 LHS = DAG.getNode(ConversionOp, DL, VT, LHS); 425 RHS = DAG.getNode(ConversionOp, DL, VT, RHS); 426 } 427 428 // If True is a hardware TRUE value and False is a hardware FALSE value or 429 // vice-versa we can handle this with a native instruction (SET* instructions). 430 if ((isHWTrueValue(True) && isHWFalseValue(False))) { 431 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 432 } 433 434 // XXX If True is a hardware TRUE value and False is a hardware FALSE value, 435 // we can handle this with a native instruction, but we need to swap true 436 // and false and change the conditional. 437 if (isHWTrueValue(False) && isHWFalseValue(True)) { 438 } 439 440 // XXX Check if we can lower this to a SELECT or if it is supported by a native 441 // operation. (The code below does this but we don't have the Instruction 442 // selection patterns to do this yet. 443#if 0 444 if (isZero(LHS) || isZero(RHS)) { 445 SDValue Cond = (isZero(LHS) ? RHS : LHS); 446 bool SwapTF = false; 447 switch (CCOpcode) { 448 case ISD::SETOEQ: 449 case ISD::SETUEQ: 450 case ISD::SETEQ: 451 SwapTF = true; 452 // Fall through 453 case ISD::SETONE: 454 case ISD::SETUNE: 455 case ISD::SETNE: 456 // We can lower to select 457 if (SwapTF) { 458 Temp = True; 459 True = False; 460 False = Temp; 461 } 462 // CNDE 463 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 464 default: 465 // Supported by a native operation (CNDGE, CNDGT) 466 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 467 } 468 } 469#endif 470 471 // If we make it this for it means we have no native instructions to handle 472 // this SELECT_CC, so we must lower it. 473 SDValue HWTrue, HWFalse; 474 475 if (VT == MVT::f32) { 476 HWTrue = DAG.getConstantFP(1.0f, VT); 477 HWFalse = DAG.getConstantFP(0.0f, VT); 478 } else if (VT == MVT::i32) { 479 HWTrue = DAG.getConstant(-1, VT); 480 HWFalse = DAG.getConstant(0, VT); 481 } 482 else { 483 assert(!"Unhandled value type in LowerSELECT_CC"); 484 } 485 486 // Lower this unsupported SELECT_CC into a combination of two supported 487 // SELECT_CC operations. 488 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); 489 490 // Convert floating point condition to i1 491 if (VT == MVT::f32) { 492 Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, 493 DAG.getNode(ISD::FNEG, DL, VT, Cond)); 494 } 495 496 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 497} 498 499SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 500{ 501 SDValue Cond; 502 SDValue LHS = Op.getOperand(0); 503 SDValue RHS = Op.getOperand(1); 504 SDValue CC = Op.getOperand(2); 505 DebugLoc DL = Op.getDebugLoc(); 506 assert(Op.getValueType() == MVT::i32); 507 Cond = DAG.getNode( 508 ISD::SELECT_CC, 509 Op.getDebugLoc(), 510 MVT::i32, 511 LHS, RHS, 512 DAG.getConstant(-1, MVT::i32), 513 DAG.getConstant(0, MVT::i32), 514 CC); 515 Cond = DAG.getNode( 516 ISD::AND, 517 DL, 518 MVT::i32, 519 DAG.getConstant(1, MVT::i32), 520 Cond); 521 return Cond; 522} 523