SIISelLowering.cpp revision 40c41fe890e53d99afb4e2c3fbf10043081edd9e
1//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is 11// mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "SIISelLowering.h" 16#include "AMDILIntrinsicInfo.h" 17#include "SIInstrInfo.h" 18#include "SIRegisterInfo.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22 23using namespace llvm; 24 25SITargetLowering::SITargetLowering(TargetMachine &TM) : 26 AMDGPUTargetLowering(TM), 27 TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) 28{ 29 addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); 30 addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); 31 addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); 32 addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass); 33 addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); 34 addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); 35 36 addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); 37 addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); 38 39 computeRegisterProperties(); 40 41 setOperationAction(ISD::AND, MVT::i1, Custom); 42 43 setOperationAction(ISD::ADD, MVT::i64, Legal); 44 setOperationAction(ISD::ADD, MVT::i32, Legal); 45 46 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 47 48 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 49 50 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 51 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 52 53 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 54 setTargetDAGCombine(ISD::SELECT_CC); 55 56 setTargetDAGCombine(ISD::SETCC); 57} 58 59MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( 60 MachineInstr * MI, MachineBasicBlock * BB) const 61{ 62 const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); 63 MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); 64 MachineBasicBlock::iterator I = MI; 65 66 if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { 67 AppendS_WAITCNT(MI, *BB, llvm::next(I)); 68 return BB; 69 } 70 71 switch (MI->getOpcode()) { 72 default: 73 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 74 75 case AMDGPU::CLAMP_SI: 76 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) 77 .addOperand(MI->getOperand(0)) 78 .addOperand(MI->getOperand(1)) 79 // VSRC1-2 are unused, but we still need to fill all the 80 // operand slots, so we just reuse the VSRC0 operand 81 .addOperand(MI->getOperand(1)) 82 .addOperand(MI->getOperand(1)) 83 .addImm(0) // ABS 84 .addImm(1) // CLAMP 85 .addImm(0) // OMOD 86 .addImm(0); // NEG 87 MI->eraseFromParent(); 88 break; 89 90 case AMDGPU::FABS_SI: 91 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) 92 .addOperand(MI->getOperand(0)) 93 .addOperand(MI->getOperand(1)) 94 // VSRC1-2 are unused, but we still need to fill all the 95 // operand slots, so we just reuse the VSRC0 operand 96 .addOperand(MI->getOperand(1)) 97 .addOperand(MI->getOperand(1)) 98 .addImm(1) // ABS 99 .addImm(0) // CLAMP 100 .addImm(0) // OMOD 101 .addImm(0); // NEG 102 MI->eraseFromParent(); 103 break; 104 105 case AMDGPU::FNEG_SI: 106 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) 107 .addOperand(MI->getOperand(0)) 108 .addOperand(MI->getOperand(1)) 109 // VSRC1-2 are unused, but we still need to fill all the 110 // operand slots, so we just reuse the VSRC0 operand 111 .addOperand(MI->getOperand(1)) 112 .addOperand(MI->getOperand(1)) 113 .addImm(0) // ABS 114 .addImm(0) // CLAMP 115 .addImm(0) // OMOD 116 .addImm(1); // NEG 117 MI->eraseFromParent(); 118 break; 119 120 case AMDGPU::SI_INTERP: 121 LowerSI_INTERP(MI, *BB, I, MRI); 122 break; 123 case AMDGPU::SI_INTERP_CONST: 124 LowerSI_INTERP_CONST(MI, *BB, I); 125 break; 126 case AMDGPU::SI_V_CNDLT: 127 LowerSI_V_CNDLT(MI, *BB, I, MRI); 128 break; 129 case AMDGPU::USE_SGPR_32: 130 case AMDGPU::USE_SGPR_64: 131 lowerUSE_SGPR(MI, BB->getParent(), MRI); 132 MI->eraseFromParent(); 133 break; 134 } 135 return BB; 136} 137 138void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, 139 MachineBasicBlock::iterator I) const 140{ 141 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT)) 142 .addImm(0); 143} 144 145void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, 146 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const 147{ 148 unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 149 MachineOperand dst = MI->getOperand(0); 150 MachineOperand iReg = MI->getOperand(1); 151 MachineOperand jReg = MI->getOperand(2); 152 MachineOperand attr_chan = MI->getOperand(3); 153 MachineOperand attr = MI->getOperand(4); 154 MachineOperand params = MI->getOperand(5); 155 156 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32)) 157 .addReg(AMDGPU::M0) 158 .addOperand(params); 159 160 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) 161 .addOperand(iReg) 162 .addOperand(attr_chan) 163 .addOperand(attr); 164 165 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) 166 .addOperand(dst) 167 .addReg(tmp) 168 .addOperand(jReg) 169 .addOperand(attr_chan) 170 .addOperand(attr); 171 172 MI->eraseFromParent(); 173} 174 175void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, 176 MachineBasicBlock &BB, MachineBasicBlock::iterator I) const 177{ 178 MachineOperand dst = MI->getOperand(0); 179 MachineOperand attr_chan = MI->getOperand(1); 180 MachineOperand attr = MI->getOperand(2); 181 MachineOperand params = MI->getOperand(3); 182 183 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32)) 184 .addReg(AMDGPU::M0) 185 .addOperand(params); 186 187 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) 188 .addOperand(dst) 189 .addOperand(attr_chan) 190 .addOperand(attr); 191 192 MI->eraseFromParent(); 193} 194 195void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, 196 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const 197{ 198 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_LT_F32_e32), 199 AMDGPU::VCC) 200 .addOperand(MI->getOperand(1)) 201 .addReg(AMDGPU::SREG_LIT_0); 202 203 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32)) 204 .addOperand(MI->getOperand(0)) 205 .addReg(AMDGPU::VCC) 206 .addOperand(MI->getOperand(2)) 207 .addOperand(MI->getOperand(3)); 208 209 MI->eraseFromParent(); 210} 211 212void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI, 213 MachineFunction * MF, MachineRegisterInfo & MRI) const 214{ 215 const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); 216 unsigned dstReg = MI->getOperand(0).getReg(); 217 int64_t newIndex = MI->getOperand(1).getImm(); 218 const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg); 219 unsigned DwordWidth = dstClass->getSize() / 4; 220 assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned"); 221 newIndex = newIndex / DwordWidth; 222 223 unsigned newReg = dstClass->getRegister(newIndex); 224 addLiveIn(MI, MF, MRI, TII, newReg); 225} 226 227EVT SITargetLowering::getSetCCResultType(EVT VT) const 228{ 229 return MVT::i1; 230} 231 232//===----------------------------------------------------------------------===// 233// Custom DAG Lowering Operations 234//===----------------------------------------------------------------------===// 235 236SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 237{ 238 switch (Op.getOpcode()) { 239 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 240 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 241 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 242 case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); 243 case ISD::INTRINSIC_WO_CHAIN: { 244 unsigned IntrinsicID = 245 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 246 EVT VT = Op.getValueType(); 247 switch (IntrinsicID) { 248 case AMDGPUIntrinsic::SI_vs_load_buffer_index: 249 return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, 250 AMDGPU::VGPR0, VT); 251 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 252 } 253 break; 254 } 255 } 256 return SDValue(); 257} 258 259/// Loweri1ContextSwitch - The function is for lowering i1 operations on the 260/// VCC register. In the VALU context, VCC is a one bit register, but in the 261/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only 262/// the SALU can perform operations on the VCC register, we need to promote 263/// the operand types from i1 to i64 in order for tablegen to be able to match 264/// this operation to the correct SALU instruction. We do this promotion by 265/// wrapping the operands in a CopyToReg node. 266/// 267SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, 268 SelectionDAG &DAG, 269 unsigned VCCNode) const 270{ 271 DebugLoc DL = Op.getDebugLoc(); 272 273 SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, 274 DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, 275 Op.getOperand(0)), 276 DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, 277 Op.getOperand(1))); 278 279 return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); 280} 281 282SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 283{ 284 SDValue Chain = Op.getOperand(0); 285 SDValue CC = Op.getOperand(1); 286 SDValue LHS = Op.getOperand(2); 287 SDValue RHS = Op.getOperand(3); 288 SDValue JumpT = Op.getOperand(4); 289 SDValue CmpValue; 290 SDValue Result; 291 CmpValue = DAG.getNode( 292 ISD::SETCC, 293 Op.getDebugLoc(), 294 MVT::i1, 295 LHS, RHS, 296 CC); 297 298 Result = DAG.getNode( 299 AMDGPUISD::BRANCH_COND, 300 CmpValue.getDebugLoc(), 301 MVT::Other, Chain, 302 JumpT, CmpValue); 303 return Result; 304} 305 306SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 307{ 308 SDValue LHS = Op.getOperand(0); 309 SDValue RHS = Op.getOperand(1); 310 SDValue True = Op.getOperand(2); 311 SDValue False = Op.getOperand(3); 312 SDValue CC = Op.getOperand(4); 313 EVT VT = Op.getValueType(); 314 DebugLoc DL = Op.getDebugLoc(); 315 316 SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); 317 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 318} 319 320//===----------------------------------------------------------------------===// 321// Custom DAG optimizations 322//===----------------------------------------------------------------------===// 323 324SDValue SITargetLowering::PerformDAGCombine(SDNode *N, 325 DAGCombinerInfo &DCI) const { 326 SelectionDAG &DAG = DCI.DAG; 327 DebugLoc DL = N->getDebugLoc(); 328 EVT VT = N->getValueType(0); 329 330 switch (N->getOpcode()) { 331 default: break; 332 case ISD::SELECT_CC: { 333 N->dump(); 334 ConstantSDNode *True, *False; 335 // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) 336 if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2))) 337 && (False = dyn_cast<ConstantSDNode>(N->getOperand(3))) 338 && True->isAllOnesValue() 339 && False->isNullValue() 340 && VT == MVT::i1) { 341 return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), 342 N->getOperand(1), N->getOperand(4)); 343 344 } 345 break; 346 } 347 case ISD::SETCC: { 348 SDValue Arg0 = N->getOperand(0); 349 SDValue Arg1 = N->getOperand(1); 350 SDValue CC = N->getOperand(2); 351 ConstantSDNode * C = NULL; 352 ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get(); 353 354 // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) 355 if (VT == MVT::i1 356 && Arg0.getOpcode() == ISD::SIGN_EXTEND 357 && Arg0.getOperand(0).getValueType() == MVT::i1 358 && (C = dyn_cast<ConstantSDNode>(Arg1)) 359 && C->isNullValue() 360 && CCOp == ISD::SETNE) { 361 return SimplifySetCC(VT, Arg0.getOperand(0), 362 DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); 363 } 364 break; 365 } 366 } 367 return SDValue(); 368} 369 370#define NODE_NAME_CASE(node) case SIISD::node: return #node; 371 372const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const 373{ 374 switch (Opcode) { 375 default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); 376 NODE_NAME_CASE(VCC_AND) 377 NODE_NAME_CASE(VCC_BITCAST) 378 } 379} 380