SIISelLowering.cpp revision a35eea786823f0130b925cb25486d7d162f2d68c
1//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is 11// mostly EmitInstrWithCustomInserter(). 12// 13//===----------------------------------------------------------------------===// 14 15#include "SIISelLowering.h" 16#include "SIInstrInfo.h" 17#include "SIRegisterInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/CodeGen/SelectionDAG.h" 21 22using namespace llvm; 23 24SITargetLowering::SITargetLowering(TargetMachine &TM) : 25 AMDGPUTargetLowering(TM), 26 TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) 27{ 28 addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); 29 addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); 30 addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); 31 addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass); 32 addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); 33 addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); 34 35 addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); 36 addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); 37 38 computeRegisterProperties(); 39 40 setOperationAction(ISD::AND, MVT::i1, Custom); 41 42 setOperationAction(ISD::ADD, MVT::i64, Legal); 43 setOperationAction(ISD::ADD, MVT::i32, Legal); 44 45 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 46 47 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 48 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 49 50 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 51 setTargetDAGCombine(ISD::SELECT_CC); 52 53 setTargetDAGCombine(ISD::SETCC); 54} 55 56MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( 57 MachineInstr * MI, MachineBasicBlock * BB) const 58{ 59 const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); 60 MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); 61 MachineBasicBlock::iterator I = MI; 62 63 if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { 64 AppendS_WAITCNT(MI, *BB, llvm::next(I)); 65 return BB; 66 } 67 68 switch (MI->getOpcode()) { 69 default: 70 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 71 72 case AMDGPU::CLAMP_SI: 73 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) 74 .addOperand(MI->getOperand(0)) 75 .addOperand(MI->getOperand(1)) 76 // VSRC1-2 are unused, but we still need to fill all the 77 // operand slots, so we just reuse the VSRC0 operand 78 .addOperand(MI->getOperand(1)) 79 .addOperand(MI->getOperand(1)) 80 .addImm(0) // ABS 81 .addImm(1) // CLAMP 82 .addImm(0) // OMOD 83 .addImm(0); // NEG 84 MI->eraseFromParent(); 85 break; 86 87 case AMDGPU::FABS_SI: 88 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) 89 .addOperand(MI->getOperand(0)) 90 .addOperand(MI->getOperand(1)) 91 // VSRC1-2 are unused, but we still need to fill all the 92 // operand slots, so we just reuse the VSRC0 operand 93 .addOperand(MI->getOperand(1)) 94 .addOperand(MI->getOperand(1)) 95 .addImm(1) // ABS 96 .addImm(0) // CLAMP 97 .addImm(0) // OMOD 98 .addImm(0); // NEG 99 MI->eraseFromParent(); 100 break; 101 102 case AMDGPU::FNEG_SI: 103 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) 104 .addOperand(MI->getOperand(0)) 105 .addOperand(MI->getOperand(1)) 106 // VSRC1-2 are unused, but we still need to fill all the 107 // operand slots, so we just reuse the VSRC0 operand 108 .addOperand(MI->getOperand(1)) 109 .addOperand(MI->getOperand(1)) 110 .addImm(0) // ABS 111 .addImm(0) // CLAMP 112 .addImm(0) // OMOD 113 .addImm(1); // NEG 114 MI->eraseFromParent(); 115 break; 116 117 case AMDGPU::SI_INTERP: 118 LowerSI_INTERP(MI, *BB, I, MRI); 119 break; 120 case AMDGPU::SI_INTERP_CONST: 121 LowerSI_INTERP_CONST(MI, *BB, I); 122 break; 123 case AMDGPU::SI_V_CNDLT: 124 LowerSI_V_CNDLT(MI, *BB, I, MRI); 125 break; 126 case AMDGPU::USE_SGPR_32: 127 case AMDGPU::USE_SGPR_64: 128 lowerUSE_SGPR(MI, BB->getParent(), MRI); 129 MI->eraseFromParent(); 130 break; 131 case AMDGPU::VS_LOAD_BUFFER_INDEX: 132 addLiveIn(MI, BB->getParent(), MRI, TII, AMDGPU::VGPR0); 133 MI->eraseFromParent(); 134 break; 135 } 136 return BB; 137} 138 139void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, 140 MachineBasicBlock::iterator I) const 141{ 142 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT)) 143 .addImm(0); 144} 145 146void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, 147 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const 148{ 149 unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 150 MachineOperand dst = MI->getOperand(0); 151 MachineOperand iReg = MI->getOperand(1); 152 MachineOperand jReg = MI->getOperand(2); 153 MachineOperand attr_chan = MI->getOperand(3); 154 MachineOperand attr = MI->getOperand(4); 155 MachineOperand params = MI->getOperand(5); 156 157 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32)) 158 .addReg(AMDGPU::M0) 159 .addOperand(params); 160 161 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) 162 .addOperand(iReg) 163 .addOperand(attr_chan) 164 .addOperand(attr); 165 166 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) 167 .addOperand(dst) 168 .addReg(tmp) 169 .addOperand(jReg) 170 .addOperand(attr_chan) 171 .addOperand(attr); 172 173 MI->eraseFromParent(); 174} 175 176void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, 177 MachineBasicBlock &BB, MachineBasicBlock::iterator I) const 178{ 179 MachineOperand dst = MI->getOperand(0); 180 MachineOperand attr_chan = MI->getOperand(1); 181 MachineOperand attr = MI->getOperand(2); 182 MachineOperand params = MI->getOperand(3); 183 184 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32)) 185 .addReg(AMDGPU::M0) 186 .addOperand(params); 187 188 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) 189 .addOperand(dst) 190 .addOperand(attr_chan) 191 .addOperand(attr); 192 193 MI->eraseFromParent(); 194} 195 196void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, 197 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const 198{ 199 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_LT_F32_e32), 200 AMDGPU::VCC) 201 .addOperand(MI->getOperand(1)) 202 .addReg(AMDGPU::SREG_LIT_0); 203 204 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32)) 205 .addOperand(MI->getOperand(0)) 206 .addReg(AMDGPU::VCC) 207 .addOperand(MI->getOperand(2)) 208 .addOperand(MI->getOperand(3)); 209 210 MI->eraseFromParent(); 211} 212 213void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI, 214 MachineFunction * MF, MachineRegisterInfo & MRI) const 215{ 216 const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); 217 unsigned dstReg = MI->getOperand(0).getReg(); 218 int64_t newIndex = MI->getOperand(1).getImm(); 219 const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg); 220 unsigned DwordWidth = dstClass->getSize() / 4; 221 assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned"); 222 newIndex = newIndex / DwordWidth; 223 224 unsigned newReg = dstClass->getRegister(newIndex); 225 addLiveIn(MI, MF, MRI, TII, newReg); 226} 227 228EVT SITargetLowering::getSetCCResultType(EVT VT) const 229{ 230 return MVT::i1; 231} 232 233//===----------------------------------------------------------------------===// 234// Custom DAG Lowering Operations 235//===----------------------------------------------------------------------===// 236 237SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 238{ 239 switch (Op.getOpcode()) { 240 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 241 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 242 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 243 case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); 244 } 245} 246 247/// Loweri1ContextSwitch - The function is for lowering i1 operations on the 248/// VCC register. In the VALU context, VCC is a one bit register, but in the 249/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only 250/// the SALU can perform operations on the VCC register, we need to promote 251/// the operand types from i1 to i64 in order for tablegen to be able to match 252/// this operation to the correct SALU instruction. We do this promotion by 253/// wrapping the operands in a CopyToReg node. 254/// 255SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, 256 SelectionDAG &DAG, 257 unsigned VCCNode) const 258{ 259 DebugLoc DL = Op.getDebugLoc(); 260 261 SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, 262 DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, 263 Op.getOperand(0)), 264 DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, 265 Op.getOperand(1))); 266 267 return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); 268} 269 270SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 271{ 272 SDValue Chain = Op.getOperand(0); 273 SDValue CC = Op.getOperand(1); 274 SDValue LHS = Op.getOperand(2); 275 SDValue RHS = Op.getOperand(3); 276 SDValue JumpT = Op.getOperand(4); 277 SDValue CmpValue; 278 SDValue Result; 279 CmpValue = DAG.getNode( 280 ISD::SETCC, 281 Op.getDebugLoc(), 282 MVT::i1, 283 LHS, RHS, 284 CC); 285 286 Result = DAG.getNode( 287 AMDGPUISD::BRANCH_COND, 288 CmpValue.getDebugLoc(), 289 MVT::Other, Chain, 290 JumpT, CmpValue); 291 return Result; 292} 293 294SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 295{ 296 SDValue LHS = Op.getOperand(0); 297 SDValue RHS = Op.getOperand(1); 298 SDValue True = Op.getOperand(2); 299 SDValue False = Op.getOperand(3); 300 SDValue CC = Op.getOperand(4); 301 EVT VT = Op.getValueType(); 302 DebugLoc DL = Op.getDebugLoc(); 303 304 SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); 305 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 306} 307 308//===----------------------------------------------------------------------===// 309// Custom DAG optimizations 310//===----------------------------------------------------------------------===// 311 312SDValue SITargetLowering::PerformDAGCombine(SDNode *N, 313 DAGCombinerInfo &DCI) const { 314 SelectionDAG &DAG = DCI.DAG; 315 DebugLoc DL = N->getDebugLoc(); 316 EVT VT = N->getValueType(0); 317 318 switch (N->getOpcode()) { 319 default: break; 320 case ISD::SELECT_CC: { 321 N->dump(); 322 ConstantSDNode *True, *False; 323 // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) 324 if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2))) 325 && (False = dyn_cast<ConstantSDNode>(N->getOperand(3))) 326 && True->isAllOnesValue() 327 && False->isNullValue() 328 && VT == MVT::i1) { 329 return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), 330 N->getOperand(1), N->getOperand(4)); 331 332 } 333 break; 334 } 335 case ISD::SETCC: { 336 SDValue Arg0 = N->getOperand(0); 337 SDValue Arg1 = N->getOperand(1); 338 SDValue CC = N->getOperand(2); 339 ConstantSDNode * C = NULL; 340 ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get(); 341 342 // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) 343 if (VT == MVT::i1 344 && Arg0.getOpcode() == ISD::SIGN_EXTEND 345 && Arg0.getOperand(0).getValueType() == MVT::i1 346 && (C = dyn_cast<ConstantSDNode>(Arg1)) 347 && C->isNullValue() 348 && CCOp == ISD::SETNE) { 349 return SimplifySetCC(VT, Arg0.getOperand(0), 350 DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); 351 } 352 break; 353 } 354 } 355 return SDValue(); 356} 357 358#define NODE_NAME_CASE(node) case SIISD::node: return #node; 359 360const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const 361{ 362 switch (Opcode) { 363 default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); 364 NODE_NAME_CASE(VCC_AND) 365 NODE_NAME_CASE(VCC_BITCAST) 366 } 367} 368