AMDGPUISelLowering.cpp revision 40c41fe890e53d99afb4e2c3fbf10043081edd9e
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is the parent TargetLowering class for hardware code gen targets. 11// 12//===----------------------------------------------------------------------===// 13 14#include "AMDGPUISelLowering.h" 15#include "AMDILIntrinsicInfo.h" 16#include "AMDGPUUtil.h" 17#include "llvm/CodeGen/SelectionDAG.h" 18#include "llvm/CodeGen/MachineFunction.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 21 22using namespace llvm; 23 24AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 25 TargetLowering(TM, new TargetLoweringObjectFileELF()) 26{ 27 28 // Initialize target lowering borrowed from AMDIL 29 InitAMDILLowering(); 30 31 // We need to custom lower some of the intrinsics 32 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 33 34 // Library functions. These default to Expand, but we have instructions 35 // for them. 36 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 37 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 38 setOperationAction(ISD::FRINT, MVT::f32, Legal); 39 40 setOperationAction(ISD::UDIV, MVT::i32, Expand); 41 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 42 setOperationAction(ISD::UREM, MVT::i32, Expand); 43} 44 45//===---------------------------------------------------------------------===// 46// TargetLowering Callbacks 47//===---------------------------------------------------------------------===// 48 49SDValue AMDGPUTargetLowering::LowerFormalArguments( 50 SDValue Chain, 51 CallingConv::ID CallConv, 52 bool isVarArg, 53 const SmallVectorImpl<ISD::InputArg> &Ins, 54 DebugLoc DL, SelectionDAG &DAG, 55 SmallVectorImpl<SDValue> &InVals) const 56{ 57 // Lowering of arguments happens in R600LowerKernelParameters, so we can 58 // ignore the arguments here. 59 for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 60 InVals.push_back(SDValue()); 61 } 62 return Chain; 63} 64 65SDValue AMDGPUTargetLowering::LowerReturn( 66 SDValue Chain, 67 CallingConv::ID CallConv, 68 bool isVarArg, 69 const SmallVectorImpl<ISD::OutputArg> &Outs, 70 const SmallVectorImpl<SDValue> &OutVals, 71 DebugLoc DL, SelectionDAG &DAG) const 72{ 73 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 74} 75 76//===---------------------------------------------------------------------===// 77// Target specific lowering 78//===---------------------------------------------------------------------===// 79 80SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 81 const 82{ 83 switch (Op.getOpcode()) { 84 default: 85 Op.getNode()->dump(); 86 assert(0 && "Custom lowering code for this" 87 "instruction is not implemented yet!"); 88 break; 89 // AMDIL DAG lowering 90 case ISD::SDIV: return LowerSDIV(Op, DAG); 91 case ISD::SREM: return LowerSREM(Op, DAG); 92 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 93 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 94 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 95 // AMDGPU DAG lowering 96 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 97 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 98 } 99 return Op; 100} 101 102SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 103 SelectionDAG &DAG) const 104{ 105 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 106 DebugLoc DL = Op.getDebugLoc(); 107 EVT VT = Op.getValueType(); 108 109 switch (IntrinsicID) { 110 default: return Op; 111 case AMDGPUIntrinsic::AMDIL_abs: 112 return LowerIntrinsicIABS(Op, DAG); 113 case AMDGPUIntrinsic::AMDIL_exp: 114 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 115 case AMDGPUIntrinsic::AMDIL_fabs: 116 return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1)); 117 case AMDGPUIntrinsic::AMDGPU_lrp: 118 return LowerIntrinsicLRP(Op, DAG); 119 case AMDGPUIntrinsic::AMDIL_fraction: 120 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 121 case AMDGPUIntrinsic::AMDIL_mad: 122 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), 123 Op.getOperand(2), Op.getOperand(3)); 124 case AMDGPUIntrinsic::AMDIL_max: 125 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 126 Op.getOperand(2)); 127 case AMDGPUIntrinsic::AMDGPU_imax: 128 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 129 Op.getOperand(2)); 130 case AMDGPUIntrinsic::AMDGPU_umax: 131 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 132 Op.getOperand(2)); 133 case AMDGPUIntrinsic::AMDIL_min: 134 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 135 Op.getOperand(2)); 136 case AMDGPUIntrinsic::AMDGPU_imin: 137 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 138 Op.getOperand(2)); 139 case AMDGPUIntrinsic::AMDGPU_umin: 140 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 141 Op.getOperand(2)); 142 case AMDGPUIntrinsic::AMDIL_round_nearest: 143 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 144 case AMDGPUIntrinsic::AMDIL_round_posinf: 145 return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1)); 146 } 147} 148 149///IABS(a) = SMAX(sub(0, a), a) 150SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 151 SelectionDAG &DAG) const 152{ 153 154 DebugLoc DL = Op.getDebugLoc(); 155 EVT VT = Op.getValueType(); 156 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 157 Op.getOperand(1)); 158 159 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 160} 161 162/// Linear Interpolation 163/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 164SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 165 SelectionDAG &DAG) const 166{ 167 DebugLoc DL = Op.getDebugLoc(); 168 EVT VT = Op.getValueType(); 169 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 170 DAG.getConstantFP(1.0f, MVT::f32), 171 Op.getOperand(1)); 172 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 173 Op.getOperand(3)); 174 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), 175 Op.getOperand(2), 176 OneSubAC); 177} 178 179 180 181SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 182 SelectionDAG &DAG) const 183{ 184 DebugLoc DL = Op.getDebugLoc(); 185 EVT VT = Op.getValueType(); 186 187 SDValue Num = Op.getOperand(0); 188 SDValue Den = Op.getOperand(1); 189 190 SmallVector<SDValue, 8> Results; 191 192 // RCP = URECIP(Den) = 2^32 / Den + e 193 // e is rounding error. 194 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 195 196 // RCP_LO = umulo(RCP, Den) */ 197 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 198 199 // RCP_HI = mulhu (RCP, Den) */ 200 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 201 202 // NEG_RCP_LO = -RCP_LO 203 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 204 RCP_LO); 205 206 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 207 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 208 NEG_RCP_LO, RCP_LO, 209 ISD::SETEQ); 210 // Calculate the rounding error from the URECIP instruction 211 // E = mulhu(ABS_RCP_LO, RCP) 212 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 213 214 // RCP_A_E = RCP + E 215 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 216 217 // RCP_S_E = RCP - E 218 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 219 220 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 221 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 222 RCP_A_E, RCP_S_E, 223 ISD::SETEQ); 224 // Quotient = mulhu(Tmp0, Num) 225 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 226 227 // Num_S_Remainder = Quotient * Den 228 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 229 230 // Remainder = Num - Num_S_Remainder 231 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 232 233 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 234 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 235 DAG.getConstant(-1, VT), 236 DAG.getConstant(0, VT), 237 ISD::SETGE); 238 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) 239 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, 240 DAG.getConstant(0, VT), 241 DAG.getConstant(-1, VT), 242 DAG.getConstant(0, VT), 243 ISD::SETGE); 244 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 245 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 246 Remainder_GE_Zero); 247 248 // Calculate Division result: 249 250 // Quotient_A_One = Quotient + 1 251 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 252 DAG.getConstant(1, VT)); 253 254 // Quotient_S_One = Quotient - 1 255 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 256 DAG.getConstant(1, VT)); 257 258 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 259 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 260 Quotient, Quotient_A_One, ISD::SETEQ); 261 262 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 263 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 264 Quotient_S_One, Div, ISD::SETEQ); 265 266 // Calculate Rem result: 267 268 // Remainder_S_Den = Remainder - Den 269 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 270 271 // Remainder_A_Den = Remainder + Den 272 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 273 274 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 275 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 276 Remainder, Remainder_S_Den, ISD::SETEQ); 277 278 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 279 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 280 Remainder_A_Den, Rem, ISD::SETEQ); 281 282 DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div); 283 DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem); 284 285 return Op; 286} 287 288//===----------------------------------------------------------------------===// 289// Helper functions 290//===----------------------------------------------------------------------===// 291 292bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const 293{ 294 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 295 return CFP->isExactlyValue(1.0); 296 } 297 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 298 return C->isAllOnesValue(); 299 } 300 return false; 301} 302 303bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const 304{ 305 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 306 return CFP->getValueAPF().isZero(); 307 } 308 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 309 return C->isNullValue(); 310 } 311 return false; 312} 313 314void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI, 315 MachineFunction * MF, MachineRegisterInfo & MRI, 316 const TargetInstrInfo * TII, unsigned reg) const 317{ 318 AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg()); 319} 320 321SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 322 const TargetRegisterClass *RC, 323 unsigned Reg, EVT VT) const { 324 MachineFunction &MF = DAG.getMachineFunction(); 325 MachineRegisterInfo &MRI = MF.getRegInfo(); 326 unsigned VirtualRegister; 327 if (!MRI.isLiveIn(Reg)) { 328 VirtualRegister = MRI.createVirtualRegister(RC); 329 MRI.addLiveIn(Reg, VirtualRegister); 330 } else { 331 VirtualRegister = MRI.getLiveInVirtReg(Reg); 332 } 333 return DAG.getRegister(VirtualRegister, VT); 334} 335 336#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 337 338const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const 339{ 340 switch (Opcode) { 341 default: return 0; 342 // AMDIL DAG nodes 343 NODE_NAME_CASE(MAD); 344 NODE_NAME_CASE(CALL); 345 NODE_NAME_CASE(UMUL); 346 NODE_NAME_CASE(DIV_INF); 347 NODE_NAME_CASE(VBUILD); 348 NODE_NAME_CASE(RET_FLAG); 349 NODE_NAME_CASE(BRANCH_COND); 350 351 // AMDGPU DAG nodes 352 NODE_NAME_CASE(FRACT) 353 NODE_NAME_CASE(FMAX) 354 NODE_NAME_CASE(SMAX) 355 NODE_NAME_CASE(UMAX) 356 NODE_NAME_CASE(FMIN) 357 NODE_NAME_CASE(SMIN) 358 NODE_NAME_CASE(UMIN) 359 NODE_NAME_CASE(URECIP) 360 } 361} 362