1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is the parent TargetLowering class for hardware code gen targets. 11// 12//===----------------------------------------------------------------------===// 13 14#include "AMDGPUISelLowering.h" 15#include "AMDILIntrinsicInfo.h" 16#include "llvm/CodeGen/MachineFunction.h" 17#include "llvm/CodeGen/MachineRegisterInfo.h" 18#include "llvm/CodeGen/SelectionDAG.h" 19#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 20 21using namespace llvm; 22 23AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 24 TargetLowering(TM, new TargetLoweringObjectFileELF()) 25{ 26 27 // Initialize target lowering borrowed from AMDIL 28 InitAMDILLowering(); 29 30 // We need to custom lower some of the intrinsics 31 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 32 33 // Library functions. These default to Expand, but we have instructions 34 // for them. 35 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 36 setOperationAction(ISD::FEXP2, MVT::f32, Legal); 37 setOperationAction(ISD::FRINT, MVT::f32, Legal); 38 39 setOperationAction(ISD::UDIV, MVT::i32, Expand); 40 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 41 setOperationAction(ISD::UREM, MVT::i32, Expand); 42} 43 44//===---------------------------------------------------------------------===// 45// TargetLowering Callbacks 46//===---------------------------------------------------------------------===// 47 48SDValue AMDGPUTargetLowering::LowerFormalArguments( 49 SDValue Chain, 50 CallingConv::ID CallConv, 51 bool isVarArg, 52 const SmallVectorImpl<ISD::InputArg> &Ins, 53 DebugLoc DL, SelectionDAG &DAG, 54 SmallVectorImpl<SDValue> &InVals) const 55{ 56 // Lowering of arguments happens in R600LowerKernelParameters, so we can 57 // ignore the arguments here. 58 for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 59 InVals.push_back(SDValue()); 60 } 61 return Chain; 62} 63 64SDValue AMDGPUTargetLowering::LowerReturn( 65 SDValue Chain, 66 CallingConv::ID CallConv, 67 bool isVarArg, 68 const SmallVectorImpl<ISD::OutputArg> &Outs, 69 const SmallVectorImpl<SDValue> &OutVals, 70 DebugLoc DL, SelectionDAG &DAG) const 71{ 72 return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 73} 74 75//===---------------------------------------------------------------------===// 76// Target specific lowering 77//===---------------------------------------------------------------------===// 78 79SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 80 const 81{ 82 switch (Op.getOpcode()) { 83 default: 84 Op.getNode()->dump(); 85 assert(0 && "Custom lowering code for this" 86 "instruction is not implemented yet!"); 87 break; 88 // AMDIL DAG lowering 89 case ISD::SDIV: return LowerSDIV(Op, DAG); 90 case ISD::SREM: return LowerSREM(Op, DAG); 91 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 92 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 93 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 94 // AMDGPU DAG lowering 95 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 96 case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 97 } 98 return Op; 99} 100 101SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 102 SelectionDAG &DAG) const 103{ 104 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 105 DebugLoc DL = Op.getDebugLoc(); 106 EVT VT = Op.getValueType(); 107 108 switch (IntrinsicID) { 109 default: return Op; 110 case AMDGPUIntrinsic::AMDIL_abs: 111 return LowerIntrinsicIABS(Op, DAG); 112 case AMDGPUIntrinsic::AMDIL_exp: 113 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 114 case AMDGPUIntrinsic::AMDIL_fabs: 115 return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1)); 116 case AMDGPUIntrinsic::AMDGPU_lrp: 117 return LowerIntrinsicLRP(Op, DAG); 118 case AMDGPUIntrinsic::AMDIL_fraction: 119 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 120 case AMDGPUIntrinsic::AMDIL_mad: 121 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), 122 Op.getOperand(2), Op.getOperand(3)); 123 case AMDGPUIntrinsic::AMDIL_max: 124 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 125 Op.getOperand(2)); 126 case AMDGPUIntrinsic::AMDGPU_imax: 127 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 128 Op.getOperand(2)); 129 case AMDGPUIntrinsic::AMDGPU_umax: 130 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 131 Op.getOperand(2)); 132 case AMDGPUIntrinsic::AMDIL_min: 133 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 134 Op.getOperand(2)); 135 case AMDGPUIntrinsic::AMDGPU_imin: 136 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 137 Op.getOperand(2)); 138 case AMDGPUIntrinsic::AMDGPU_umin: 139 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 140 Op.getOperand(2)); 141 case AMDGPUIntrinsic::AMDIL_round_nearest: 142 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 143 case AMDGPUIntrinsic::AMDIL_round_posinf: 144 return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1)); 145 } 146} 147 148///IABS(a) = SMAX(sub(0, a), a) 149SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 150 SelectionDAG &DAG) const 151{ 152 153 DebugLoc DL = Op.getDebugLoc(); 154 EVT VT = Op.getValueType(); 155 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 156 Op.getOperand(1)); 157 158 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 159} 160 161/// Linear Interpolation 162/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 163SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 164 SelectionDAG &DAG) const 165{ 166 DebugLoc DL = Op.getDebugLoc(); 167 EVT VT = Op.getValueType(); 168 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 169 DAG.getConstantFP(1.0f, MVT::f32), 170 Op.getOperand(1)); 171 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 172 Op.getOperand(3)); 173 return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), 174 Op.getOperand(2), 175 OneSubAC); 176} 177 178 179 180SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 181 SelectionDAG &DAG) const 182{ 183 DebugLoc DL = Op.getDebugLoc(); 184 EVT VT = Op.getValueType(); 185 186 SDValue Num = Op.getOperand(0); 187 SDValue Den = Op.getOperand(1); 188 189 SmallVector<SDValue, 8> Results; 190 191 // RCP = URECIP(Den) = 2^32 / Den + e 192 // e is rounding error. 193 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 194 195 // RCP_LO = umulo(RCP, Den) */ 196 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 197 198 // RCP_HI = mulhu (RCP, Den) */ 199 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 200 201 // NEG_RCP_LO = -RCP_LO 202 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 203 RCP_LO); 204 205 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 206 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 207 NEG_RCP_LO, RCP_LO, 208 ISD::SETEQ); 209 // Calculate the rounding error from the URECIP instruction 210 // E = mulhu(ABS_RCP_LO, RCP) 211 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 212 213 // RCP_A_E = RCP + E 214 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 215 216 // RCP_S_E = RCP - E 217 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 218 219 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 220 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 221 RCP_A_E, RCP_S_E, 222 ISD::SETEQ); 223 // Quotient = mulhu(Tmp0, Num) 224 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 225 226 // Num_S_Remainder = Quotient * Den 227 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 228 229 // Remainder = Num - Num_S_Remainder 230 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 231 232 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 233 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 234 DAG.getConstant(-1, VT), 235 DAG.getConstant(0, VT), 236 ISD::SETGE); 237 // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) 238 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, 239 DAG.getConstant(0, VT), 240 DAG.getConstant(-1, VT), 241 DAG.getConstant(0, VT), 242 ISD::SETGE); 243 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 244 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 245 Remainder_GE_Zero); 246 247 // Calculate Division result: 248 249 // Quotient_A_One = Quotient + 1 250 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 251 DAG.getConstant(1, VT)); 252 253 // Quotient_S_One = Quotient - 1 254 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 255 DAG.getConstant(1, VT)); 256 257 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 258 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 259 Quotient, Quotient_A_One, ISD::SETEQ); 260 261 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 262 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 263 Quotient_S_One, Div, ISD::SETEQ); 264 265 // Calculate Rem result: 266 267 // Remainder_S_Den = Remainder - Den 268 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 269 270 // Remainder_A_Den = Remainder + Den 271 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 272 273 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 274 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 275 Remainder, Remainder_S_Den, ISD::SETEQ); 276 277 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 278 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 279 Remainder_A_Den, Rem, ISD::SETEQ); 280 281 DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div); 282 DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem); 283 284 return Op; 285} 286 287//===----------------------------------------------------------------------===// 288// Helper functions 289//===----------------------------------------------------------------------===// 290 291bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const 292{ 293 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 294 return CFP->isExactlyValue(1.0); 295 } 296 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 297 return C->isAllOnesValue(); 298 } 299 return false; 300} 301 302bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const 303{ 304 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 305 return CFP->getValueAPF().isZero(); 306 } 307 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 308 return C->isNullValue(); 309 } 310 return false; 311} 312 313SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 314 const TargetRegisterClass *RC, 315 unsigned Reg, EVT VT) const { 316 MachineFunction &MF = DAG.getMachineFunction(); 317 MachineRegisterInfo &MRI = MF.getRegInfo(); 318 unsigned VirtualRegister; 319 if (!MRI.isLiveIn(Reg)) { 320 VirtualRegister = MRI.createVirtualRegister(RC); 321 MRI.addLiveIn(Reg, VirtualRegister); 322 } else { 323 VirtualRegister = MRI.getLiveInVirtReg(Reg); 324 } 325 return DAG.getRegister(VirtualRegister, VT); 326} 327 328#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 329 330const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const 331{ 332 switch (Opcode) { 333 default: return 0; 334 // AMDIL DAG nodes 335 NODE_NAME_CASE(MAD); 336 NODE_NAME_CASE(CALL); 337 NODE_NAME_CASE(UMUL); 338 NODE_NAME_CASE(DIV_INF); 339 NODE_NAME_CASE(VBUILD); 340 NODE_NAME_CASE(RET_FLAG); 341 NODE_NAME_CASE(BRANCH_COND); 342 343 // AMDGPU DAG nodes 344 NODE_NAME_CASE(FRACT) 345 NODE_NAME_CASE(FMAX) 346 NODE_NAME_CASE(SMAX) 347 NODE_NAME_CASE(UMAX) 348 NODE_NAME_CASE(FMIN) 349 NODE_NAME_CASE(SMIN) 350 NODE_NAME_CASE(UMIN) 351 NODE_NAME_CASE(URECIP) 352 } 353} 354