1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This is the parent TargetLowering class for hardware code gen targets.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUISelLowering.h"
15#include "AMDILIntrinsicInfo.h"
16#include "llvm/CodeGen/MachineFunction.h"
17#include "llvm/CodeGen/MachineRegisterInfo.h"
18#include "llvm/CodeGen/SelectionDAG.h"
19#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
20
21using namespace llvm;
22
23AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
24  TargetLowering(TM, new TargetLoweringObjectFileELF())
25{
26
27  // Initialize target lowering borrowed from AMDIL
28  InitAMDILLowering();
29
30  // We need to custom lower some of the intrinsics
31  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
32
33  // Library functions.  These default to Expand, but we have instructions
34  // for them.
35  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
36  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
37  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
38
39  setOperationAction(ISD::UDIV, MVT::i32, Expand);
40  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
41  setOperationAction(ISD::UREM, MVT::i32, Expand);
42}
43
44//===---------------------------------------------------------------------===//
45// TargetLowering Callbacks
46//===---------------------------------------------------------------------===//
47
48SDValue AMDGPUTargetLowering::LowerFormalArguments(
49                                      SDValue Chain,
50                                      CallingConv::ID CallConv,
51                                      bool isVarArg,
52                                      const SmallVectorImpl<ISD::InputArg> &Ins,
53                                      DebugLoc DL, SelectionDAG &DAG,
54                                      SmallVectorImpl<SDValue> &InVals) const
55{
56  // Lowering of arguments happens in R600LowerKernelParameters, so we can
57  // ignore the arguments here.
58  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
59    InVals.push_back(SDValue());
60  }
61  return Chain;
62}
63
64SDValue AMDGPUTargetLowering::LowerReturn(
65                                     SDValue Chain,
66                                     CallingConv::ID CallConv,
67                                     bool isVarArg,
68                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
69                                     const SmallVectorImpl<SDValue> &OutVals,
70                                     DebugLoc DL, SelectionDAG &DAG) const
71{
72  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
73}
74
75//===---------------------------------------------------------------------===//
76// Target specific lowering
77//===---------------------------------------------------------------------===//
78
79SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
80    const
81{
82  switch (Op.getOpcode()) {
83  default:
84    Op.getNode()->dump();
85    assert(0 && "Custom lowering code for this"
86        "instruction is not implemented yet!");
87    break;
88  // AMDIL DAG lowering
89  case ISD::SDIV: return LowerSDIV(Op, DAG);
90  case ISD::SREM: return LowerSREM(Op, DAG);
91  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
92  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
93  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
94  // AMDGPU DAG lowering
95  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
96  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
97  }
98  return Op;
99}
100
101SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
102    SelectionDAG &DAG) const
103{
104  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
105  DebugLoc DL = Op.getDebugLoc();
106  EVT VT = Op.getValueType();
107
108  switch (IntrinsicID) {
109    default: return Op;
110    case AMDGPUIntrinsic::AMDIL_abs:
111      return LowerIntrinsicIABS(Op, DAG);
112    case AMDGPUIntrinsic::AMDIL_exp:
113      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
114    case AMDGPUIntrinsic::AMDIL_fabs:
115      return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
116    case AMDGPUIntrinsic::AMDGPU_lrp:
117      return LowerIntrinsicLRP(Op, DAG);
118    case AMDGPUIntrinsic::AMDIL_fraction:
119      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
120    case AMDGPUIntrinsic::AMDIL_mad:
121      return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
122                              Op.getOperand(2), Op.getOperand(3));
123    case AMDGPUIntrinsic::AMDIL_max:
124      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
125                                                  Op.getOperand(2));
126    case AMDGPUIntrinsic::AMDGPU_imax:
127      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
128                                                  Op.getOperand(2));
129    case AMDGPUIntrinsic::AMDGPU_umax:
130      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
131                                                  Op.getOperand(2));
132    case AMDGPUIntrinsic::AMDIL_min:
133      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
134                                                  Op.getOperand(2));
135    case AMDGPUIntrinsic::AMDGPU_imin:
136      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
137                                                  Op.getOperand(2));
138    case AMDGPUIntrinsic::AMDGPU_umin:
139      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
140                                                  Op.getOperand(2));
141    case AMDGPUIntrinsic::AMDIL_round_nearest:
142      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
143    case AMDGPUIntrinsic::AMDIL_round_posinf:
144      return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
145  }
146}
147
148///IABS(a) = SMAX(sub(0, a), a)
149SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
150    SelectionDAG &DAG) const
151{
152
153  DebugLoc DL = Op.getDebugLoc();
154  EVT VT = Op.getValueType();
155  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
156                                              Op.getOperand(1));
157
158  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
159}
160
161/// Linear Interpolation
162/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
163SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
164    SelectionDAG &DAG) const
165{
166  DebugLoc DL = Op.getDebugLoc();
167  EVT VT = Op.getValueType();
168  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
169                                DAG.getConstantFP(1.0f, MVT::f32),
170                                Op.getOperand(1));
171  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
172                                                    Op.getOperand(3));
173  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
174                                               Op.getOperand(2),
175                                               OneSubAC);
176}
177
178
179
180SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
181    SelectionDAG &DAG) const
182{
183  DebugLoc DL = Op.getDebugLoc();
184  EVT VT = Op.getValueType();
185
186  SDValue Num = Op.getOperand(0);
187  SDValue Den = Op.getOperand(1);
188
189  SmallVector<SDValue, 8> Results;
190
191  // RCP =  URECIP(Den) = 2^32 / Den + e
192  // e is rounding error.
193  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
194
195  // RCP_LO = umulo(RCP, Den) */
196  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
197
198  // RCP_HI = mulhu (RCP, Den) */
199  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
200
201  // NEG_RCP_LO = -RCP_LO
202  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
203                                                     RCP_LO);
204
205  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
206  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
207                                           NEG_RCP_LO, RCP_LO,
208                                           ISD::SETEQ);
209  // Calculate the rounding error from the URECIP instruction
210  // E = mulhu(ABS_RCP_LO, RCP)
211  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
212
213  // RCP_A_E = RCP + E
214  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
215
216  // RCP_S_E = RCP - E
217  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
218
219  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
220  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
221                                     RCP_A_E, RCP_S_E,
222                                     ISD::SETEQ);
223  // Quotient = mulhu(Tmp0, Num)
224  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
225
226  // Num_S_Remainder = Quotient * Den
227  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
228
229  // Remainder = Num - Num_S_Remainder
230  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
231
232  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
233  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
234                                                 DAG.getConstant(-1, VT),
235                                                 DAG.getConstant(0, VT),
236                                                 ISD::SETGE);
237  // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
238  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
239                                                  DAG.getConstant(0, VT),
240                                                  DAG.getConstant(-1, VT),
241                                                  DAG.getConstant(0, VT),
242                                                  ISD::SETGE);
243  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
244  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
245                                               Remainder_GE_Zero);
246
247  // Calculate Division result:
248
249  // Quotient_A_One = Quotient + 1
250  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
251                                                         DAG.getConstant(1, VT));
252
253  // Quotient_S_One = Quotient - 1
254  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
255                                                         DAG.getConstant(1, VT));
256
257  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
258  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
259                                     Quotient, Quotient_A_One, ISD::SETEQ);
260
261  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
262  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
263                            Quotient_S_One, Div, ISD::SETEQ);
264
265  // Calculate Rem result:
266
267  // Remainder_S_Den = Remainder - Den
268  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
269
270  // Remainder_A_Den = Remainder + Den
271  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
272
273  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
274  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
275                                    Remainder, Remainder_S_Den, ISD::SETEQ);
276
277  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
278  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
279                            Remainder_A_Den, Rem, ISD::SETEQ);
280
281  DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
282  DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
283
284  return Op;
285}
286
287//===----------------------------------------------------------------------===//
288// Helper functions
289//===----------------------------------------------------------------------===//
290
291bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
292{
293  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
294    return CFP->isExactlyValue(1.0);
295  }
296  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
297    return C->isAllOnesValue();
298  }
299  return false;
300}
301
302bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
303{
304  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
305    return CFP->getValueAPF().isZero();
306  }
307  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
308    return C->isNullValue();
309  }
310  return false;
311}
312
313SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
314                                                  const TargetRegisterClass *RC,
315                                                   unsigned Reg, EVT VT) const {
316  MachineFunction &MF = DAG.getMachineFunction();
317  MachineRegisterInfo &MRI = MF.getRegInfo();
318  unsigned VirtualRegister;
319  if (!MRI.isLiveIn(Reg)) {
320    VirtualRegister = MRI.createVirtualRegister(RC);
321    MRI.addLiveIn(Reg, VirtualRegister);
322  } else {
323    VirtualRegister = MRI.getLiveInVirtReg(Reg);
324  }
325  return DAG.getRegister(VirtualRegister, VT);
326}
327
328#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
329
330const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
331{
332  switch (Opcode) {
333  default: return 0;
334  // AMDIL DAG nodes
335  NODE_NAME_CASE(MAD);
336  NODE_NAME_CASE(CALL);
337  NODE_NAME_CASE(UMUL);
338  NODE_NAME_CASE(DIV_INF);
339  NODE_NAME_CASE(VBUILD);
340  NODE_NAME_CASE(RET_FLAG);
341  NODE_NAME_CASE(BRANCH_COND);
342
343  // AMDGPU DAG nodes
344  NODE_NAME_CASE(FRACT)
345  NODE_NAME_CASE(FMAX)
346  NODE_NAME_CASE(SMAX)
347  NODE_NAME_CASE(UMAX)
348  NODE_NAME_CASE(FMIN)
349  NODE_NAME_CASE(SMIN)
350  NODE_NAME_CASE(UMIN)
351  NODE_NAME_CASE(URECIP)
352  }
353}
354