AMDGPUISelLowering.cpp revision 40c41fe890e53d99afb4e2c3fbf10043081edd9e
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This is the parent TargetLowering class for hardware code gen targets.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUISelLowering.h"
15#include "AMDILIntrinsicInfo.h"
16#include "AMDGPUUtil.h"
17#include "llvm/CodeGen/SelectionDAG.h"
18#include "llvm/CodeGen/MachineFunction.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
21
22using namespace llvm;
23
24AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
25  TargetLowering(TM, new TargetLoweringObjectFileELF())
26{
27
28  // Initialize target lowering borrowed from AMDIL
29  InitAMDILLowering();
30
31  // We need to custom lower some of the intrinsics
32  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
33
34  // Library functions.  These default to Expand, but we have instructions
35  // for them.
36  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
37  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
38  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
39
40  setOperationAction(ISD::UDIV, MVT::i32, Expand);
41  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
42  setOperationAction(ISD::UREM, MVT::i32, Expand);
43}
44
45//===---------------------------------------------------------------------===//
46// TargetLowering Callbacks
47//===---------------------------------------------------------------------===//
48
49SDValue AMDGPUTargetLowering::LowerFormalArguments(
50                                      SDValue Chain,
51                                      CallingConv::ID CallConv,
52                                      bool isVarArg,
53                                      const SmallVectorImpl<ISD::InputArg> &Ins,
54                                      DebugLoc DL, SelectionDAG &DAG,
55                                      SmallVectorImpl<SDValue> &InVals) const
56{
57  // Lowering of arguments happens in R600LowerKernelParameters, so we can
58  // ignore the arguments here.
59  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
60    InVals.push_back(SDValue());
61  }
62  return Chain;
63}
64
65SDValue AMDGPUTargetLowering::LowerReturn(
66                                     SDValue Chain,
67                                     CallingConv::ID CallConv,
68                                     bool isVarArg,
69                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
70                                     const SmallVectorImpl<SDValue> &OutVals,
71                                     DebugLoc DL, SelectionDAG &DAG) const
72{
73  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
74}
75
76//===---------------------------------------------------------------------===//
77// Target specific lowering
78//===---------------------------------------------------------------------===//
79
80SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
81    const
82{
83  switch (Op.getOpcode()) {
84  default:
85    Op.getNode()->dump();
86    assert(0 && "Custom lowering code for this"
87        "instruction is not implemented yet!");
88    break;
89  // AMDIL DAG lowering
90  case ISD::SDIV: return LowerSDIV(Op, DAG);
91  case ISD::SREM: return LowerSREM(Op, DAG);
92  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
93  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
94  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
95  // AMDGPU DAG lowering
96  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
97  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
98  }
99  return Op;
100}
101
102SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
103    SelectionDAG &DAG) const
104{
105  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
106  DebugLoc DL = Op.getDebugLoc();
107  EVT VT = Op.getValueType();
108
109  switch (IntrinsicID) {
110    default: return Op;
111    case AMDGPUIntrinsic::AMDIL_abs:
112      return LowerIntrinsicIABS(Op, DAG);
113    case AMDGPUIntrinsic::AMDIL_exp:
114      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
115    case AMDGPUIntrinsic::AMDIL_fabs:
116      return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
117    case AMDGPUIntrinsic::AMDGPU_lrp:
118      return LowerIntrinsicLRP(Op, DAG);
119    case AMDGPUIntrinsic::AMDIL_fraction:
120      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
121    case AMDGPUIntrinsic::AMDIL_mad:
122      return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
123                              Op.getOperand(2), Op.getOperand(3));
124    case AMDGPUIntrinsic::AMDIL_max:
125      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
126                                                  Op.getOperand(2));
127    case AMDGPUIntrinsic::AMDGPU_imax:
128      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
129                                                  Op.getOperand(2));
130    case AMDGPUIntrinsic::AMDGPU_umax:
131      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
132                                                  Op.getOperand(2));
133    case AMDGPUIntrinsic::AMDIL_min:
134      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
135                                                  Op.getOperand(2));
136    case AMDGPUIntrinsic::AMDGPU_imin:
137      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
138                                                  Op.getOperand(2));
139    case AMDGPUIntrinsic::AMDGPU_umin:
140      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
141                                                  Op.getOperand(2));
142    case AMDGPUIntrinsic::AMDIL_round_nearest:
143      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
144    case AMDGPUIntrinsic::AMDIL_round_posinf:
145      return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
146  }
147}
148
149///IABS(a) = SMAX(sub(0, a), a)
150SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
151    SelectionDAG &DAG) const
152{
153
154  DebugLoc DL = Op.getDebugLoc();
155  EVT VT = Op.getValueType();
156  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
157                                              Op.getOperand(1));
158
159  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
160}
161
162/// Linear Interpolation
163/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
164SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
165    SelectionDAG &DAG) const
166{
167  DebugLoc DL = Op.getDebugLoc();
168  EVT VT = Op.getValueType();
169  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
170                                DAG.getConstantFP(1.0f, MVT::f32),
171                                Op.getOperand(1));
172  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
173                                                    Op.getOperand(3));
174  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
175                                               Op.getOperand(2),
176                                               OneSubAC);
177}
178
179
180
181SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
182    SelectionDAG &DAG) const
183{
184  DebugLoc DL = Op.getDebugLoc();
185  EVT VT = Op.getValueType();
186
187  SDValue Num = Op.getOperand(0);
188  SDValue Den = Op.getOperand(1);
189
190  SmallVector<SDValue, 8> Results;
191
192  // RCP =  URECIP(Den) = 2^32 / Den + e
193  // e is rounding error.
194  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
195
196  // RCP_LO = umulo(RCP, Den) */
197  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
198
199  // RCP_HI = mulhu (RCP, Den) */
200  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
201
202  // NEG_RCP_LO = -RCP_LO
203  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
204                                                     RCP_LO);
205
206  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
207  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
208                                           NEG_RCP_LO, RCP_LO,
209                                           ISD::SETEQ);
210  // Calculate the rounding error from the URECIP instruction
211  // E = mulhu(ABS_RCP_LO, RCP)
212  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
213
214  // RCP_A_E = RCP + E
215  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
216
217  // RCP_S_E = RCP - E
218  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
219
220  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
221  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
222                                     RCP_A_E, RCP_S_E,
223                                     ISD::SETEQ);
224  // Quotient = mulhu(Tmp0, Num)
225  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
226
227  // Num_S_Remainder = Quotient * Den
228  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
229
230  // Remainder = Num - Num_S_Remainder
231  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
232
233  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
234  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
235                                                 DAG.getConstant(-1, VT),
236                                                 DAG.getConstant(0, VT),
237                                                 ISD::SETGE);
238  // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
239  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
240                                                  DAG.getConstant(0, VT),
241                                                  DAG.getConstant(-1, VT),
242                                                  DAG.getConstant(0, VT),
243                                                  ISD::SETGE);
244  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
245  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
246                                               Remainder_GE_Zero);
247
248  // Calculate Division result:
249
250  // Quotient_A_One = Quotient + 1
251  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
252                                                         DAG.getConstant(1, VT));
253
254  // Quotient_S_One = Quotient - 1
255  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
256                                                         DAG.getConstant(1, VT));
257
258  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
259  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
260                                     Quotient, Quotient_A_One, ISD::SETEQ);
261
262  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
263  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
264                            Quotient_S_One, Div, ISD::SETEQ);
265
266  // Calculate Rem result:
267
268  // Remainder_S_Den = Remainder - Den
269  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
270
271  // Remainder_A_Den = Remainder + Den
272  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
273
274  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
275  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
276                                    Remainder, Remainder_S_Den, ISD::SETEQ);
277
278  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
279  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
280                            Remainder_A_Den, Rem, ISD::SETEQ);
281
282  DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
283  DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
284
285  return Op;
286}
287
288//===----------------------------------------------------------------------===//
289// Helper functions
290//===----------------------------------------------------------------------===//
291
292bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
293{
294  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
295    return CFP->isExactlyValue(1.0);
296  }
297  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
298    return C->isAllOnesValue();
299  }
300  return false;
301}
302
303bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
304{
305  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
306    return CFP->getValueAPF().isZero();
307  }
308  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
309    return C->isNullValue();
310  }
311  return false;
312}
313
314void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
315    MachineFunction * MF, MachineRegisterInfo & MRI,
316    const TargetInstrInfo * TII, unsigned reg) const
317{
318  AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
319}
320
321SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
322                                                  const TargetRegisterClass *RC,
323                                                   unsigned Reg, EVT VT) const {
324  MachineFunction &MF = DAG.getMachineFunction();
325  MachineRegisterInfo &MRI = MF.getRegInfo();
326  unsigned VirtualRegister;
327  if (!MRI.isLiveIn(Reg)) {
328    VirtualRegister = MRI.createVirtualRegister(RC);
329    MRI.addLiveIn(Reg, VirtualRegister);
330  } else {
331    VirtualRegister = MRI.getLiveInVirtReg(Reg);
332  }
333  return DAG.getRegister(VirtualRegister, VT);
334}
335
336#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
337
338const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
339{
340  switch (Opcode) {
341  default: return 0;
342  // AMDIL DAG nodes
343  NODE_NAME_CASE(MAD);
344  NODE_NAME_CASE(CALL);
345  NODE_NAME_CASE(UMUL);
346  NODE_NAME_CASE(DIV_INF);
347  NODE_NAME_CASE(VBUILD);
348  NODE_NAME_CASE(RET_FLAG);
349  NODE_NAME_CASE(BRANCH_COND);
350
351  // AMDGPU DAG nodes
352  NODE_NAME_CASE(FRACT)
353  NODE_NAME_CASE(FMAX)
354  NODE_NAME_CASE(SMAX)
355  NODE_NAME_CASE(UMAX)
356  NODE_NAME_CASE(FMIN)
357  NODE_NAME_CASE(SMIN)
358  NODE_NAME_CASE(UMIN)
359  NODE_NAME_CASE(URECIP)
360  }
361}
362