180bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
280bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//
380bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//                     The LLVM Compiler Infrastructure
480bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//
580bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru// This file is distributed under the University of Illinois Open Source
680bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru// License. See LICENSE.TXT for details.
780bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//
880bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//==-----------------------------------------------------------------------===//
980bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//
1080bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru// This file contains TargetLowering functions borrowed from AMDLI.
1180bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//
1280bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//===----------------------------------------------------------------------===//
1380bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru
1480bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "AMDGPUISelLowering.h"
1580bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "AMDGPURegisterInfo.h"
1680bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "AMDILDevices.h"
1780bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "AMDILIntrinsicInfo.h"
1880bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "AMDGPUSubtarget.h"
1980bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "AMDILUtilityFunctions.h"
2080bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/CallingConv.h"
2180bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/CodeGen/MachineFrameInfo.h"
2280bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/CodeGen/MachineRegisterInfo.h"
2380bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/CodeGen/PseudoSourceValue.h"
2480bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/CodeGen/SelectionDAG.h"
2580bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/CodeGen/SelectionDAGNodes.h"
2680bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
2780bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/DerivedTypes.h"
2880bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/Instructions.h"
2980bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/Intrinsics.h"
3080bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/Support/raw_ostream.h"
3180bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/Target/TargetInstrInfo.h"
3280bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "llvm/Target/TargetOptions.h"
3380bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru
3480bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queruusing namespace llvm;
3580bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//===----------------------------------------------------------------------===//
3680bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru// Calling Convention Implementation
3780bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//===----------------------------------------------------------------------===//
3880bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru#include "AMDGPUGenCallingConv.inc"
3980bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru
4080bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//===----------------------------------------------------------------------===//
4180bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru// TargetLowering Implementation Help Functions End
4280bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//===----------------------------------------------------------------------===//
4380bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru
4480bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//===----------------------------------------------------------------------===//
4580bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru// TargetLowering Class Implementation Begins
4680bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru//===----------------------------------------------------------------------===//
4780bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queruvoid AMDGPUTargetLowering::InitAMDILLowering()
4880bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru{
4980bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru  int types[] =
5080bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru  {
5180bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::i8,
5280bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::i16,
5380bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::i32,
5480bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::f32,
5580bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::f64,
5680bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::i64,
5780bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v2i8,
5880bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v4i8,
5980bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v2i16,
6080bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v4i16,
6180bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v4f32,
6280bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v4i32,
6380bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v2f32,
6480bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v2i32,
6580bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v2f64,
6680bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::v2i64
6780bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru  };
6880bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru
6980bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru  int IntTypes[] =
7080bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru  {
7180bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::i8,
7280bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::i16,
7380bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::i32,
7480bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::i64
7580bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru  };
7680bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru
7780bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru  int FloatTypes[] =
7880bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru  {
7980bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::f32,
8080bacfeb4bda06541e8695bd502229727bccfeaJean-Baptiste Queru    (int)MVT::f64
81  };
82
83  int VectorTypes[] =
84  {
85    (int)MVT::v2i8,
86    (int)MVT::v4i8,
87    (int)MVT::v2i16,
88    (int)MVT::v4i16,
89    (int)MVT::v4f32,
90    (int)MVT::v4i32,
91    (int)MVT::v2f32,
92    (int)MVT::v2i32,
93    (int)MVT::v2f64,
94    (int)MVT::v2i64
95  };
96  size_t numTypes = sizeof(types) / sizeof(*types);
97  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
98  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
99  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
100
101  const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
102  // These are the current register classes that are
103  // supported
104
105  for (unsigned int x  = 0; x < numTypes; ++x) {
106    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
107
108    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
109    // We cannot sextinreg, expand to shifts
110    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
111    setOperationAction(ISD::SUBE, VT, Expand);
112    setOperationAction(ISD::SUBC, VT, Expand);
113    setOperationAction(ISD::ADDE, VT, Expand);
114    setOperationAction(ISD::ADDC, VT, Expand);
115    setOperationAction(ISD::BRCOND, VT, Custom);
116    setOperationAction(ISD::BR_JT, VT, Expand);
117    setOperationAction(ISD::BRIND, VT, Expand);
118    // TODO: Implement custom UREM/SREM routines
119    setOperationAction(ISD::SREM, VT, Expand);
120    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
121    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
122    if (VT != MVT::i64 && VT != MVT::v2i64) {
123      setOperationAction(ISD::SDIV, VT, Custom);
124    }
125  }
126  for (unsigned int x = 0; x < numFloatTypes; ++x) {
127    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
128
129    // IL does not have these operations for floating point types
130    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
131    setOperationAction(ISD::SETOLT, VT, Expand);
132    setOperationAction(ISD::SETOGE, VT, Expand);
133    setOperationAction(ISD::SETOGT, VT, Expand);
134    setOperationAction(ISD::SETOLE, VT, Expand);
135    setOperationAction(ISD::SETULT, VT, Expand);
136    setOperationAction(ISD::SETUGE, VT, Expand);
137    setOperationAction(ISD::SETUGT, VT, Expand);
138    setOperationAction(ISD::SETULE, VT, Expand);
139  }
140
141  for (unsigned int x = 0; x < numIntTypes; ++x) {
142    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
143
144    // GPU also does not have divrem function for signed or unsigned
145    setOperationAction(ISD::SDIVREM, VT, Expand);
146
147    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
148    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
149    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
150
151    // GPU doesn't have a rotl, rotr, or byteswap instruction
152    setOperationAction(ISD::ROTR, VT, Expand);
153    setOperationAction(ISD::BSWAP, VT, Expand);
154
155    // GPU doesn't have any counting operators
156    setOperationAction(ISD::CTPOP, VT, Expand);
157    setOperationAction(ISD::CTTZ, VT, Expand);
158    setOperationAction(ISD::CTLZ, VT, Expand);
159  }
160
161  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
162  {
163    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
164
165    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
166    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
167    setOperationAction(ISD::SDIVREM, VT, Expand);
168    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
169    // setOperationAction(ISD::VSETCC, VT, Expand);
170    setOperationAction(ISD::SELECT_CC, VT, Expand);
171
172  }
173  if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
174    setOperationAction(ISD::MULHU, MVT::i64, Expand);
175    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
176    setOperationAction(ISD::MULHS, MVT::i64, Expand);
177    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
178    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
179    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
180    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
181    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
182    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
183    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
184    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
185    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
186  }
187  if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
188    // we support loading/storing v2f64 but not operations on the type
189    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
190    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
191    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
192    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
193    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
194    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
195    // We want to expand vector conversions into their scalar
196    // counterparts.
197    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
198    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
199    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
200    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
201    setOperationAction(ISD::FABS, MVT::f64, Expand);
202    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
203  }
204  // TODO: Fix the UDIV24 algorithm so it works for these
205  // types correctly. This needs vector comparisons
206  // for this to work correctly.
207  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
208  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
209  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
210  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
211  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
212  setOperationAction(ISD::SUBC, MVT::Other, Expand);
213  setOperationAction(ISD::ADDE, MVT::Other, Expand);
214  setOperationAction(ISD::ADDC, MVT::Other, Expand);
215  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
216  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
217  setOperationAction(ISD::BRIND, MVT::Other, Expand);
218  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
219
220  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
221
222  // Use the default implementation.
223  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
224  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
225
226  setSchedulingPreference(Sched::RegPressure);
227  setPow2DivIsCheap(false);
228  setPrefLoopAlignment(16);
229  setSelectIsExpensive(true);
230  setJumpIsExpensive(true);
231
232  maxStoresPerMemcpy  = 4096;
233  maxStoresPerMemmove = 4096;
234  maxStoresPerMemset  = 4096;
235
236#undef numTypes
237#undef numIntTypes
238#undef numVectorTypes
239#undef numFloatTypes
240}
241
242bool
243AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
244    const CallInst &I, unsigned Intrinsic) const
245{
246  return false;
247}
248// The backend supports 32 and 64 bit floating point immediates
249bool
250AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
251{
252  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
253      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
254    return true;
255  } else {
256    return false;
257  }
258}
259
260bool
261AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
262{
263  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
264      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
265    return false;
266  } else {
267    return true;
268  }
269}
270
271
272// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
273// be zero. Op is expected to be a target specific node. Used by DAG
274// combiner.
275
276void
277AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
278    const SDValue Op,
279    APInt &KnownZero,
280    APInt &KnownOne,
281    const SelectionDAG &DAG,
282    unsigned Depth) const
283{
284  APInt KnownZero2;
285  APInt KnownOne2;
286  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
287  switch (Op.getOpcode()) {
288    default: break;
289    case ISD::SELECT_CC:
290             DAG.ComputeMaskedBits(
291                 Op.getOperand(1),
292                 KnownZero,
293                 KnownOne,
294                 Depth + 1
295                 );
296             DAG.ComputeMaskedBits(
297                 Op.getOperand(0),
298                 KnownZero2,
299                 KnownOne2
300                 );
301             assert((KnownZero & KnownOne) == 0
302                 && "Bits known to be one AND zero?");
303             assert((KnownZero2 & KnownOne2) == 0
304                 && "Bits known to be one AND zero?");
305             // Only known if known in both the LHS and RHS
306             KnownOne &= KnownOne2;
307             KnownZero &= KnownZero2;
308             break;
309  };
310}
311
312//===----------------------------------------------------------------------===//
313//                           Other Lowering Hooks
314//===----------------------------------------------------------------------===//
315
316SDValue
317AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
318{
319  EVT OVT = Op.getValueType();
320  SDValue DST;
321  if (OVT.getScalarType() == MVT::i64) {
322    DST = LowerSDIV64(Op, DAG);
323  } else if (OVT.getScalarType() == MVT::i32) {
324    DST = LowerSDIV32(Op, DAG);
325  } else if (OVT.getScalarType() == MVT::i16
326      || OVT.getScalarType() == MVT::i8) {
327    DST = LowerSDIV24(Op, DAG);
328  } else {
329    DST = SDValue(Op.getNode(), 0);
330  }
331  return DST;
332}
333
334SDValue
335AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
336{
337  EVT OVT = Op.getValueType();
338  SDValue DST;
339  if (OVT.getScalarType() == MVT::i64) {
340    DST = LowerSREM64(Op, DAG);
341  } else if (OVT.getScalarType() == MVT::i32) {
342    DST = LowerSREM32(Op, DAG);
343  } else if (OVT.getScalarType() == MVT::i16) {
344    DST = LowerSREM16(Op, DAG);
345  } else if (OVT.getScalarType() == MVT::i8) {
346    DST = LowerSREM8(Op, DAG);
347  } else {
348    DST = SDValue(Op.getNode(), 0);
349  }
350  return DST;
351}
352
353SDValue
354AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
355{
356  EVT VT = Op.getValueType();
357  SDValue Nodes1;
358  SDValue second;
359  SDValue third;
360  SDValue fourth;
361  DebugLoc DL = Op.getDebugLoc();
362  Nodes1 = DAG.getNode(AMDGPUISD::VBUILD,
363      DL,
364      VT, Op.getOperand(0));
365#if 0
366  bool allEqual = true;
367  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
368    if (Op.getOperand(0) != Op.getOperand(x)) {
369      allEqual = false;
370      break;
371    }
372  }
373  if (allEqual) {
374    return Nodes1;
375  }
376#endif
377  switch(Op.getNumOperands()) {
378    default:
379    case 1:
380      break;
381    case 4:
382      fourth = Op.getOperand(3);
383      if (fourth.getOpcode() != ISD::UNDEF) {
384        Nodes1 = DAG.getNode(
385            ISD::INSERT_VECTOR_ELT,
386            DL,
387            Op.getValueType(),
388            Nodes1,
389            fourth,
390            DAG.getConstant(7, MVT::i32));
391      }
392    case 3:
393      third = Op.getOperand(2);
394      if (third.getOpcode() != ISD::UNDEF) {
395        Nodes1 = DAG.getNode(
396            ISD::INSERT_VECTOR_ELT,
397            DL,
398            Op.getValueType(),
399            Nodes1,
400            third,
401            DAG.getConstant(6, MVT::i32));
402      }
403    case 2:
404      second = Op.getOperand(1);
405      if (second.getOpcode() != ISD::UNDEF) {
406        Nodes1 = DAG.getNode(
407            ISD::INSERT_VECTOR_ELT,
408            DL,
409            Op.getValueType(),
410            Nodes1,
411            second,
412            DAG.getConstant(5, MVT::i32));
413      }
414      break;
415  };
416  return Nodes1;
417}
418
419SDValue
420AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
421{
422  SDValue Data = Op.getOperand(0);
423  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
424  DebugLoc DL = Op.getDebugLoc();
425  EVT DVT = Data.getValueType();
426  EVT BVT = BaseType->getVT();
427  unsigned baseBits = BVT.getScalarType().getSizeInBits();
428  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
429  unsigned shiftBits = srcBits - baseBits;
430  if (srcBits < 32) {
431    // If the op is less than 32 bits, then it needs to extend to 32bits
432    // so it can properly keep the upper bits valid.
433    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
434    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
435    shiftBits = 32 - baseBits;
436    DVT = IVT;
437  }
438  SDValue Shift = DAG.getConstant(shiftBits, DVT);
439  // Shift left by 'Shift' bits.
440  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
441  // Signed shift Right by 'Shift' bits.
442  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
443  if (srcBits < 32) {
444    // Once the sign extension is done, the op needs to be converted to
445    // its original type.
446    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
447  }
448  return Data;
449}
450EVT
451AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
452{
453  int iSize = (size * numEle);
454  int vEle = (iSize >> ((size == 64) ? 6 : 5));
455  if (!vEle) {
456    vEle = 1;
457  }
458  if (size == 64) {
459    if (vEle == 1) {
460      return EVT(MVT::i64);
461    } else {
462      return EVT(MVT::getVectorVT(MVT::i64, vEle));
463    }
464  } else {
465    if (vEle == 1) {
466      return EVT(MVT::i32);
467    } else {
468      return EVT(MVT::getVectorVT(MVT::i32, vEle));
469    }
470  }
471}
472
473SDValue
474AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
475{
476  SDValue Chain = Op.getOperand(0);
477  SDValue Cond  = Op.getOperand(1);
478  SDValue Jump  = Op.getOperand(2);
479  SDValue Result;
480  Result = DAG.getNode(
481      AMDGPUISD::BRANCH_COND,
482      Op.getDebugLoc(),
483      Op.getValueType(),
484      Chain, Jump, Cond);
485  return Result;
486}
487
488SDValue
489AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
490{
491  DebugLoc DL = Op.getDebugLoc();
492  EVT OVT = Op.getValueType();
493  SDValue LHS = Op.getOperand(0);
494  SDValue RHS = Op.getOperand(1);
495  MVT INTTY;
496  MVT FLTTY;
497  if (!OVT.isVector()) {
498    INTTY = MVT::i32;
499    FLTTY = MVT::f32;
500  } else if (OVT.getVectorNumElements() == 2) {
501    INTTY = MVT::v2i32;
502    FLTTY = MVT::v2f32;
503  } else if (OVT.getVectorNumElements() == 4) {
504    INTTY = MVT::v4i32;
505    FLTTY = MVT::v4f32;
506  }
507  unsigned bitsize = OVT.getScalarType().getSizeInBits();
508  // char|short jq = ia ^ ib;
509  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
510
511  // jq = jq >> (bitsize - 2)
512  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
513
514  // jq = jq | 0x1
515  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
516
517  // jq = (int)jq
518  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
519
520  // int ia = (int)LHS;
521  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
522
523  // int ib, (int)RHS;
524  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
525
526  // float fa = (float)ia;
527  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
528
529  // float fb = (float)ib;
530  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
531
532  // float fq = native_divide(fa, fb);
533  SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
534
535  // fq = trunc(fq);
536  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
537
538  // float fqneg = -fq;
539  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
540
541  // float fr = mad(fqneg, fb, fa);
542  SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
543
544  // int iq = (int)fq;
545  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
546
547  // fr = fabs(fr);
548  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
549
550  // fb = fabs(fb);
551  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
552
553  // int cv = fr >= fb;
554  SDValue cv;
555  if (INTTY == MVT::i32) {
556    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
557  } else {
558    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
559  }
560  // jq = (cv ? jq : 0);
561  jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
562      DAG.getConstant(0, OVT));
563  // dst = iq + jq;
564  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
565  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
566  return iq;
567}
568
569SDValue
570AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
571{
572  DebugLoc DL = Op.getDebugLoc();
573  EVT OVT = Op.getValueType();
574  SDValue LHS = Op.getOperand(0);
575  SDValue RHS = Op.getOperand(1);
576  // The LowerSDIV32 function generates equivalent to the following IL.
577  // mov r0, LHS
578  // mov r1, RHS
579  // ilt r10, r0, 0
580  // ilt r11, r1, 0
581  // iadd r0, r0, r10
582  // iadd r1, r1, r11
583  // ixor r0, r0, r10
584  // ixor r1, r1, r11
585  // udiv r0, r0, r1
586  // ixor r10, r10, r11
587  // iadd r0, r0, r10
588  // ixor DST, r0, r10
589
590  // mov r0, LHS
591  SDValue r0 = LHS;
592
593  // mov r1, RHS
594  SDValue r1 = RHS;
595
596  // ilt r10, r0, 0
597  SDValue r10 = DAG.getSelectCC(DL,
598      r0, DAG.getConstant(0, OVT),
599      DAG.getConstant(-1, MVT::i32),
600      DAG.getConstant(0, MVT::i32),
601      ISD::SETLT);
602
603  // ilt r11, r1, 0
604  SDValue r11 = DAG.getSelectCC(DL,
605      r1, DAG.getConstant(0, OVT),
606      DAG.getConstant(-1, MVT::i32),
607      DAG.getConstant(0, MVT::i32),
608      ISD::SETLT);
609
610  // iadd r0, r0, r10
611  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
612
613  // iadd r1, r1, r11
614  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
615
616  // ixor r0, r0, r10
617  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
618
619  // ixor r1, r1, r11
620  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
621
622  // udiv r0, r0, r1
623  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
624
625  // ixor r10, r10, r11
626  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
627
628  // iadd r0, r0, r10
629  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
630
631  // ixor DST, r0, r10
632  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
633  return DST;
634}
635
636SDValue
637AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
638{
639  return SDValue(Op.getNode(), 0);
640}
641
642SDValue
643AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
644{
645  DebugLoc DL = Op.getDebugLoc();
646  EVT OVT = Op.getValueType();
647  MVT INTTY = MVT::i32;
648  if (OVT == MVT::v2i8) {
649    INTTY = MVT::v2i32;
650  } else if (OVT == MVT::v4i8) {
651    INTTY = MVT::v4i32;
652  }
653  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
654  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
655  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
656  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
657  return LHS;
658}
659
660SDValue
661AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
662{
663  DebugLoc DL = Op.getDebugLoc();
664  EVT OVT = Op.getValueType();
665  MVT INTTY = MVT::i32;
666  if (OVT == MVT::v2i16) {
667    INTTY = MVT::v2i32;
668  } else if (OVT == MVT::v4i16) {
669    INTTY = MVT::v4i32;
670  }
671  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
672  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
673  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
674  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
675  return LHS;
676}
677
678SDValue
679AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
680{
681  DebugLoc DL = Op.getDebugLoc();
682  EVT OVT = Op.getValueType();
683  SDValue LHS = Op.getOperand(0);
684  SDValue RHS = Op.getOperand(1);
685  // The LowerSREM32 function generates equivalent to the following IL.
686  // mov r0, LHS
687  // mov r1, RHS
688  // ilt r10, r0, 0
689  // ilt r11, r1, 0
690  // iadd r0, r0, r10
691  // iadd r1, r1, r11
692  // ixor r0, r0, r10
693  // ixor r1, r1, r11
694  // udiv r20, r0, r1
695  // umul r20, r20, r1
696  // sub r0, r0, r20
697  // iadd r0, r0, r10
698  // ixor DST, r0, r10
699
700  // mov r0, LHS
701  SDValue r0 = LHS;
702
703  // mov r1, RHS
704  SDValue r1 = RHS;
705
706  // ilt r10, r0, 0
707  SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
708
709  // ilt r11, r1, 0
710  SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
711
712  // iadd r0, r0, r10
713  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
714
715  // iadd r1, r1, r11
716  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
717
718  // ixor r0, r0, r10
719  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
720
721  // ixor r1, r1, r11
722  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
723
724  // udiv r20, r0, r1
725  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
726
727  // umul r20, r20, r1
728  r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
729
730  // sub r0, r0, r20
731  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
732
733  // iadd r0, r0, r10
734  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
735
736  // ixor DST, r0, r10
737  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
738  return DST;
739}
740
741SDValue
742AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
743{
744  return SDValue(Op.getNode(), 0);
745}
746