AMDGPUISelLowering.cpp revision 5464a92861c76f1e091cd219dee71ce9858eb195
1//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief This is the parent TargetLowering class for hardware code gen
12/// targets.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUISelLowering.h"
17#include "AMDGPU.h"
18#include "AMDGPURegisterInfo.h"
19#include "AMDGPUSubtarget.h"
20#include "AMDILIntrinsicInfo.h"
21#include "R600MachineFunctionInfo.h"
22#include "SIMachineFunctionInfo.h"
23#include "llvm/CodeGen/CallingConvLower.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28#include "llvm/IR/DataLayout.h"
29
30using namespace llvm;
31
32#include "AMDGPUGenCallingConv.inc"
33
34AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
35  TargetLowering(TM, new TargetLoweringObjectFileELF()) {
36
37  // Initialize target lowering borrowed from AMDIL
38  InitAMDILLowering();
39
40  // We need to custom lower some of the intrinsics
41  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
42
43  // Library functions.  These default to Expand, but we have instructions
44  // for them.
45  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
46  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
47  setOperationAction(ISD::FPOW,   MVT::f32, Legal);
48  setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
49  setOperationAction(ISD::FABS,   MVT::f32, Legal);
50  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
51  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
52
53  // The hardware supports ROTR, but not ROTL
54  setOperationAction(ISD::ROTL, MVT::i32, Expand);
55
56  // Lower floating point store/load to integer store/load to reduce the number
57  // of patterns in tablegen.
58  setOperationAction(ISD::STORE, MVT::f32, Promote);
59  AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
60
61  setOperationAction(ISD::STORE, MVT::v2f32, Promote);
62  AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
63
64  setOperationAction(ISD::STORE, MVT::v4f32, Promote);
65  AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
66
67  setOperationAction(ISD::STORE, MVT::f64, Promote);
68  AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
69
70  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
71  setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
72  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
73  // XXX: This can be change to Custom, once ExpandVectorStores can
74  // handle 64-bit stores.
75  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
76
77  setOperationAction(ISD::LOAD, MVT::f32, Promote);
78  AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
79
80  setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
81  AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
82
83  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
84  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
85
86  setOperationAction(ISD::LOAD, MVT::f64, Promote);
87  AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
88
89  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
90  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
91  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
92  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
93
94  setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
95  setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
96  setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
97  setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
98  setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
99  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
100  setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
101  setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
102  setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
103  setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
104  setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
105  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
106
107  setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
108  setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
109
110  setOperationAction(ISD::MUL, MVT::i64, Expand);
111
112  setOperationAction(ISD::UDIV, MVT::i32, Expand);
113  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
114  setOperationAction(ISD::UREM, MVT::i32, Expand);
115  setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
116  setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
117
118  static const MVT::SimpleValueType IntTypes[] = {
119    MVT::v2i32, MVT::v4i32
120  };
121  const size_t NumIntTypes = array_lengthof(IntTypes);
122
123  for (unsigned int x  = 0; x < NumIntTypes; ++x) {
124    MVT::SimpleValueType VT = IntTypes[x];
125    //Expand the following operations for the current type by default
126    setOperationAction(ISD::ADD,  VT, Expand);
127    setOperationAction(ISD::AND,  VT, Expand);
128    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
129    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
130    setOperationAction(ISD::MUL,  VT, Expand);
131    setOperationAction(ISD::OR,   VT, Expand);
132    setOperationAction(ISD::SHL,  VT, Expand);
133    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
134    setOperationAction(ISD::SRL,  VT, Expand);
135    setOperationAction(ISD::SRA,  VT, Expand);
136    setOperationAction(ISD::SUB,  VT, Expand);
137    setOperationAction(ISD::UDIV, VT, Expand);
138    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
139    setOperationAction(ISD::UREM, VT, Expand);
140    setOperationAction(ISD::VSELECT, VT, Expand);
141    setOperationAction(ISD::XOR,  VT, Expand);
142  }
143
144  static const MVT::SimpleValueType FloatTypes[] = {
145    MVT::v2f32, MVT::v4f32
146  };
147  const size_t NumFloatTypes = array_lengthof(FloatTypes);
148
149  for (unsigned int x = 0; x < NumFloatTypes; ++x) {
150    MVT::SimpleValueType VT = FloatTypes[x];
151    setOperationAction(ISD::FADD, VT, Expand);
152    setOperationAction(ISD::FDIV, VT, Expand);
153    setOperationAction(ISD::FFLOOR, VT, Expand);
154    setOperationAction(ISD::FMUL, VT, Expand);
155    setOperationAction(ISD::FRINT, VT, Expand);
156    setOperationAction(ISD::FSUB, VT, Expand);
157  }
158}
159
160//===----------------------------------------------------------------------===//
161// Target Information
162//===----------------------------------------------------------------------===//
163
164MVT AMDGPUTargetLowering::getVectorIdxTy() const {
165  return MVT::i32;
166}
167
168
169//===---------------------------------------------------------------------===//
170// Target Properties
171//===---------------------------------------------------------------------===//
172
173bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
174  assert(VT.isFloatingPoint());
175  return VT == MVT::f32;
176}
177
178bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
179  assert(VT.isFloatingPoint());
180  return VT == MVT::f32;
181}
182
183//===---------------------------------------------------------------------===//
184// TargetLowering Callbacks
185//===---------------------------------------------------------------------===//
186
187void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
188                             const SmallVectorImpl<ISD::InputArg> &Ins) const {
189
190  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
191}
192
193SDValue AMDGPUTargetLowering::LowerReturn(
194                                     SDValue Chain,
195                                     CallingConv::ID CallConv,
196                                     bool isVarArg,
197                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
198                                     const SmallVectorImpl<SDValue> &OutVals,
199                                     SDLoc DL, SelectionDAG &DAG) const {
200  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
201}
202
203//===---------------------------------------------------------------------===//
204// Target specific lowering
205//===---------------------------------------------------------------------===//
206
207SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
208    const {
209  switch (Op.getOpcode()) {
210  default:
211    Op.getNode()->dump();
212    assert(0 && "Custom lowering code for this"
213        "instruction is not implemented yet!");
214    break;
215  // AMDIL DAG lowering
216  case ISD::SDIV: return LowerSDIV(Op, DAG);
217  case ISD::SREM: return LowerSREM(Op, DAG);
218  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
219  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
220  // AMDGPU DAG lowering
221  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
222  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
223  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
224  case ISD::STORE: return LowerVectorStore(Op, DAG);
225  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
226  }
227  return Op;
228}
229
230SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
231                                                 SDValue Op,
232                                                 SelectionDAG &DAG) const {
233
234  const DataLayout *TD = getTargetMachine().getDataLayout();
235  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
236  // XXX: What does the value of G->getOffset() mean?
237  assert(G->getOffset() == 0 &&
238         "Do not know what to do with an non-zero offset");
239
240  unsigned Offset = MFI->LDSSize;
241  const GlobalValue *GV = G->getGlobal();
242  uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
243
244  // XXX: Account for alignment?
245  MFI->LDSSize += Size;
246
247  return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
248}
249
250void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
251                                         SmallVectorImpl<SDValue> &Args,
252                                         unsigned Start,
253                                         unsigned Count) const {
254  EVT VT = Op.getValueType();
255  for (unsigned i = Start, e = Start + Count; i != e; ++i) {
256    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
257                               VT.getVectorElementType(),
258                               Op, DAG.getConstant(i, MVT::i32)));
259  }
260}
261
262SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
263                                                  SelectionDAG &DAG) const {
264  SmallVector<SDValue, 8> Args;
265  SDValue A = Op.getOperand(0);
266  SDValue B = Op.getOperand(1);
267
268  ExtractVectorElements(A, DAG, Args, 0,
269                        A.getValueType().getVectorNumElements());
270  ExtractVectorElements(B, DAG, Args, 0,
271                        B.getValueType().getVectorNumElements());
272
273  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
274                     &Args[0], Args.size());
275}
276
277SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
278                                                     SelectionDAG &DAG) const {
279
280  SmallVector<SDValue, 8> Args;
281  EVT VT = Op.getValueType();
282  unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
283  ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
284                        VT.getVectorNumElements());
285
286  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
287                     &Args[0], Args.size());
288}
289
290
291SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
292    SelectionDAG &DAG) const {
293  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
294  SDLoc DL(Op);
295  EVT VT = Op.getValueType();
296
297  switch (IntrinsicID) {
298    default: return Op;
299    case AMDGPUIntrinsic::AMDIL_abs:
300      return LowerIntrinsicIABS(Op, DAG);
301    case AMDGPUIntrinsic::AMDIL_exp:
302      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
303    case AMDGPUIntrinsic::AMDGPU_lrp:
304      return LowerIntrinsicLRP(Op, DAG);
305    case AMDGPUIntrinsic::AMDIL_fraction:
306      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
307    case AMDGPUIntrinsic::AMDIL_max:
308      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
309                                                  Op.getOperand(2));
310    case AMDGPUIntrinsic::AMDGPU_imax:
311      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
312                                                  Op.getOperand(2));
313    case AMDGPUIntrinsic::AMDGPU_umax:
314      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
315                                                  Op.getOperand(2));
316    case AMDGPUIntrinsic::AMDIL_min:
317      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
318                                                  Op.getOperand(2));
319    case AMDGPUIntrinsic::AMDGPU_imin:
320      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
321                                                  Op.getOperand(2));
322    case AMDGPUIntrinsic::AMDGPU_umin:
323      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
324                                                  Op.getOperand(2));
325    case AMDGPUIntrinsic::AMDIL_round_nearest:
326      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
327  }
328}
329
330///IABS(a) = SMAX(sub(0, a), a)
331SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
332    SelectionDAG &DAG) const {
333
334  SDLoc DL(Op);
335  EVT VT = Op.getValueType();
336  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
337                                              Op.getOperand(1));
338
339  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
340}
341
342/// Linear Interpolation
343/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
344SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
345    SelectionDAG &DAG) const {
346  SDLoc DL(Op);
347  EVT VT = Op.getValueType();
348  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
349                                DAG.getConstantFP(1.0f, MVT::f32),
350                                Op.getOperand(1));
351  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
352                                                    Op.getOperand(3));
353  return DAG.getNode(ISD::FADD, DL, VT,
354      DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
355      OneSubAC);
356}
357
358/// \brief Generate Min/Max node
359SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
360    SelectionDAG &DAG) const {
361  SDLoc DL(Op);
362  EVT VT = Op.getValueType();
363
364  SDValue LHS = Op.getOperand(0);
365  SDValue RHS = Op.getOperand(1);
366  SDValue True = Op.getOperand(2);
367  SDValue False = Op.getOperand(3);
368  SDValue CC = Op.getOperand(4);
369
370  if (VT != MVT::f32 ||
371      !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
372    return SDValue();
373  }
374
375  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
376  switch (CCOpcode) {
377  case ISD::SETOEQ:
378  case ISD::SETONE:
379  case ISD::SETUNE:
380  case ISD::SETNE:
381  case ISD::SETUEQ:
382  case ISD::SETEQ:
383  case ISD::SETFALSE:
384  case ISD::SETFALSE2:
385  case ISD::SETTRUE:
386  case ISD::SETTRUE2:
387  case ISD::SETUO:
388  case ISD::SETO:
389    assert(0 && "Operation should already be optimised !");
390  case ISD::SETULE:
391  case ISD::SETULT:
392  case ISD::SETOLE:
393  case ISD::SETOLT:
394  case ISD::SETLE:
395  case ISD::SETLT: {
396    if (LHS == True)
397      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
398    else
399      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
400  }
401  case ISD::SETGT:
402  case ISD::SETGE:
403  case ISD::SETUGE:
404  case ISD::SETOGE:
405  case ISD::SETUGT:
406  case ISD::SETOGT: {
407    if (LHS == True)
408      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
409    else
410      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
411  }
412  case ISD::SETCC_INVALID:
413    assert(0 && "Invalid setcc condcode !");
414  }
415  return Op;
416}
417
418
419
420SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
421    SelectionDAG &DAG) const {
422  SDLoc DL(Op);
423  EVT VT = Op.getValueType();
424
425  SDValue Num = Op.getOperand(0);
426  SDValue Den = Op.getOperand(1);
427
428  SmallVector<SDValue, 8> Results;
429
430  // RCP =  URECIP(Den) = 2^32 / Den + e
431  // e is rounding error.
432  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
433
434  // RCP_LO = umulo(RCP, Den) */
435  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
436
437  // RCP_HI = mulhu (RCP, Den) */
438  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
439
440  // NEG_RCP_LO = -RCP_LO
441  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
442                                                     RCP_LO);
443
444  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
445  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
446                                           NEG_RCP_LO, RCP_LO,
447                                           ISD::SETEQ);
448  // Calculate the rounding error from the URECIP instruction
449  // E = mulhu(ABS_RCP_LO, RCP)
450  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
451
452  // RCP_A_E = RCP + E
453  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
454
455  // RCP_S_E = RCP - E
456  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
457
458  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
459  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
460                                     RCP_A_E, RCP_S_E,
461                                     ISD::SETEQ);
462  // Quotient = mulhu(Tmp0, Num)
463  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
464
465  // Num_S_Remainder = Quotient * Den
466  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
467
468  // Remainder = Num - Num_S_Remainder
469  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
470
471  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
472  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
473                                                 DAG.getConstant(-1, VT),
474                                                 DAG.getConstant(0, VT),
475                                                 ISD::SETGE);
476  // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
477  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
478                                                  DAG.getConstant(0, VT),
479                                                  DAG.getConstant(-1, VT),
480                                                  DAG.getConstant(0, VT),
481                                                  ISD::SETGE);
482  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
483  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
484                                               Remainder_GE_Zero);
485
486  // Calculate Division result:
487
488  // Quotient_A_One = Quotient + 1
489  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
490                                                         DAG.getConstant(1, VT));
491
492  // Quotient_S_One = Quotient - 1
493  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
494                                                         DAG.getConstant(1, VT));
495
496  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
497  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
498                                     Quotient, Quotient_A_One, ISD::SETEQ);
499
500  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
501  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
502                            Quotient_S_One, Div, ISD::SETEQ);
503
504  // Calculate Rem result:
505
506  // Remainder_S_Den = Remainder - Den
507  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
508
509  // Remainder_A_Den = Remainder + Den
510  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
511
512  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
513  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
514                                    Remainder, Remainder_S_Den, ISD::SETEQ);
515
516  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
517  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
518                            Remainder_A_Den, Rem, ISD::SETEQ);
519  SDValue Ops[2];
520  Ops[0] = Div;
521  Ops[1] = Rem;
522  return DAG.getMergeValues(Ops, 2, DL);
523}
524
525SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op,
526                                               SelectionDAG &DAG) const {
527  StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
528  EVT MemVT = Store->getMemoryVT();
529  unsigned MemBits = MemVT.getSizeInBits();
530
531  // Byte stores are really expensive, so if possible, try to pack
532  // 32-bit vector truncatating store into an i32 store.
533  // XXX: We could also handle optimize other vector bitwidths
534  if (!MemVT.isVector() || MemBits > 32) {
535    return SDValue();
536  }
537
538  SDLoc DL(Op);
539  const SDValue &Value = Store->getValue();
540  EVT VT = Value.getValueType();
541  const SDValue &Ptr = Store->getBasePtr();
542  EVT MemEltVT = MemVT.getVectorElementType();
543  unsigned MemEltBits = MemEltVT.getSizeInBits();
544  unsigned MemNumElements = MemVT.getVectorNumElements();
545  EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
546  SDValue Mask;
547  switch(MemEltBits) {
548  case 8:
549    Mask = DAG.getConstant(0xFF, PackedVT);
550    break;
551  case 16:
552    Mask = DAG.getConstant(0xFFFF, PackedVT);
553    break;
554  default:
555    llvm_unreachable("Cannot lower this vector store");
556  }
557  SDValue PackedValue;
558  for (unsigned i = 0; i < MemNumElements; ++i) {
559    EVT ElemVT = VT.getVectorElementType();
560    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
561                              DAG.getConstant(i, MVT::i32));
562    Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
563    Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
564    SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
565    Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
566    if (i == 0) {
567      PackedValue = Elt;
568    } else {
569      PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
570    }
571  }
572  return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
573                      MachinePointerInfo(Store->getMemOperand()->getValue()),
574                      Store->isVolatile(),  Store->isNonTemporal(),
575                      Store->getAlignment());
576}
577
578//===----------------------------------------------------------------------===//
579// Helper functions
580//===----------------------------------------------------------------------===//
581
582bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
583  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
584    return CFP->isExactlyValue(1.0);
585  }
586  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
587    return C->isAllOnesValue();
588  }
589  return false;
590}
591
592bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
593  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
594    return CFP->getValueAPF().isZero();
595  }
596  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
597    return C->isNullValue();
598  }
599  return false;
600}
601
602SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
603                                                  const TargetRegisterClass *RC,
604                                                   unsigned Reg, EVT VT) const {
605  MachineFunction &MF = DAG.getMachineFunction();
606  MachineRegisterInfo &MRI = MF.getRegInfo();
607  unsigned VirtualRegister;
608  if (!MRI.isLiveIn(Reg)) {
609    VirtualRegister = MRI.createVirtualRegister(RC);
610    MRI.addLiveIn(Reg, VirtualRegister);
611  } else {
612    VirtualRegister = MRI.getLiveInVirtReg(Reg);
613  }
614  return DAG.getRegister(VirtualRegister, VT);
615}
616
617#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
618
619const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
620  switch (Opcode) {
621  default: return 0;
622  // AMDIL DAG nodes
623  NODE_NAME_CASE(CALL);
624  NODE_NAME_CASE(UMUL);
625  NODE_NAME_CASE(DIV_INF);
626  NODE_NAME_CASE(RET_FLAG);
627  NODE_NAME_CASE(BRANCH_COND);
628
629  // AMDGPU DAG nodes
630  NODE_NAME_CASE(DWORDADDR)
631  NODE_NAME_CASE(FRACT)
632  NODE_NAME_CASE(FMAX)
633  NODE_NAME_CASE(SMAX)
634  NODE_NAME_CASE(UMAX)
635  NODE_NAME_CASE(FMIN)
636  NODE_NAME_CASE(SMIN)
637  NODE_NAME_CASE(UMIN)
638  NODE_NAME_CASE(URECIP)
639  NODE_NAME_CASE(EXPORT)
640  NODE_NAME_CASE(CONST_ADDRESS)
641  NODE_NAME_CASE(REGISTER_LOAD)
642  NODE_NAME_CASE(REGISTER_STORE)
643  NODE_NAME_CASE(LOAD_CONSTANT)
644  NODE_NAME_CASE(LOAD_INPUT)
645  NODE_NAME_CASE(SAMPLE)
646  NODE_NAME_CASE(SAMPLEB)
647  NODE_NAME_CASE(SAMPLED)
648  NODE_NAME_CASE(SAMPLEL)
649  NODE_NAME_CASE(STORE_MSKOR)
650  }
651}
652