R600ISelLowering.cpp revision 8263408a91b6b3beb5af5de6bdc7e5d13197a268
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600InstrInfo.h"
17#include "R600MachineFunctionInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/SelectionDAG.h"
21
22using namespace llvm;
23
24R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
25    AMDGPUTargetLowering(TM),
26    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
27{
28  setOperationAction(ISD::MUL, MVT::i64, Expand);
29  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
30  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
31  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
32  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
33  computeRegisterProperties();
34
35  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
36
37  setOperationAction(ISD::FSUB, MVT::f32, Expand);
38
39  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
40  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
41
42  setOperationAction(ISD::ROTL, MVT::i32, Custom);
43
44  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
45  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
46
47  setOperationAction(ISD::SETCC, MVT::i32, Custom);
48
49  setSchedulingPreference(Sched::VLIW);
50}
51
52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
53    MachineInstr * MI, MachineBasicBlock * BB) const
54{
55  MachineFunction * MF = BB->getParent();
56  MachineRegisterInfo &MRI = MF->getRegInfo();
57  MachineBasicBlock::iterator I = *MI;
58
59  switch (MI->getOpcode()) {
60  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
61  case AMDGPU::NGROUPS_X:
62    lowerImplicitParameter(MI, *BB, MRI, 0);
63    break;
64  case AMDGPU::NGROUPS_Y:
65    lowerImplicitParameter(MI, *BB, MRI, 1);
66    break;
67  case AMDGPU::NGROUPS_Z:
68    lowerImplicitParameter(MI, *BB, MRI, 2);
69    break;
70  case AMDGPU::GLOBAL_SIZE_X:
71    lowerImplicitParameter(MI, *BB, MRI, 3);
72    break;
73  case AMDGPU::GLOBAL_SIZE_Y:
74    lowerImplicitParameter(MI, *BB, MRI, 4);
75    break;
76  case AMDGPU::GLOBAL_SIZE_Z:
77    lowerImplicitParameter(MI, *BB, MRI, 5);
78    break;
79  case AMDGPU::LOCAL_SIZE_X:
80    lowerImplicitParameter(MI, *BB, MRI, 6);
81    break;
82  case AMDGPU::LOCAL_SIZE_Y:
83    lowerImplicitParameter(MI, *BB, MRI, 7);
84    break;
85  case AMDGPU::LOCAL_SIZE_Z:
86    lowerImplicitParameter(MI, *BB, MRI, 8);
87    break;
88
89  case AMDGPU::CLAMP_R600:
90    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
91    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
92           .addOperand(MI->getOperand(0))
93           .addOperand(MI->getOperand(1))
94           .addReg(AMDGPU::PRED_SEL_OFF);
95    break;
96
97  case AMDGPU::FABS_R600:
98    MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
99    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
100           .addOperand(MI->getOperand(0))
101           .addOperand(MI->getOperand(1))
102           .addReg(AMDGPU::PRED_SEL_OFF);
103    break;
104
105  case AMDGPU::FNEG_R600:
106    MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
107    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
108            .addOperand(MI->getOperand(0))
109            .addOperand(MI->getOperand(1))
110            .addReg(AMDGPU::PRED_SEL_OFF);
111    break;
112
113  case AMDGPU::R600_LOAD_CONST:
114    {
115      int64_t RegIndex = MI->getOperand(1).getImm();
116      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
117      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
118                  .addOperand(MI->getOperand(0))
119                  .addReg(ConstantReg);
120      break;
121    }
122
123  case AMDGPU::MASK_WRITE:
124    {
125      unsigned maskedRegister = MI->getOperand(0).getReg();
126      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
127      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
128      MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
129      def->addTargetFlag(MO_FLAG_MASK);
130      // Return early so the instruction is not erased
131      return BB;
132    }
133
134  case AMDGPU::RAT_WRITE_CACHELESS_eg:
135    {
136      // Convert to DWORD address
137      unsigned NewAddr = MRI.createVirtualRegister(
138                                             AMDGPU::R600_TReg32_XRegisterClass);
139      unsigned ShiftValue = MRI.createVirtualRegister(
140                                              AMDGPU::R600_TReg32RegisterClass);
141
142      // XXX In theory, we should be able to pass ShiftValue directly to
143      // the LSHR_eg instruction as an inline literal, but I tried doing it
144      // this way and it didn't produce the correct results.
145      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
146              .addReg(AMDGPU::ALU_LITERAL_X)
147              .addReg(AMDGPU::PRED_SEL_OFF)
148              .addImm(2);
149      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
150              .addOperand(MI->getOperand(1))
151              .addReg(ShiftValue)
152              .addReg(AMDGPU::PRED_SEL_OFF);
153      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
154              .addOperand(MI->getOperand(0))
155              .addReg(NewAddr);
156      break;
157    }
158
159  case AMDGPU::RESERVE_REG:
160    {
161      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
162      int64_t ReservedIndex = MI->getOperand(0).getImm();
163      unsigned ReservedReg =
164                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
165      MFI->ReservedRegs.push_back(ReservedReg);
166      break;
167    }
168
169  case AMDGPU::TXD:
170    {
171      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
172      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
173
174      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
175              .addOperand(MI->getOperand(3))
176              .addOperand(MI->getOperand(4))
177              .addOperand(MI->getOperand(5));
178      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
179              .addOperand(MI->getOperand(2))
180              .addOperand(MI->getOperand(4))
181              .addOperand(MI->getOperand(5));
182      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
183              .addOperand(MI->getOperand(0))
184              .addOperand(MI->getOperand(1))
185              .addOperand(MI->getOperand(4))
186              .addOperand(MI->getOperand(5))
187              .addReg(t0, RegState::Implicit)
188              .addReg(t1, RegState::Implicit);
189      break;
190    }
191  case AMDGPU::TXD_SHADOW:
192    {
193      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
194      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
195
196      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
197              .addOperand(MI->getOperand(3))
198              .addOperand(MI->getOperand(4))
199              .addOperand(MI->getOperand(5));
200      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
201              .addOperand(MI->getOperand(2))
202              .addOperand(MI->getOperand(4))
203              .addOperand(MI->getOperand(5));
204      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
205              .addOperand(MI->getOperand(0))
206              .addOperand(MI->getOperand(1))
207              .addOperand(MI->getOperand(4))
208              .addOperand(MI->getOperand(5))
209              .addReg(t0, RegState::Implicit)
210              .addReg(t1, RegState::Implicit);
211      break;
212    }
213
214
215  }
216
217  MI->eraseFromParent();
218  return BB;
219}
220
221void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
222    MachineRegisterInfo & MRI, unsigned dword_offset) const
223{
224  unsigned ByteOffset = dword_offset * 4;
225
226  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
227  assert(isInt<16>(ByteOffset));
228
229  MachineBasicBlock::iterator I = *MI;
230  unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
231  MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
232
233  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::COPY), PtrReg)
234          .addReg(AMDGPU::ZERO);
235
236  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
237          .addOperand(MI->getOperand(0))
238          .addReg(PtrReg)
239          .addImm(ByteOffset);
240}
241
242//===----------------------------------------------------------------------===//
243// Custom DAG Lowering Operations
244//===----------------------------------------------------------------------===//
245
246using namespace llvm::Intrinsic;
247using namespace llvm::AMDGPUIntrinsic;
248
249SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
250{
251  switch (Op.getOpcode()) {
252  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
253  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
254  case ISD::ROTL: return LowerROTL(Op, DAG);
255  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
256  case ISD::SETCC: return LowerSETCC(Op, DAG);
257  case ISD::INTRINSIC_VOID: {
258    SDValue Chain = Op.getOperand(0);
259    unsigned IntrinsicID =
260                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
261    switch (IntrinsicID) {
262    case AMDGPUIntrinsic::AMDGPU_store_output: {
263      MachineFunction &MF = DAG.getMachineFunction();
264      MachineRegisterInfo &MRI = MF.getRegInfo();
265      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
266      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
267      if (!MRI.isLiveOut(Reg)) {
268        MRI.addLiveOut(Reg);
269      }
270      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
271    }
272    // default for switch(IntrinsicID)
273    default: break;
274    }
275    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
276    break;
277  }
278  case ISD::INTRINSIC_WO_CHAIN: {
279    unsigned IntrinsicID =
280                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
281    EVT VT = Op.getValueType();
282    switch(IntrinsicID) {
283    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
284    case AMDGPUIntrinsic::R600_load_input: {
285      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
286      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
287      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
288    }
289    case r600_read_tgid_x:
290      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
291                                  AMDGPU::T1_X, VT);
292    case r600_read_tgid_y:
293      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
294                                  AMDGPU::T1_Y, VT);
295    case r600_read_tgid_z:
296      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
297                                  AMDGPU::T1_Z, VT);
298    case r600_read_tidig_x:
299      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
300                                  AMDGPU::T0_X, VT);
301    case r600_read_tidig_y:
302      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
303                                  AMDGPU::T0_Y, VT);
304    case r600_read_tidig_z:
305      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
306                                  AMDGPU::T0_Z, VT);
307    }
308    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
309    break;
310  }
311  } // end switch(Op.getOpcode())
312  return SDValue();
313}
314
315SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
316{
317  SDValue Chain = Op.getOperand(0);
318  SDValue CC = Op.getOperand(1);
319  SDValue LHS   = Op.getOperand(2);
320  SDValue RHS   = Op.getOperand(3);
321  SDValue JumpT  = Op.getOperand(4);
322  SDValue CmpValue;
323  SDValue Result;
324  CmpValue = DAG.getNode(
325      ISD::SELECT_CC,
326      Op.getDebugLoc(),
327      MVT::i32,
328      LHS, RHS,
329      DAG.getConstant(-1, MVT::i32),
330      DAG.getConstant(0, MVT::i32),
331      CC);
332  Result = DAG.getNode(
333      AMDGPUISD::BRANCH_COND,
334      CmpValue.getDebugLoc(),
335      MVT::Other, Chain,
336      JumpT, CmpValue);
337  return Result;
338}
339
340
341SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
342{
343  DebugLoc DL = Op.getDebugLoc();
344  EVT VT = Op.getValueType();
345
346  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
347                     Op.getOperand(0),
348                     Op.getOperand(0),
349                     DAG.getNode(ISD::SUB, DL, VT,
350                                 DAG.getConstant(32, MVT::i32),
351                                 Op.getOperand(1)));
352}
353
354SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
355{
356  DebugLoc DL = Op.getDebugLoc();
357  EVT VT = Op.getValueType();
358
359  SDValue LHS = Op.getOperand(0);
360  SDValue RHS = Op.getOperand(1);
361  SDValue True = Op.getOperand(2);
362  SDValue False = Op.getOperand(3);
363  SDValue CC = Op.getOperand(4);
364  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
365  SDValue Temp;
366
367  // LHS and RHS are guaranteed to be the same value type
368  EVT CompareVT = LHS.getValueType();
369
370  // We need all the operands of SELECT_CC to have the same value type, so if
371  // necessary we need to convert LHS and RHS to be the same type True and
372  // False.  True and False are guaranteed to have the same type as this
373  // SELECT_CC node.
374
375  if (CompareVT !=  VT) {
376    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
377    if (VT == MVT::f32 && CompareVT == MVT::i32) {
378      if (isUnsignedIntSetCC(CCOpcode)) {
379        ConversionOp = ISD::UINT_TO_FP;
380      } else {
381        ConversionOp = ISD::SINT_TO_FP;
382      }
383    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
384      ConversionOp = ISD::FP_TO_SINT;
385    } else {
386      // I don't think there will be any other type pairings.
387      assert(!"Unhandled operand type parings in SELECT_CC");
388    }
389    // XXX Check the value of LHS and RHS and avoid creating sequences like
390    // (FTOI (ITOF))
391    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
392    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
393  }
394
395  // If True is a hardware TRUE value and False is a hardware FALSE value or
396  // vice-versa we can handle this with a native instruction (SET* instructions).
397  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
398    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
399  }
400
401  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
402  // we can handle this with a native instruction, but we need to swap true
403  // and false and change the conditional.
404  if (isHWTrueValue(False) && isHWFalseValue(True)) {
405  }
406
407  // XXX Check if we can lower this to a SELECT or if it is supported by a native
408  // operation. (The code below does this but we don't have the Instruction
409  // selection patterns to do this yet.
410#if 0
411  if (isZero(LHS) || isZero(RHS)) {
412    SDValue Cond = (isZero(LHS) ? RHS : LHS);
413    bool SwapTF = false;
414    switch (CCOpcode) {
415    case ISD::SETOEQ:
416    case ISD::SETUEQ:
417    case ISD::SETEQ:
418      SwapTF = true;
419      // Fall through
420    case ISD::SETONE:
421    case ISD::SETUNE:
422    case ISD::SETNE:
423      // We can lower to select
424      if (SwapTF) {
425        Temp = True;
426        True = False;
427        False = Temp;
428      }
429      // CNDE
430      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
431    default:
432      // Supported by a native operation (CNDGE, CNDGT)
433      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
434    }
435  }
436#endif
437
438  // If we make it this for it means we have no native instructions to handle
439  // this SELECT_CC, so we must lower it.
440  SDValue HWTrue, HWFalse;
441
442  if (VT == MVT::f32) {
443    HWTrue = DAG.getConstantFP(1.0f, VT);
444    HWFalse = DAG.getConstantFP(0.0f, VT);
445  } else if (VT == MVT::i32) {
446    HWTrue = DAG.getConstant(-1, VT);
447    HWFalse = DAG.getConstant(0, VT);
448  }
449  else {
450    assert(!"Unhandled value type in LowerSELECT_CC");
451  }
452
453  // Lower this unsupported SELECT_CC into a combination of two supported
454  // SELECT_CC operations.
455  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
456
457  // Convert floating point condition to i1
458  if (VT == MVT::f32) {
459    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
460                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
461  }
462
463  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
464}
465
466SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
467{
468  SDValue Cond;
469  SDValue LHS = Op.getOperand(0);
470  SDValue RHS = Op.getOperand(1);
471  SDValue CC  = Op.getOperand(2);
472  DebugLoc DL = Op.getDebugLoc();
473  assert(Op.getValueType() == MVT::i32);
474  Cond = DAG.getNode(
475      ISD::SELECT_CC,
476      Op.getDebugLoc(),
477      MVT::i32,
478      LHS, RHS,
479      DAG.getConstant(-1, MVT::i32),
480      DAG.getConstant(0, MVT::i32),
481      CC);
482  Cond = DAG.getNode(
483      ISD::AND,
484      DL,
485      MVT::i32,
486      DAG.getConstant(1, MVT::i32),
487      Cond);
488  return Cond;
489}
490