R600ISelLowering.cpp revision f3480f92349c90f55e2e80d9a4536ab048fb5652
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "AMDGPUUtil.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22
23using namespace llvm;
24
25R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
26    AMDGPUTargetLowering(TM),
27    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
28{
29  setOperationAction(ISD::MUL, MVT::i64, Expand);
30  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
31  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
32  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
33  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
34  computeRegisterProperties();
35
36  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
37
38  setOperationAction(ISD::FSUB, MVT::f32, Expand);
39
40  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
41
42  setOperationAction(ISD::ROTL, MVT::i32, Custom);
43
44  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
45  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
46
47  setOperationAction(ISD::SETCC, MVT::i32, Custom);
48
49  setSchedulingPreference(Sched::VLIW);
50}
51
52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
53    MachineInstr * MI, MachineBasicBlock * BB) const
54{
55  MachineFunction * MF = BB->getParent();
56  MachineRegisterInfo &MRI = MF->getRegInfo();
57  MachineBasicBlock::iterator I = *MI;
58
59  switch (MI->getOpcode()) {
60  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
61  case AMDGPU::TGID_X:
62    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
63    break;
64  case AMDGPU::TGID_Y:
65    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
66    break;
67  case AMDGPU::TGID_Z:
68    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
69    break;
70  case AMDGPU::TIDIG_X:
71    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
72    break;
73  case AMDGPU::TIDIG_Y:
74    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
75    break;
76  case AMDGPU::TIDIG_Z:
77    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
78    break;
79  case AMDGPU::NGROUPS_X:
80    lowerImplicitParameter(MI, *BB, MRI, 0);
81    break;
82  case AMDGPU::NGROUPS_Y:
83    lowerImplicitParameter(MI, *BB, MRI, 1);
84    break;
85  case AMDGPU::NGROUPS_Z:
86    lowerImplicitParameter(MI, *BB, MRI, 2);
87    break;
88  case AMDGPU::GLOBAL_SIZE_X:
89    lowerImplicitParameter(MI, *BB, MRI, 3);
90    break;
91  case AMDGPU::GLOBAL_SIZE_Y:
92    lowerImplicitParameter(MI, *BB, MRI, 4);
93    break;
94  case AMDGPU::GLOBAL_SIZE_Z:
95    lowerImplicitParameter(MI, *BB, MRI, 5);
96    break;
97  case AMDGPU::LOCAL_SIZE_X:
98    lowerImplicitParameter(MI, *BB, MRI, 6);
99    break;
100  case AMDGPU::LOCAL_SIZE_Y:
101    lowerImplicitParameter(MI, *BB, MRI, 7);
102    break;
103  case AMDGPU::LOCAL_SIZE_Z:
104    lowerImplicitParameter(MI, *BB, MRI, 8);
105    break;
106
107  case AMDGPU::CLAMP_R600:
108    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
109    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
110           .addOperand(MI->getOperand(0))
111           .addOperand(MI->getOperand(1));
112    break;
113
114  case AMDGPU::FABS_R600:
115    MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
116    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
117           .addOperand(MI->getOperand(0))
118           .addOperand(MI->getOperand(1));
119    break;
120
121  case AMDGPU::FNEG_R600:
122    MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
123    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
124            .addOperand(MI->getOperand(0))
125            .addOperand(MI->getOperand(1));
126    break;
127
128  case AMDGPU::R600_LOAD_CONST:
129    {
130      int64_t RegIndex = MI->getOperand(1).getImm();
131      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
132      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
133                  .addOperand(MI->getOperand(0))
134                  .addReg(ConstantReg);
135      break;
136    }
137
138  case AMDGPU::LOAD_INPUT:
139    {
140      int64_t RegIndex = MI->getOperand(1).getImm();
141      addLiveIn(MI, MF, MRI, TII,
142                AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
143      break;
144    }
145
146  case AMDGPU::MASK_WRITE:
147    {
148      unsigned maskedRegister = MI->getOperand(0).getReg();
149      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
150      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
151      MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
152      def->addTargetFlag(MO_FLAG_MASK);
153      // Return early so the instruction is not erased
154      return BB;
155    }
156
157  case AMDGPU::RAT_WRITE_CACHELESS_eg:
158    {
159      // Convert to DWORD address
160      unsigned NewAddr = MRI.createVirtualRegister(
161                                             AMDGPU::R600_TReg32_XRegisterClass);
162      unsigned ShiftValue = MRI.createVirtualRegister(
163                                              AMDGPU::R600_TReg32RegisterClass);
164
165      // XXX In theory, we should be able to pass ShiftValue directly to
166      // the LSHR_eg instruction as an inline literal, but I tried doing it
167      // this way and it didn't produce the correct results.
168      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
169              .addReg(AMDGPU::ALU_LITERAL_X)
170              .addImm(2);
171      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
172              .addOperand(MI->getOperand(1))
173              .addReg(ShiftValue);
174      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
175              .addOperand(MI->getOperand(0))
176              .addReg(NewAddr);
177      break;
178    }
179
180  case AMDGPU::RESERVE_REG:
181    {
182      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
183      int64_t ReservedIndex = MI->getOperand(0).getImm();
184      unsigned ReservedReg =
185                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
186      MFI->ReservedRegs.push_back(ReservedReg);
187      break;
188    }
189
190  case AMDGPU::TXD:
191    {
192      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
193      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
194
195      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
196              .addOperand(MI->getOperand(3))
197              .addOperand(MI->getOperand(4))
198              .addOperand(MI->getOperand(5));
199      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
200              .addOperand(MI->getOperand(2))
201              .addOperand(MI->getOperand(4))
202              .addOperand(MI->getOperand(5));
203      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
204              .addOperand(MI->getOperand(0))
205              .addOperand(MI->getOperand(1))
206              .addOperand(MI->getOperand(4))
207              .addOperand(MI->getOperand(5))
208              .addReg(t0, RegState::Implicit)
209              .addReg(t1, RegState::Implicit);
210      break;
211    }
212  case AMDGPU::TXD_SHADOW:
213    {
214      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
215      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
216
217      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
218              .addOperand(MI->getOperand(3))
219              .addOperand(MI->getOperand(4))
220              .addOperand(MI->getOperand(5));
221      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
222              .addOperand(MI->getOperand(2))
223              .addOperand(MI->getOperand(4))
224              .addOperand(MI->getOperand(5));
225      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
226              .addOperand(MI->getOperand(0))
227              .addOperand(MI->getOperand(1))
228              .addOperand(MI->getOperand(4))
229              .addOperand(MI->getOperand(5))
230              .addReg(t0, RegState::Implicit)
231              .addReg(t1, RegState::Implicit);
232      break;
233    }
234
235
236  }
237
238  MI->eraseFromParent();
239  return BB;
240}
241
242void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
243    MachineRegisterInfo & MRI, unsigned dword_offset) const
244{
245  unsigned ByteOffset = dword_offset * 4;
246
247  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
248  assert(isInt<16>(ByteOffset));
249
250  MachineBasicBlock::iterator I = *MI;
251  unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
252  MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
253
254  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::COPY), PtrReg)
255          .addReg(AMDGPU::ZERO);
256
257  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
258          .addOperand(MI->getOperand(0))
259          .addReg(PtrReg)
260          .addImm(ByteOffset);
261}
262
263//===----------------------------------------------------------------------===//
264// Custom DAG Lowering Operations
265//===----------------------------------------------------------------------===//
266
267
268SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
269{
270  switch (Op.getOpcode()) {
271  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
272  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
273  case ISD::ROTL: return LowerROTL(Op, DAG);
274  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
275  case ISD::SETCC: return LowerSETCC(Op, DAG);
276  case ISD::INTRINSIC_VOID: {
277    SDValue Chain = Op.getOperand(0);
278    unsigned IntrinsicID =
279                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
280    switch (IntrinsicID) {
281    case AMDGPUIntrinsic::AMDGPU_store_output: {
282      MachineFunction &MF = DAG.getMachineFunction();
283      MachineRegisterInfo &MRI = MF.getRegInfo();
284      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
285      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
286      if (!MRI.isLiveOut(Reg)) {
287        MRI.addLiveOut(Reg);
288      }
289      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
290    }
291    default: return SDValue();
292    }
293    break;
294  }
295  }
296}
297
298SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
299{
300  SDValue Chain = Op.getOperand(0);
301  SDValue CC = Op.getOperand(1);
302  SDValue LHS   = Op.getOperand(2);
303  SDValue RHS   = Op.getOperand(3);
304  SDValue JumpT  = Op.getOperand(4);
305  SDValue CmpValue;
306  SDValue Result;
307  CmpValue = DAG.getNode(
308      ISD::SELECT_CC,
309      Op.getDebugLoc(),
310      MVT::i32,
311      LHS, RHS,
312      DAG.getConstant(-1, MVT::i32),
313      DAG.getConstant(0, MVT::i32),
314      CC);
315  Result = DAG.getNode(
316      AMDGPUISD::BRANCH_COND,
317      CmpValue.getDebugLoc(),
318      MVT::Other, Chain,
319      JumpT, CmpValue);
320  return Result;
321}
322
323
324SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
325{
326  DebugLoc DL = Op.getDebugLoc();
327  EVT VT = Op.getValueType();
328
329  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
330                     Op.getOperand(0),
331                     Op.getOperand(0),
332                     DAG.getNode(ISD::SUB, DL, VT,
333                                 DAG.getConstant(32, MVT::i32),
334                                 Op.getOperand(1)));
335}
336
337SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
338{
339  DebugLoc DL = Op.getDebugLoc();
340  EVT VT = Op.getValueType();
341
342  SDValue LHS = Op.getOperand(0);
343  SDValue RHS = Op.getOperand(1);
344  SDValue True = Op.getOperand(2);
345  SDValue False = Op.getOperand(3);
346  SDValue CC = Op.getOperand(4);
347  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
348  SDValue Temp;
349
350  // LHS and RHS are guaranteed to be the same value type
351  EVT CompareVT = LHS.getValueType();
352
353  // We need all the operands of SELECT_CC to have the same value type, so if
354  // necessary we need to convert LHS and RHS to be the same type True and
355  // False.  True and False are guaranteed to have the same type as this
356  // SELECT_CC node.
357
358  if (CompareVT !=  VT) {
359    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
360    if (VT == MVT::f32 && CompareVT == MVT::i32) {
361      if (isUnsignedIntSetCC(CCOpcode)) {
362        ConversionOp = ISD::UINT_TO_FP;
363      } else {
364        ConversionOp = ISD::SINT_TO_FP;
365      }
366    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
367      ConversionOp = ISD::FP_TO_SINT;
368    } else {
369      // I don't think there will be any other type pairings.
370      assert(!"Unhandled operand type parings in SELECT_CC");
371    }
372    // XXX Check the value of LHS and RHS and avoid creating sequences like
373    // (FTOI (ITOF))
374    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
375    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
376  }
377
378  // If True is a hardware TRUE value and False is a hardware FALSE value or
379  // vice-versa we can handle this with a native instruction (SET* instructions).
380  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
381    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
382  }
383
384  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
385  // we can handle this with a native instruction, but we need to swap true
386  // and false and change the conditional.
387  if (isHWTrueValue(False) && isHWFalseValue(True)) {
388  }
389
390  // XXX Check if we can lower this to a SELECT or if it is supported by a native
391  // operation. (The code below does this but we don't have the Instruction
392  // selection patterns to do this yet.
393#if 0
394  if (isZero(LHS) || isZero(RHS)) {
395    SDValue Cond = (isZero(LHS) ? RHS : LHS);
396    bool SwapTF = false;
397    switch (CCOpcode) {
398    case ISD::SETOEQ:
399    case ISD::SETUEQ:
400    case ISD::SETEQ:
401      SwapTF = true;
402      // Fall through
403    case ISD::SETONE:
404    case ISD::SETUNE:
405    case ISD::SETNE:
406      // We can lower to select
407      if (SwapTF) {
408        Temp = True;
409        True = False;
410        False = Temp;
411      }
412      // CNDE
413      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
414    default:
415      // Supported by a native operation (CNDGE, CNDGT)
416      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
417    }
418  }
419#endif
420
421  // If we make it this for it means we have no native instructions to handle
422  // this SELECT_CC, so we must lower it.
423  SDValue HWTrue, HWFalse;
424
425  if (VT == MVT::f32) {
426    HWTrue = DAG.getConstantFP(1.0f, VT);
427    HWFalse = DAG.getConstantFP(0.0f, VT);
428  } else if (VT == MVT::i32) {
429    HWTrue = DAG.getConstant(-1, VT);
430    HWFalse = DAG.getConstant(0, VT);
431  }
432  else {
433    assert(!"Unhandled value type in LowerSELECT_CC");
434  }
435
436  // Lower this unsupported SELECT_CC into a combination of two supported
437  // SELECT_CC operations.
438  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
439
440  // Convert floating point condition to i1
441  if (VT == MVT::f32) {
442    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
443                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
444  }
445
446  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
447}
448
449SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
450{
451  SDValue Cond;
452  SDValue LHS = Op.getOperand(0);
453  SDValue RHS = Op.getOperand(1);
454  SDValue CC  = Op.getOperand(2);
455  DebugLoc DL = Op.getDebugLoc();
456  assert(Op.getValueType() == MVT::i32);
457  Cond = DAG.getNode(
458      ISD::SELECT_CC,
459      Op.getDebugLoc(),
460      MVT::i32,
461      LHS, RHS,
462      DAG.getConstant(-1, MVT::i32),
463      DAG.getConstant(0, MVT::i32),
464      CC);
465  Cond = DAG.getNode(
466      ISD::AND,
467      DL,
468      MVT::i32,
469      DAG.getConstant(1, MVT::i32),
470      Cond);
471  return Cond;
472}
473