R600ISelLowering.cpp revision f7fcaa07df7b3aab124576dec346ae4fa7c6715b
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "AMDGPUUtil.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22
23using namespace llvm;
24
25R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
26    AMDGPUTargetLowering(TM),
27    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
28{
29  setOperationAction(ISD::MUL, MVT::i64, Expand);
30  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
31  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
32  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
33  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
34  computeRegisterProperties();
35
36  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
37
38  setOperationAction(ISD::FSUB, MVT::f32, Expand);
39
40  setOperationAction(ISD::ROTL, MVT::i32, Custom);
41
42  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
43  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
44
45  setOperationAction(ISD::SETCC, MVT::i32, Custom);
46
47  setSchedulingPreference(Sched::VLIW);
48}
49
50MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
51    MachineInstr * MI, MachineBasicBlock * BB) const
52{
53  MachineFunction * MF = BB->getParent();
54  MachineRegisterInfo &MRI = MF->getRegInfo();
55  MachineBasicBlock::iterator I = *MI;
56
57  switch (MI->getOpcode()) {
58  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
59  case AMDGPU::TGID_X:
60    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
61    break;
62  case AMDGPU::TGID_Y:
63    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
64    break;
65  case AMDGPU::TGID_Z:
66    addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
67    break;
68  case AMDGPU::TIDIG_X:
69    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
70    break;
71  case AMDGPU::TIDIG_Y:
72    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
73    break;
74  case AMDGPU::TIDIG_Z:
75    addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
76    break;
77  case AMDGPU::NGROUPS_X:
78    lowerImplicitParameter(MI, *BB, MRI, 0);
79    break;
80  case AMDGPU::NGROUPS_Y:
81    lowerImplicitParameter(MI, *BB, MRI, 1);
82    break;
83  case AMDGPU::NGROUPS_Z:
84    lowerImplicitParameter(MI, *BB, MRI, 2);
85    break;
86  case AMDGPU::GLOBAL_SIZE_X:
87    lowerImplicitParameter(MI, *BB, MRI, 3);
88    break;
89  case AMDGPU::GLOBAL_SIZE_Y:
90    lowerImplicitParameter(MI, *BB, MRI, 4);
91    break;
92  case AMDGPU::GLOBAL_SIZE_Z:
93    lowerImplicitParameter(MI, *BB, MRI, 5);
94    break;
95  case AMDGPU::LOCAL_SIZE_X:
96    lowerImplicitParameter(MI, *BB, MRI, 6);
97    break;
98  case AMDGPU::LOCAL_SIZE_Y:
99    lowerImplicitParameter(MI, *BB, MRI, 7);
100    break;
101  case AMDGPU::LOCAL_SIZE_Z:
102    lowerImplicitParameter(MI, *BB, MRI, 8);
103    break;
104
105  case AMDGPU::CLAMP_R600:
106    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
107    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
108           .addOperand(MI->getOperand(0))
109           .addOperand(MI->getOperand(1));
110    break;
111
112  case AMDGPU::FABS_R600:
113    MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
114    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
115           .addOperand(MI->getOperand(0))
116           .addOperand(MI->getOperand(1));
117    break;
118
119  case AMDGPU::FNEG_R600:
120    MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
121    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
122            .addOperand(MI->getOperand(0))
123            .addOperand(MI->getOperand(1));
124    break;
125
126  case AMDGPU::R600_LOAD_CONST:
127    {
128      int64_t RegIndex = MI->getOperand(1).getImm();
129      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
130      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
131                  .addOperand(MI->getOperand(0))
132                  .addReg(ConstantReg);
133      break;
134    }
135
136  case AMDGPU::LOAD_INPUT:
137    {
138      int64_t RegIndex = MI->getOperand(1).getImm();
139      addLiveIn(MI, MF, MRI, TII,
140                AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
141      break;
142    }
143
144  case AMDGPU::MASK_WRITE:
145    {
146      unsigned maskedRegister = MI->getOperand(0).getReg();
147      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
148      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
149      MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
150      def->addTargetFlag(MO_FLAG_MASK);
151      // Return early so the instruction is not erased
152      return BB;
153    }
154
155  case AMDGPU::RAT_WRITE_CACHELESS_eg:
156    {
157      // Convert to DWORD address
158      unsigned NewAddr = MRI.createVirtualRegister(
159                                             AMDGPU::R600_TReg32_XRegisterClass);
160      unsigned ShiftValue = MRI.createVirtualRegister(
161                                              AMDGPU::R600_TReg32RegisterClass);
162
163      // XXX In theory, we should be able to pass ShiftValue directly to
164      // the LSHR_eg instruction as an inline literal, but I tried doing it
165      // this way and it didn't produce the correct results.
166      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
167              .addReg(AMDGPU::ALU_LITERAL_X)
168              .addImm(2);
169      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
170              .addOperand(MI->getOperand(1))
171              .addReg(ShiftValue);
172      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
173              .addOperand(MI->getOperand(0))
174              .addReg(NewAddr);
175      break;
176    }
177
178  case AMDGPU::STORE_OUTPUT:
179    {
180      int64_t OutputIndex = MI->getOperand(1).getImm();
181      unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex);
182
183      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg)
184                  .addOperand(MI->getOperand(0));
185
186      if (!MRI.isLiveOut(OutputReg)) {
187        MRI.addLiveOut(OutputReg);
188      }
189      break;
190    }
191
192  case AMDGPU::RESERVE_REG:
193    {
194      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
195      int64_t ReservedIndex = MI->getOperand(0).getImm();
196      unsigned ReservedReg =
197                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
198      MFI->ReservedRegs.push_back(ReservedReg);
199      break;
200    }
201
202  case AMDGPU::TXD:
203    {
204      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
205      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
206
207      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
208              .addOperand(MI->getOperand(3))
209              .addOperand(MI->getOperand(4))
210              .addOperand(MI->getOperand(5));
211      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
212              .addOperand(MI->getOperand(2))
213              .addOperand(MI->getOperand(4))
214              .addOperand(MI->getOperand(5));
215      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
216              .addOperand(MI->getOperand(0))
217              .addOperand(MI->getOperand(1))
218              .addOperand(MI->getOperand(4))
219              .addOperand(MI->getOperand(5))
220              .addReg(t0, RegState::Implicit)
221              .addReg(t1, RegState::Implicit);
222      break;
223    }
224  case AMDGPU::TXD_SHADOW:
225    {
226      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
227      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
228
229      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
230              .addOperand(MI->getOperand(3))
231              .addOperand(MI->getOperand(4))
232              .addOperand(MI->getOperand(5));
233      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
234              .addOperand(MI->getOperand(2))
235              .addOperand(MI->getOperand(4))
236              .addOperand(MI->getOperand(5));
237      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
238              .addOperand(MI->getOperand(0))
239              .addOperand(MI->getOperand(1))
240              .addOperand(MI->getOperand(4))
241              .addOperand(MI->getOperand(5))
242              .addReg(t0, RegState::Implicit)
243              .addReg(t1, RegState::Implicit);
244      break;
245    }
246
247
248  }
249
250  MI->eraseFromParent();
251  return BB;
252}
253
254void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
255    MachineRegisterInfo & MRI, unsigned dword_offset) const
256{
257  MachineBasicBlock::iterator I = *MI;
258  unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
259  MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
260
261  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg)
262          .addReg(AMDGPU::ALU_LITERAL_X)
263          .addImm(dword_offset * 4);
264
265  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
266          .addOperand(MI->getOperand(0))
267          .addReg(PtrReg)
268          .addImm(0);
269}
270
271//===----------------------------------------------------------------------===//
272// Custom DAG Lowering Operations
273//===----------------------------------------------------------------------===//
274
275
276SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
277{
278  switch (Op.getOpcode()) {
279  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
280  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
281  case ISD::ROTL: return LowerROTL(Op, DAG);
282  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
283  case ISD::SETCC: return LowerSETCC(Op, DAG);
284  }
285}
286
287SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
288{
289  SDValue Chain = Op.getOperand(0);
290  SDValue CC = Op.getOperand(1);
291  SDValue LHS   = Op.getOperand(2);
292  SDValue RHS   = Op.getOperand(3);
293  SDValue JumpT  = Op.getOperand(4);
294  SDValue CmpValue;
295  SDValue Result;
296  CmpValue = DAG.getNode(
297      ISD::SELECT_CC,
298      Op.getDebugLoc(),
299      MVT::i32,
300      LHS, RHS,
301      DAG.getConstant(-1, MVT::i32),
302      DAG.getConstant(0, MVT::i32),
303      CC);
304  Result = DAG.getNode(
305      AMDGPUISD::BRANCH_COND,
306      CmpValue.getDebugLoc(),
307      MVT::Other, Chain,
308      JumpT, CmpValue);
309  return Result;
310}
311
312
313SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
314{
315  DebugLoc DL = Op.getDebugLoc();
316  EVT VT = Op.getValueType();
317
318  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
319                     Op.getOperand(0),
320                     Op.getOperand(0),
321                     DAG.getNode(ISD::SUB, DL, VT,
322                                 DAG.getConstant(32, MVT::i32),
323                                 Op.getOperand(1)));
324}
325
326SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
327{
328  DebugLoc DL = Op.getDebugLoc();
329  EVT VT = Op.getValueType();
330
331  SDValue LHS = Op.getOperand(0);
332  SDValue RHS = Op.getOperand(1);
333  SDValue True = Op.getOperand(2);
334  SDValue False = Op.getOperand(3);
335  SDValue CC = Op.getOperand(4);
336  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
337  SDValue Temp;
338
339  // LHS and RHS are guaranteed to be the same value type
340  EVT CompareVT = LHS.getValueType();
341
342  // We need all the operands of SELECT_CC to have the same value type, so if
343  // necessary we need to convert LHS and RHS to be the same type True and
344  // False.  True and False are guaranteed to have the same type as this
345  // SELECT_CC node.
346
347  if (CompareVT !=  VT) {
348    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
349    if (VT == MVT::f32 && CompareVT == MVT::i32) {
350      if (isUnsignedIntSetCC(CCOpcode)) {
351        ConversionOp = ISD::UINT_TO_FP;
352      } else {
353        ConversionOp = ISD::SINT_TO_FP;
354      }
355    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
356      ConversionOp = ISD::FP_TO_SINT;
357    } else {
358      // I don't think there will be any other type pairings.
359      assert(!"Unhandled operand type parings in SELECT_CC");
360    }
361    // XXX Check the value of LHS and RHS and avoid creating sequences like
362    // (FTOI (ITOF))
363    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
364    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
365  }
366
367  // If True is a hardware TRUE value and False is a hardware FALSE value or
368  // vice-versa we can handle this with a native instruction (SET* instructions).
369  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
370    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
371  }
372
373  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
374  // we can handle this with a native instruction, but we need to swap true
375  // and false and change the conditional.
376  if (isHWTrueValue(False) && isHWFalseValue(True)) {
377  }
378
379  // XXX Check if we can lower this to a SELECT or if it is supported by a native
380  // operation. (The code below does this but we don't have the Instruction
381  // selection patterns to do this yet.
382#if 0
383  if (isZero(LHS) || isZero(RHS)) {
384    SDValue Cond = (isZero(LHS) ? RHS : LHS);
385    bool SwapTF = false;
386    switch (CCOpcode) {
387    case ISD::SETOEQ:
388    case ISD::SETUEQ:
389    case ISD::SETEQ:
390      SwapTF = true;
391      // Fall through
392    case ISD::SETONE:
393    case ISD::SETUNE:
394    case ISD::SETNE:
395      // We can lower to select
396      if (SwapTF) {
397        Temp = True;
398        True = False;
399        False = Temp;
400      }
401      // CNDE
402      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
403    default:
404      // Supported by a native operation (CNDGE, CNDGT)
405      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
406    }
407  }
408#endif
409
410  // If we make it this for it means we have no native instructions to handle
411  // this SELECT_CC, so we must lower it.
412  SDValue HWTrue, HWFalse;
413
414  if (VT == MVT::f32) {
415    HWTrue = DAG.getConstantFP(1.0f, VT);
416    HWFalse = DAG.getConstantFP(0.0f, VT);
417  } else if (VT == MVT::i32) {
418    HWTrue = DAG.getConstant(-1, VT);
419    HWFalse = DAG.getConstant(0, VT);
420  }
421  else {
422    assert(!"Unhandled value type in LowerSELECT_CC");
423  }
424
425  // Lower this unsupported SELECT_CC into a combination of two supported
426  // SELECT_CC operations.
427  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
428
429  // Convert floating point condition to i1
430  if (VT == MVT::f32) {
431    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
432                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
433  }
434
435  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
436}
437
438SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
439{
440  SDValue Cond;
441  SDValue LHS = Op.getOperand(0);
442  SDValue RHS = Op.getOperand(1);
443  SDValue CC  = Op.getOperand(2);
444  DebugLoc DL = Op.getDebugLoc();
445  assert(Op.getValueType() == MVT::i32);
446  Cond = DAG.getNode(
447      ISD::SELECT_CC,
448      Op.getDebugLoc(),
449      MVT::i32,
450      LHS, RHS,
451      DAG.getConstant(-1, MVT::i32),
452      DAG.getConstant(0, MVT::i32),
453      CC);
454  Cond = DAG.getNode(
455      ISD::AND,
456      DL,
457      MVT::i32,
458      DAG.getConstant(1, MVT::i32),
459      Cond);
460  return Cond;
461}
462