R600ISelLowering.cpp revision 0eca5fd919b0a31ea926b5f5072e5e56f7a55269
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600InstrInfo.h"
17#include "R600MachineFunctionInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/SelectionDAG.h"
21
22using namespace llvm;
23
24R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
25    AMDGPUTargetLowering(TM),
26    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
27{
28  setOperationAction(ISD::MUL, MVT::i64, Expand);
29  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
30  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
31  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
32  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
33  computeRegisterProperties();
34
35  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
36
37  setOperationAction(ISD::FSUB, MVT::f32, Expand);
38
39  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
40  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
41
42  setOperationAction(ISD::ROTL, MVT::i32, Custom);
43
44  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
45  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
46
47  setOperationAction(ISD::SETCC, MVT::i32, Custom);
48
49  setSchedulingPreference(Sched::VLIW);
50}
51
52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
53    MachineInstr * MI, MachineBasicBlock * BB) const
54{
55  MachineFunction * MF = BB->getParent();
56  MachineRegisterInfo &MRI = MF->getRegInfo();
57  MachineBasicBlock::iterator I = *MI;
58
59  switch (MI->getOpcode()) {
60  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
61  case AMDGPU::NGROUPS_X:
62    lowerImplicitParameter(MI, *BB, MRI, 0);
63    break;
64  case AMDGPU::NGROUPS_Y:
65    lowerImplicitParameter(MI, *BB, MRI, 1);
66    break;
67  case AMDGPU::NGROUPS_Z:
68    lowerImplicitParameter(MI, *BB, MRI, 2);
69    break;
70  case AMDGPU::GLOBAL_SIZE_X:
71    lowerImplicitParameter(MI, *BB, MRI, 3);
72    break;
73  case AMDGPU::GLOBAL_SIZE_Y:
74    lowerImplicitParameter(MI, *BB, MRI, 4);
75    break;
76  case AMDGPU::GLOBAL_SIZE_Z:
77    lowerImplicitParameter(MI, *BB, MRI, 5);
78    break;
79  case AMDGPU::LOCAL_SIZE_X:
80    lowerImplicitParameter(MI, *BB, MRI, 6);
81    break;
82  case AMDGPU::LOCAL_SIZE_Y:
83    lowerImplicitParameter(MI, *BB, MRI, 7);
84    break;
85  case AMDGPU::LOCAL_SIZE_Z:
86    lowerImplicitParameter(MI, *BB, MRI, 8);
87    break;
88
89  case AMDGPU::CLAMP_R600:
90    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
91    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
92           .addOperand(MI->getOperand(0))
93           .addOperand(MI->getOperand(1))
94           .addReg(AMDGPU::PRED_SEL_OFF);
95    break;
96
97  case AMDGPU::FABS_R600:
98    MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
99    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
100           .addOperand(MI->getOperand(0))
101           .addOperand(MI->getOperand(1))
102           .addReg(AMDGPU::PRED_SEL_OFF);
103    break;
104
105  case AMDGPU::FNEG_R600:
106    MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
107    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
108            .addOperand(MI->getOperand(0))
109            .addOperand(MI->getOperand(1))
110            .addReg(AMDGPU::PRED_SEL_OFF);
111    break;
112
113  case AMDGPU::R600_LOAD_CONST:
114    {
115      int64_t RegIndex = MI->getOperand(1).getImm();
116      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
117      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
118                  .addOperand(MI->getOperand(0))
119                  .addReg(ConstantReg);
120      break;
121    }
122
123  case AMDGPU::MASK_WRITE:
124    {
125      unsigned maskedRegister = MI->getOperand(0).getReg();
126      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
127      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
128      MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
129      def->addTargetFlag(MO_FLAG_MASK);
130      // Return early so the instruction is not erased
131      return BB;
132    }
133
134  case AMDGPU::RAT_WRITE_CACHELESS_eg:
135    {
136      // Convert to DWORD address
137      unsigned NewAddr = MRI.createVirtualRegister(
138                                             AMDGPU::R600_TReg32_XRegisterClass);
139      unsigned ShiftValue = MRI.createVirtualRegister(
140                                              AMDGPU::R600_TReg32RegisterClass);
141
142      // XXX In theory, we should be able to pass ShiftValue directly to
143      // the LSHR_eg instruction as an inline literal, but I tried doing it
144      // this way and it didn't produce the correct results.
145      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
146              .addReg(AMDGPU::ALU_LITERAL_X)
147              .addReg(AMDGPU::PRED_SEL_OFF)
148              .addImm(2);
149      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
150              .addOperand(MI->getOperand(1))
151              .addReg(ShiftValue)
152              .addReg(AMDGPU::PRED_SEL_OFF);
153      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
154              .addOperand(MI->getOperand(0))
155              .addReg(NewAddr);
156      break;
157    }
158
159  case AMDGPU::RESERVE_REG:
160    {
161      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
162      int64_t ReservedIndex = MI->getOperand(0).getImm();
163      unsigned ReservedReg =
164                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
165      MFI->ReservedRegs.push_back(ReservedReg);
166      break;
167    }
168
169  case AMDGPU::TXD:
170    {
171      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
172      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
173
174      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
175              .addOperand(MI->getOperand(3))
176              .addOperand(MI->getOperand(4))
177              .addOperand(MI->getOperand(5));
178      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
179              .addOperand(MI->getOperand(2))
180              .addOperand(MI->getOperand(4))
181              .addOperand(MI->getOperand(5));
182      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
183              .addOperand(MI->getOperand(0))
184              .addOperand(MI->getOperand(1))
185              .addOperand(MI->getOperand(4))
186              .addOperand(MI->getOperand(5))
187              .addReg(t0, RegState::Implicit)
188              .addReg(t1, RegState::Implicit);
189      break;
190    }
191  case AMDGPU::TXD_SHADOW:
192    {
193      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
194      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
195
196      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
197              .addOperand(MI->getOperand(3))
198              .addOperand(MI->getOperand(4))
199              .addOperand(MI->getOperand(5));
200      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
201              .addOperand(MI->getOperand(2))
202              .addOperand(MI->getOperand(4))
203              .addOperand(MI->getOperand(5));
204      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
205              .addOperand(MI->getOperand(0))
206              .addOperand(MI->getOperand(1))
207              .addOperand(MI->getOperand(4))
208              .addOperand(MI->getOperand(5))
209              .addReg(t0, RegState::Implicit)
210              .addReg(t1, RegState::Implicit);
211      break;
212    }
213  case AMDGPU::BRANCH:
214      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
215              .addOperand(MI->getOperand(0))
216              .addReg(0);
217      break;
218  case AMDGPU::BRANCH_COND_f32:
219    MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
220
221    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
222            .addReg(AMDGPU::PREDICATE_BIT)
223            .addOperand(MI->getOperand(1))
224            .addImm(OPCODE_IS_ZERO);
225    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
226            .addOperand(MI->getOperand(0))
227            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
228    break;
229  case AMDGPU::BRANCH_COND_i32:
230    MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
231
232    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
233            .addReg(AMDGPU::PREDICATE_BIT)
234            .addOperand(MI->getOperand(1))
235            .addImm(OPCODE_IS_ZERO_INT);
236    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
237            .addOperand(MI->getOperand(0))
238            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
239   break;
240
241
242  }
243
244  MI->eraseFromParent();
245  return BB;
246}
247
248void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
249    MachineRegisterInfo & MRI, unsigned dword_offset) const
250{
251  unsigned ByteOffset = dword_offset * 4;
252
253  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
254  assert(isInt<16>(ByteOffset));
255
256  MachineBasicBlock::iterator I = *MI;
257  unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
258  MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
259
260  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::COPY), PtrReg)
261          .addReg(AMDGPU::ZERO);
262
263  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
264          .addOperand(MI->getOperand(0))
265          .addReg(PtrReg)
266          .addImm(ByteOffset);
267}
268
269//===----------------------------------------------------------------------===//
270// Custom DAG Lowering Operations
271//===----------------------------------------------------------------------===//
272
273using namespace llvm::Intrinsic;
274using namespace llvm::AMDGPUIntrinsic;
275
276SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
277{
278  switch (Op.getOpcode()) {
279  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
280  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
281  case ISD::ROTL: return LowerROTL(Op, DAG);
282  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
283  case ISD::SETCC: return LowerSETCC(Op, DAG);
284  case ISD::INTRINSIC_VOID: {
285    SDValue Chain = Op.getOperand(0);
286    unsigned IntrinsicID =
287                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
288    switch (IntrinsicID) {
289    case AMDGPUIntrinsic::AMDGPU_store_output: {
290      MachineFunction &MF = DAG.getMachineFunction();
291      MachineRegisterInfo &MRI = MF.getRegInfo();
292      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
293      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
294      if (!MRI.isLiveOut(Reg)) {
295        MRI.addLiveOut(Reg);
296      }
297      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
298    }
299    // default for switch(IntrinsicID)
300    default: break;
301    }
302    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
303    break;
304  }
305  case ISD::INTRINSIC_WO_CHAIN: {
306    unsigned IntrinsicID =
307                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
308    EVT VT = Op.getValueType();
309    switch(IntrinsicID) {
310    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
311    case AMDGPUIntrinsic::R600_load_input: {
312      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
313      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
314      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
315    }
316    case r600_read_tgid_x:
317      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
318                                  AMDGPU::T1_X, VT);
319    case r600_read_tgid_y:
320      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
321                                  AMDGPU::T1_Y, VT);
322    case r600_read_tgid_z:
323      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
324                                  AMDGPU::T1_Z, VT);
325    case r600_read_tidig_x:
326      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
327                                  AMDGPU::T0_X, VT);
328    case r600_read_tidig_y:
329      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
330                                  AMDGPU::T0_Y, VT);
331    case r600_read_tidig_z:
332      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
333                                  AMDGPU::T0_Z, VT);
334    }
335    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
336    break;
337  }
338  } // end switch(Op.getOpcode())
339  return SDValue();
340}
341
342SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
343{
344  SDValue Chain = Op.getOperand(0);
345  SDValue CC = Op.getOperand(1);
346  SDValue LHS   = Op.getOperand(2);
347  SDValue RHS   = Op.getOperand(3);
348  SDValue JumpT  = Op.getOperand(4);
349  SDValue CmpValue;
350  SDValue Result;
351  CmpValue = DAG.getNode(
352      ISD::SELECT_CC,
353      Op.getDebugLoc(),
354      MVT::i32,
355      LHS, RHS,
356      DAG.getConstant(-1, MVT::i32),
357      DAG.getConstant(0, MVT::i32),
358      CC);
359  Result = DAG.getNode(
360      AMDGPUISD::BRANCH_COND,
361      CmpValue.getDebugLoc(),
362      MVT::Other, Chain,
363      JumpT, CmpValue);
364  return Result;
365}
366
367
368SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
369{
370  DebugLoc DL = Op.getDebugLoc();
371  EVT VT = Op.getValueType();
372
373  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
374                     Op.getOperand(0),
375                     Op.getOperand(0),
376                     DAG.getNode(ISD::SUB, DL, VT,
377                                 DAG.getConstant(32, MVT::i32),
378                                 Op.getOperand(1)));
379}
380
381SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
382{
383  DebugLoc DL = Op.getDebugLoc();
384  EVT VT = Op.getValueType();
385
386  SDValue LHS = Op.getOperand(0);
387  SDValue RHS = Op.getOperand(1);
388  SDValue True = Op.getOperand(2);
389  SDValue False = Op.getOperand(3);
390  SDValue CC = Op.getOperand(4);
391  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
392  SDValue Temp;
393
394  // LHS and RHS are guaranteed to be the same value type
395  EVT CompareVT = LHS.getValueType();
396
397  // We need all the operands of SELECT_CC to have the same value type, so if
398  // necessary we need to convert LHS and RHS to be the same type True and
399  // False.  True and False are guaranteed to have the same type as this
400  // SELECT_CC node.
401
402  if (CompareVT !=  VT) {
403    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
404    if (VT == MVT::f32 && CompareVT == MVT::i32) {
405      if (isUnsignedIntSetCC(CCOpcode)) {
406        ConversionOp = ISD::UINT_TO_FP;
407      } else {
408        ConversionOp = ISD::SINT_TO_FP;
409      }
410    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
411      ConversionOp = ISD::FP_TO_SINT;
412    } else {
413      // I don't think there will be any other type pairings.
414      assert(!"Unhandled operand type parings in SELECT_CC");
415    }
416    // XXX Check the value of LHS and RHS and avoid creating sequences like
417    // (FTOI (ITOF))
418    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
419    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
420  }
421
422  // If True is a hardware TRUE value and False is a hardware FALSE value or
423  // vice-versa we can handle this with a native instruction (SET* instructions).
424  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
425    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
426  }
427
428  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
429  // we can handle this with a native instruction, but we need to swap true
430  // and false and change the conditional.
431  if (isHWTrueValue(False) && isHWFalseValue(True)) {
432  }
433
434  // XXX Check if we can lower this to a SELECT or if it is supported by a native
435  // operation. (The code below does this but we don't have the Instruction
436  // selection patterns to do this yet.
437#if 0
438  if (isZero(LHS) || isZero(RHS)) {
439    SDValue Cond = (isZero(LHS) ? RHS : LHS);
440    bool SwapTF = false;
441    switch (CCOpcode) {
442    case ISD::SETOEQ:
443    case ISD::SETUEQ:
444    case ISD::SETEQ:
445      SwapTF = true;
446      // Fall through
447    case ISD::SETONE:
448    case ISD::SETUNE:
449    case ISD::SETNE:
450      // We can lower to select
451      if (SwapTF) {
452        Temp = True;
453        True = False;
454        False = Temp;
455      }
456      // CNDE
457      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
458    default:
459      // Supported by a native operation (CNDGE, CNDGT)
460      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
461    }
462  }
463#endif
464
465  // If we make it this for it means we have no native instructions to handle
466  // this SELECT_CC, so we must lower it.
467  SDValue HWTrue, HWFalse;
468
469  if (VT == MVT::f32) {
470    HWTrue = DAG.getConstantFP(1.0f, VT);
471    HWFalse = DAG.getConstantFP(0.0f, VT);
472  } else if (VT == MVT::i32) {
473    HWTrue = DAG.getConstant(-1, VT);
474    HWFalse = DAG.getConstant(0, VT);
475  }
476  else {
477    assert(!"Unhandled value type in LowerSELECT_CC");
478  }
479
480  // Lower this unsupported SELECT_CC into a combination of two supported
481  // SELECT_CC operations.
482  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
483
484  // Convert floating point condition to i1
485  if (VT == MVT::f32) {
486    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
487                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
488  }
489
490  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
491}
492
493SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
494{
495  SDValue Cond;
496  SDValue LHS = Op.getOperand(0);
497  SDValue RHS = Op.getOperand(1);
498  SDValue CC  = Op.getOperand(2);
499  DebugLoc DL = Op.getDebugLoc();
500  assert(Op.getValueType() == MVT::i32);
501  Cond = DAG.getNode(
502      ISD::SELECT_CC,
503      Op.getDebugLoc(),
504      MVT::i32,
505      LHS, RHS,
506      DAG.getConstant(-1, MVT::i32),
507      DAG.getConstant(0, MVT::i32),
508      CC);
509  Cond = DAG.getNode(
510      ISD::AND,
511      DL,
512      MVT::i32,
513      DAG.getConstant(1, MVT::i32),
514      Cond);
515  return Cond;
516}
517