R600ISelLowering.cpp revision 3a7a56e7aa56bc6cb847c241ef6bd749713ae6e1
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600InstrInfo.h"
17#include "R600MachineFunctionInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/SelectionDAG.h"
21
22using namespace llvm;
23
24R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
25    AMDGPUTargetLowering(TM),
26    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
27{
28  setOperationAction(ISD::MUL, MVT::i64, Expand);
29  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
30  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
31  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
32  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
33  computeRegisterProperties();
34
35  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
36
37  setOperationAction(ISD::FSUB, MVT::f32, Expand);
38
39  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
40  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
41
42  setOperationAction(ISD::ROTL, MVT::i32, Custom);
43
44  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
45  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
46
47  setOperationAction(ISD::SETCC, MVT::i32, Custom);
48
49  setSchedulingPreference(Sched::VLIW);
50}
51
52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
53    MachineInstr * MI, MachineBasicBlock * BB) const
54{
55  MachineFunction * MF = BB->getParent();
56  MachineRegisterInfo &MRI = MF->getRegInfo();
57  MachineBasicBlock::iterator I = *MI;
58
59  switch (MI->getOpcode()) {
60  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
61  case AMDGPU::CLAMP_R600:
62    {
63      MachineInstr *NewMI =
64        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
65               .addOperand(MI->getOperand(0))
66               .addOperand(MI->getOperand(1))
67               .addReg(AMDGPU::PRED_SEL_OFF);
68      TII->AddFlag(NewMI, 0, MO_FLAG_CLAMP);
69      break;
70    }
71  case AMDGPU::FABS_R600:
72    {
73      MachineInstr *NewMI =
74        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
75               .addOperand(MI->getOperand(0))
76               .addOperand(MI->getOperand(1))
77               .addReg(AMDGPU::PRED_SEL_OFF);
78      TII->AddFlag(NewMI, 1, MO_FLAG_ABS);
79      break;
80    }
81
82  case AMDGPU::FNEG_R600:
83    {
84      MachineInstr *NewMI =
85        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
86                .addOperand(MI->getOperand(0))
87                .addOperand(MI->getOperand(1))
88                .addReg(AMDGPU::PRED_SEL_OFF);
89      TII->AddFlag(NewMI, 1, MO_FLAG_NEG);
90    break;
91    }
92
93  case AMDGPU::R600_LOAD_CONST:
94    {
95      int64_t RegIndex = MI->getOperand(1).getImm();
96      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
97      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
98                  .addOperand(MI->getOperand(0))
99                  .addReg(ConstantReg);
100      break;
101    }
102
103  case AMDGPU::MASK_WRITE:
104    {
105      unsigned maskedRegister = MI->getOperand(0).getReg();
106      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
107      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
108      TII->AddFlag(defInstr, 0, MO_FLAG_MASK);
109      // Return early so the instruction is not erased
110      return BB;
111    }
112
113  case AMDGPU::RAT_WRITE_CACHELESS_eg:
114    {
115      // Convert to DWORD address
116      unsigned NewAddr = MRI.createVirtualRegister(
117                                             AMDGPU::R600_TReg32_XRegisterClass);
118      unsigned ShiftValue = MRI.createVirtualRegister(
119                                              AMDGPU::R600_TReg32RegisterClass);
120
121      // XXX In theory, we should be able to pass ShiftValue directly to
122      // the LSHR_eg instruction as an inline literal, but I tried doing it
123      // this way and it didn't produce the correct results.
124      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
125              .addReg(AMDGPU::ALU_LITERAL_X)
126              .addReg(AMDGPU::PRED_SEL_OFF)
127              .addImm(2);
128      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
129              .addOperand(MI->getOperand(1))
130              .addReg(ShiftValue)
131              .addReg(AMDGPU::PRED_SEL_OFF);
132      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
133              .addOperand(MI->getOperand(0))
134              .addReg(NewAddr);
135      break;
136    }
137
138  case AMDGPU::RESERVE_REG:
139    {
140      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
141      int64_t ReservedIndex = MI->getOperand(0).getImm();
142      unsigned ReservedReg =
143                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
144      MFI->ReservedRegs.push_back(ReservedReg);
145      break;
146    }
147
148  case AMDGPU::TXD:
149    {
150      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
151      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
152
153      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
154              .addOperand(MI->getOperand(3))
155              .addOperand(MI->getOperand(4))
156              .addOperand(MI->getOperand(5));
157      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
158              .addOperand(MI->getOperand(2))
159              .addOperand(MI->getOperand(4))
160              .addOperand(MI->getOperand(5));
161      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
162              .addOperand(MI->getOperand(0))
163              .addOperand(MI->getOperand(1))
164              .addOperand(MI->getOperand(4))
165              .addOperand(MI->getOperand(5))
166              .addReg(t0, RegState::Implicit)
167              .addReg(t1, RegState::Implicit);
168      break;
169    }
170  case AMDGPU::TXD_SHADOW:
171    {
172      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
173      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
174
175      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
176              .addOperand(MI->getOperand(3))
177              .addOperand(MI->getOperand(4))
178              .addOperand(MI->getOperand(5));
179      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
180              .addOperand(MI->getOperand(2))
181              .addOperand(MI->getOperand(4))
182              .addOperand(MI->getOperand(5));
183      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
184              .addOperand(MI->getOperand(0))
185              .addOperand(MI->getOperand(1))
186              .addOperand(MI->getOperand(4))
187              .addOperand(MI->getOperand(5))
188              .addReg(t0, RegState::Implicit)
189              .addReg(t1, RegState::Implicit);
190      break;
191    }
192  case AMDGPU::BRANCH:
193      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
194              .addOperand(MI->getOperand(0))
195              .addReg(0);
196      break;
197  case AMDGPU::BRANCH_COND_f32:
198    {
199      MachineInstr *NewMI =
200        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
201                .addReg(AMDGPU::PREDICATE_BIT)
202                .addOperand(MI->getOperand(1))
203                .addImm(OPCODE_IS_ZERO);
204      TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
205      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
206              .addOperand(MI->getOperand(0))
207              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
208      break;
209    }
210  case AMDGPU::BRANCH_COND_i32:
211    {
212      MachineInstr *NewMI =
213        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
214              .addReg(AMDGPU::PREDICATE_BIT)
215              .addOperand(MI->getOperand(1))
216              .addImm(OPCODE_IS_ZERO_INT);
217      TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
218      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
219             .addOperand(MI->getOperand(0))
220              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
221      break;
222    }
223  }
224
225  MI->eraseFromParent();
226  return BB;
227}
228
229//===----------------------------------------------------------------------===//
230// Custom DAG Lowering Operations
231//===----------------------------------------------------------------------===//
232
233using namespace llvm::Intrinsic;
234using namespace llvm::AMDGPUIntrinsic;
235
236SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
237{
238  switch (Op.getOpcode()) {
239  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
240  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
241  case ISD::ROTL: return LowerROTL(Op, DAG);
242  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
243  case ISD::SETCC: return LowerSETCC(Op, DAG);
244  case ISD::INTRINSIC_VOID: {
245    SDValue Chain = Op.getOperand(0);
246    unsigned IntrinsicID =
247                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
248    switch (IntrinsicID) {
249    case AMDGPUIntrinsic::AMDGPU_store_output: {
250      MachineFunction &MF = DAG.getMachineFunction();
251      MachineRegisterInfo &MRI = MF.getRegInfo();
252      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
253      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
254      if (!MRI.isLiveOut(Reg)) {
255        MRI.addLiveOut(Reg);
256      }
257      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
258    }
259    // default for switch(IntrinsicID)
260    default: break;
261    }
262    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
263    break;
264  }
265  case ISD::INTRINSIC_WO_CHAIN: {
266    unsigned IntrinsicID =
267                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
268    EVT VT = Op.getValueType();
269    DebugLoc DL = Op.getDebugLoc();
270    switch(IntrinsicID) {
271    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
272    case AMDGPUIntrinsic::R600_load_input: {
273      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
274      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
275      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
276    }
277
278    case r600_read_ngroups_x:
279      return LowerImplicitParameter(DAG, VT, DL, 0);
280    case r600_read_ngroups_y:
281      return LowerImplicitParameter(DAG, VT, DL, 1);
282    case r600_read_ngroups_z:
283      return LowerImplicitParameter(DAG, VT, DL, 2);
284    case r600_read_global_size_x:
285      return LowerImplicitParameter(DAG, VT, DL, 3);
286    case r600_read_global_size_y:
287      return LowerImplicitParameter(DAG, VT, DL, 4);
288    case r600_read_global_size_z:
289      return LowerImplicitParameter(DAG, VT, DL, 5);
290    case r600_read_local_size_x:
291      return LowerImplicitParameter(DAG, VT, DL, 6);
292    case r600_read_local_size_y:
293      return LowerImplicitParameter(DAG, VT, DL, 7);
294    case r600_read_local_size_z:
295      return LowerImplicitParameter(DAG, VT, DL, 8);
296
297    case r600_read_tgid_x:
298      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
299                                  AMDGPU::T1_X, VT);
300    case r600_read_tgid_y:
301      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
302                                  AMDGPU::T1_Y, VT);
303    case r600_read_tgid_z:
304      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
305                                  AMDGPU::T1_Z, VT);
306    case r600_read_tidig_x:
307      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
308                                  AMDGPU::T0_X, VT);
309    case r600_read_tidig_y:
310      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
311                                  AMDGPU::T0_Y, VT);
312    case r600_read_tidig_z:
313      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
314                                  AMDGPU::T0_Z, VT);
315    }
316    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
317    break;
318  }
319  } // end switch(Op.getOpcode())
320  return SDValue();
321}
322
323SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
324{
325  SDValue Chain = Op.getOperand(0);
326  SDValue CC = Op.getOperand(1);
327  SDValue LHS   = Op.getOperand(2);
328  SDValue RHS   = Op.getOperand(3);
329  SDValue JumpT  = Op.getOperand(4);
330  SDValue CmpValue;
331  SDValue Result;
332  CmpValue = DAG.getNode(
333      ISD::SELECT_CC,
334      Op.getDebugLoc(),
335      MVT::i32,
336      LHS, RHS,
337      DAG.getConstant(-1, MVT::i32),
338      DAG.getConstant(0, MVT::i32),
339      CC);
340  Result = DAG.getNode(
341      AMDGPUISD::BRANCH_COND,
342      CmpValue.getDebugLoc(),
343      MVT::Other, Chain,
344      JumpT, CmpValue);
345  return Result;
346}
347
348SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
349                                                   DebugLoc DL,
350                                                   unsigned DwordOffset) const
351{
352  unsigned ByteOffset = DwordOffset * 4;
353  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
354                                      AMDGPUAS::PARAM_I_ADDRESS);
355
356  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
357  assert(isInt<16>(ByteOffset));
358
359  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
360                     DAG.getConstant(ByteOffset, MVT::i32), // PTR
361                     MachinePointerInfo(ConstantPointerNull::get(PtrType)),
362                     false, false, false, 0);
363}
364
365SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
366{
367  DebugLoc DL = Op.getDebugLoc();
368  EVT VT = Op.getValueType();
369
370  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
371                     Op.getOperand(0),
372                     Op.getOperand(0),
373                     DAG.getNode(ISD::SUB, DL, VT,
374                                 DAG.getConstant(32, MVT::i32),
375                                 Op.getOperand(1)));
376}
377
378SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
379{
380  DebugLoc DL = Op.getDebugLoc();
381  EVT VT = Op.getValueType();
382
383  SDValue LHS = Op.getOperand(0);
384  SDValue RHS = Op.getOperand(1);
385  SDValue True = Op.getOperand(2);
386  SDValue False = Op.getOperand(3);
387  SDValue CC = Op.getOperand(4);
388  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
389  SDValue Temp;
390
391  // LHS and RHS are guaranteed to be the same value type
392  EVT CompareVT = LHS.getValueType();
393
394  // We need all the operands of SELECT_CC to have the same value type, so if
395  // necessary we need to convert LHS and RHS to be the same type True and
396  // False.  True and False are guaranteed to have the same type as this
397  // SELECT_CC node.
398
399  if (CompareVT !=  VT) {
400    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
401    if (VT == MVT::f32 && CompareVT == MVT::i32) {
402      if (isUnsignedIntSetCC(CCOpcode)) {
403        ConversionOp = ISD::UINT_TO_FP;
404      } else {
405        ConversionOp = ISD::SINT_TO_FP;
406      }
407    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
408      ConversionOp = ISD::FP_TO_SINT;
409    } else {
410      // I don't think there will be any other type pairings.
411      assert(!"Unhandled operand type parings in SELECT_CC");
412    }
413    // XXX Check the value of LHS and RHS and avoid creating sequences like
414    // (FTOI (ITOF))
415    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
416    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
417  }
418
419  // If True is a hardware TRUE value and False is a hardware FALSE value or
420  // vice-versa we can handle this with a native instruction (SET* instructions).
421  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
422    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
423  }
424
425  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
426  // we can handle this with a native instruction, but we need to swap true
427  // and false and change the conditional.
428  if (isHWTrueValue(False) && isHWFalseValue(True)) {
429  }
430
431  // XXX Check if we can lower this to a SELECT or if it is supported by a native
432  // operation. (The code below does this but we don't have the Instruction
433  // selection patterns to do this yet.
434#if 0
435  if (isZero(LHS) || isZero(RHS)) {
436    SDValue Cond = (isZero(LHS) ? RHS : LHS);
437    bool SwapTF = false;
438    switch (CCOpcode) {
439    case ISD::SETOEQ:
440    case ISD::SETUEQ:
441    case ISD::SETEQ:
442      SwapTF = true;
443      // Fall through
444    case ISD::SETONE:
445    case ISD::SETUNE:
446    case ISD::SETNE:
447      // We can lower to select
448      if (SwapTF) {
449        Temp = True;
450        True = False;
451        False = Temp;
452      }
453      // CNDE
454      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
455    default:
456      // Supported by a native operation (CNDGE, CNDGT)
457      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
458    }
459  }
460#endif
461
462  // If we make it this for it means we have no native instructions to handle
463  // this SELECT_CC, so we must lower it.
464  SDValue HWTrue, HWFalse;
465
466  if (VT == MVT::f32) {
467    HWTrue = DAG.getConstantFP(1.0f, VT);
468    HWFalse = DAG.getConstantFP(0.0f, VT);
469  } else if (VT == MVT::i32) {
470    HWTrue = DAG.getConstant(-1, VT);
471    HWFalse = DAG.getConstant(0, VT);
472  }
473  else {
474    assert(!"Unhandled value type in LowerSELECT_CC");
475  }
476
477  // Lower this unsupported SELECT_CC into a combination of two supported
478  // SELECT_CC operations.
479  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
480
481  // Convert floating point condition to i1
482  if (VT == MVT::f32) {
483    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
484                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
485  }
486
487  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
488}
489
490SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
491{
492  SDValue Cond;
493  SDValue LHS = Op.getOperand(0);
494  SDValue RHS = Op.getOperand(1);
495  SDValue CC  = Op.getOperand(2);
496  DebugLoc DL = Op.getDebugLoc();
497  assert(Op.getValueType() == MVT::i32);
498  Cond = DAG.getNode(
499      ISD::SELECT_CC,
500      Op.getDebugLoc(),
501      MVT::i32,
502      LHS, RHS,
503      DAG.getConstant(-1, MVT::i32),
504      DAG.getConstant(0, MVT::i32),
505      CC);
506  Cond = DAG.getNode(
507      ISD::AND,
508      DL,
509      MVT::i32,
510      DAG.getConstant(1, MVT::i32),
511      Cond);
512  return Cond;
513}
514