1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22
23using namespace llvm;
24
25R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
26    AMDGPUTargetLowering(TM),
27    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
28{
29  setOperationAction(ISD::MUL, MVT::i64, Expand);
30  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
31  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
32  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
33  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
34  computeRegisterProperties();
35
36  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
37
38  setOperationAction(ISD::FSUB, MVT::f32, Expand);
39
40  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
41  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
42
43  setOperationAction(ISD::ROTL, MVT::i32, Custom);
44
45  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
46  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
47
48  setOperationAction(ISD::SETCC, MVT::i32, Custom);
49
50  setSchedulingPreference(Sched::VLIW);
51}
52
53MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
54    MachineInstr * MI, MachineBasicBlock * BB) const
55{
56  MachineFunction * MF = BB->getParent();
57  MachineRegisterInfo &MRI = MF->getRegInfo();
58  MachineBasicBlock::iterator I = *MI;
59
60  switch (MI->getOpcode()) {
61  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
62  case AMDGPU::CLAMP_R600:
63    {
64      MachineInstr *NewMI =
65        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
66               .addOperand(MI->getOperand(0))
67               .addOperand(MI->getOperand(1))
68               .addImm(0) // Flags
69               .addReg(AMDGPU::PRED_SEL_OFF);
70      TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
71      break;
72    }
73  case AMDGPU::FABS_R600:
74    {
75      MachineInstr *NewMI =
76        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
77               .addOperand(MI->getOperand(0))
78               .addOperand(MI->getOperand(1))
79               .addImm(0) // Flags
80               .addReg(AMDGPU::PRED_SEL_OFF);
81      TII->addFlag(NewMI, 1, MO_FLAG_ABS);
82      break;
83    }
84
85  case AMDGPU::FNEG_R600:
86    {
87      MachineInstr *NewMI =
88        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
89                .addOperand(MI->getOperand(0))
90                .addOperand(MI->getOperand(1))
91                .addImm(0) // Flags
92                .addReg(AMDGPU::PRED_SEL_OFF);
93      TII->addFlag(NewMI, 1, MO_FLAG_NEG);
94    break;
95    }
96
97  case AMDGPU::R600_LOAD_CONST:
98    {
99      int64_t RegIndex = MI->getOperand(1).getImm();
100      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
101      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
102                  .addOperand(MI->getOperand(0))
103                  .addReg(ConstantReg);
104      break;
105    }
106
107  case AMDGPU::MASK_WRITE:
108    {
109      unsigned maskedRegister = MI->getOperand(0).getReg();
110      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
111      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
112      TII->addFlag(defInstr, 0, MO_FLAG_MASK);
113      // Return early so the instruction is not erased
114      return BB;
115    }
116
117  case AMDGPU::RAT_WRITE_CACHELESS_eg:
118    {
119      // Convert to DWORD address
120      unsigned NewAddr = MRI.createVirtualRegister(
121                                             &AMDGPU::R600_TReg32_XRegClass);
122      unsigned ShiftValue = MRI.createVirtualRegister(
123                                              &AMDGPU::R600_TReg32RegClass);
124      unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
125
126      // XXX In theory, we should be able to pass ShiftValue directly to
127      // the LSHR_eg instruction as an inline literal, but I tried doing it
128      // this way and it didn't produce the correct results.
129      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
130              ShiftValue)
131              .addReg(AMDGPU::ALU_LITERAL_X)
132              .addReg(AMDGPU::PRED_SEL_OFF)
133              .addImm(2);
134      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
135              .addOperand(MI->getOperand(1))
136              .addReg(ShiftValue)
137              .addReg(AMDGPU::PRED_SEL_OFF);
138      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
139              .addOperand(MI->getOperand(0))
140              .addReg(NewAddr)
141              .addImm(EOP); // Set End of program bit
142      break;
143    }
144
145  case AMDGPU::RESERVE_REG:
146    {
147      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
148      int64_t ReservedIndex = MI->getOperand(0).getImm();
149      unsigned ReservedReg =
150                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
151      MFI->ReservedRegs.push_back(ReservedReg);
152      break;
153    }
154
155  case AMDGPU::TXD:
156    {
157      unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
158      unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
159
160      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
161              .addOperand(MI->getOperand(3))
162              .addOperand(MI->getOperand(4))
163              .addOperand(MI->getOperand(5));
164      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
165              .addOperand(MI->getOperand(2))
166              .addOperand(MI->getOperand(4))
167              .addOperand(MI->getOperand(5));
168      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
169              .addOperand(MI->getOperand(0))
170              .addOperand(MI->getOperand(1))
171              .addOperand(MI->getOperand(4))
172              .addOperand(MI->getOperand(5))
173              .addReg(t0, RegState::Implicit)
174              .addReg(t1, RegState::Implicit);
175      break;
176    }
177  case AMDGPU::TXD_SHADOW:
178    {
179      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
180      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
181
182      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
183              .addOperand(MI->getOperand(3))
184              .addOperand(MI->getOperand(4))
185              .addOperand(MI->getOperand(5));
186      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
187              .addOperand(MI->getOperand(2))
188              .addOperand(MI->getOperand(4))
189              .addOperand(MI->getOperand(5));
190      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
191              .addOperand(MI->getOperand(0))
192              .addOperand(MI->getOperand(1))
193              .addOperand(MI->getOperand(4))
194              .addOperand(MI->getOperand(5))
195              .addReg(t0, RegState::Implicit)
196              .addReg(t1, RegState::Implicit);
197      break;
198    }
199  case AMDGPU::BRANCH:
200      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
201              .addOperand(MI->getOperand(0))
202              .addReg(0);
203      break;
204  case AMDGPU::BRANCH_COND_f32:
205    {
206      MachineInstr *NewMI =
207        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
208                .addReg(AMDGPU::PREDICATE_BIT)
209                .addOperand(MI->getOperand(1))
210                .addImm(OPCODE_IS_ZERO)
211                .addImm(0); // Flags
212      TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
213      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
214              .addOperand(MI->getOperand(0))
215              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
216      break;
217    }
218  case AMDGPU::BRANCH_COND_i32:
219    {
220      MachineInstr *NewMI =
221        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
222              .addReg(AMDGPU::PREDICATE_BIT)
223              .addOperand(MI->getOperand(1))
224              .addImm(OPCODE_IS_ZERO_INT)
225              .addImm(0); // Flags
226      TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
227      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
228             .addOperand(MI->getOperand(0))
229              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
230      break;
231    }
232  }
233
234  MI->eraseFromParent();
235  return BB;
236}
237
238//===----------------------------------------------------------------------===//
239// Custom DAG Lowering Operations
240//===----------------------------------------------------------------------===//
241
242using namespace llvm::Intrinsic;
243using namespace llvm::AMDGPUIntrinsic;
244
245SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
246{
247  switch (Op.getOpcode()) {
248  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
249  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
250  case ISD::ROTL: return LowerROTL(Op, DAG);
251  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
252  case ISD::SETCC: return LowerSETCC(Op, DAG);
253  case ISD::INTRINSIC_VOID: {
254    SDValue Chain = Op.getOperand(0);
255    unsigned IntrinsicID =
256                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
257    switch (IntrinsicID) {
258    case AMDGPUIntrinsic::AMDGPU_store_output: {
259      MachineFunction &MF = DAG.getMachineFunction();
260      MachineRegisterInfo &MRI = MF.getRegInfo();
261      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
262      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
263      if (!MRI.isLiveOut(Reg)) {
264        MRI.addLiveOut(Reg);
265      }
266      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
267    }
268    // default for switch(IntrinsicID)
269    default: break;
270    }
271    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
272    break;
273  }
274  case ISD::INTRINSIC_WO_CHAIN: {
275    unsigned IntrinsicID =
276                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
277    EVT VT = Op.getValueType();
278    DebugLoc DL = Op.getDebugLoc();
279    switch(IntrinsicID) {
280    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
281    case AMDGPUIntrinsic::R600_load_input: {
282      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
283      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
284      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
285    }
286
287    case r600_read_ngroups_x:
288      return LowerImplicitParameter(DAG, VT, DL, 0);
289    case r600_read_ngroups_y:
290      return LowerImplicitParameter(DAG, VT, DL, 1);
291    case r600_read_ngroups_z:
292      return LowerImplicitParameter(DAG, VT, DL, 2);
293    case r600_read_global_size_x:
294      return LowerImplicitParameter(DAG, VT, DL, 3);
295    case r600_read_global_size_y:
296      return LowerImplicitParameter(DAG, VT, DL, 4);
297    case r600_read_global_size_z:
298      return LowerImplicitParameter(DAG, VT, DL, 5);
299    case r600_read_local_size_x:
300      return LowerImplicitParameter(DAG, VT, DL, 6);
301    case r600_read_local_size_y:
302      return LowerImplicitParameter(DAG, VT, DL, 7);
303    case r600_read_local_size_z:
304      return LowerImplicitParameter(DAG, VT, DL, 8);
305
306    case r600_read_tgid_x:
307      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
308                                  AMDGPU::T1_X, VT);
309    case r600_read_tgid_y:
310      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
311                                  AMDGPU::T1_Y, VT);
312    case r600_read_tgid_z:
313      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
314                                  AMDGPU::T1_Z, VT);
315    case r600_read_tidig_x:
316      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
317                                  AMDGPU::T0_X, VT);
318    case r600_read_tidig_y:
319      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
320                                  AMDGPU::T0_Y, VT);
321    case r600_read_tidig_z:
322      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
323                                  AMDGPU::T0_Z, VT);
324    }
325    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
326    break;
327  }
328  } // end switch(Op.getOpcode())
329  return SDValue();
330}
331
332SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
333{
334  SDValue Chain = Op.getOperand(0);
335  SDValue CC = Op.getOperand(1);
336  SDValue LHS   = Op.getOperand(2);
337  SDValue RHS   = Op.getOperand(3);
338  SDValue JumpT  = Op.getOperand(4);
339  SDValue CmpValue;
340  SDValue Result;
341  CmpValue = DAG.getNode(
342      ISD::SELECT_CC,
343      Op.getDebugLoc(),
344      MVT::i32,
345      LHS, RHS,
346      DAG.getConstant(-1, MVT::i32),
347      DAG.getConstant(0, MVT::i32),
348      CC);
349  Result = DAG.getNode(
350      AMDGPUISD::BRANCH_COND,
351      CmpValue.getDebugLoc(),
352      MVT::Other, Chain,
353      JumpT, CmpValue);
354  return Result;
355}
356
357SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
358                                                   DebugLoc DL,
359                                                   unsigned DwordOffset) const
360{
361  unsigned ByteOffset = DwordOffset * 4;
362  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
363                                      AMDGPUAS::PARAM_I_ADDRESS);
364
365  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
366  assert(isInt<16>(ByteOffset));
367
368  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
369                     DAG.getConstant(ByteOffset, MVT::i32), // PTR
370                     MachinePointerInfo(ConstantPointerNull::get(PtrType)),
371                     false, false, false, 0);
372}
373
374SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
375{
376  DebugLoc DL = Op.getDebugLoc();
377  EVT VT = Op.getValueType();
378
379  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
380                     Op.getOperand(0),
381                     Op.getOperand(0),
382                     DAG.getNode(ISD::SUB, DL, VT,
383                                 DAG.getConstant(32, MVT::i32),
384                                 Op.getOperand(1)));
385}
386
387SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
388{
389  DebugLoc DL = Op.getDebugLoc();
390  EVT VT = Op.getValueType();
391
392  SDValue LHS = Op.getOperand(0);
393  SDValue RHS = Op.getOperand(1);
394  SDValue True = Op.getOperand(2);
395  SDValue False = Op.getOperand(3);
396  SDValue CC = Op.getOperand(4);
397  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
398  SDValue Temp;
399
400  // LHS and RHS are guaranteed to be the same value type
401  EVT CompareVT = LHS.getValueType();
402
403  // We need all the operands of SELECT_CC to have the same value type, so if
404  // necessary we need to convert LHS and RHS to be the same type True and
405  // False.  True and False are guaranteed to have the same type as this
406  // SELECT_CC node.
407
408  if (CompareVT !=  VT) {
409    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
410    if (VT == MVT::f32 && CompareVT == MVT::i32) {
411      if (isUnsignedIntSetCC(CCOpcode)) {
412        ConversionOp = ISD::UINT_TO_FP;
413      } else {
414        ConversionOp = ISD::SINT_TO_FP;
415      }
416    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
417      ConversionOp = ISD::FP_TO_SINT;
418    } else {
419      // I don't think there will be any other type pairings.
420      assert(!"Unhandled operand type parings in SELECT_CC");
421    }
422    // XXX Check the value of LHS and RHS and avoid creating sequences like
423    // (FTOI (ITOF))
424    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
425    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
426  }
427
428  // If True is a hardware TRUE value and False is a hardware FALSE value or
429  // vice-versa we can handle this with a native instruction (SET* instructions).
430  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
431    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
432  }
433
434  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
435  // we can handle this with a native instruction, but we need to swap true
436  // and false and change the conditional.
437  if (isHWTrueValue(False) && isHWFalseValue(True)) {
438  }
439
440  // XXX Check if we can lower this to a SELECT or if it is supported by a native
441  // operation. (The code below does this but we don't have the Instruction
442  // selection patterns to do this yet.
443#if 0
444  if (isZero(LHS) || isZero(RHS)) {
445    SDValue Cond = (isZero(LHS) ? RHS : LHS);
446    bool SwapTF = false;
447    switch (CCOpcode) {
448    case ISD::SETOEQ:
449    case ISD::SETUEQ:
450    case ISD::SETEQ:
451      SwapTF = true;
452      // Fall through
453    case ISD::SETONE:
454    case ISD::SETUNE:
455    case ISD::SETNE:
456      // We can lower to select
457      if (SwapTF) {
458        Temp = True;
459        True = False;
460        False = Temp;
461      }
462      // CNDE
463      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
464    default:
465      // Supported by a native operation (CNDGE, CNDGT)
466      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
467    }
468  }
469#endif
470
471  // If we make it this for it means we have no native instructions to handle
472  // this SELECT_CC, so we must lower it.
473  SDValue HWTrue, HWFalse;
474
475  if (VT == MVT::f32) {
476    HWTrue = DAG.getConstantFP(1.0f, VT);
477    HWFalse = DAG.getConstantFP(0.0f, VT);
478  } else if (VT == MVT::i32) {
479    HWTrue = DAG.getConstant(-1, VT);
480    HWFalse = DAG.getConstant(0, VT);
481  }
482  else {
483    assert(!"Unhandled value type in LowerSELECT_CC");
484  }
485
486  // Lower this unsupported SELECT_CC into a combination of two supported
487  // SELECT_CC operations.
488  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
489
490  // Convert floating point condition to i1
491  if (VT == MVT::f32) {
492    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
493                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
494  }
495
496  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
497}
498
499SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
500{
501  SDValue Cond;
502  SDValue LHS = Op.getOperand(0);
503  SDValue RHS = Op.getOperand(1);
504  SDValue CC  = Op.getOperand(2);
505  DebugLoc DL = Op.getDebugLoc();
506  assert(Op.getValueType() == MVT::i32);
507  Cond = DAG.getNode(
508      ISD::SELECT_CC,
509      Op.getDebugLoc(),
510      MVT::i32,
511      LHS, RHS,
512      DAG.getConstant(-1, MVT::i32),
513      DAG.getConstant(0, MVT::i32),
514      CC);
515  Cond = DAG.getNode(
516      ISD::AND,
517      DL,
518      MVT::i32,
519      DAG.getConstant(1, MVT::i32),
520      Cond);
521  return Cond;
522}
523