R600ISelLowering.cpp revision 228a6641ccddaf24a993f827af1e97379785985a
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22
23using namespace llvm;
24
25R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
26    AMDGPUTargetLowering(TM),
27    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
28{
29  setOperationAction(ISD::MUL, MVT::i64, Expand);
30  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
31  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
32  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
33  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
34  computeRegisterProperties();
35
36  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
37
38  setOperationAction(ISD::FSUB, MVT::f32, Expand);
39
40  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
41  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
42
43  setOperationAction(ISD::ROTL, MVT::i32, Custom);
44
45  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
46  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
47
48  setOperationAction(ISD::SETCC, MVT::i32, Custom);
49
50  setSchedulingPreference(Sched::VLIW);
51}
52
53MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
54    MachineInstr * MI, MachineBasicBlock * BB) const
55{
56  MachineFunction * MF = BB->getParent();
57  MachineRegisterInfo &MRI = MF->getRegInfo();
58  MachineBasicBlock::iterator I = *MI;
59
60  switch (MI->getOpcode()) {
61  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
62  case AMDGPU::CLAMP_R600:
63    {
64      MachineInstr *NewMI =
65        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
66               .addOperand(MI->getOperand(0))
67               .addOperand(MI->getOperand(1))
68               .addImm(0) // Flags
69               .addReg(AMDGPU::PRED_SEL_OFF);
70      TII->AddFlag(NewMI, 0, MO_FLAG_CLAMP);
71      break;
72    }
73  case AMDGPU::FABS_R600:
74    {
75      MachineInstr *NewMI =
76        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
77               .addOperand(MI->getOperand(0))
78               .addOperand(MI->getOperand(1))
79               .addImm(0) // Flags
80               .addReg(AMDGPU::PRED_SEL_OFF);
81      TII->AddFlag(NewMI, 1, MO_FLAG_ABS);
82      break;
83    }
84
85  case AMDGPU::FNEG_R600:
86    {
87      MachineInstr *NewMI =
88        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
89                .addOperand(MI->getOperand(0))
90                .addOperand(MI->getOperand(1))
91                .addImm(0) // Flags
92                .addReg(AMDGPU::PRED_SEL_OFF);
93      TII->AddFlag(NewMI, 1, MO_FLAG_NEG);
94    break;
95    }
96
97  case AMDGPU::R600_LOAD_CONST:
98    {
99      int64_t RegIndex = MI->getOperand(1).getImm();
100      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
101      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
102                  .addOperand(MI->getOperand(0))
103                  .addReg(ConstantReg);
104      break;
105    }
106
107  case AMDGPU::MASK_WRITE:
108    {
109      unsigned maskedRegister = MI->getOperand(0).getReg();
110      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
111      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
112      TII->AddFlag(defInstr, 0, MO_FLAG_MASK);
113      // Return early so the instruction is not erased
114      return BB;
115    }
116
117  case AMDGPU::RAT_WRITE_CACHELESS_eg:
118    {
119      // Convert to DWORD address
120      unsigned NewAddr = MRI.createVirtualRegister(
121                                             &AMDGPU::R600_TReg32_XRegClass);
122      unsigned ShiftValue = MRI.createVirtualRegister(
123                                              &AMDGPU::R600_TReg32RegClass);
124
125      // XXX In theory, we should be able to pass ShiftValue directly to
126      // the LSHR_eg instruction as an inline literal, but I tried doing it
127      // this way and it didn't produce the correct results.
128      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
129              .addReg(AMDGPU::ALU_LITERAL_X)
130              .addReg(AMDGPU::PRED_SEL_OFF)
131              .addImm(2);
132      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
133              .addOperand(MI->getOperand(1))
134              .addReg(ShiftValue)
135              .addReg(AMDGPU::PRED_SEL_OFF);
136      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
137              .addOperand(MI->getOperand(0))
138              .addReg(NewAddr);
139      break;
140    }
141
142  case AMDGPU::RESERVE_REG:
143    {
144      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
145      int64_t ReservedIndex = MI->getOperand(0).getImm();
146      unsigned ReservedReg =
147                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
148      MFI->ReservedRegs.push_back(ReservedReg);
149      break;
150    }
151
152  case AMDGPU::TXD:
153    {
154      unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
155      unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
156
157      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
158              .addOperand(MI->getOperand(3))
159              .addOperand(MI->getOperand(4))
160              .addOperand(MI->getOperand(5));
161      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
162              .addOperand(MI->getOperand(2))
163              .addOperand(MI->getOperand(4))
164              .addOperand(MI->getOperand(5));
165      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
166              .addOperand(MI->getOperand(0))
167              .addOperand(MI->getOperand(1))
168              .addOperand(MI->getOperand(4))
169              .addOperand(MI->getOperand(5))
170              .addReg(t0, RegState::Implicit)
171              .addReg(t1, RegState::Implicit);
172      break;
173    }
174  case AMDGPU::TXD_SHADOW:
175    {
176      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
177      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
178
179      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
180              .addOperand(MI->getOperand(3))
181              .addOperand(MI->getOperand(4))
182              .addOperand(MI->getOperand(5));
183      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
184              .addOperand(MI->getOperand(2))
185              .addOperand(MI->getOperand(4))
186              .addOperand(MI->getOperand(5));
187      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
188              .addOperand(MI->getOperand(0))
189              .addOperand(MI->getOperand(1))
190              .addOperand(MI->getOperand(4))
191              .addOperand(MI->getOperand(5))
192              .addReg(t0, RegState::Implicit)
193              .addReg(t1, RegState::Implicit);
194      break;
195    }
196  case AMDGPU::BRANCH:
197      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
198              .addOperand(MI->getOperand(0))
199              .addReg(0);
200      break;
201  case AMDGPU::BRANCH_COND_f32:
202    {
203      MachineInstr *NewMI =
204        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
205                .addReg(AMDGPU::PREDICATE_BIT)
206                .addOperand(MI->getOperand(1))
207                .addImm(OPCODE_IS_ZERO)
208                .addImm(0); // Flags
209      TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
210      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
211              .addOperand(MI->getOperand(0))
212              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
213      break;
214    }
215  case AMDGPU::BRANCH_COND_i32:
216    {
217      MachineInstr *NewMI =
218        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
219              .addReg(AMDGPU::PREDICATE_BIT)
220              .addOperand(MI->getOperand(1))
221              .addImm(OPCODE_IS_ZERO_INT)
222              .addImm(0); // Flags
223      TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
224      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
225             .addOperand(MI->getOperand(0))
226              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
227      break;
228    }
229  }
230
231  MI->eraseFromParent();
232  return BB;
233}
234
235//===----------------------------------------------------------------------===//
236// Custom DAG Lowering Operations
237//===----------------------------------------------------------------------===//
238
239using namespace llvm::Intrinsic;
240using namespace llvm::AMDGPUIntrinsic;
241
242SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
243{
244  switch (Op.getOpcode()) {
245  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
246  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
247  case ISD::ROTL: return LowerROTL(Op, DAG);
248  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
249  case ISD::SETCC: return LowerSETCC(Op, DAG);
250  case ISD::INTRINSIC_VOID: {
251    SDValue Chain = Op.getOperand(0);
252    unsigned IntrinsicID =
253                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
254    switch (IntrinsicID) {
255    case AMDGPUIntrinsic::AMDGPU_store_output: {
256      MachineFunction &MF = DAG.getMachineFunction();
257      MachineRegisterInfo &MRI = MF.getRegInfo();
258      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
259      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
260      if (!MRI.isLiveOut(Reg)) {
261        MRI.addLiveOut(Reg);
262      }
263      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
264    }
265    // default for switch(IntrinsicID)
266    default: break;
267    }
268    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
269    break;
270  }
271  case ISD::INTRINSIC_WO_CHAIN: {
272    unsigned IntrinsicID =
273                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
274    EVT VT = Op.getValueType();
275    DebugLoc DL = Op.getDebugLoc();
276    switch(IntrinsicID) {
277    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
278    case AMDGPUIntrinsic::R600_load_input: {
279      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
280      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
281      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
282    }
283
284    case r600_read_ngroups_x:
285      return LowerImplicitParameter(DAG, VT, DL, 0);
286    case r600_read_ngroups_y:
287      return LowerImplicitParameter(DAG, VT, DL, 1);
288    case r600_read_ngroups_z:
289      return LowerImplicitParameter(DAG, VT, DL, 2);
290    case r600_read_global_size_x:
291      return LowerImplicitParameter(DAG, VT, DL, 3);
292    case r600_read_global_size_y:
293      return LowerImplicitParameter(DAG, VT, DL, 4);
294    case r600_read_global_size_z:
295      return LowerImplicitParameter(DAG, VT, DL, 5);
296    case r600_read_local_size_x:
297      return LowerImplicitParameter(DAG, VT, DL, 6);
298    case r600_read_local_size_y:
299      return LowerImplicitParameter(DAG, VT, DL, 7);
300    case r600_read_local_size_z:
301      return LowerImplicitParameter(DAG, VT, DL, 8);
302
303    case r600_read_tgid_x:
304      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
305                                  AMDGPU::T1_X, VT);
306    case r600_read_tgid_y:
307      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
308                                  AMDGPU::T1_Y, VT);
309    case r600_read_tgid_z:
310      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
311                                  AMDGPU::T1_Z, VT);
312    case r600_read_tidig_x:
313      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
314                                  AMDGPU::T0_X, VT);
315    case r600_read_tidig_y:
316      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
317                                  AMDGPU::T0_Y, VT);
318    case r600_read_tidig_z:
319      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
320                                  AMDGPU::T0_Z, VT);
321    }
322    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
323    break;
324  }
325  } // end switch(Op.getOpcode())
326  return SDValue();
327}
328
329SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
330{
331  SDValue Chain = Op.getOperand(0);
332  SDValue CC = Op.getOperand(1);
333  SDValue LHS   = Op.getOperand(2);
334  SDValue RHS   = Op.getOperand(3);
335  SDValue JumpT  = Op.getOperand(4);
336  SDValue CmpValue;
337  SDValue Result;
338  CmpValue = DAG.getNode(
339      ISD::SELECT_CC,
340      Op.getDebugLoc(),
341      MVT::i32,
342      LHS, RHS,
343      DAG.getConstant(-1, MVT::i32),
344      DAG.getConstant(0, MVT::i32),
345      CC);
346  Result = DAG.getNode(
347      AMDGPUISD::BRANCH_COND,
348      CmpValue.getDebugLoc(),
349      MVT::Other, Chain,
350      JumpT, CmpValue);
351  return Result;
352}
353
354SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
355                                                   DebugLoc DL,
356                                                   unsigned DwordOffset) const
357{
358  unsigned ByteOffset = DwordOffset * 4;
359  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
360                                      AMDGPUAS::PARAM_I_ADDRESS);
361
362  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
363  assert(isInt<16>(ByteOffset));
364
365  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
366                     DAG.getConstant(ByteOffset, MVT::i32), // PTR
367                     MachinePointerInfo(ConstantPointerNull::get(PtrType)),
368                     false, false, false, 0);
369}
370
371SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
372{
373  DebugLoc DL = Op.getDebugLoc();
374  EVT VT = Op.getValueType();
375
376  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
377                     Op.getOperand(0),
378                     Op.getOperand(0),
379                     DAG.getNode(ISD::SUB, DL, VT,
380                                 DAG.getConstant(32, MVT::i32),
381                                 Op.getOperand(1)));
382}
383
384SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
385{
386  DebugLoc DL = Op.getDebugLoc();
387  EVT VT = Op.getValueType();
388
389  SDValue LHS = Op.getOperand(0);
390  SDValue RHS = Op.getOperand(1);
391  SDValue True = Op.getOperand(2);
392  SDValue False = Op.getOperand(3);
393  SDValue CC = Op.getOperand(4);
394  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
395  SDValue Temp;
396
397  // LHS and RHS are guaranteed to be the same value type
398  EVT CompareVT = LHS.getValueType();
399
400  // We need all the operands of SELECT_CC to have the same value type, so if
401  // necessary we need to convert LHS and RHS to be the same type True and
402  // False.  True and False are guaranteed to have the same type as this
403  // SELECT_CC node.
404
405  if (CompareVT !=  VT) {
406    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
407    if (VT == MVT::f32 && CompareVT == MVT::i32) {
408      if (isUnsignedIntSetCC(CCOpcode)) {
409        ConversionOp = ISD::UINT_TO_FP;
410      } else {
411        ConversionOp = ISD::SINT_TO_FP;
412      }
413    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
414      ConversionOp = ISD::FP_TO_SINT;
415    } else {
416      // I don't think there will be any other type pairings.
417      assert(!"Unhandled operand type parings in SELECT_CC");
418    }
419    // XXX Check the value of LHS and RHS and avoid creating sequences like
420    // (FTOI (ITOF))
421    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
422    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
423  }
424
425  // If True is a hardware TRUE value and False is a hardware FALSE value or
426  // vice-versa we can handle this with a native instruction (SET* instructions).
427  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
428    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
429  }
430
431  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
432  // we can handle this with a native instruction, but we need to swap true
433  // and false and change the conditional.
434  if (isHWTrueValue(False) && isHWFalseValue(True)) {
435  }
436
437  // XXX Check if we can lower this to a SELECT or if it is supported by a native
438  // operation. (The code below does this but we don't have the Instruction
439  // selection patterns to do this yet.
440#if 0
441  if (isZero(LHS) || isZero(RHS)) {
442    SDValue Cond = (isZero(LHS) ? RHS : LHS);
443    bool SwapTF = false;
444    switch (CCOpcode) {
445    case ISD::SETOEQ:
446    case ISD::SETUEQ:
447    case ISD::SETEQ:
448      SwapTF = true;
449      // Fall through
450    case ISD::SETONE:
451    case ISD::SETUNE:
452    case ISD::SETNE:
453      // We can lower to select
454      if (SwapTF) {
455        Temp = True;
456        True = False;
457        False = Temp;
458      }
459      // CNDE
460      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
461    default:
462      // Supported by a native operation (CNDGE, CNDGT)
463      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
464    }
465  }
466#endif
467
468  // If we make it this for it means we have no native instructions to handle
469  // this SELECT_CC, so we must lower it.
470  SDValue HWTrue, HWFalse;
471
472  if (VT == MVT::f32) {
473    HWTrue = DAG.getConstantFP(1.0f, VT);
474    HWFalse = DAG.getConstantFP(0.0f, VT);
475  } else if (VT == MVT::i32) {
476    HWTrue = DAG.getConstant(-1, VT);
477    HWFalse = DAG.getConstant(0, VT);
478  }
479  else {
480    assert(!"Unhandled value type in LowerSELECT_CC");
481  }
482
483  // Lower this unsupported SELECT_CC into a combination of two supported
484  // SELECT_CC operations.
485  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
486
487  // Convert floating point condition to i1
488  if (VT == MVT::f32) {
489    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
490                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
491  }
492
493  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
494}
495
496SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
497{
498  SDValue Cond;
499  SDValue LHS = Op.getOperand(0);
500  SDValue RHS = Op.getOperand(1);
501  SDValue CC  = Op.getOperand(2);
502  DebugLoc DL = Op.getDebugLoc();
503  assert(Op.getValueType() == MVT::i32);
504  Cond = DAG.getNode(
505      ISD::SELECT_CC,
506      Op.getDebugLoc(),
507      MVT::i32,
508      LHS, RHS,
509      DAG.getConstant(-1, MVT::i32),
510      DAG.getConstant(0, MVT::i32),
511      CC);
512  Cond = DAG.getNode(
513      ISD::AND,
514      DL,
515      MVT::i32,
516      DAG.getConstant(1, MVT::i32),
517      Cond);
518  return Cond;
519}
520