R600ISelLowering.cpp revision 5f82d1924831da7467bfe8025ca18e98b9548ca4
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600InstrInfo.h"
17#include "R600MachineFunctionInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/SelectionDAG.h"
21
22using namespace llvm;
23
24R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
25    AMDGPUTargetLowering(TM),
26    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
27{
28  setOperationAction(ISD::MUL, MVT::i64, Expand);
29  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
30  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
31  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
32  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
33  computeRegisterProperties();
34
35  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
36
37  setOperationAction(ISD::FSUB, MVT::f32, Expand);
38
39  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
40  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
41
42  setOperationAction(ISD::ROTL, MVT::i32, Custom);
43
44  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
45  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
46
47  setOperationAction(ISD::SETCC, MVT::i32, Custom);
48
49  setSchedulingPreference(Sched::VLIW);
50}
51
52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
53    MachineInstr * MI, MachineBasicBlock * BB) const
54{
55  MachineFunction * MF = BB->getParent();
56  MachineRegisterInfo &MRI = MF->getRegInfo();
57  MachineBasicBlock::iterator I = *MI;
58
59  switch (MI->getOpcode()) {
60  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
61  case AMDGPU::CLAMP_R600:
62    MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
63    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
64           .addOperand(MI->getOperand(0))
65           .addOperand(MI->getOperand(1))
66           .addReg(AMDGPU::PRED_SEL_OFF);
67    break;
68
69  case AMDGPU::FABS_R600:
70    MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
71    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
72           .addOperand(MI->getOperand(0))
73           .addOperand(MI->getOperand(1))
74           .addReg(AMDGPU::PRED_SEL_OFF);
75    break;
76
77  case AMDGPU::FNEG_R600:
78    MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
79    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
80            .addOperand(MI->getOperand(0))
81            .addOperand(MI->getOperand(1))
82            .addReg(AMDGPU::PRED_SEL_OFF);
83    break;
84
85  case AMDGPU::R600_LOAD_CONST:
86    {
87      int64_t RegIndex = MI->getOperand(1).getImm();
88      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
89      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
90                  .addOperand(MI->getOperand(0))
91                  .addReg(ConstantReg);
92      break;
93    }
94
95  case AMDGPU::MASK_WRITE:
96    {
97      unsigned maskedRegister = MI->getOperand(0).getReg();
98      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
99      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
100      MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
101      def->addTargetFlag(MO_FLAG_MASK);
102      // Return early so the instruction is not erased
103      return BB;
104    }
105
106  case AMDGPU::RAT_WRITE_CACHELESS_eg:
107    {
108      // Convert to DWORD address
109      unsigned NewAddr = MRI.createVirtualRegister(
110                                             AMDGPU::R600_TReg32_XRegisterClass);
111      unsigned ShiftValue = MRI.createVirtualRegister(
112                                              AMDGPU::R600_TReg32RegisterClass);
113
114      // XXX In theory, we should be able to pass ShiftValue directly to
115      // the LSHR_eg instruction as an inline literal, but I tried doing it
116      // this way and it didn't produce the correct results.
117      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
118              .addReg(AMDGPU::ALU_LITERAL_X)
119              .addReg(AMDGPU::PRED_SEL_OFF)
120              .addImm(2);
121      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
122              .addOperand(MI->getOperand(1))
123              .addReg(ShiftValue)
124              .addReg(AMDGPU::PRED_SEL_OFF);
125      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
126              .addOperand(MI->getOperand(0))
127              .addReg(NewAddr);
128      break;
129    }
130
131  case AMDGPU::RESERVE_REG:
132    {
133      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
134      int64_t ReservedIndex = MI->getOperand(0).getImm();
135      unsigned ReservedReg =
136                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
137      MFI->ReservedRegs.push_back(ReservedReg);
138      break;
139    }
140
141  case AMDGPU::TXD:
142    {
143      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
144      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
145
146      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
147              .addOperand(MI->getOperand(3))
148              .addOperand(MI->getOperand(4))
149              .addOperand(MI->getOperand(5));
150      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
151              .addOperand(MI->getOperand(2))
152              .addOperand(MI->getOperand(4))
153              .addOperand(MI->getOperand(5));
154      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
155              .addOperand(MI->getOperand(0))
156              .addOperand(MI->getOperand(1))
157              .addOperand(MI->getOperand(4))
158              .addOperand(MI->getOperand(5))
159              .addReg(t0, RegState::Implicit)
160              .addReg(t1, RegState::Implicit);
161      break;
162    }
163  case AMDGPU::TXD_SHADOW:
164    {
165      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
166      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
167
168      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
169              .addOperand(MI->getOperand(3))
170              .addOperand(MI->getOperand(4))
171              .addOperand(MI->getOperand(5));
172      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
173              .addOperand(MI->getOperand(2))
174              .addOperand(MI->getOperand(4))
175              .addOperand(MI->getOperand(5));
176      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
177              .addOperand(MI->getOperand(0))
178              .addOperand(MI->getOperand(1))
179              .addOperand(MI->getOperand(4))
180              .addOperand(MI->getOperand(5))
181              .addReg(t0, RegState::Implicit)
182              .addReg(t1, RegState::Implicit);
183      break;
184    }
185  case AMDGPU::BRANCH:
186      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
187              .addOperand(MI->getOperand(0))
188              .addReg(0);
189      break;
190  case AMDGPU::BRANCH_COND_f32:
191    MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
192
193    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
194            .addReg(AMDGPU::PREDICATE_BIT)
195            .addOperand(MI->getOperand(1))
196            .addImm(OPCODE_IS_ZERO);
197    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
198            .addOperand(MI->getOperand(0))
199            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
200    break;
201  case AMDGPU::BRANCH_COND_i32:
202    MI->getOperand(1).addTargetFlag(MO_FLAG_PUSH);
203
204    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
205            .addReg(AMDGPU::PREDICATE_BIT)
206            .addOperand(MI->getOperand(1))
207            .addImm(OPCODE_IS_ZERO_INT);
208    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
209            .addOperand(MI->getOperand(0))
210            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
211   break;
212
213
214  }
215
216  MI->eraseFromParent();
217  return BB;
218}
219
220//===----------------------------------------------------------------------===//
221// Custom DAG Lowering Operations
222//===----------------------------------------------------------------------===//
223
224using namespace llvm::Intrinsic;
225using namespace llvm::AMDGPUIntrinsic;
226
227SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
228{
229  switch (Op.getOpcode()) {
230  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
231  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
232  case ISD::ROTL: return LowerROTL(Op, DAG);
233  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
234  case ISD::SETCC: return LowerSETCC(Op, DAG);
235  case ISD::INTRINSIC_VOID: {
236    SDValue Chain = Op.getOperand(0);
237    unsigned IntrinsicID =
238                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
239    switch (IntrinsicID) {
240    case AMDGPUIntrinsic::AMDGPU_store_output: {
241      MachineFunction &MF = DAG.getMachineFunction();
242      MachineRegisterInfo &MRI = MF.getRegInfo();
243      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
244      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
245      if (!MRI.isLiveOut(Reg)) {
246        MRI.addLiveOut(Reg);
247      }
248      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
249    }
250    // default for switch(IntrinsicID)
251    default: break;
252    }
253    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
254    break;
255  }
256  case ISD::INTRINSIC_WO_CHAIN: {
257    unsigned IntrinsicID =
258                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
259    EVT VT = Op.getValueType();
260    DebugLoc DL = Op.getDebugLoc();
261    switch(IntrinsicID) {
262    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
263    case AMDGPUIntrinsic::R600_load_input: {
264      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
265      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
266      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
267    }
268
269    case r600_read_ngroups_x:
270      return LowerImplicitParameter(DAG, VT, DL, 0);
271    case r600_read_ngroups_y:
272      return LowerImplicitParameter(DAG, VT, DL, 1);
273    case r600_read_ngroups_z:
274      return LowerImplicitParameter(DAG, VT, DL, 2);
275    case r600_read_global_size_x:
276      return LowerImplicitParameter(DAG, VT, DL, 3);
277    case r600_read_global_size_y:
278      return LowerImplicitParameter(DAG, VT, DL, 4);
279    case r600_read_global_size_z:
280      return LowerImplicitParameter(DAG, VT, DL, 5);
281    case r600_read_local_size_x:
282      return LowerImplicitParameter(DAG, VT, DL, 6);
283    case r600_read_local_size_y:
284      return LowerImplicitParameter(DAG, VT, DL, 7);
285    case r600_read_local_size_z:
286      return LowerImplicitParameter(DAG, VT, DL, 8);
287
288    case r600_read_tgid_x:
289      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
290                                  AMDGPU::T1_X, VT);
291    case r600_read_tgid_y:
292      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
293                                  AMDGPU::T1_Y, VT);
294    case r600_read_tgid_z:
295      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
296                                  AMDGPU::T1_Z, VT);
297    case r600_read_tidig_x:
298      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
299                                  AMDGPU::T0_X, VT);
300    case r600_read_tidig_y:
301      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
302                                  AMDGPU::T0_Y, VT);
303    case r600_read_tidig_z:
304      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
305                                  AMDGPU::T0_Z, VT);
306    }
307    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
308    break;
309  }
310  } // end switch(Op.getOpcode())
311  return SDValue();
312}
313
314SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
315{
316  SDValue Chain = Op.getOperand(0);
317  SDValue CC = Op.getOperand(1);
318  SDValue LHS   = Op.getOperand(2);
319  SDValue RHS   = Op.getOperand(3);
320  SDValue JumpT  = Op.getOperand(4);
321  SDValue CmpValue;
322  SDValue Result;
323  CmpValue = DAG.getNode(
324      ISD::SELECT_CC,
325      Op.getDebugLoc(),
326      MVT::i32,
327      LHS, RHS,
328      DAG.getConstant(-1, MVT::i32),
329      DAG.getConstant(0, MVT::i32),
330      CC);
331  Result = DAG.getNode(
332      AMDGPUISD::BRANCH_COND,
333      CmpValue.getDebugLoc(),
334      MVT::Other, Chain,
335      JumpT, CmpValue);
336  return Result;
337}
338
339SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
340                                                   DebugLoc DL,
341                                                   unsigned DwordOffset) const
342{
343  unsigned ByteOffset = DwordOffset * 4;
344  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
345                                      AMDGPUAS::PARAM_I_ADDRESS);
346
347  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
348  assert(isInt<16>(ByteOffset));
349
350  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
351                     DAG.getConstant(ByteOffset, MVT::i32), // PTR
352                     MachinePointerInfo(ConstantPointerNull::get(PtrType)),
353                     false, false, false, 0);
354}
355
356SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
357{
358  DebugLoc DL = Op.getDebugLoc();
359  EVT VT = Op.getValueType();
360
361  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
362                     Op.getOperand(0),
363                     Op.getOperand(0),
364                     DAG.getNode(ISD::SUB, DL, VT,
365                                 DAG.getConstant(32, MVT::i32),
366                                 Op.getOperand(1)));
367}
368
369SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
370{
371  DebugLoc DL = Op.getDebugLoc();
372  EVT VT = Op.getValueType();
373
374  SDValue LHS = Op.getOperand(0);
375  SDValue RHS = Op.getOperand(1);
376  SDValue True = Op.getOperand(2);
377  SDValue False = Op.getOperand(3);
378  SDValue CC = Op.getOperand(4);
379  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
380  SDValue Temp;
381
382  // LHS and RHS are guaranteed to be the same value type
383  EVT CompareVT = LHS.getValueType();
384
385  // We need all the operands of SELECT_CC to have the same value type, so if
386  // necessary we need to convert LHS and RHS to be the same type True and
387  // False.  True and False are guaranteed to have the same type as this
388  // SELECT_CC node.
389
390  if (CompareVT !=  VT) {
391    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
392    if (VT == MVT::f32 && CompareVT == MVT::i32) {
393      if (isUnsignedIntSetCC(CCOpcode)) {
394        ConversionOp = ISD::UINT_TO_FP;
395      } else {
396        ConversionOp = ISD::SINT_TO_FP;
397      }
398    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
399      ConversionOp = ISD::FP_TO_SINT;
400    } else {
401      // I don't think there will be any other type pairings.
402      assert(!"Unhandled operand type parings in SELECT_CC");
403    }
404    // XXX Check the value of LHS and RHS and avoid creating sequences like
405    // (FTOI (ITOF))
406    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
407    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
408  }
409
410  // If True is a hardware TRUE value and False is a hardware FALSE value or
411  // vice-versa we can handle this with a native instruction (SET* instructions).
412  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
413    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
414  }
415
416  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
417  // we can handle this with a native instruction, but we need to swap true
418  // and false and change the conditional.
419  if (isHWTrueValue(False) && isHWFalseValue(True)) {
420  }
421
422  // XXX Check if we can lower this to a SELECT or if it is supported by a native
423  // operation. (The code below does this but we don't have the Instruction
424  // selection patterns to do this yet.
425#if 0
426  if (isZero(LHS) || isZero(RHS)) {
427    SDValue Cond = (isZero(LHS) ? RHS : LHS);
428    bool SwapTF = false;
429    switch (CCOpcode) {
430    case ISD::SETOEQ:
431    case ISD::SETUEQ:
432    case ISD::SETEQ:
433      SwapTF = true;
434      // Fall through
435    case ISD::SETONE:
436    case ISD::SETUNE:
437    case ISD::SETNE:
438      // We can lower to select
439      if (SwapTF) {
440        Temp = True;
441        True = False;
442        False = Temp;
443      }
444      // CNDE
445      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
446    default:
447      // Supported by a native operation (CNDGE, CNDGT)
448      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
449    }
450  }
451#endif
452
453  // If we make it this for it means we have no native instructions to handle
454  // this SELECT_CC, so we must lower it.
455  SDValue HWTrue, HWFalse;
456
457  if (VT == MVT::f32) {
458    HWTrue = DAG.getConstantFP(1.0f, VT);
459    HWFalse = DAG.getConstantFP(0.0f, VT);
460  } else if (VT == MVT::i32) {
461    HWTrue = DAG.getConstant(-1, VT);
462    HWFalse = DAG.getConstant(0, VT);
463  }
464  else {
465    assert(!"Unhandled value type in LowerSELECT_CC");
466  }
467
468  // Lower this unsupported SELECT_CC into a combination of two supported
469  // SELECT_CC operations.
470  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
471
472  // Convert floating point condition to i1
473  if (VT == MVT::f32) {
474    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
475                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
476  }
477
478  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
479}
480
481SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
482{
483  SDValue Cond;
484  SDValue LHS = Op.getOperand(0);
485  SDValue RHS = Op.getOperand(1);
486  SDValue CC  = Op.getOperand(2);
487  DebugLoc DL = Op.getDebugLoc();
488  assert(Op.getValueType() == MVT::i32);
489  Cond = DAG.getNode(
490      ISD::SELECT_CC,
491      Op.getDebugLoc(),
492      MVT::i32,
493      LHS, RHS,
494      DAG.getConstant(-1, MVT::i32),
495      DAG.getConstant(0, MVT::i32),
496      CC);
497  Cond = DAG.getNode(
498      ISD::AND,
499      DL,
500      MVT::i32,
501      DAG.getConstant(1, MVT::i32),
502      Cond);
503  return Cond;
504}
505