R600ISelLowering.cpp revision 67a47a445b544ac638d10303dc697d70f25d12fb
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp.  This file
11// is mostly EmitInstrWithCustomInserter().
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600InstrInfo.h"
17#include "R600MachineFunctionInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/SelectionDAG.h"
21
22using namespace llvm;
23
24R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
25    AMDGPUTargetLowering(TM),
26    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
27{
28  setOperationAction(ISD::MUL, MVT::i64, Expand);
29  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
30  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
31  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
32  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
33  computeRegisterProperties();
34
35  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
36
37  setOperationAction(ISD::FSUB, MVT::f32, Expand);
38
39  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
40  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
41
42  setOperationAction(ISD::ROTL, MVT::i32, Custom);
43
44  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
45  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
46
47  setOperationAction(ISD::SETCC, MVT::i32, Custom);
48
49  setSchedulingPreference(Sched::VLIW);
50}
51
52MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
53    MachineInstr * MI, MachineBasicBlock * BB) const
54{
55  MachineFunction * MF = BB->getParent();
56  MachineRegisterInfo &MRI = MF->getRegInfo();
57  MachineBasicBlock::iterator I = *MI;
58
59  switch (MI->getOpcode()) {
60  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
61  case AMDGPU::CLAMP_R600:
62    {
63      MachineInstr *NewMI =
64        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
65               .addOperand(MI->getOperand(0))
66               .addOperand(MI->getOperand(1))
67               .addImm(0) // Flags
68               .addReg(AMDGPU::PRED_SEL_OFF);
69      TII->AddFlag(NewMI, 0, MO_FLAG_CLAMP);
70      break;
71    }
72  case AMDGPU::FABS_R600:
73    {
74      MachineInstr *NewMI =
75        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
76               .addOperand(MI->getOperand(0))
77               .addOperand(MI->getOperand(1))
78               .addImm(0) // Flags
79               .addReg(AMDGPU::PRED_SEL_OFF);
80      TII->AddFlag(NewMI, 1, MO_FLAG_ABS);
81      break;
82    }
83
84  case AMDGPU::FNEG_R600:
85    {
86      MachineInstr *NewMI =
87        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
88                .addOperand(MI->getOperand(0))
89                .addOperand(MI->getOperand(1))
90                .addImm(0) // Flags
91                .addReg(AMDGPU::PRED_SEL_OFF);
92      TII->AddFlag(NewMI, 1, MO_FLAG_NEG);
93    break;
94    }
95
96  case AMDGPU::R600_LOAD_CONST:
97    {
98      int64_t RegIndex = MI->getOperand(1).getImm();
99      unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
100      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
101                  .addOperand(MI->getOperand(0))
102                  .addReg(ConstantReg);
103      break;
104    }
105
106  case AMDGPU::MASK_WRITE:
107    {
108      unsigned maskedRegister = MI->getOperand(0).getReg();
109      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
110      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
111      TII->AddFlag(defInstr, 0, MO_FLAG_MASK);
112      // Return early so the instruction is not erased
113      return BB;
114    }
115
116  case AMDGPU::RAT_WRITE_CACHELESS_eg:
117    {
118      // Convert to DWORD address
119      unsigned NewAddr = MRI.createVirtualRegister(
120                                             AMDGPU::R600_TReg32_XRegisterClass);
121      unsigned ShiftValue = MRI.createVirtualRegister(
122                                              AMDGPU::R600_TReg32RegisterClass);
123
124      // XXX In theory, we should be able to pass ShiftValue directly to
125      // the LSHR_eg instruction as an inline literal, but I tried doing it
126      // this way and it didn't produce the correct results.
127      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
128              .addReg(AMDGPU::ALU_LITERAL_X)
129              .addReg(AMDGPU::PRED_SEL_OFF)
130              .addImm(2);
131      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
132              .addOperand(MI->getOperand(1))
133              .addReg(ShiftValue)
134              .addReg(AMDGPU::PRED_SEL_OFF);
135      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
136              .addOperand(MI->getOperand(0))
137              .addReg(NewAddr);
138      break;
139    }
140
141  case AMDGPU::RESERVE_REG:
142    {
143      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
144      int64_t ReservedIndex = MI->getOperand(0).getImm();
145      unsigned ReservedReg =
146                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
147      MFI->ReservedRegs.push_back(ReservedReg);
148      break;
149    }
150
151  case AMDGPU::TXD:
152    {
153      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
154      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
155
156      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
157              .addOperand(MI->getOperand(3))
158              .addOperand(MI->getOperand(4))
159              .addOperand(MI->getOperand(5));
160      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
161              .addOperand(MI->getOperand(2))
162              .addOperand(MI->getOperand(4))
163              .addOperand(MI->getOperand(5));
164      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
165              .addOperand(MI->getOperand(0))
166              .addOperand(MI->getOperand(1))
167              .addOperand(MI->getOperand(4))
168              .addOperand(MI->getOperand(5))
169              .addReg(t0, RegState::Implicit)
170              .addReg(t1, RegState::Implicit);
171      break;
172    }
173  case AMDGPU::TXD_SHADOW:
174    {
175      unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
176      unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
177
178      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
179              .addOperand(MI->getOperand(3))
180              .addOperand(MI->getOperand(4))
181              .addOperand(MI->getOperand(5));
182      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
183              .addOperand(MI->getOperand(2))
184              .addOperand(MI->getOperand(4))
185              .addOperand(MI->getOperand(5));
186      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
187              .addOperand(MI->getOperand(0))
188              .addOperand(MI->getOperand(1))
189              .addOperand(MI->getOperand(4))
190              .addOperand(MI->getOperand(5))
191              .addReg(t0, RegState::Implicit)
192              .addReg(t1, RegState::Implicit);
193      break;
194    }
195  case AMDGPU::BRANCH:
196      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
197              .addOperand(MI->getOperand(0))
198              .addReg(0);
199      break;
200  case AMDGPU::BRANCH_COND_f32:
201    {
202      MachineInstr *NewMI =
203        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
204                .addReg(AMDGPU::PREDICATE_BIT)
205                .addOperand(MI->getOperand(1))
206                .addImm(OPCODE_IS_ZERO)
207                .addImm(0); // Flags
208      TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
209      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
210              .addOperand(MI->getOperand(0))
211              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
212      break;
213    }
214  case AMDGPU::BRANCH_COND_i32:
215    {
216      MachineInstr *NewMI =
217        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
218              .addReg(AMDGPU::PREDICATE_BIT)
219              .addOperand(MI->getOperand(1))
220              .addImm(OPCODE_IS_ZERO_INT)
221              .addImm(0); // Flags
222      TII->AddFlag(NewMI, 1, MO_FLAG_PUSH);
223      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
224             .addOperand(MI->getOperand(0))
225              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
226      break;
227    }
228  }
229
230  MI->eraseFromParent();
231  return BB;
232}
233
234//===----------------------------------------------------------------------===//
235// Custom DAG Lowering Operations
236//===----------------------------------------------------------------------===//
237
238using namespace llvm::Intrinsic;
239using namespace llvm::AMDGPUIntrinsic;
240
241SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
242{
243  switch (Op.getOpcode()) {
244  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
245  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
246  case ISD::ROTL: return LowerROTL(Op, DAG);
247  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
248  case ISD::SETCC: return LowerSETCC(Op, DAG);
249  case ISD::INTRINSIC_VOID: {
250    SDValue Chain = Op.getOperand(0);
251    unsigned IntrinsicID =
252                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
253    switch (IntrinsicID) {
254    case AMDGPUIntrinsic::AMDGPU_store_output: {
255      MachineFunction &MF = DAG.getMachineFunction();
256      MachineRegisterInfo &MRI = MF.getRegInfo();
257      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
258      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
259      if (!MRI.isLiveOut(Reg)) {
260        MRI.addLiveOut(Reg);
261      }
262      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
263    }
264    // default for switch(IntrinsicID)
265    default: break;
266    }
267    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
268    break;
269  }
270  case ISD::INTRINSIC_WO_CHAIN: {
271    unsigned IntrinsicID =
272                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
273    EVT VT = Op.getValueType();
274    DebugLoc DL = Op.getDebugLoc();
275    switch(IntrinsicID) {
276    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
277    case AMDGPUIntrinsic::R600_load_input: {
278      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
279      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
280      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
281    }
282
283    case r600_read_ngroups_x:
284      return LowerImplicitParameter(DAG, VT, DL, 0);
285    case r600_read_ngroups_y:
286      return LowerImplicitParameter(DAG, VT, DL, 1);
287    case r600_read_ngroups_z:
288      return LowerImplicitParameter(DAG, VT, DL, 2);
289    case r600_read_global_size_x:
290      return LowerImplicitParameter(DAG, VT, DL, 3);
291    case r600_read_global_size_y:
292      return LowerImplicitParameter(DAG, VT, DL, 4);
293    case r600_read_global_size_z:
294      return LowerImplicitParameter(DAG, VT, DL, 5);
295    case r600_read_local_size_x:
296      return LowerImplicitParameter(DAG, VT, DL, 6);
297    case r600_read_local_size_y:
298      return LowerImplicitParameter(DAG, VT, DL, 7);
299    case r600_read_local_size_z:
300      return LowerImplicitParameter(DAG, VT, DL, 8);
301
302    case r600_read_tgid_x:
303      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
304                                  AMDGPU::T1_X, VT);
305    case r600_read_tgid_y:
306      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
307                                  AMDGPU::T1_Y, VT);
308    case r600_read_tgid_z:
309      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
310                                  AMDGPU::T1_Z, VT);
311    case r600_read_tidig_x:
312      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
313                                  AMDGPU::T0_X, VT);
314    case r600_read_tidig_y:
315      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
316                                  AMDGPU::T0_Y, VT);
317    case r600_read_tidig_z:
318      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
319                                  AMDGPU::T0_Z, VT);
320    }
321    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
322    break;
323  }
324  } // end switch(Op.getOpcode())
325  return SDValue();
326}
327
328SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
329{
330  SDValue Chain = Op.getOperand(0);
331  SDValue CC = Op.getOperand(1);
332  SDValue LHS   = Op.getOperand(2);
333  SDValue RHS   = Op.getOperand(3);
334  SDValue JumpT  = Op.getOperand(4);
335  SDValue CmpValue;
336  SDValue Result;
337  CmpValue = DAG.getNode(
338      ISD::SELECT_CC,
339      Op.getDebugLoc(),
340      MVT::i32,
341      LHS, RHS,
342      DAG.getConstant(-1, MVT::i32),
343      DAG.getConstant(0, MVT::i32),
344      CC);
345  Result = DAG.getNode(
346      AMDGPUISD::BRANCH_COND,
347      CmpValue.getDebugLoc(),
348      MVT::Other, Chain,
349      JumpT, CmpValue);
350  return Result;
351}
352
353SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
354                                                   DebugLoc DL,
355                                                   unsigned DwordOffset) const
356{
357  unsigned ByteOffset = DwordOffset * 4;
358  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
359                                      AMDGPUAS::PARAM_I_ADDRESS);
360
361  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
362  assert(isInt<16>(ByteOffset));
363
364  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
365                     DAG.getConstant(ByteOffset, MVT::i32), // PTR
366                     MachinePointerInfo(ConstantPointerNull::get(PtrType)),
367                     false, false, false, 0);
368}
369
370SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
371{
372  DebugLoc DL = Op.getDebugLoc();
373  EVT VT = Op.getValueType();
374
375  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
376                     Op.getOperand(0),
377                     Op.getOperand(0),
378                     DAG.getNode(ISD::SUB, DL, VT,
379                                 DAG.getConstant(32, MVT::i32),
380                                 Op.getOperand(1)));
381}
382
383SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
384{
385  DebugLoc DL = Op.getDebugLoc();
386  EVT VT = Op.getValueType();
387
388  SDValue LHS = Op.getOperand(0);
389  SDValue RHS = Op.getOperand(1);
390  SDValue True = Op.getOperand(2);
391  SDValue False = Op.getOperand(3);
392  SDValue CC = Op.getOperand(4);
393  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
394  SDValue Temp;
395
396  // LHS and RHS are guaranteed to be the same value type
397  EVT CompareVT = LHS.getValueType();
398
399  // We need all the operands of SELECT_CC to have the same value type, so if
400  // necessary we need to convert LHS and RHS to be the same type True and
401  // False.  True and False are guaranteed to have the same type as this
402  // SELECT_CC node.
403
404  if (CompareVT !=  VT) {
405    ISD::NodeType ConversionOp = ISD::DELETED_NODE;
406    if (VT == MVT::f32 && CompareVT == MVT::i32) {
407      if (isUnsignedIntSetCC(CCOpcode)) {
408        ConversionOp = ISD::UINT_TO_FP;
409      } else {
410        ConversionOp = ISD::SINT_TO_FP;
411      }
412    } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
413      ConversionOp = ISD::FP_TO_SINT;
414    } else {
415      // I don't think there will be any other type pairings.
416      assert(!"Unhandled operand type parings in SELECT_CC");
417    }
418    // XXX Check the value of LHS and RHS and avoid creating sequences like
419    // (FTOI (ITOF))
420    LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
421    RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
422  }
423
424  // If True is a hardware TRUE value and False is a hardware FALSE value or
425  // vice-versa we can handle this with a native instruction (SET* instructions).
426  if ((isHWTrueValue(True) && isHWFalseValue(False))) {
427    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
428  }
429
430  // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
431  // we can handle this with a native instruction, but we need to swap true
432  // and false and change the conditional.
433  if (isHWTrueValue(False) && isHWFalseValue(True)) {
434  }
435
436  // XXX Check if we can lower this to a SELECT or if it is supported by a native
437  // operation. (The code below does this but we don't have the Instruction
438  // selection patterns to do this yet.
439#if 0
440  if (isZero(LHS) || isZero(RHS)) {
441    SDValue Cond = (isZero(LHS) ? RHS : LHS);
442    bool SwapTF = false;
443    switch (CCOpcode) {
444    case ISD::SETOEQ:
445    case ISD::SETUEQ:
446    case ISD::SETEQ:
447      SwapTF = true;
448      // Fall through
449    case ISD::SETONE:
450    case ISD::SETUNE:
451    case ISD::SETNE:
452      // We can lower to select
453      if (SwapTF) {
454        Temp = True;
455        True = False;
456        False = Temp;
457      }
458      // CNDE
459      return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
460    default:
461      // Supported by a native operation (CNDGE, CNDGT)
462      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
463    }
464  }
465#endif
466
467  // If we make it this for it means we have no native instructions to handle
468  // this SELECT_CC, so we must lower it.
469  SDValue HWTrue, HWFalse;
470
471  if (VT == MVT::f32) {
472    HWTrue = DAG.getConstantFP(1.0f, VT);
473    HWFalse = DAG.getConstantFP(0.0f, VT);
474  } else if (VT == MVT::i32) {
475    HWTrue = DAG.getConstant(-1, VT);
476    HWFalse = DAG.getConstant(0, VT);
477  }
478  else {
479    assert(!"Unhandled value type in LowerSELECT_CC");
480  }
481
482  // Lower this unsupported SELECT_CC into a combination of two supported
483  // SELECT_CC operations.
484  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
485
486  // Convert floating point condition to i1
487  if (VT == MVT::f32) {
488    Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
489                       DAG.getNode(ISD::FNEG, DL, VT, Cond));
490  }
491
492  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
493}
494
495SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
496{
497  SDValue Cond;
498  SDValue LHS = Op.getOperand(0);
499  SDValue RHS = Op.getOperand(1);
500  SDValue CC  = Op.getOperand(2);
501  DebugLoc DL = Op.getDebugLoc();
502  assert(Op.getValueType() == MVT::i32);
503  Cond = DAG.getNode(
504      ISD::SELECT_CC,
505      Op.getDebugLoc(),
506      MVT::i32,
507      LHS, RHS,
508      DAG.getConstant(-1, MVT::i32),
509      DAG.getConstant(0, MVT::i32),
510      CC);
511  Cond = DAG.getNode(
512      ISD::AND,
513      DL,
514      MVT::i32,
515      DAG.getConstant(1, MVT::i32),
516      Cond);
517  return Cond;
518}
519