R600ISelLowering.cpp revision a499d2bcef0c1001c60d752d356e50eed2402ca8
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Custom DAG lowering for R600
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600ISelLowering.h"
16#include "R600Defines.h"
17#include "R600InstrInfo.h"
18#include "R600MachineFunctionInfo.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22#include "llvm/IR/Argument.h"
23#include "llvm/IR/Function.h"
24
25using namespace llvm;
26
27R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
28    AMDGPUTargetLowering(TM),
29    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
30  setOperationAction(ISD::MUL, MVT::i64, Expand);
31  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
32  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
33  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
34  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
35  computeRegisterProperties();
36
37  setOperationAction(ISD::FADD, MVT::v4f32, Expand);
38  setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
39  setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
40  setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
41
42  setOperationAction(ISD::ADD,  MVT::v4i32, Expand);
43  setOperationAction(ISD::AND,  MVT::v4i32, Expand);
44  setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
45  setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
46  setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
47  setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
48  setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
49  setOperationAction(ISD::UREM, MVT::v4i32, Expand);
50  setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
51
52  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
53  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
54
55  setOperationAction(ISD::FSUB, MVT::f32, Expand);
56
57  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
58  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
59  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
60  setOperationAction(ISD::FPOW, MVT::f32, Custom);
61
62  setOperationAction(ISD::ROTL, MVT::i32, Custom);
63
64  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
65  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
66
67  setOperationAction(ISD::SETCC, MVT::i32, Custom);
68  setOperationAction(ISD::SETCC, MVT::f32, Custom);
69  setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
70
71  setOperationAction(ISD::SELECT, MVT::i32, Custom);
72  setOperationAction(ISD::SELECT, MVT::f32, Custom);
73
74  setOperationAction(ISD::STORE, MVT::i32, Custom);
75  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
76
77  setOperationAction(ISD::LOAD, MVT::i32, Custom);
78  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
79  setTargetDAGCombine(ISD::FP_ROUND);
80  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
81
82  setSchedulingPreference(Sched::VLIW);
83}
84
85MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
86    MachineInstr * MI, MachineBasicBlock * BB) const {
87  MachineFunction * MF = BB->getParent();
88  MachineRegisterInfo &MRI = MF->getRegInfo();
89  MachineBasicBlock::iterator I = *MI;
90
91  switch (MI->getOpcode()) {
92  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
93  case AMDGPU::SHADER_TYPE: break;
94  case AMDGPU::CLAMP_R600: {
95    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
96                                                   AMDGPU::MOV,
97                                                   MI->getOperand(0).getReg(),
98                                                   MI->getOperand(1).getReg());
99    TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
100    break;
101  }
102
103  case AMDGPU::FABS_R600: {
104    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
105                                                    AMDGPU::MOV,
106                                                    MI->getOperand(0).getReg(),
107                                                    MI->getOperand(1).getReg());
108    TII->addFlag(NewMI, 0, MO_FLAG_ABS);
109    break;
110  }
111
112  case AMDGPU::FNEG_R600: {
113    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
114                                                    AMDGPU::MOV,
115                                                    MI->getOperand(0).getReg(),
116                                                    MI->getOperand(1).getReg());
117    TII->addFlag(NewMI, 0, MO_FLAG_NEG);
118    break;
119  }
120
121  case AMDGPU::MASK_WRITE: {
122    unsigned maskedRegister = MI->getOperand(0).getReg();
123    assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
124    MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
125    TII->addFlag(defInstr, 0, MO_FLAG_MASK);
126    break;
127  }
128
129  case AMDGPU::MOV_IMM_F32:
130    TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
131                     MI->getOperand(1).getFPImm()->getValueAPF()
132                         .bitcastToAPInt().getZExtValue());
133    break;
134  case AMDGPU::MOV_IMM_I32:
135    TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
136                     MI->getOperand(1).getImm());
137    break;
138
139
140  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
141  case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
142    unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
143
144    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
145            .addOperand(MI->getOperand(0))
146            .addOperand(MI->getOperand(1))
147            .addImm(EOP); // Set End of program bit
148    break;
149  }
150
151  case AMDGPU::TXD: {
152    unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
153    unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
154
155    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
156            .addOperand(MI->getOperand(3))
157            .addOperand(MI->getOperand(4))
158            .addOperand(MI->getOperand(5))
159            .addOperand(MI->getOperand(6));
160    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
161            .addOperand(MI->getOperand(2))
162            .addOperand(MI->getOperand(4))
163            .addOperand(MI->getOperand(5))
164            .addOperand(MI->getOperand(6));
165    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
166            .addOperand(MI->getOperand(0))
167            .addOperand(MI->getOperand(1))
168            .addOperand(MI->getOperand(4))
169            .addOperand(MI->getOperand(5))
170            .addOperand(MI->getOperand(6))
171            .addReg(T0, RegState::Implicit)
172            .addReg(T1, RegState::Implicit);
173    break;
174  }
175
176  case AMDGPU::TXD_SHADOW: {
177    unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
178    unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
179
180    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
181            .addOperand(MI->getOperand(3))
182            .addOperand(MI->getOperand(4))
183            .addOperand(MI->getOperand(5))
184            .addOperand(MI->getOperand(6));
185    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
186            .addOperand(MI->getOperand(2))
187            .addOperand(MI->getOperand(4))
188            .addOperand(MI->getOperand(5))
189            .addOperand(MI->getOperand(6));
190    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
191            .addOperand(MI->getOperand(0))
192            .addOperand(MI->getOperand(1))
193            .addOperand(MI->getOperand(4))
194            .addOperand(MI->getOperand(5))
195            .addOperand(MI->getOperand(6))
196            .addReg(T0, RegState::Implicit)
197            .addReg(T1, RegState::Implicit);
198    break;
199  }
200
201  case AMDGPU::BRANCH:
202      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
203              .addOperand(MI->getOperand(0))
204              .addReg(0);
205      break;
206
207  case AMDGPU::BRANCH_COND_f32: {
208    MachineInstr *NewMI =
209      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
210              AMDGPU::PREDICATE_BIT)
211              .addOperand(MI->getOperand(1))
212              .addImm(OPCODE_IS_NOT_ZERO)
213              .addImm(0); // Flags
214    TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
215    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
216            .addOperand(MI->getOperand(0))
217            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
218    break;
219  }
220
221  case AMDGPU::BRANCH_COND_i32: {
222    MachineInstr *NewMI =
223      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
224            AMDGPU::PREDICATE_BIT)
225            .addOperand(MI->getOperand(1))
226            .addImm(OPCODE_IS_NOT_ZERO_INT)
227            .addImm(0); // Flags
228    TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
229    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
230           .addOperand(MI->getOperand(0))
231            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
232    break;
233  }
234
235  case AMDGPU::EG_ExportSwz:
236  case AMDGPU::R600_ExportSwz: {
237    // Instruction is left unmodified if its not the last one of its type
238    bool isLastInstructionOfItsType = true;
239    unsigned InstExportType = MI->getOperand(1).getImm();
240    for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
241         EndBlock = BB->end(); NextExportInst != EndBlock;
242         NextExportInst = llvm::next(NextExportInst)) {
243      if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
244          NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
245        unsigned CurrentInstExportType = NextExportInst->getOperand(1)
246            .getImm();
247        if (CurrentInstExportType == InstExportType) {
248          isLastInstructionOfItsType = false;
249          break;
250        }
251      }
252    }
253    bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
254    if (!EOP && !isLastInstructionOfItsType)
255      return BB;
256    unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
257    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
258            .addOperand(MI->getOperand(0))
259            .addOperand(MI->getOperand(1))
260            .addOperand(MI->getOperand(2))
261            .addOperand(MI->getOperand(3))
262            .addOperand(MI->getOperand(4))
263            .addOperand(MI->getOperand(5))
264            .addOperand(MI->getOperand(6))
265            .addImm(CfInst)
266            .addImm(EOP);
267    break;
268  }
269  case AMDGPU::RETURN: {
270    // RETURN instructions must have the live-out registers as implicit uses,
271    // otherwise they appear dead.
272    R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
273    MachineInstrBuilder MIB(*MF, MI);
274    for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
275      MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
276    return BB;
277  }
278  }
279
280  MI->eraseFromParent();
281  return BB;
282}
283
284//===----------------------------------------------------------------------===//
285// Custom DAG Lowering Operations
286//===----------------------------------------------------------------------===//
287
288using namespace llvm::Intrinsic;
289using namespace llvm::AMDGPUIntrinsic;
290
291static SDValue
292InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
293    unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
294    SDValue Scalar, SDValue Chain) {
295  if (!ExportMap[Slot]) {
296    SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
297      DL, MVT::v4f32,
298      DAG.getUNDEF(MVT::v4f32),
299      Scalar,
300      DAG.getConstant(Channel, MVT::i32));
301
302    unsigned Mask = 1 << Channel;
303
304    const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
305        DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
306        DAG.getConstant(Mask, MVT::i32)};
307
308    SDValue Res =  DAG.getNode(
309        AMDGPUISD::EXPORT,
310        DL,
311        MVT::Other,
312        Ops, 6);
313     ExportMap[Slot] = Res.getNode();
314     return Res;
315  }
316
317  SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
318  SDValue PreviousVector = ExportInstruction->getOperand(1);
319  SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
320      DL, MVT::v4f32,
321      PreviousVector,
322      Scalar,
323      DAG.getConstant(Channel, MVT::i32));
324
325  unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
326      ->getZExtValue();
327  Mask |= (1 << Channel);
328
329  const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
330      DAG.getConstant(Inst, MVT::i32),
331      DAG.getConstant(Type, MVT::i32),
332      DAG.getConstant(Slot, MVT::i32),
333      DAG.getConstant(Mask, MVT::i32)};
334
335  DAG.UpdateNodeOperands(ExportInstruction,
336      Ops, 6);
337
338  return Chain;
339
340}
341
342SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
343  switch (Op.getOpcode()) {
344  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
345  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
346  case ISD::ROTL: return LowerROTL(Op, DAG);
347  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
348  case ISD::SELECT: return LowerSELECT(Op, DAG);
349  case ISD::SETCC: return LowerSETCC(Op, DAG);
350  case ISD::STORE: return LowerSTORE(Op, DAG);
351  case ISD::LOAD: return LowerLOAD(Op, DAG);
352  case ISD::FPOW: return LowerFPOW(Op, DAG);
353  case ISD::INTRINSIC_VOID: {
354    SDValue Chain = Op.getOperand(0);
355    unsigned IntrinsicID =
356                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
357    switch (IntrinsicID) {
358    case AMDGPUIntrinsic::AMDGPU_store_output: {
359      MachineFunction &MF = DAG.getMachineFunction();
360      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
361      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
362      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
363      MFI->LiveOuts.push_back(Reg);
364      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
365    }
366    case AMDGPUIntrinsic::R600_store_pixel_color: {
367      MachineFunction &MF = DAG.getMachineFunction();
368      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
369      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
370
371      SDNode **OutputsMap = MFI->Outputs;
372      return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
373          RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
374          Chain);
375
376    }
377
378    // default for switch(IntrinsicID)
379    default: break;
380    }
381    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
382    break;
383  }
384  case ISD::INTRINSIC_WO_CHAIN: {
385    unsigned IntrinsicID =
386                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
387    EVT VT = Op.getValueType();
388    DebugLoc DL = Op.getDebugLoc();
389    switch(IntrinsicID) {
390    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
391    case AMDGPUIntrinsic::R600_load_input: {
392      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
393      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
394      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
395    }
396
397    case AMDGPUIntrinsic::R600_interp_input: {
398      int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
399      int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
400      MachineSDNode *interp;
401      if (ijb < 0) {
402        interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
403            MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
404        return DAG.getTargetExtractSubreg(
405            TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
406            DL, MVT::f32, SDValue(interp, 0));
407      }
408
409      if (slot % 4 < 2)
410        interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
411            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
412            CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
413                AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
414            CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
415                AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
416      else
417        interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
418            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
419            CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
420                AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
421            CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
422                AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
423
424      return SDValue(interp, slot % 2);
425    }
426
427    case r600_read_ngroups_x:
428      return LowerImplicitParameter(DAG, VT, DL, 0);
429    case r600_read_ngroups_y:
430      return LowerImplicitParameter(DAG, VT, DL, 1);
431    case r600_read_ngroups_z:
432      return LowerImplicitParameter(DAG, VT, DL, 2);
433    case r600_read_global_size_x:
434      return LowerImplicitParameter(DAG, VT, DL, 3);
435    case r600_read_global_size_y:
436      return LowerImplicitParameter(DAG, VT, DL, 4);
437    case r600_read_global_size_z:
438      return LowerImplicitParameter(DAG, VT, DL, 5);
439    case r600_read_local_size_x:
440      return LowerImplicitParameter(DAG, VT, DL, 6);
441    case r600_read_local_size_y:
442      return LowerImplicitParameter(DAG, VT, DL, 7);
443    case r600_read_local_size_z:
444      return LowerImplicitParameter(DAG, VT, DL, 8);
445
446    case r600_read_tgid_x:
447      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
448                                  AMDGPU::T1_X, VT);
449    case r600_read_tgid_y:
450      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
451                                  AMDGPU::T1_Y, VT);
452    case r600_read_tgid_z:
453      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
454                                  AMDGPU::T1_Z, VT);
455    case r600_read_tidig_x:
456      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
457                                  AMDGPU::T0_X, VT);
458    case r600_read_tidig_y:
459      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
460                                  AMDGPU::T0_Y, VT);
461    case r600_read_tidig_z:
462      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
463                                  AMDGPU::T0_Z, VT);
464    }
465    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
466    break;
467  }
468  } // end switch(Op.getOpcode())
469  return SDValue();
470}
471
472void R600TargetLowering::ReplaceNodeResults(SDNode *N,
473                                            SmallVectorImpl<SDValue> &Results,
474                                            SelectionDAG &DAG) const {
475  switch (N->getOpcode()) {
476  default: return;
477  case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
478    return;
479  case ISD::LOAD: {
480    SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
481    Results.push_back(SDValue(Node, 0));
482    Results.push_back(SDValue(Node, 1));
483    // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
484    // function
485    DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
486    return;
487  }
488  }
489}
490
491SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
492  return DAG.getNode(
493      ISD::SETCC,
494      Op.getDebugLoc(),
495      MVT::i1,
496      Op, DAG.getConstantFP(0.0f, MVT::f32),
497      DAG.getCondCode(ISD::SETNE)
498      );
499}
500
501SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
502  SDValue Chain = Op.getOperand(0);
503  SDValue CC = Op.getOperand(1);
504  SDValue LHS   = Op.getOperand(2);
505  SDValue RHS   = Op.getOperand(3);
506  SDValue JumpT  = Op.getOperand(4);
507  SDValue CmpValue;
508  SDValue Result;
509
510  if (LHS.getValueType() == MVT::i32) {
511    CmpValue = DAG.getNode(
512        ISD::SELECT_CC,
513        Op.getDebugLoc(),
514        MVT::i32,
515        LHS, RHS,
516        DAG.getConstant(-1, MVT::i32),
517        DAG.getConstant(0, MVT::i32),
518        CC);
519  } else if (LHS.getValueType() == MVT::f32) {
520    CmpValue = DAG.getNode(
521        ISD::SELECT_CC,
522        Op.getDebugLoc(),
523        MVT::f32,
524        LHS, RHS,
525        DAG.getConstantFP(1.0f, MVT::f32),
526        DAG.getConstantFP(0.0f, MVT::f32),
527        CC);
528  } else {
529    assert(0 && "Not valid type for br_cc");
530  }
531  Result = DAG.getNode(
532      AMDGPUISD::BRANCH_COND,
533      CmpValue.getDebugLoc(),
534      MVT::Other, Chain,
535      JumpT, CmpValue);
536  return Result;
537}
538
539SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
540                                                   DebugLoc DL,
541                                                   unsigned DwordOffset) const {
542  unsigned ByteOffset = DwordOffset * 4;
543  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
544                                      AMDGPUAS::PARAM_I_ADDRESS);
545
546  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
547  assert(isInt<16>(ByteOffset));
548
549  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
550                     DAG.getConstant(ByteOffset, MVT::i32), // PTR
551                     MachinePointerInfo(ConstantPointerNull::get(PtrType)),
552                     false, false, false, 0);
553}
554
555SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
556  DebugLoc DL = Op.getDebugLoc();
557  EVT VT = Op.getValueType();
558
559  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
560                     Op.getOperand(0),
561                     Op.getOperand(0),
562                     DAG.getNode(ISD::SUB, DL, VT,
563                                 DAG.getConstant(32, MVT::i32),
564                                 Op.getOperand(1)));
565}
566
567bool R600TargetLowering::isZero(SDValue Op) const {
568  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
569    return Cst->isNullValue();
570  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
571    return CstFP->isZero();
572  } else {
573    return false;
574  }
575}
576
577SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
578  DebugLoc DL = Op.getDebugLoc();
579  EVT VT = Op.getValueType();
580
581  SDValue LHS = Op.getOperand(0);
582  SDValue RHS = Op.getOperand(1);
583  SDValue True = Op.getOperand(2);
584  SDValue False = Op.getOperand(3);
585  SDValue CC = Op.getOperand(4);
586  SDValue Temp;
587
588  // LHS and RHS are guaranteed to be the same value type
589  EVT CompareVT = LHS.getValueType();
590
591  // Check if we can lower this to a native operation.
592
593  // Try to lower to a CND* instruction:
594  // CND* instructions requires RHS to be zero.  Some SELECT_CC nodes that
595  // can be lowered to CND* instructions can also be lowered to SET*
596  // instructions.  CND* instructions are cheaper, because they dont't
597  // require additional instructions to convert their result to the correct
598  // value type, so this check should be first.
599  if (isZero(LHS) || isZero(RHS)) {
600    SDValue Cond = (isZero(LHS) ? RHS : LHS);
601    SDValue Zero = (isZero(LHS) ? LHS : RHS);
602    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
603    if (CompareVT != VT) {
604      // Bitcast True / False to the correct types.  This will end up being
605      // a nop, but it allows us to define only a single pattern in the
606      // .TD files for each CND* instruction rather than having to have
607      // one pattern for integer True/False and one for fp True/False
608      True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
609      False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
610    }
611    if (isZero(LHS)) {
612      CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
613    }
614
615    switch (CCOpcode) {
616    case ISD::SETONE:
617    case ISD::SETUNE:
618    case ISD::SETNE:
619    case ISD::SETULE:
620    case ISD::SETULT:
621    case ISD::SETOLE:
622    case ISD::SETOLT:
623    case ISD::SETLE:
624    case ISD::SETLT:
625      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
626      Temp = True;
627      True = False;
628      False = Temp;
629      break;
630    default:
631      break;
632    }
633    SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
634        Cond, Zero,
635        True, False,
636        DAG.getCondCode(CCOpcode));
637    return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
638  }
639
640  // Try to lower to a SET* instruction:
641  // We need all the operands of SELECT_CC to have the same value type, so if
642  // necessary we need to change True and False to be the same type as LHS and
643  // RHS, and then convert the result of the select_cc back to the correct type.
644
645  // Move hardware True/False values to the correct operand.
646  if (isHWTrueValue(False) && isHWFalseValue(True)) {
647    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
648    std::swap(False, True);
649    CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
650  }
651
652  if (isHWTrueValue(True) && isHWFalseValue(False)) {
653    if (CompareVT !=  VT) {
654      if (VT == MVT::f32 && CompareVT == MVT::i32) {
655        SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
656            LHS, RHS,
657            DAG.getConstant(-1, MVT::i32),
658            DAG.getConstant(0, MVT::i32),
659            CC);
660        // Convert integer values of true (-1) and false (0) to fp values of
661        // true (1.0f) and false (0.0f).
662        SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
663                                                  DAG.getConstant(1, MVT::i32));
664        return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
665      } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
666        SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
667            LHS, RHS,
668            DAG.getConstantFP(1.0f, MVT::f32),
669            DAG.getConstantFP(0.0f, MVT::f32),
670            CC);
671        // Convert fp values of true (1.0f) and false (0.0f) to integer values
672        // of true (-1) and false (0).
673        SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
674        return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
675      } else {
676        // I don't think there will be any other type pairings.
677        assert(!"Unhandled operand type parings in SELECT_CC");
678      }
679    } else {
680      // This SELECT_CC is already legal.
681      return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
682    }
683  }
684
685  // Possible Min/Max pattern
686  SDValue MinMax = LowerMinMax(Op, DAG);
687  if (MinMax.getNode()) {
688    return MinMax;
689  }
690
691  // If we make it this for it means we have no native instructions to handle
692  // this SELECT_CC, so we must lower it.
693  SDValue HWTrue, HWFalse;
694
695  if (CompareVT == MVT::f32) {
696    HWTrue = DAG.getConstantFP(1.0f, CompareVT);
697    HWFalse = DAG.getConstantFP(0.0f, CompareVT);
698  } else if (CompareVT == MVT::i32) {
699    HWTrue = DAG.getConstant(-1, CompareVT);
700    HWFalse = DAG.getConstant(0, CompareVT);
701  }
702  else {
703    assert(!"Unhandled value type in LowerSELECT_CC");
704  }
705
706  // Lower this unsupported SELECT_CC into a combination of two supported
707  // SELECT_CC operations.
708  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
709
710  return DAG.getNode(ISD::SELECT_CC, DL, VT,
711      Cond, HWFalse,
712      True, False,
713      DAG.getCondCode(ISD::SETNE));
714}
715
716SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
717  return DAG.getNode(ISD::SELECT_CC,
718      Op.getDebugLoc(),
719      Op.getValueType(),
720      Op.getOperand(0),
721      DAG.getConstant(0, MVT::i32),
722      Op.getOperand(1),
723      Op.getOperand(2),
724      DAG.getCondCode(ISD::SETNE));
725}
726
727SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
728  SDValue Cond;
729  SDValue LHS = Op.getOperand(0);
730  SDValue RHS = Op.getOperand(1);
731  SDValue CC  = Op.getOperand(2);
732  DebugLoc DL = Op.getDebugLoc();
733  assert(Op.getValueType() == MVT::i32);
734  if (LHS.getValueType() == MVT::i32) {
735    Cond = DAG.getNode(
736        ISD::SELECT_CC,
737        Op.getDebugLoc(),
738        MVT::i32,
739        LHS, RHS,
740        DAG.getConstant(-1, MVT::i32),
741        DAG.getConstant(0, MVT::i32),
742        CC);
743  } else if (LHS.getValueType() == MVT::f32) {
744    Cond = DAG.getNode(
745        ISD::SELECT_CC,
746        Op.getDebugLoc(),
747        MVT::f32,
748        LHS, RHS,
749        DAG.getConstantFP(1.0f, MVT::f32),
750        DAG.getConstantFP(0.0f, MVT::f32),
751        CC);
752    Cond = DAG.getNode(
753        ISD::FP_TO_SINT,
754        DL,
755        MVT::i32,
756        Cond);
757  } else {
758    assert(0 && "Not valid type for set_cc");
759  }
760  Cond = DAG.getNode(
761      ISD::AND,
762      DL,
763      MVT::i32,
764      DAG.getConstant(1, MVT::i32),
765      Cond);
766  return Cond;
767}
768
769SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
770  DebugLoc DL = Op.getDebugLoc();
771  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
772  SDValue Chain = Op.getOperand(0);
773  SDValue Value = Op.getOperand(1);
774  SDValue Ptr = Op.getOperand(2);
775
776  if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
777      Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
778    // Convert pointer from byte address to dword address.
779    Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
780                      DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
781                                  Ptr, DAG.getConstant(2, MVT::i32)));
782
783    if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
784      assert(!"Truncated and indexed stores not supported yet");
785    } else {
786      Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
787    }
788    return Chain;
789  }
790  return SDValue();
791}
792
793// return (512 + (kc_bank << 12)
794static int
795ConstantAddressBlock(unsigned AddressSpace) {
796  switch (AddressSpace) {
797  case AMDGPUAS::CONSTANT_BUFFER_0:
798    return 512;
799  case AMDGPUAS::CONSTANT_BUFFER_1:
800    return 512 + 4096;
801  case AMDGPUAS::CONSTANT_BUFFER_2:
802    return 512 + 4096 * 2;
803  case AMDGPUAS::CONSTANT_BUFFER_3:
804    return 512 + 4096 * 3;
805  case AMDGPUAS::CONSTANT_BUFFER_4:
806    return 512 + 4096 * 4;
807  case AMDGPUAS::CONSTANT_BUFFER_5:
808    return 512 + 4096 * 5;
809  case AMDGPUAS::CONSTANT_BUFFER_6:
810    return 512 + 4096 * 6;
811  case AMDGPUAS::CONSTANT_BUFFER_7:
812    return 512 + 4096 * 7;
813  case AMDGPUAS::CONSTANT_BUFFER_8:
814    return 512 + 4096 * 8;
815  case AMDGPUAS::CONSTANT_BUFFER_9:
816    return 512 + 4096 * 9;
817  case AMDGPUAS::CONSTANT_BUFFER_10:
818    return 512 + 4096 * 10;
819  case AMDGPUAS::CONSTANT_BUFFER_11:
820    return 512 + 4096 * 11;
821  case AMDGPUAS::CONSTANT_BUFFER_12:
822    return 512 + 4096 * 12;
823  case AMDGPUAS::CONSTANT_BUFFER_13:
824    return 512 + 4096 * 13;
825  case AMDGPUAS::CONSTANT_BUFFER_14:
826    return 512 + 4096 * 14;
827  case AMDGPUAS::CONSTANT_BUFFER_15:
828    return 512 + 4096 * 15;
829  default:
830    return -1;
831  }
832}
833
834SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
835{
836  EVT VT = Op.getValueType();
837  DebugLoc DL = Op.getDebugLoc();
838  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
839  SDValue Chain = Op.getOperand(0);
840  SDValue Ptr = Op.getOperand(1);
841  SDValue LoweredLoad;
842
843  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
844  if (ConstantBlock > -1) {
845    SDValue Result;
846    if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
847        dyn_cast<Constant>(LoadNode->getSrcValue())) {
848      SDValue Slots[4];
849      for (unsigned i = 0; i < 4; i++) {
850        // We want Const position encoded with the following formula :
851        // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
852        // const_index is Ptr computed by llvm using an alignment of 16.
853        // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
854        // then div by 4 at the ISel step
855        SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
856            DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
857        Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
858      }
859      Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
860    } else {
861      // non constant ptr cant be folded, keeps it as a v4f32 load
862      Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
863          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
864          );
865    }
866
867    if (!VT.isVector()) {
868      Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
869          DAG.getConstant(0, MVT::i32));
870    }
871
872    SDValue MergedValues[2] = {
873        Result,
874        Chain
875    };
876    return DAG.getMergeValues(MergedValues, 2, DL);
877  }
878
879  return SDValue();
880}
881
882SDValue R600TargetLowering::LowerFPOW(SDValue Op,
883    SelectionDAG &DAG) const {
884  DebugLoc DL = Op.getDebugLoc();
885  EVT VT = Op.getValueType();
886  SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
887  SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
888  return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
889}
890
891/// XXX Only kernel functions are supported, so we can assume for now that
892/// every function is a kernel function, but in the future we should use
893/// separate calling conventions for kernel and non-kernel functions.
894SDValue R600TargetLowering::LowerFormalArguments(
895                                      SDValue Chain,
896                                      CallingConv::ID CallConv,
897                                      bool isVarArg,
898                                      const SmallVectorImpl<ISD::InputArg> &Ins,
899                                      DebugLoc DL, SelectionDAG &DAG,
900                                      SmallVectorImpl<SDValue> &InVals) const {
901  unsigned ParamOffsetBytes = 36;
902  Function::const_arg_iterator FuncArg =
903                            DAG.getMachineFunction().getFunction()->arg_begin();
904  for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
905    EVT VT = Ins[i].VT;
906    Type *ArgType = FuncArg->getType();
907    unsigned ArgSizeInBits = ArgType->isPointerTy() ?
908                             32 : ArgType->getPrimitiveSizeInBits();
909    unsigned ArgBytes = ArgSizeInBits >> 3;
910    EVT ArgVT;
911    if (ArgSizeInBits < VT.getSizeInBits()) {
912      assert(!ArgType->isFloatTy() &&
913             "Extending floating point arguments not supported yet");
914      ArgVT = MVT::getIntegerVT(ArgSizeInBits);
915    } else {
916      ArgVT = VT;
917    }
918    PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
919                                                    AMDGPUAS::PARAM_I_ADDRESS);
920    SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
921                                DAG.getConstant(ParamOffsetBytes, MVT::i32),
922                                       MachinePointerInfo(new Argument(PtrTy)),
923                                       ArgVT, false, false, ArgBytes);
924    InVals.push_back(Arg);
925    ParamOffsetBytes += ArgBytes;
926  }
927  return Chain;
928}
929
930EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
931   if (!VT.isVector()) return MVT::i32;
932   return VT.changeVectorElementTypeToInteger();
933}
934
935//===----------------------------------------------------------------------===//
936// Custom DAG Optimizations
937//===----------------------------------------------------------------------===//
938
939SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
940                                              DAGCombinerInfo &DCI) const {
941  SelectionDAG &DAG = DCI.DAG;
942
943  switch (N->getOpcode()) {
944  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
945  case ISD::FP_ROUND: {
946      SDValue Arg = N->getOperand(0);
947      if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
948        return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
949                           Arg.getOperand(0));
950      }
951      break;
952    }
953  // Extract_vec (Build_vector) generated by custom lowering
954  // also needs to be customly combined
955  case ISD::EXTRACT_VECTOR_ELT: {
956    SDValue Arg = N->getOperand(0);
957    if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
958      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
959        unsigned Element = Const->getZExtValue();
960        return Arg->getOperand(Element);
961      }
962    }
963    if (Arg.getOpcode() == ISD::BITCAST &&
964        Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
965      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
966        unsigned Element = Const->getZExtValue();
967        return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
968            Arg->getOperand(0).getOperand(Element));
969      }
970    }
971  }
972  }
973  return SDValue();
974}
975