1//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Defines an instruction selector for the AMDGPU target.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUDiagnosticInfoUnsupported.h"
16#include "AMDGPUInstrInfo.h"
17#include "AMDGPUISelLowering.h" // For AMDGPUISD
18#include "AMDGPURegisterInfo.h"
19#include "AMDGPUSubtarget.h"
20#include "R600InstrInfo.h"
21#include "SIDefines.h"
22#include "SIISelLowering.h"
23#include "SIMachineFunctionInfo.h"
24#include "llvm/CodeGen/FunctionLoweringInfo.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/PseudoSourceValue.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/SelectionDAGISel.h"
30#include "llvm/IR/Function.h"
31
32using namespace llvm;
33
34//===----------------------------------------------------------------------===//
35// Instruction Selector Implementation
36//===----------------------------------------------------------------------===//
37
38namespace {
39/// AMDGPU specific code to select AMDGPU machine instructions for
40/// SelectionDAG operations.
41class AMDGPUDAGToDAGISel : public SelectionDAGISel {
42  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
43  // make the right decision when generating code for different targets.
44  const AMDGPUSubtarget *Subtarget;
45
46public:
47  AMDGPUDAGToDAGISel(TargetMachine &TM);
48  virtual ~AMDGPUDAGToDAGISel();
49  bool runOnMachineFunction(MachineFunction &MF) override;
50  SDNode *Select(SDNode *N) override;
51  const char *getPassName() const override;
52  void PreprocessISelDAG() override;
53  void PostprocessISelDAG() override;
54
55private:
56  bool isInlineImmediate(SDNode *N) const;
57  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
58                   const R600InstrInfo *TII);
59  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
60  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
61
62  // Complex pattern selectors
63  bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
64  bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
65  bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
66
67  static bool checkType(const Value *ptr, unsigned int addrspace);
68  static bool checkPrivateAddress(const MachineMemOperand *Op);
69
70  static bool isGlobalStore(const StoreSDNode *N);
71  static bool isFlatStore(const StoreSDNode *N);
72  static bool isPrivateStore(const StoreSDNode *N);
73  static bool isLocalStore(const StoreSDNode *N);
74  static bool isRegionStore(const StoreSDNode *N);
75
76  bool isCPLoad(const LoadSDNode *N) const;
77  bool isConstantLoad(const LoadSDNode *N, int cbID) const;
78  bool isGlobalLoad(const LoadSDNode *N) const;
79  bool isFlatLoad(const LoadSDNode *N) const;
80  bool isParamLoad(const LoadSDNode *N) const;
81  bool isPrivateLoad(const LoadSDNode *N) const;
82  bool isLocalLoad(const LoadSDNode *N) const;
83  bool isRegionLoad(const LoadSDNode *N) const;
84
85  SDNode *glueCopyToM0(SDNode *N) const;
86
87  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
88  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
89  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
90                                       SDValue& Offset);
91  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
92  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
93  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
94                       unsigned OffsetBits) const;
95  bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
96  bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
97                                 SDValue &Offset1) const;
98  void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
99                   SDValue &SOffset, SDValue &Offset, SDValue &Offen,
100                   SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
101                   SDValue &TFE) const;
102  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
103                         SDValue &SOffset, SDValue &Offset, SDValue &GLC,
104                         SDValue &SLC, SDValue &TFE) const;
105  bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
106                         SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
107                         SDValue &SLC) const;
108  bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
109                          SDValue &SOffset, SDValue &ImmOffset) const;
110  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
111                         SDValue &Offset, SDValue &GLC, SDValue &SLC,
112                         SDValue &TFE) const;
113  bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
114                         SDValue &Offset, SDValue &GLC) const;
115  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
116                        bool &Imm) const;
117  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
118                  bool &Imm) const;
119  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
120  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
121  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
122  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
123  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
124  bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
125  SDNode *SelectAddrSpaceCast(SDNode *N);
126  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
127  bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
128  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
129                       SDValue &Clamp, SDValue &Omod) const;
130  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
131                         SDValue &Clamp, SDValue &Omod) const;
132
133  bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
134                            SDValue &Omod) const;
135  bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
136                                 SDValue &Clamp,
137                                 SDValue &Omod) const;
138
139  SDNode *SelectADD_SUB_I64(SDNode *N);
140  SDNode *SelectDIV_SCALE(SDNode *N);
141
142  SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
143                   uint32_t Offset, uint32_t Width);
144  SDNode *SelectS_BFEFromShifts(SDNode *N);
145  SDNode *SelectS_BFE(SDNode *N);
146
147  // Include the pieces autogenerated from the target description.
148#include "AMDGPUGenDAGISel.inc"
149};
150}  // end anonymous namespace
151
152/// \brief This pass converts a legalized DAG into a AMDGPU-specific
153// DAG, ready for instruction scheduling.
154FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
155  return new AMDGPUDAGToDAGISel(TM);
156}
157
158AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
159    : SelectionDAGISel(TM) {}
160
161bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
162  Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
163  return SelectionDAGISel::runOnMachineFunction(MF);
164}
165
166AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
167}
168
169bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
170  const SITargetLowering *TL
171      = static_cast<const SITargetLowering *>(getTargetLowering());
172  return TL->analyzeImmediate(N) == 0;
173}
174
175/// \brief Determine the register class for \p OpNo
176/// \returns The register class of the virtual register that will be used for
177/// the given operand number \OpNo or NULL if the register class cannot be
178/// determined.
179const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
180                                                          unsigned OpNo) const {
181  if (!N->isMachineOpcode())
182    return nullptr;
183
184  switch (N->getMachineOpcode()) {
185  default: {
186    const MCInstrDesc &Desc =
187        Subtarget->getInstrInfo()->get(N->getMachineOpcode());
188    unsigned OpIdx = Desc.getNumDefs() + OpNo;
189    if (OpIdx >= Desc.getNumOperands())
190      return nullptr;
191    int RegClass = Desc.OpInfo[OpIdx].RegClass;
192    if (RegClass == -1)
193      return nullptr;
194
195    return Subtarget->getRegisterInfo()->getRegClass(RegClass);
196  }
197  case AMDGPU::REG_SEQUENCE: {
198    unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
199    const TargetRegisterClass *SuperRC =
200        Subtarget->getRegisterInfo()->getRegClass(RCID);
201
202    SDValue SubRegOp = N->getOperand(OpNo + 1);
203    unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
204    return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
205                                                              SubRegIdx);
206  }
207  }
208}
209
210bool AMDGPUDAGToDAGISel::SelectADDRParam(
211  SDValue Addr, SDValue& R1, SDValue& R2) {
212
213  if (Addr.getOpcode() == ISD::FrameIndex) {
214    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
215      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
216      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
217    } else {
218      R1 = Addr;
219      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
220    }
221  } else if (Addr.getOpcode() == ISD::ADD) {
222    R1 = Addr.getOperand(0);
223    R2 = Addr.getOperand(1);
224  } else {
225    R1 = Addr;
226    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
227  }
228  return true;
229}
230
231bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
232  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
233      Addr.getOpcode() == ISD::TargetGlobalAddress) {
234    return false;
235  }
236  return SelectADDRParam(Addr, R1, R2);
237}
238
239
240bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
241  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
242      Addr.getOpcode() == ISD::TargetGlobalAddress) {
243    return false;
244  }
245
246  if (Addr.getOpcode() == ISD::FrameIndex) {
247    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
248      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
249      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
250    } else {
251      R1 = Addr;
252      R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
253    }
254  } else if (Addr.getOpcode() == ISD::ADD) {
255    R1 = Addr.getOperand(0);
256    R2 = Addr.getOperand(1);
257  } else {
258    R1 = Addr;
259    R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
260  }
261  return true;
262}
263
264SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
265  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
266      !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
267                 AMDGPUAS::LOCAL_ADDRESS))
268    return N;
269
270  const SITargetLowering& Lowering =
271      *static_cast<const SITargetLowering*>(getTargetLowering());
272
273  // Write max value to m0 before each load operation
274
275  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
276                                 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
277
278  SDValue Glue = M0.getValue(1);
279
280  SmallVector <SDValue, 8> Ops;
281  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
282     Ops.push_back(N->getOperand(i));
283  }
284  Ops.push_back(Glue);
285  CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
286
287  return N;
288}
289
290static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
291  switch (NumVectorElts) {
292  case 1:
293    return AMDGPU::SReg_32RegClassID;
294  case 2:
295    return AMDGPU::SReg_64RegClassID;
296  case 4:
297    return AMDGPU::SReg_128RegClassID;
298  case 8:
299    return AMDGPU::SReg_256RegClassID;
300  case 16:
301    return AMDGPU::SReg_512RegClassID;
302  }
303
304  llvm_unreachable("invalid vector size");
305}
306
307SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
308  unsigned int Opc = N->getOpcode();
309  if (N->isMachineOpcode()) {
310    N->setNodeId(-1);
311    return nullptr;   // Already selected.
312  }
313
314  if (isa<AtomicSDNode>(N))
315    N = glueCopyToM0(N);
316
317  switch (Opc) {
318  default: break;
319  // We are selecting i64 ADD here instead of custom lower it during
320  // DAG legalization, so we can fold some i64 ADDs used for address
321  // calculation into the LOAD and STORE instructions.
322  case ISD::ADD:
323  case ISD::SUB: {
324    if (N->getValueType(0) != MVT::i64 ||
325        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
326      break;
327
328    return SelectADD_SUB_I64(N);
329  }
330  case ISD::SCALAR_TO_VECTOR:
331  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
332  case ISD::BUILD_VECTOR: {
333    unsigned RegClassID;
334    const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
335    EVT VT = N->getValueType(0);
336    unsigned NumVectorElts = VT.getVectorNumElements();
337    EVT EltVT = VT.getVectorElementType();
338    assert(EltVT.bitsEq(MVT::i32));
339    if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
340      RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
341    } else {
342      // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
343      // that adds a 128 bits reg copy when going through TwoAddressInstructions
344      // pass. We want to avoid 128 bits copies as much as possible because they
345      // can't be bundled by our scheduler.
346      switch(NumVectorElts) {
347      case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
348      case 4:
349        if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
350          RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
351        else
352          RegClassID = AMDGPU::R600_Reg128RegClassID;
353        break;
354      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
355      }
356    }
357
358    SDLoc DL(N);
359    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
360
361    if (NumVectorElts == 1) {
362      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
363                                  N->getOperand(0), RegClass);
364    }
365
366    assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
367                                  "supported yet");
368    // 16 = Max Num Vector Elements
369    // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
370    // 1 = Vector Register Class
371    SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
372
373    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
374    bool IsRegSeq = true;
375    unsigned NOps = N->getNumOperands();
376    for (unsigned i = 0; i < NOps; i++) {
377      // XXX: Why is this here?
378      if (isa<RegisterSDNode>(N->getOperand(i))) {
379        IsRegSeq = false;
380        break;
381      }
382      RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
383      RegSeqArgs[1 + (2 * i) + 1] =
384              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
385                                        MVT::i32);
386    }
387
388    if (NOps != NumVectorElts) {
389      // Fill in the missing undef elements if this was a scalar_to_vector.
390      assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
391
392      MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
393                                                     DL, EltVT);
394      for (unsigned i = NOps; i < NumVectorElts; ++i) {
395        RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
396        RegSeqArgs[1 + (2 * i) + 1] =
397          CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
398      }
399    }
400
401    if (!IsRegSeq)
402      break;
403    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
404                                RegSeqArgs);
405  }
406  case ISD::BUILD_PAIR: {
407    SDValue RC, SubReg0, SubReg1;
408    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
409      break;
410    }
411    SDLoc DL(N);
412    if (N->getValueType(0) == MVT::i128) {
413      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
414      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
415      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
416    } else if (N->getValueType(0) == MVT::i64) {
417      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
418      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
419      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
420    } else {
421      llvm_unreachable("Unhandled value type for BUILD_PAIR");
422    }
423    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
424                            N->getOperand(1), SubReg1 };
425    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
426                                  DL, N->getValueType(0), Ops);
427  }
428
429  case ISD::Constant:
430  case ISD::ConstantFP: {
431    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
432        N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
433      break;
434
435    uint64_t Imm;
436    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
437      Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
438    else {
439      ConstantSDNode *C = cast<ConstantSDNode>(N);
440      Imm = C->getZExtValue();
441    }
442
443    SDLoc DL(N);
444    SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
445                                CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
446                                                    MVT::i32));
447    SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
448                                CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
449    const SDValue Ops[] = {
450      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
451      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
452      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
453    };
454
455    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
456                                  N->getValueType(0), Ops);
457  }
458  case ISD::LOAD:
459  case ISD::STORE: {
460    N = glueCopyToM0(N);
461    break;
462  }
463
464  case AMDGPUISD::BFE_I32:
465  case AMDGPUISD::BFE_U32: {
466    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
467      break;
468
469    // There is a scalar version available, but unlike the vector version which
470    // has a separate operand for the offset and width, the scalar version packs
471    // the width and offset into a single operand. Try to move to the scalar
472    // version if the offsets are constant, so that we can try to keep extended
473    // loads of kernel arguments in SGPRs.
474
475    // TODO: Technically we could try to pattern match scalar bitshifts of
476    // dynamic values, but it's probably not useful.
477    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
478    if (!Offset)
479      break;
480
481    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
482    if (!Width)
483      break;
484
485    bool Signed = Opc == AMDGPUISD::BFE_I32;
486
487    uint32_t OffsetVal = Offset->getZExtValue();
488    uint32_t WidthVal = Width->getZExtValue();
489
490    return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
491                    N->getOperand(0), OffsetVal, WidthVal);
492  }
493  case AMDGPUISD::DIV_SCALE: {
494    return SelectDIV_SCALE(N);
495  }
496  case ISD::CopyToReg: {
497    const SITargetLowering& Lowering =
498      *static_cast<const SITargetLowering*>(getTargetLowering());
499    Lowering.legalizeTargetIndependentNode(N, *CurDAG);
500    break;
501  }
502  case ISD::ADDRSPACECAST:
503    return SelectAddrSpaceCast(N);
504  case ISD::AND:
505  case ISD::SRL:
506  case ISD::SRA:
507    if (N->getValueType(0) != MVT::i32 ||
508        Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
509      break;
510
511    return SelectS_BFE(N);
512  }
513
514  return SelectCode(N);
515}
516
517bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
518  assert(AS != 0 && "Use checkPrivateAddress instead.");
519  if (!Ptr)
520    return false;
521
522  return Ptr->getType()->getPointerAddressSpace() == AS;
523}
524
525bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
526  if (Op->getPseudoValue())
527    return true;
528
529  if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
530    return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
531
532  return false;
533}
534
535bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
536  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
537}
538
539bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
540  const Value *MemVal = N->getMemOperand()->getValue();
541  return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
542          !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
543          !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
544}
545
546bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
547  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
548}
549
550bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
551  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
552}
553
554bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
555  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
556}
557
558bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
559  const Value *MemVal = N->getMemOperand()->getValue();
560  if (CbId == -1)
561    return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
562
563  return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
564}
565
566bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
567  if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
568    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
569        N->getMemoryVT().bitsLT(MVT::i32))
570      return true;
571
572  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
573}
574
575bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
576  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
577}
578
579bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
580  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
581}
582
583bool AMDGPUDAGToDAGISel::isFlatLoad(const  LoadSDNode *N) const {
584  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
585}
586
587bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
588  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
589}
590
591bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
592  MachineMemOperand *MMO = N->getMemOperand();
593  if (checkPrivateAddress(N->getMemOperand())) {
594    if (MMO) {
595      const PseudoSourceValue *PSV = MMO->getPseudoValue();
596      if (PSV && PSV->isConstantPool()) {
597        return true;
598      }
599    }
600  }
601  return false;
602}
603
604bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
605  if (checkPrivateAddress(N->getMemOperand())) {
606    // Check to make sure we are not a constant pool load or a constant load
607    // that is marked as a private load
608    if (isCPLoad(N) || isConstantLoad(N, -1)) {
609      return false;
610    }
611  }
612
613  const Value *MemVal = N->getMemOperand()->getValue();
614  if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
615      !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
616      !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
617      !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
618      !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
619      !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
620      !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
621    return true;
622  }
623  return false;
624}
625
626const char *AMDGPUDAGToDAGISel::getPassName() const {
627  return "AMDGPU DAG->DAG Pattern Instruction Selection";
628}
629
630#ifdef DEBUGTMP
631#undef INT64_C
632#endif
633#undef DEBUGTMP
634
635//===----------------------------------------------------------------------===//
636// Complex Patterns
637//===----------------------------------------------------------------------===//
638
639bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
640                                                         SDValue& IntPtr) {
641  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
642    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
643                                       true);
644    return true;
645  }
646  return false;
647}
648
649bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
650    SDValue& BaseReg, SDValue &Offset) {
651  if (!isa<ConstantSDNode>(Addr)) {
652    BaseReg = Addr;
653    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
654    return true;
655  }
656  return false;
657}
658
659bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
660                                           SDValue &Offset) {
661  ConstantSDNode *IMMOffset;
662
663  if (Addr.getOpcode() == ISD::ADD
664      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
665      && isInt<16>(IMMOffset->getZExtValue())) {
666
667      Base = Addr.getOperand(0);
668      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
669                                         MVT::i32);
670      return true;
671  // If the pointer address is constant, we can move it to the offset field.
672  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
673             && isInt<16>(IMMOffset->getZExtValue())) {
674    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
675                                  SDLoc(CurDAG->getEntryNode()),
676                                  AMDGPU::ZERO, MVT::i32);
677    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
678                                       MVT::i32);
679    return true;
680  }
681
682  // Default case, no offset
683  Base = Addr;
684  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
685  return true;
686}
687
688bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
689                                            SDValue &Offset) {
690  ConstantSDNode *C;
691  SDLoc DL(Addr);
692
693  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
694    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
695    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
696  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
697            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
698    Base = Addr.getOperand(0);
699    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
700  } else {
701    Base = Addr;
702    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
703  }
704
705  return true;
706}
707
708SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
709  SDLoc DL(N);
710  SDValue LHS = N->getOperand(0);
711  SDValue RHS = N->getOperand(1);
712
713  bool IsAdd = (N->getOpcode() == ISD::ADD);
714
715  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
716  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
717
718  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
719                                       DL, MVT::i32, LHS, Sub0);
720  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
721                                       DL, MVT::i32, LHS, Sub1);
722
723  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
724                                       DL, MVT::i32, RHS, Sub0);
725  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
726                                       DL, MVT::i32, RHS, Sub1);
727
728  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
729  SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
730
731
732  unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
733  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
734
735  SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
736  SDValue Carry(AddLo, 1);
737  SDNode *AddHi
738    = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
739                             SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
740
741  SDValue Args[5] = {
742    CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
743    SDValue(AddLo,0),
744    Sub0,
745    SDValue(AddHi,0),
746    Sub1,
747  };
748  return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
749}
750
751// We need to handle this here because tablegen doesn't support matching
752// instructions with multiple outputs.
753SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
754  SDLoc SL(N);
755  EVT VT = N->getValueType(0);
756
757  assert(VT == MVT::f32 || VT == MVT::f64);
758
759  unsigned Opc
760    = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
761
762  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
763  // omod
764  SDValue Ops[8];
765
766  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
767  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
768  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
769  return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
770}
771
772bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
773                                         unsigned OffsetBits) const {
774  if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
775      (OffsetBits == 8 && !isUInt<8>(Offset)))
776    return false;
777
778  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
779      Subtarget->unsafeDSOffsetFoldingEnabled())
780    return true;
781
782  // On Southern Islands instruction with a negative base value and an offset
783  // don't seem to work.
784  return CurDAG->SignBitIsZero(Base);
785}
786
787bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
788                                              SDValue &Offset) const {
789  if (CurDAG->isBaseWithConstantOffset(Addr)) {
790    SDValue N0 = Addr.getOperand(0);
791    SDValue N1 = Addr.getOperand(1);
792    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
793    if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
794      // (add n0, c0)
795      Base = N0;
796      Offset = N1;
797      return true;
798    }
799  } else if (Addr.getOpcode() == ISD::SUB) {
800    // sub C, x -> add (sub 0, x), C
801    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
802      int64_t ByteOffset = C->getSExtValue();
803      if (isUInt<16>(ByteOffset)) {
804        SDLoc DL(Addr);
805        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
806
807        // XXX - This is kind of hacky. Create a dummy sub node so we can check
808        // the known bits in isDSOffsetLegal. We need to emit the selected node
809        // here, so this is thrown away.
810        SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
811                                      Zero, Addr.getOperand(1));
812
813        if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
814          MachineSDNode *MachineSub
815            = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
816                                     Zero, Addr.getOperand(1));
817
818          Base = SDValue(MachineSub, 0);
819          Offset = Addr.getOperand(0);
820          return true;
821        }
822      }
823    }
824  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
825    // If we have a constant address, prefer to put the constant into the
826    // offset. This can save moves to load the constant address since multiple
827    // operations can share the zero base address register, and enables merging
828    // into read2 / write2 instructions.
829
830    SDLoc DL(Addr);
831
832    if (isUInt<16>(CAddr->getZExtValue())) {
833      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
834      MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
835                                 DL, MVT::i32, Zero);
836      Base = SDValue(MovZero, 0);
837      Offset = Addr;
838      return true;
839    }
840  }
841
842  // default case
843  Base = Addr;
844  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
845  return true;
846}
847
848// TODO: If offset is too big, put low 16-bit into offset.
849bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
850                                                   SDValue &Offset0,
851                                                   SDValue &Offset1) const {
852  SDLoc DL(Addr);
853
854  if (CurDAG->isBaseWithConstantOffset(Addr)) {
855    SDValue N0 = Addr.getOperand(0);
856    SDValue N1 = Addr.getOperand(1);
857    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
858    unsigned DWordOffset0 = C1->getZExtValue() / 4;
859    unsigned DWordOffset1 = DWordOffset0 + 1;
860    // (add n0, c0)
861    if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
862      Base = N0;
863      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
864      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
865      return true;
866    }
867  } else if (Addr.getOpcode() == ISD::SUB) {
868    // sub C, x -> add (sub 0, x), C
869    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
870      unsigned DWordOffset0 = C->getZExtValue() / 4;
871      unsigned DWordOffset1 = DWordOffset0 + 1;
872
873      if (isUInt<8>(DWordOffset0)) {
874        SDLoc DL(Addr);
875        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
876
877        // XXX - This is kind of hacky. Create a dummy sub node so we can check
878        // the known bits in isDSOffsetLegal. We need to emit the selected node
879        // here, so this is thrown away.
880        SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
881                                      Zero, Addr.getOperand(1));
882
883        if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
884          MachineSDNode *MachineSub
885            = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
886                                     Zero, Addr.getOperand(1));
887
888          Base = SDValue(MachineSub, 0);
889          Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
890          Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
891          return true;
892        }
893      }
894    }
895  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
896    unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
897    unsigned DWordOffset1 = DWordOffset0 + 1;
898    assert(4 * DWordOffset0 == CAddr->getZExtValue());
899
900    if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
901      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
902      MachineSDNode *MovZero
903        = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
904                                 DL, MVT::i32, Zero);
905      Base = SDValue(MovZero, 0);
906      Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
907      Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
908      return true;
909    }
910  }
911
912  // default case
913  Base = Addr;
914  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
915  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
916  return true;
917}
918
919static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
920  return isUInt<12>(Imm->getZExtValue());
921}
922
923void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
924                                     SDValue &VAddr, SDValue &SOffset,
925                                     SDValue &Offset, SDValue &Offen,
926                                     SDValue &Idxen, SDValue &Addr64,
927                                     SDValue &GLC, SDValue &SLC,
928                                     SDValue &TFE) const {
929  SDLoc DL(Addr);
930
931  GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
932  SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
933  TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
934
935  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
936  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
937  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
938  SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
939
940  if (CurDAG->isBaseWithConstantOffset(Addr)) {
941    SDValue N0 = Addr.getOperand(0);
942    SDValue N1 = Addr.getOperand(1);
943    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
944
945    if (N0.getOpcode() == ISD::ADD) {
946      // (add (add N2, N3), C1) -> addr64
947      SDValue N2 = N0.getOperand(0);
948      SDValue N3 = N0.getOperand(1);
949      Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
950      Ptr = N2;
951      VAddr = N3;
952    } else {
953
954      // (add N0, C1) -> offset
955      VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
956      Ptr = N0;
957    }
958
959    if (isLegalMUBUFImmOffset(C1)) {
960        Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
961        return;
962    } else if (isUInt<32>(C1->getZExtValue())) {
963      // Illegal offset, store it in soffset.
964      Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
965      SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
966                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
967                        0);
968      return;
969    }
970  }
971
972  if (Addr.getOpcode() == ISD::ADD) {
973    // (add N0, N1) -> addr64
974    SDValue N0 = Addr.getOperand(0);
975    SDValue N1 = Addr.getOperand(1);
976    Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
977    Ptr = N0;
978    VAddr = N1;
979    Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
980    return;
981  }
982
983  // default case -> offset
984  VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
985  Ptr = Addr;
986  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
987}
988
989bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
990                                           SDValue &VAddr, SDValue &SOffset,
991                                           SDValue &Offset, SDValue &GLC,
992                                           SDValue &SLC, SDValue &TFE) const {
993  SDValue Ptr, Offen, Idxen, Addr64;
994
995  // addr64 bit was removed for volcanic islands.
996  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
997    return false;
998
999  SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1000              GLC, SLC, TFE);
1001
1002  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1003  if (C->getSExtValue()) {
1004    SDLoc DL(Addr);
1005
1006    const SITargetLowering& Lowering =
1007      *static_cast<const SITargetLowering*>(getTargetLowering());
1008
1009    SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1010    return true;
1011  }
1012
1013  return false;
1014}
1015
1016bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1017                                           SDValue &VAddr, SDValue &SOffset,
1018                                           SDValue &Offset,
1019                                           SDValue &SLC) const {
1020  SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1021  SDValue GLC, TFE;
1022
1023  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1024}
1025
1026bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
1027                                            SDValue &VAddr, SDValue &SOffset,
1028                                            SDValue &ImmOffset) const {
1029
1030  SDLoc DL(Addr);
1031  MachineFunction &MF = CurDAG->getMachineFunction();
1032  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1033
1034  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1035  SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
1036
1037  // (add n0, c1)
1038  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1039    SDValue N0 = Addr.getOperand(0);
1040    SDValue N1 = Addr.getOperand(1);
1041    // Offsets in vaddr must be positive.
1042    if (CurDAG->SignBitIsZero(N0)) {
1043      ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1044      if (isLegalMUBUFImmOffset(C1)) {
1045        VAddr = N0;
1046        ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1047        return true;
1048      }
1049    }
1050  }
1051
1052  // (node)
1053  VAddr = Addr;
1054  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1055  return true;
1056}
1057
1058bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1059                                           SDValue &SOffset, SDValue &Offset,
1060                                           SDValue &GLC, SDValue &SLC,
1061                                           SDValue &TFE) const {
1062  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1063  const SIInstrInfo *TII =
1064    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1065
1066  SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1067              GLC, SLC, TFE);
1068
1069  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1070      !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1071      !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1072    uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1073                    APInt::getAllOnesValue(32).getZExtValue(); // Size
1074    SDLoc DL(Addr);
1075
1076    const SITargetLowering& Lowering =
1077      *static_cast<const SITargetLowering*>(getTargetLowering());
1078
1079    SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1080    return true;
1081  }
1082  return false;
1083}
1084
1085bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1086                                           SDValue &Soffset, SDValue &Offset,
1087                                           SDValue &GLC) const {
1088  SDValue SLC, TFE;
1089
1090  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1091}
1092
1093///
1094/// \param EncodedOffset This is the immediate value that will be encoded
1095///        directly into the instruction.  On SI/CI the \p EncodedOffset
1096///        will be in units of dwords and on VI+ it will be units of bytes.
1097static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1098                                 int64_t EncodedOffset) {
1099  return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1100     isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1101}
1102
1103bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1104                                          SDValue &Offset, bool &Imm) const {
1105
1106  // FIXME: Handle non-constant offsets.
1107  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1108  if (!C)
1109    return false;
1110
1111  SDLoc SL(ByteOffsetNode);
1112  AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1113  int64_t ByteOffset = C->getSExtValue();
1114  int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1115      ByteOffset >> 2 : ByteOffset;
1116
1117  if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1118    Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1119    Imm = true;
1120    return true;
1121  }
1122
1123  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1124    return false;
1125
1126  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1127    // 32-bit Immediates are supported on Sea Islands.
1128    Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1129  } else {
1130    SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1131    Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1132                                            C32Bit), 0);
1133  }
1134  Imm = false;
1135  return true;
1136}
1137
1138bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1139                                     SDValue &Offset, bool &Imm) const {
1140
1141  SDLoc SL(Addr);
1142  if (CurDAG->isBaseWithConstantOffset(Addr)) {
1143    SDValue N0 = Addr.getOperand(0);
1144    SDValue N1 = Addr.getOperand(1);
1145
1146    if (SelectSMRDOffset(N1, Offset, Imm)) {
1147      SBase = N0;
1148      return true;
1149    }
1150  }
1151  SBase = Addr;
1152  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1153  Imm = true;
1154  return true;
1155}
1156
1157bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1158                                       SDValue &Offset) const {
1159  bool Imm;
1160  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1161}
1162
1163bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1164                                         SDValue &Offset) const {
1165
1166  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1167    return false;
1168
1169  bool Imm;
1170  if (!SelectSMRD(Addr, SBase, Offset, Imm))
1171    return false;
1172
1173  return !Imm && isa<ConstantSDNode>(Offset);
1174}
1175
1176bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1177                                        SDValue &Offset) const {
1178  bool Imm;
1179  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1180         !isa<ConstantSDNode>(Offset);
1181}
1182
1183bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1184                                             SDValue &Offset) const {
1185  bool Imm;
1186  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1187}
1188
1189bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1190                                               SDValue &Offset) const {
1191  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1192    return false;
1193
1194  bool Imm;
1195  if (!SelectSMRDOffset(Addr, Offset, Imm))
1196    return false;
1197
1198  return !Imm && isa<ConstantSDNode>(Offset);
1199}
1200
1201bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1202                                              SDValue &Offset) const {
1203  bool Imm;
1204  return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1205         !isa<ConstantSDNode>(Offset);
1206}
1207
1208// FIXME: This is incorrect and only enough to be able to compile.
1209SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
1210  AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
1211  SDLoc DL(N);
1212
1213  const MachineFunction &MF = CurDAG->getMachineFunction();
1214  DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(),
1215                                           "addrspacecast not implemented");
1216  CurDAG->getContext()->diagnose(NotImplemented);
1217
1218  assert(Subtarget->hasFlatAddressSpace() &&
1219         "addrspacecast only supported with flat address space!");
1220
1221  assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
1222          ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
1223         "Can only cast to / from flat address space!");
1224
1225  // The flat instructions read the address as the index of the VGPR holding the
1226  // address, so casting should just be reinterpreting the base VGPR, so just
1227  // insert trunc / bitcast / zext.
1228
1229  SDValue Src = ASC->getOperand(0);
1230  EVT DestVT = ASC->getValueType(0);
1231  EVT SrcVT = Src.getValueType();
1232
1233  unsigned SrcSize = SrcVT.getSizeInBits();
1234  unsigned DestSize = DestVT.getSizeInBits();
1235
1236  if (SrcSize > DestSize) {
1237    assert(SrcSize == 64 && DestSize == 32);
1238    return CurDAG->getMachineNode(
1239      TargetOpcode::EXTRACT_SUBREG,
1240      DL,
1241      DestVT,
1242      Src,
1243      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
1244  }
1245
1246  if (DestSize > SrcSize) {
1247    assert(SrcSize == 32 && DestSize == 64);
1248
1249    // FIXME: This is probably wrong, we should never be defining
1250    // a register class with both VGPRs and SGPRs
1251    SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
1252                                           MVT::i32);
1253
1254    const SDValue Ops[] = {
1255      RC,
1256      Src,
1257      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
1258      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1259                                     CurDAG->getConstant(0, DL, MVT::i32)), 0),
1260      CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
1261    };
1262
1263    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
1264                                  DL, N->getValueType(0), Ops);
1265  }
1266
1267  assert(SrcSize == 64 && DestSize == 64);
1268  return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
1269}
1270
1271SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
1272                                     uint32_t Offset, uint32_t Width) {
1273  // Transformation function, pack the offset and width of a BFE into
1274  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1275  // source, bits [5:0] contain the offset and bits [22:16] the width.
1276  uint32_t PackedVal = Offset | (Width << 16);
1277  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1278
1279  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1280}
1281
1282SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1283  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1284  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1285  // Predicate: 0 < b <= c < 32
1286
1287  const SDValue &Shl = N->getOperand(0);
1288  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1289  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1290
1291  if (B && C) {
1292    uint32_t BVal = B->getZExtValue();
1293    uint32_t CVal = C->getZExtValue();
1294
1295    if (0 < BVal && BVal <= CVal && CVal < 32) {
1296      bool Signed = N->getOpcode() == ISD::SRA;
1297      unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1298
1299      return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
1300                      CVal - BVal, 32 - CVal);
1301    }
1302  }
1303  return SelectCode(N);
1304}
1305
1306SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1307  switch (N->getOpcode()) {
1308  case ISD::AND:
1309    if (N->getOperand(0).getOpcode() == ISD::SRL) {
1310      // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1311      // Predicate: isMask(mask)
1312      const SDValue &Srl = N->getOperand(0);
1313      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1314      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1315
1316      if (Shift && Mask) {
1317        uint32_t ShiftVal = Shift->getZExtValue();
1318        uint32_t MaskVal = Mask->getZExtValue();
1319
1320        if (isMask_32(MaskVal)) {
1321          uint32_t WidthVal = countPopulation(MaskVal);
1322
1323          return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
1324                          ShiftVal, WidthVal);
1325        }
1326      }
1327    }
1328    break;
1329  case ISD::SRL:
1330    if (N->getOperand(0).getOpcode() == ISD::AND) {
1331      // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1332      // Predicate: isMask(mask >> b)
1333      const SDValue &And = N->getOperand(0);
1334      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1335      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1336
1337      if (Shift && Mask) {
1338        uint32_t ShiftVal = Shift->getZExtValue();
1339        uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1340
1341        if (isMask_32(MaskVal)) {
1342          uint32_t WidthVal = countPopulation(MaskVal);
1343
1344          return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
1345                          ShiftVal, WidthVal);
1346        }
1347      }
1348    } else if (N->getOperand(0).getOpcode() == ISD::SHL)
1349      return SelectS_BFEFromShifts(N);
1350    break;
1351  case ISD::SRA:
1352    if (N->getOperand(0).getOpcode() == ISD::SHL)
1353      return SelectS_BFEFromShifts(N);
1354    break;
1355  }
1356
1357  return SelectCode(N);
1358}
1359
1360bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1361                                        SDValue &SrcMods) const {
1362
1363  unsigned Mods = 0;
1364
1365  Src = In;
1366
1367  if (Src.getOpcode() == ISD::FNEG) {
1368    Mods |= SISrcMods::NEG;
1369    Src = Src.getOperand(0);
1370  }
1371
1372  if (Src.getOpcode() == ISD::FABS) {
1373    Mods |= SISrcMods::ABS;
1374    Src = Src.getOperand(0);
1375  }
1376
1377  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1378
1379  return true;
1380}
1381
1382bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1383                                         SDValue &SrcMods) const {
1384  bool Res = SelectVOP3Mods(In, Src, SrcMods);
1385  return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1386}
1387
1388bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1389                                         SDValue &SrcMods, SDValue &Clamp,
1390                                         SDValue &Omod) const {
1391  SDLoc DL(In);
1392  // FIXME: Handle Clamp and Omod
1393  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1394  Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1395
1396  return SelectVOP3Mods(In, Src, SrcMods);
1397}
1398
1399bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1400                                           SDValue &SrcMods, SDValue &Clamp,
1401                                           SDValue &Omod) const {
1402  bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1403
1404  return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1405                cast<ConstantSDNode>(Clamp)->isNullValue() &&
1406                cast<ConstantSDNode>(Omod)->isNullValue();
1407}
1408
1409bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1410                                              SDValue &SrcMods,
1411                                              SDValue &Omod) const {
1412  // FIXME: Handle Omod
1413  Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1414
1415  return SelectVOP3Mods(In, Src, SrcMods);
1416}
1417
1418bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1419                                                   SDValue &SrcMods,
1420                                                   SDValue &Clamp,
1421                                                   SDValue &Omod) const {
1422  Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1423  return SelectVOP3Mods(In, Src, SrcMods);
1424}
1425
1426void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
1427  bool Modified = false;
1428
1429  // XXX - Other targets seem to be able to do this without a worklist.
1430  SmallVector<LoadSDNode *, 8> LoadsToReplace;
1431  SmallVector<StoreSDNode *, 8> StoresToReplace;
1432
1433  for (SDNode &Node : CurDAG->allnodes()) {
1434    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
1435      EVT VT = LD->getValueType(0);
1436      if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
1437        continue;
1438
1439      // To simplify the TableGen patters, we replace all i64 loads with v2i32
1440      // loads.  Alternatively, we could promote i64 loads to v2i32 during DAG
1441      // legalization, however, so places (ExpandUnalignedLoad) in the DAG
1442      // legalizer assume that if i64 is legal, so doing this promotion early
1443      // can cause problems.
1444      LoadsToReplace.push_back(LD);
1445    } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
1446      // Handle i64 stores here for the same reason mentioned above for loads.
1447      SDValue Value = ST->getValue();
1448      if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
1449        continue;
1450      StoresToReplace.push_back(ST);
1451    }
1452  }
1453
1454  for (LoadSDNode *LD : LoadsToReplace) {
1455    SDLoc SL(LD);
1456
1457    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
1458                                      LD->getBasePtr(), LD->getMemOperand());
1459    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
1460                                      MVT::i64, NewLoad);
1461    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
1462    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
1463    Modified = true;
1464  }
1465
1466  for (StoreSDNode *ST : StoresToReplace) {
1467    SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
1468                                       MVT::v2i32, ST->getValue());
1469    const SDValue StoreOps[] = {
1470      ST->getChain(),
1471      NewValue,
1472      ST->getBasePtr(),
1473      ST->getOffset()
1474    };
1475
1476    CurDAG->UpdateNodeOperands(ST, StoreOps);
1477    Modified = true;
1478  }
1479
1480  // XXX - Is this necessary?
1481  if (Modified)
1482    CurDAG->RemoveDeadNodes();
1483}
1484
1485void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1486  const AMDGPUTargetLowering& Lowering =
1487    *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1488  bool IsModified = false;
1489  do {
1490    IsModified = false;
1491    // Go over all selected nodes and try to fold them a bit more
1492    for (SDNode &Node : CurDAG->allnodes()) {
1493      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1494      if (!MachineNode)
1495        continue;
1496
1497      SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1498      if (ResNode != &Node) {
1499        ReplaceUses(&Node, ResNode);
1500        IsModified = true;
1501      }
1502    }
1503    CurDAG->RemoveDeadNodes();
1504  } while (IsModified);
1505}
1506