AMDGPUISelDAGToDAG.cpp revision cd81d94322a39503e4a3e87b6ee03d4fcb3465fb
1//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Defines an instruction selector for the AMDGPU target.
12//
13//===----------------------------------------------------------------------===//
14#include "AMDGPUInstrInfo.h"
15#include "AMDGPUISelLowering.h" // For AMDGPUISD
16#include "AMDGPURegisterInfo.h"
17#include "AMDGPUSubtarget.h"
18#include "R600InstrInfo.h"
19#include "SIISelLowering.h"
20#include "llvm/CodeGen/FunctionLoweringInfo.h"
21#include "llvm/CodeGen/PseudoSourceValue.h"
22#include "llvm/CodeGen/SelectionDAG.h"
23#include "llvm/CodeGen/SelectionDAGISel.h"
24#include "llvm/IR/Function.h"
25
26using namespace llvm;
27
28//===----------------------------------------------------------------------===//
29// Instruction Selector Implementation
30//===----------------------------------------------------------------------===//
31
32namespace {
33/// AMDGPU specific code to select AMDGPU machine instructions for
34/// SelectionDAG operations.
35class AMDGPUDAGToDAGISel : public SelectionDAGISel {
36  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
37  // make the right decision when generating code for different targets.
38  const AMDGPUSubtarget &Subtarget;
39public:
40  AMDGPUDAGToDAGISel(TargetMachine &TM);
41  virtual ~AMDGPUDAGToDAGISel();
42
43  SDNode *Select(SDNode *N) override;
44  const char *getPassName() const override;
45  void PostprocessISelDAG() override;
46
47private:
48  bool isInlineImmediate(SDNode *N) const;
49  inline SDValue getSmallIPtrImm(unsigned Imm);
50  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
51                   const R600InstrInfo *TII);
52  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
53  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
54
55  // Complex pattern selectors
56  bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
57  bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
58  bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
59
60  static bool checkType(const Value *ptr, unsigned int addrspace);
61  static bool checkPrivateAddress(const MachineMemOperand *Op);
62
63  static bool isGlobalStore(const StoreSDNode *N);
64  static bool isPrivateStore(const StoreSDNode *N);
65  static bool isLocalStore(const StoreSDNode *N);
66  static bool isRegionStore(const StoreSDNode *N);
67
68  bool isCPLoad(const LoadSDNode *N) const;
69  bool isConstantLoad(const LoadSDNode *N, int cbID) const;
70  bool isGlobalLoad(const LoadSDNode *N) const;
71  bool isParamLoad(const LoadSDNode *N) const;
72  bool isPrivateLoad(const LoadSDNode *N) const;
73  bool isLocalLoad(const LoadSDNode *N) const;
74  bool isRegionLoad(const LoadSDNode *N) const;
75
76  /// \returns True if the current basic block being selected is at control
77  ///          flow depth 0.  Meaning that the current block dominates the
78  //           exit block.
79  bool isCFDepth0() const;
80
81  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
82  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
83  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
84                                       SDValue& Offset);
85  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
86  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
87  bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
88                        SDValue &ImmOffset) const;
89
90  SDNode *SelectADD_SUB_I64(SDNode *N);
91  SDNode *SelectDIV_SCALE(SDNode *N);
92
93  // Include the pieces autogenerated from the target description.
94#include "AMDGPUGenDAGISel.inc"
95};
96}  // end anonymous namespace
97
98/// \brief This pass converts a legalized DAG into a AMDGPU-specific
99// DAG, ready for instruction scheduling.
100FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
101  return new AMDGPUDAGToDAGISel(TM);
102}
103
104AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
105  : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
106}
107
108AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
109}
110
111bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
112  const SITargetLowering *TL
113      = static_cast<const SITargetLowering *>(getTargetLowering());
114  return TL->analyzeImmediate(N) == 0;
115}
116
117/// \brief Determine the register class for \p OpNo
118/// \returns The register class of the virtual register that will be used for
119/// the given operand number \OpNo or NULL if the register class cannot be
120/// determined.
121const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
122                                                          unsigned OpNo) const {
123  if (!N->isMachineOpcode())
124    return nullptr;
125
126  switch (N->getMachineOpcode()) {
127  default: {
128    const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
129    unsigned OpIdx = Desc.getNumDefs() + OpNo;
130    if (OpIdx >= Desc.getNumOperands())
131      return nullptr;
132    int RegClass = Desc.OpInfo[OpIdx].RegClass;
133    if (RegClass == -1)
134      return nullptr;
135
136    return TM.getRegisterInfo()->getRegClass(RegClass);
137  }
138  case AMDGPU::REG_SEQUENCE: {
139    unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
140    const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID);
141
142    SDValue SubRegOp = N->getOperand(OpNo + 1);
143    unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
144    return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
145  }
146  }
147}
148
149SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
150  return CurDAG->getTargetConstant(Imm, MVT::i32);
151}
152
153bool AMDGPUDAGToDAGISel::SelectADDRParam(
154  SDValue Addr, SDValue& R1, SDValue& R2) {
155
156  if (Addr.getOpcode() == ISD::FrameIndex) {
157    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
158      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
159      R2 = CurDAG->getTargetConstant(0, MVT::i32);
160    } else {
161      R1 = Addr;
162      R2 = CurDAG->getTargetConstant(0, MVT::i32);
163    }
164  } else if (Addr.getOpcode() == ISD::ADD) {
165    R1 = Addr.getOperand(0);
166    R2 = Addr.getOperand(1);
167  } else {
168    R1 = Addr;
169    R2 = CurDAG->getTargetConstant(0, MVT::i32);
170  }
171  return true;
172}
173
174bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
175  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
176      Addr.getOpcode() == ISD::TargetGlobalAddress) {
177    return false;
178  }
179  return SelectADDRParam(Addr, R1, R2);
180}
181
182
183bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
184  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
185      Addr.getOpcode() == ISD::TargetGlobalAddress) {
186    return false;
187  }
188
189  if (Addr.getOpcode() == ISD::FrameIndex) {
190    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
191      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
192      R2 = CurDAG->getTargetConstant(0, MVT::i64);
193    } else {
194      R1 = Addr;
195      R2 = CurDAG->getTargetConstant(0, MVT::i64);
196    }
197  } else if (Addr.getOpcode() == ISD::ADD) {
198    R1 = Addr.getOperand(0);
199    R2 = Addr.getOperand(1);
200  } else {
201    R1 = Addr;
202    R2 = CurDAG->getTargetConstant(0, MVT::i64);
203  }
204  return true;
205}
206
207SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
208  unsigned int Opc = N->getOpcode();
209  if (N->isMachineOpcode()) {
210    N->setNodeId(-1);
211    return nullptr;   // Already selected.
212  }
213
214  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
215  switch (Opc) {
216  default: break;
217  // We are selecting i64 ADD here instead of custom lower it during
218  // DAG legalization, so we can fold some i64 ADDs used for address
219  // calculation into the LOAD and STORE instructions.
220  case ISD::ADD:
221  case ISD::SUB: {
222    if (N->getValueType(0) != MVT::i64 ||
223        ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
224      break;
225
226    return SelectADD_SUB_I64(N);
227  }
228  case ISD::SCALAR_TO_VECTOR:
229  case AMDGPUISD::BUILD_VERTICAL_VECTOR:
230  case ISD::BUILD_VECTOR: {
231    unsigned RegClassID;
232    const AMDGPURegisterInfo *TRI =
233                   static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
234    const SIRegisterInfo *SIRI =
235                   static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
236    EVT VT = N->getValueType(0);
237    unsigned NumVectorElts = VT.getVectorNumElements();
238    EVT EltVT = VT.getVectorElementType();
239    assert(EltVT.bitsEq(MVT::i32));
240    if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
241      bool UseVReg = true;
242      for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
243                                                    U != E; ++U) {
244        if (!U->isMachineOpcode()) {
245          continue;
246        }
247        const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
248        if (!RC) {
249          continue;
250        }
251        if (SIRI->isSGPRClass(RC)) {
252          UseVReg = false;
253        }
254      }
255      switch(NumVectorElts) {
256      case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
257                                     AMDGPU::SReg_32RegClassID;
258        break;
259      case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
260                                     AMDGPU::SReg_64RegClassID;
261        break;
262      case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
263                                     AMDGPU::SReg_128RegClassID;
264        break;
265      case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
266                                     AMDGPU::SReg_256RegClassID;
267        break;
268      case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
269                                      AMDGPU::SReg_512RegClassID;
270        break;
271      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
272      }
273    } else {
274      // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
275      // that adds a 128 bits reg copy when going through TwoAddressInstructions
276      // pass. We want to avoid 128 bits copies as much as possible because they
277      // can't be bundled by our scheduler.
278      switch(NumVectorElts) {
279      case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
280      case 4:
281        if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
282          RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
283        else
284          RegClassID = AMDGPU::R600_Reg128RegClassID;
285        break;
286      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
287      }
288    }
289
290    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
291
292    if (NumVectorElts == 1) {
293      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
294                                  N->getOperand(0), RegClass);
295    }
296
297    assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
298                                  "supported yet");
299    // 16 = Max Num Vector Elements
300    // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
301    // 1 = Vector Register Class
302    SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
303
304    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
305    bool IsRegSeq = true;
306    unsigned NOps = N->getNumOperands();
307    for (unsigned i = 0; i < NOps; i++) {
308      // XXX: Why is this here?
309      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
310        IsRegSeq = false;
311        break;
312      }
313      RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
314      RegSeqArgs[1 + (2 * i) + 1] =
315              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
316    }
317
318    if (NOps != NumVectorElts) {
319      // Fill in the missing undef elements if this was a scalar_to_vector.
320      assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
321
322      MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
323                                                     SDLoc(N), EltVT);
324      for (unsigned i = NOps; i < NumVectorElts; ++i) {
325        RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
326        RegSeqArgs[1 + (2 * i) + 1] =
327          CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
328      }
329    }
330
331    if (!IsRegSeq)
332      break;
333    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
334                                RegSeqArgs);
335  }
336  case ISD::BUILD_PAIR: {
337    SDValue RC, SubReg0, SubReg1;
338    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
339      break;
340    }
341    if (N->getValueType(0) == MVT::i128) {
342      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
343      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
344      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
345    } else if (N->getValueType(0) == MVT::i64) {
346      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
347      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
348      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
349    } else {
350      llvm_unreachable("Unhandled value type for BUILD_PAIR");
351    }
352    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
353                            N->getOperand(1), SubReg1 };
354    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
355                                  SDLoc(N), N->getValueType(0), Ops);
356  }
357
358  case ISD::Constant:
359  case ISD::ConstantFP: {
360    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
361    if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
362        N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
363      break;
364
365    uint64_t Imm;
366    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
367      Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
368    else {
369      ConstantSDNode *C = cast<ConstantSDNode>(N);
370      Imm = C->getZExtValue();
371    }
372
373    SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
374                                CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
375    SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
376                                CurDAG->getConstant(Imm >> 32, MVT::i32));
377    const SDValue Ops[] = {
378      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
379      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
380      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
381    };
382
383    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
384                                  N->getValueType(0), Ops);
385  }
386
387  case AMDGPUISD::REGISTER_LOAD: {
388    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
389      break;
390    SDValue Addr, Offset;
391
392    SelectADDRIndirect(N->getOperand(1), Addr, Offset);
393    const SDValue Ops[] = {
394      Addr,
395      Offset,
396      CurDAG->getTargetConstant(0, MVT::i32),
397      N->getOperand(0),
398    };
399    return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
400                                  CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
401                                  Ops);
402  }
403  case AMDGPUISD::REGISTER_STORE: {
404    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
405      break;
406    SDValue Addr, Offset;
407    SelectADDRIndirect(N->getOperand(2), Addr, Offset);
408    const SDValue Ops[] = {
409      N->getOperand(1),
410      Addr,
411      Offset,
412      CurDAG->getTargetConstant(0, MVT::i32),
413      N->getOperand(0),
414    };
415    return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
416                                        CurDAG->getVTList(MVT::Other),
417                                        Ops);
418  }
419
420  case AMDGPUISD::BFE_I32:
421  case AMDGPUISD::BFE_U32: {
422    if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
423      break;
424
425    // There is a scalar version available, but unlike the vector version which
426    // has a separate operand for the offset and width, the scalar version packs
427    // the width and offset into a single operand. Try to move to the scalar
428    // version if the offsets are constant, so that we can try to keep extended
429    // loads of kernel arguments in SGPRs.
430
431    // TODO: Technically we could try to pattern match scalar bitshifts of
432    // dynamic values, but it's probably not useful.
433    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
434    if (!Offset)
435      break;
436
437    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
438    if (!Width)
439      break;
440
441    bool Signed = Opc == AMDGPUISD::BFE_I32;
442
443    // Transformation function, pack the offset and width of a BFE into
444    // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
445    // source, bits [5:0] contain the offset and bits [22:16] the width.
446
447    uint32_t OffsetVal = Offset->getZExtValue();
448    uint32_t WidthVal = Width->getZExtValue();
449
450    uint32_t PackedVal = OffsetVal | WidthVal << 16;
451
452    SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
453    return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
454                                  SDLoc(N),
455                                  MVT::i32,
456                                  N->getOperand(0),
457                                  PackedOffsetWidth);
458
459  }
460  case AMDGPUISD::DIV_SCALE: {
461    return SelectDIV_SCALE(N);
462  }
463  }
464  return SelectCode(N);
465}
466
467
468bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
469  assert(AS != 0 && "Use checkPrivateAddress instead.");
470  if (!Ptr)
471    return false;
472
473  return Ptr->getType()->getPointerAddressSpace() == AS;
474}
475
476bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
477  if (Op->getPseudoValue())
478    return true;
479
480  if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
481    return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
482
483  return false;
484}
485
486bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
487  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
488}
489
490bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
491  const Value *MemVal = N->getMemOperand()->getValue();
492  return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
493          !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
494          !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
495}
496
497bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
498  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
499}
500
501bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
502  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
503}
504
505bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
506  const Value *MemVal = N->getMemOperand()->getValue();
507  if (CbId == -1)
508    return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
509
510  return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
511}
512
513bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
514  if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
515    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
516    if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
517        N->getMemoryVT().bitsLT(MVT::i32)) {
518      return true;
519    }
520  }
521  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
522}
523
524bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
525  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
526}
527
528bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
529  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
530}
531
532bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
533  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
534}
535
536bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
537  MachineMemOperand *MMO = N->getMemOperand();
538  if (checkPrivateAddress(N->getMemOperand())) {
539    if (MMO) {
540      const PseudoSourceValue *PSV = MMO->getPseudoValue();
541      if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
542        return true;
543      }
544    }
545  }
546  return false;
547}
548
549bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
550  if (checkPrivateAddress(N->getMemOperand())) {
551    // Check to make sure we are not a constant pool load or a constant load
552    // that is marked as a private load
553    if (isCPLoad(N) || isConstantLoad(N, -1)) {
554      return false;
555    }
556  }
557
558  const Value *MemVal = N->getMemOperand()->getValue();
559  if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
560      !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
561      !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
562      !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
563      !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
564      !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
565    return true;
566  }
567  return false;
568}
569
570bool AMDGPUDAGToDAGISel::isCFDepth0() const {
571  // FIXME: Figure out a way to use DominatorTree analysis here.
572  const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
573  const Function *Fn = FuncInfo->Fn;
574  return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
575}
576
577
578const char *AMDGPUDAGToDAGISel::getPassName() const {
579  return "AMDGPU DAG->DAG Pattern Instruction Selection";
580}
581
582#ifdef DEBUGTMP
583#undef INT64_C
584#endif
585#undef DEBUGTMP
586
587//===----------------------------------------------------------------------===//
588// Complex Patterns
589//===----------------------------------------------------------------------===//
590
591bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
592                                                         SDValue& IntPtr) {
593  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
594    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
595    return true;
596  }
597  return false;
598}
599
600bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
601    SDValue& BaseReg, SDValue &Offset) {
602  if (!isa<ConstantSDNode>(Addr)) {
603    BaseReg = Addr;
604    Offset = CurDAG->getIntPtrConstant(0, true);
605    return true;
606  }
607  return false;
608}
609
610bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
611                                           SDValue &Offset) {
612  ConstantSDNode *IMMOffset;
613
614  if (Addr.getOpcode() == ISD::ADD
615      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
616      && isInt<16>(IMMOffset->getZExtValue())) {
617
618      Base = Addr.getOperand(0);
619      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
620      return true;
621  // If the pointer address is constant, we can move it to the offset field.
622  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
623             && isInt<16>(IMMOffset->getZExtValue())) {
624    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
625                                  SDLoc(CurDAG->getEntryNode()),
626                                  AMDGPU::ZERO, MVT::i32);
627    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
628    return true;
629  }
630
631  // Default case, no offset
632  Base = Addr;
633  Offset = CurDAG->getTargetConstant(0, MVT::i32);
634  return true;
635}
636
637bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
638                                            SDValue &Offset) {
639  ConstantSDNode *C;
640
641  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
642    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
643    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
644  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
645            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
646    Base = Addr.getOperand(0);
647    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
648  } else {
649    Base = Addr;
650    Offset = CurDAG->getTargetConstant(0, MVT::i32);
651  }
652
653  return true;
654}
655
656SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
657  SDLoc DL(N);
658  SDValue LHS = N->getOperand(0);
659  SDValue RHS = N->getOperand(1);
660
661  bool IsAdd = (N->getOpcode() == ISD::ADD);
662
663  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
664  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
665
666  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
667                                       DL, MVT::i32, LHS, Sub0);
668  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
669                                       DL, MVT::i32, LHS, Sub1);
670
671  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
672                                       DL, MVT::i32, RHS, Sub0);
673  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
674                                       DL, MVT::i32, RHS, Sub1);
675
676  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
677  SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
678
679
680  unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
681  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
682
683  if (!isCFDepth0()) {
684    Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
685    CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
686  }
687
688  SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
689  SDValue Carry(AddLo, 1);
690  SDNode *AddHi
691    = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
692                             SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
693
694  SDValue Args[5] = {
695    CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
696    SDValue(AddLo,0),
697    Sub0,
698    SDValue(AddHi,0),
699    Sub1,
700  };
701  return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
702}
703
704SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
705  SDLoc SL(N);
706  EVT VT = N->getValueType(0);
707
708  assert(VT == MVT::f32 || VT == MVT::f64);
709
710  unsigned Opc
711    = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
712
713  const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
714
715  SDValue Ops[] = {
716    N->getOperand(0),
717    N->getOperand(1),
718    N->getOperand(2),
719    Zero,
720    Zero,
721    Zero,
722    Zero
723  };
724
725  return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
726}
727
728static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
729  return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
730                                     Ptr), 0);
731}
732
733bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
734                                           SDValue &Offset,
735                                           SDValue &ImmOffset) const {
736  SDLoc DL(Addr);
737
738  if (CurDAG->isBaseWithConstantOffset(Addr)) {
739    SDValue N0 = Addr.getOperand(0);
740    SDValue N1 = Addr.getOperand(1);
741    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
742
743    if (isUInt<12>(C1->getZExtValue())) {
744
745      if (N0.getOpcode() == ISD::ADD) {
746        // (add (add N2, N3), C1)
747        SDValue N2 = N0.getOperand(0);
748        SDValue N3 = N0.getOperand(1);
749        Ptr = wrapAddr64Rsrc(CurDAG, DL, N2);
750        Offset = N3;
751        ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
752        return true;
753      }
754
755      // (add N0, C1)
756      Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));;
757      Offset = N0;
758      ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
759      return true;
760    }
761  }
762  if (Addr.getOpcode() == ISD::ADD) {
763    // (add N0, N1)
764    SDValue N0 = Addr.getOperand(0);
765    SDValue N1 = Addr.getOperand(1);
766    Ptr = wrapAddr64Rsrc(CurDAG, DL, N0);
767    Offset = N1;
768    ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
769    return true;
770  }
771
772  // default case
773  Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64));
774  Offset = Addr;
775  ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
776  return true;
777}
778
779void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
780  const AMDGPUTargetLowering& Lowering =
781    *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
782  bool IsModified = false;
783  do {
784    IsModified = false;
785    // Go over all selected nodes and try to fold them a bit more
786    for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
787         E = CurDAG->allnodes_end(); I != E; ++I) {
788
789      SDNode *Node = I;
790
791      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
792      if (!MachineNode)
793        continue;
794
795      SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
796      if (ResNode != Node) {
797        ReplaceUses(Node, ResNode);
798        IsModified = true;
799      }
800    }
801    CurDAG->RemoveDeadNodes();
802  } while (IsModified);
803}
804