AMDGPUISelDAGToDAG.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Defines an instruction selector for the AMDGPU target.
12//
13//===----------------------------------------------------------------------===//
14#include "AMDGPUInstrInfo.h"
15#include "AMDGPUISelLowering.h" // For AMDGPUISD
16#include "AMDGPURegisterInfo.h"
17#include "R600InstrInfo.h"
18#include "SIISelLowering.h"
19#include "llvm/CodeGen/FunctionLoweringInfo.h"
20#include "llvm/CodeGen/PseudoSourceValue.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/IR/Function.h"
24
25using namespace llvm;
26
27//===----------------------------------------------------------------------===//
28// Instruction Selector Implementation
29//===----------------------------------------------------------------------===//
30
31namespace {
32/// AMDGPU specific code to select AMDGPU machine instructions for
33/// SelectionDAG operations.
34class AMDGPUDAGToDAGISel : public SelectionDAGISel {
35  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
36  // make the right decision when generating code for different targets.
37  const AMDGPUSubtarget &Subtarget;
38public:
39  AMDGPUDAGToDAGISel(TargetMachine &TM);
40  virtual ~AMDGPUDAGToDAGISel();
41
42  SDNode *Select(SDNode *N) override;
43  const char *getPassName() const override;
44  void PostprocessISelDAG() override;
45
46private:
47  bool isInlineImmediate(SDNode *N) const;
48  inline SDValue getSmallIPtrImm(unsigned Imm);
49  bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
50                   const R600InstrInfo *TII);
51  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
52  bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
53
54  // Complex pattern selectors
55  bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
56  bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
57  bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
58
59  static bool checkType(const Value *ptr, unsigned int addrspace);
60  static bool checkPrivateAddress(const MachineMemOperand *Op);
61
62  static bool isGlobalStore(const StoreSDNode *N);
63  static bool isPrivateStore(const StoreSDNode *N);
64  static bool isLocalStore(const StoreSDNode *N);
65  static bool isRegionStore(const StoreSDNode *N);
66
67  bool isCPLoad(const LoadSDNode *N) const;
68  bool isConstantLoad(const LoadSDNode *N, int cbID) const;
69  bool isGlobalLoad(const LoadSDNode *N) const;
70  bool isParamLoad(const LoadSDNode *N) const;
71  bool isPrivateLoad(const LoadSDNode *N) const;
72  bool isLocalLoad(const LoadSDNode *N) const;
73  bool isRegionLoad(const LoadSDNode *N) const;
74
75  /// \returns True if the current basic block being selected is at control
76  ///          flow depth 0.  Meaning that the current block dominates the
77  //           exit block.
78  bool isCFDepth0() const;
79
80  const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
81  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
82  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
83                                       SDValue& Offset);
84  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
85  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
86
87  // Include the pieces autogenerated from the target description.
88#include "AMDGPUGenDAGISel.inc"
89};
90}  // end anonymous namespace
91
92/// \brief This pass converts a legalized DAG into a AMDGPU-specific
93// DAG, ready for instruction scheduling.
94FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
95  return new AMDGPUDAGToDAGISel(TM);
96}
97
98AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
99  : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
100}
101
102AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
103}
104
105bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
106  const SITargetLowering *TL
107      = static_cast<const SITargetLowering *>(getTargetLowering());
108  return TL->analyzeImmediate(N) == 0;
109}
110
111/// \brief Determine the register class for \p OpNo
112/// \returns The register class of the virtual register that will be used for
113/// the given operand number \OpNo or NULL if the register class cannot be
114/// determined.
115const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
116                                                          unsigned OpNo) const {
117  if (!N->isMachineOpcode())
118    return nullptr;
119
120  switch (N->getMachineOpcode()) {
121  default: {
122    const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
123    unsigned OpIdx = Desc.getNumDefs() + OpNo;
124    if (OpIdx >= Desc.getNumOperands())
125      return nullptr;
126    int RegClass = Desc.OpInfo[OpIdx].RegClass;
127    if (RegClass == -1)
128      return nullptr;
129
130    return TM.getRegisterInfo()->getRegClass(RegClass);
131  }
132  case AMDGPU::REG_SEQUENCE: {
133    unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
134    const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID);
135
136    SDValue SubRegOp = N->getOperand(OpNo + 1);
137    unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
138    return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
139  }
140  }
141}
142
143SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
144  return CurDAG->getTargetConstant(Imm, MVT::i32);
145}
146
147bool AMDGPUDAGToDAGISel::SelectADDRParam(
148  SDValue Addr, SDValue& R1, SDValue& R2) {
149
150  if (Addr.getOpcode() == ISD::FrameIndex) {
151    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
152      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
153      R2 = CurDAG->getTargetConstant(0, MVT::i32);
154    } else {
155      R1 = Addr;
156      R2 = CurDAG->getTargetConstant(0, MVT::i32);
157    }
158  } else if (Addr.getOpcode() == ISD::ADD) {
159    R1 = Addr.getOperand(0);
160    R2 = Addr.getOperand(1);
161  } else {
162    R1 = Addr;
163    R2 = CurDAG->getTargetConstant(0, MVT::i32);
164  }
165  return true;
166}
167
168bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
169  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
170      Addr.getOpcode() == ISD::TargetGlobalAddress) {
171    return false;
172  }
173  return SelectADDRParam(Addr, R1, R2);
174}
175
176
177bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
178  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
179      Addr.getOpcode() == ISD::TargetGlobalAddress) {
180    return false;
181  }
182
183  if (Addr.getOpcode() == ISD::FrameIndex) {
184    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
185      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
186      R2 = CurDAG->getTargetConstant(0, MVT::i64);
187    } else {
188      R1 = Addr;
189      R2 = CurDAG->getTargetConstant(0, MVT::i64);
190    }
191  } else if (Addr.getOpcode() == ISD::ADD) {
192    R1 = Addr.getOperand(0);
193    R2 = Addr.getOperand(1);
194  } else {
195    R1 = Addr;
196    R2 = CurDAG->getTargetConstant(0, MVT::i64);
197  }
198  return true;
199}
200
201SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
202  unsigned int Opc = N->getOpcode();
203  if (N->isMachineOpcode()) {
204    N->setNodeId(-1);
205    return nullptr;   // Already selected.
206  }
207
208  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
209  switch (Opc) {
210  default: break;
211  // We are selecting i64 ADD here instead of custom lower it during
212  // DAG legalization, so we can fold some i64 ADDs used for address
213  // calculation into the LOAD and STORE instructions.
214  case ISD::ADD: {
215    if (N->getValueType(0) != MVT::i64 ||
216        ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
217      break;
218
219    SDLoc DL(N);
220    SDValue LHS = N->getOperand(0);
221    SDValue RHS = N->getOperand(1);
222
223    SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
224    SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
225
226    SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
227                                         DL, MVT::i32, LHS, Sub0);
228    SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
229                                         DL, MVT::i32, LHS, Sub1);
230
231    SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
232                                         DL, MVT::i32, RHS, Sub0);
233    SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
234                                         DL, MVT::i32, RHS, Sub1);
235
236    SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
237
238    SmallVector<SDValue, 8> AddLoArgs;
239    AddLoArgs.push_back(SDValue(Lo0, 0));
240    AddLoArgs.push_back(SDValue(Lo1, 0));
241
242    SDNode *AddLo = CurDAG->getMachineNode(
243        isCFDepth0() ? AMDGPU::S_ADD_I32 : AMDGPU::V_ADD_I32_e32,
244        DL, VTList, AddLoArgs);
245    SDValue Carry = SDValue(AddLo, 1);
246    SDNode *AddHi = CurDAG->getMachineNode(
247        isCFDepth0() ? AMDGPU::S_ADDC_U32 : AMDGPU::V_ADDC_U32_e32,
248        DL, MVT::i32, SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
249
250    SDValue Args[5] = {
251      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
252      SDValue(AddLo,0),
253      Sub0,
254      SDValue(AddHi,0),
255      Sub1,
256    };
257    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
258  }
259  case ISD::BUILD_VECTOR: {
260    unsigned RegClassID;
261    const AMDGPURegisterInfo *TRI =
262                   static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
263    const SIRegisterInfo *SIRI =
264                   static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
265    EVT VT = N->getValueType(0);
266    unsigned NumVectorElts = VT.getVectorNumElements();
267    assert(VT.getVectorElementType().bitsEq(MVT::i32));
268    if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
269      bool UseVReg = true;
270      for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
271                                                    U != E; ++U) {
272        if (!U->isMachineOpcode()) {
273          continue;
274        }
275        const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
276        if (!RC) {
277          continue;
278        }
279        if (SIRI->isSGPRClass(RC)) {
280          UseVReg = false;
281        }
282      }
283      switch(NumVectorElts) {
284      case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
285                                     AMDGPU::SReg_32RegClassID;
286        break;
287      case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
288                                     AMDGPU::SReg_64RegClassID;
289        break;
290      case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
291                                     AMDGPU::SReg_128RegClassID;
292        break;
293      case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
294                                     AMDGPU::SReg_256RegClassID;
295        break;
296      case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
297                                      AMDGPU::SReg_512RegClassID;
298        break;
299      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
300      }
301    } else {
302      // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
303      // that adds a 128 bits reg copy when going through TwoAddressInstructions
304      // pass. We want to avoid 128 bits copies as much as possible because they
305      // can't be bundled by our scheduler.
306      switch(NumVectorElts) {
307      case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
308      case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
309      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
310      }
311    }
312
313    SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
314
315    if (NumVectorElts == 1) {
316      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
317                                  VT.getVectorElementType(),
318                                  N->getOperand(0), RegClass);
319    }
320
321    assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
322                                  "supported yet");
323    // 16 = Max Num Vector Elements
324    // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
325    // 1 = Vector Register Class
326    SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(N->getNumOperands() * 2 + 1);
327
328    RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
329    bool IsRegSeq = true;
330    for (unsigned i = 0; i < N->getNumOperands(); i++) {
331      // XXX: Why is this here?
332      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
333        IsRegSeq = false;
334        break;
335      }
336      RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
337      RegSeqArgs[1 + (2 * i) + 1] =
338              CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
339    }
340    if (!IsRegSeq)
341      break;
342    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
343                                RegSeqArgs);
344  }
345  case ISD::BUILD_PAIR: {
346    SDValue RC, SubReg0, SubReg1;
347    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
348      break;
349    }
350    if (N->getValueType(0) == MVT::i128) {
351      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
352      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
353      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
354    } else if (N->getValueType(0) == MVT::i64) {
355      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
356      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
357      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
358    } else {
359      llvm_unreachable("Unhandled value type for BUILD_PAIR");
360    }
361    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
362                            N->getOperand(1), SubReg1 };
363    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
364                                  SDLoc(N), N->getValueType(0), Ops);
365  }
366
367  case ISD::Constant:
368  case ISD::ConstantFP: {
369    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
370    if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
371        N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
372      break;
373
374    uint64_t Imm;
375    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
376      Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
377    else {
378      ConstantSDNode *C = cast<ConstantSDNode>(N);
379      Imm = C->getZExtValue();
380    }
381
382    SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
383                                CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
384    SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
385                                CurDAG->getConstant(Imm >> 32, MVT::i32));
386    const SDValue Ops[] = {
387      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
388      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
389      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
390    };
391
392    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
393                                  N->getValueType(0), Ops);
394  }
395
396  case AMDGPUISD::REGISTER_LOAD: {
397    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
398      break;
399    SDValue Addr, Offset;
400
401    SelectADDRIndirect(N->getOperand(1), Addr, Offset);
402    const SDValue Ops[] = {
403      Addr,
404      Offset,
405      CurDAG->getTargetConstant(0, MVT::i32),
406      N->getOperand(0),
407    };
408    return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
409                                  CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
410                                  Ops);
411  }
412  case AMDGPUISD::REGISTER_STORE: {
413    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
414      break;
415    SDValue Addr, Offset;
416    SelectADDRIndirect(N->getOperand(2), Addr, Offset);
417    const SDValue Ops[] = {
418      N->getOperand(1),
419      Addr,
420      Offset,
421      CurDAG->getTargetConstant(0, MVT::i32),
422      N->getOperand(0),
423    };
424    return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
425                                        CurDAG->getVTList(MVT::Other),
426                                        Ops);
427  }
428
429  case AMDGPUISD::BFE_I32:
430  case AMDGPUISD::BFE_U32: {
431    if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
432      break;
433
434    // There is a scalar version available, but unlike the vector version which
435    // has a separate operand for the offset and width, the scalar version packs
436    // the width and offset into a single operand. Try to move to the scalar
437    // version if the offsets are constant, so that we can try to keep extended
438    // loads of kernel arguments in SGPRs.
439
440    // TODO: Technically we could try to pattern match scalar bitshifts of
441    // dynamic values, but it's probably not useful.
442    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
443    if (!Offset)
444      break;
445
446    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
447    if (!Width)
448      break;
449
450    bool Signed = Opc == AMDGPUISD::BFE_I32;
451
452    // Transformation function, pack the offset and width of a BFE into
453    // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
454    // source, bits [5:0] contain the offset and bits [22:16] the width.
455
456    uint32_t OffsetVal = Offset->getZExtValue();
457    uint32_t WidthVal = Width->getZExtValue();
458
459    uint32_t PackedVal = OffsetVal | WidthVal << 16;
460
461    SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
462    return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
463                                  SDLoc(N),
464                                  MVT::i32,
465                                  N->getOperand(0),
466                                  PackedOffsetWidth);
467
468  }
469  }
470  return SelectCode(N);
471}
472
473
474bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
475  assert(AS != 0 && "Use checkPrivateAddress instead.");
476  if (!Ptr)
477    return false;
478
479  return Ptr->getType()->getPointerAddressSpace() == AS;
480}
481
482bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
483  if (Op->getPseudoValue())
484    return true;
485
486  if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
487    return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
488
489  return false;
490}
491
492bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
493  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
494}
495
496bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
497  const Value *MemVal = N->getMemOperand()->getValue();
498  return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
499          !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
500          !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
501}
502
503bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
504  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
505}
506
507bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
508  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
509}
510
511bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
512  const Value *MemVal = N->getMemOperand()->getValue();
513  if (CbId == -1)
514    return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
515
516  return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
517}
518
519bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
520  if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
521    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
522    if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
523        N->getMemoryVT().bitsLT(MVT::i32)) {
524      return true;
525    }
526  }
527  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
528}
529
530bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
531  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
532}
533
534bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
535  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
536}
537
538bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
539  return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
540}
541
542bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
543  MachineMemOperand *MMO = N->getMemOperand();
544  if (checkPrivateAddress(N->getMemOperand())) {
545    if (MMO) {
546      const PseudoSourceValue *PSV = MMO->getPseudoValue();
547      if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
548        return true;
549      }
550    }
551  }
552  return false;
553}
554
555bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
556  if (checkPrivateAddress(N->getMemOperand())) {
557    // Check to make sure we are not a constant pool load or a constant load
558    // that is marked as a private load
559    if (isCPLoad(N) || isConstantLoad(N, -1)) {
560      return false;
561    }
562  }
563
564  const Value *MemVal = N->getMemOperand()->getValue();
565  if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
566      !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
567      !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
568      !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
569      !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
570      !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
571    return true;
572  }
573  return false;
574}
575
576bool AMDGPUDAGToDAGISel::isCFDepth0() const {
577  // FIXME: Figure out a way to use DominatorTree analysis here.
578  const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
579  const Function *Fn = FuncInfo->Fn;
580  return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
581}
582
583
584const char *AMDGPUDAGToDAGISel::getPassName() const {
585  return "AMDGPU DAG->DAG Pattern Instruction Selection";
586}
587
588#ifdef DEBUGTMP
589#undef INT64_C
590#endif
591#undef DEBUGTMP
592
593//===----------------------------------------------------------------------===//
594// Complex Patterns
595//===----------------------------------------------------------------------===//
596
597bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
598                                                         SDValue& IntPtr) {
599  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
600    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
601    return true;
602  }
603  return false;
604}
605
606bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
607    SDValue& BaseReg, SDValue &Offset) {
608  if (!isa<ConstantSDNode>(Addr)) {
609    BaseReg = Addr;
610    Offset = CurDAG->getIntPtrConstant(0, true);
611    return true;
612  }
613  return false;
614}
615
616bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
617                                           SDValue &Offset) {
618  ConstantSDNode *IMMOffset;
619
620  if (Addr.getOpcode() == ISD::ADD
621      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
622      && isInt<16>(IMMOffset->getZExtValue())) {
623
624      Base = Addr.getOperand(0);
625      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
626      return true;
627  // If the pointer address is constant, we can move it to the offset field.
628  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
629             && isInt<16>(IMMOffset->getZExtValue())) {
630    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
631                                  SDLoc(CurDAG->getEntryNode()),
632                                  AMDGPU::ZERO, MVT::i32);
633    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
634    return true;
635  }
636
637  // Default case, no offset
638  Base = Addr;
639  Offset = CurDAG->getTargetConstant(0, MVT::i32);
640  return true;
641}
642
643bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
644                                            SDValue &Offset) {
645  ConstantSDNode *C;
646
647  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
648    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
649    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
650  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
651            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
652    Base = Addr.getOperand(0);
653    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
654  } else {
655    Base = Addr;
656    Offset = CurDAG->getTargetConstant(0, MVT::i32);
657  }
658
659  return true;
660}
661
662void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
663  const AMDGPUTargetLowering& Lowering =
664    *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
665  bool IsModified = false;
666  do {
667    IsModified = false;
668    // Go over all selected nodes and try to fold them a bit more
669    for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
670         E = CurDAG->allnodes_end(); I != E; ++I) {
671
672      SDNode *Node = I;
673
674      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
675      if (!MachineNode)
676        continue;
677
678      SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
679      if (ResNode != Node) {
680        ReplaceUses(Node, ResNode);
681        IsModified = true;
682      }
683    }
684    CurDAG->RemoveDeadNodes();
685  } while (IsModified);
686}
687