1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelDAGToDAG.h"
15#include "llvm/IR/GlobalValue.h"
16#include "llvm/IR/Instructions.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/Debug.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/raw_ostream.h"
21#include "llvm/Target/TargetIntrinsicInfo.h"
22
23using namespace llvm;
24
25#define DEBUG_TYPE "nvptx-isel"
26
27unsigned FMAContractLevel = 0;
28
29static cl::opt<unsigned, true>
30FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
31                    cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
32                             " 1: do it  2: do it aggressively"),
33                    cl::location(FMAContractLevel),
34                    cl::init(2));
35
36static cl::opt<int> UsePrecDivF32(
37    "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
38    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
39             " IEEE Compliant F32 div.rnd if avaiable."),
40    cl::init(2));
41
42static cl::opt<bool>
43UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
44          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
45          cl::init(true));
46
47static cl::opt<bool>
48FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
49           cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
50           cl::init(false));
51
52
53/// createNVPTXISelDag - This pass converts a legalized DAG into a
54/// NVPTX-specific DAG, ready for instruction scheduling.
55FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
56                                       llvm::CodeGenOpt::Level OptLevel) {
57  return new NVPTXDAGToDAGISel(TM, OptLevel);
58}
59
60NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
61                                     CodeGenOpt::Level OptLevel)
62    : SelectionDAGISel(tm, OptLevel),
63      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
64
65  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
66  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
67  doFMAF32AGG =
68      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
69  doFMAF64AGG =
70      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
71
72  allowFMA = (FMAContractLevel >= 1);
73
74  doMulWide = (OptLevel > 0);
75}
76
77int NVPTXDAGToDAGISel::getDivF32Level() const {
78  if (UsePrecDivF32.getNumOccurrences() > 0) {
79    // If nvptx-prec-div32=N is used on the command-line, always honor it
80    return UsePrecDivF32;
81  } else {
82    // Otherwise, use div.approx if fast math is enabled
83    if (TM.Options.UnsafeFPMath)
84      return 0;
85    else
86      return 2;
87  }
88}
89
90bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
91  if (UsePrecSqrtF32.getNumOccurrences() > 0) {
92    // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
93    return UsePrecSqrtF32;
94  } else {
95    // Otherwise, use sqrt.approx if fast math is enabled
96    if (TM.Options.UnsafeFPMath)
97      return false;
98    else
99      return true;
100  }
101}
102
103bool NVPTXDAGToDAGISel::useF32FTZ() const {
104  if (FtzEnabled.getNumOccurrences() > 0) {
105    // If nvptx-f32ftz is used on the command-line, always honor it
106    return FtzEnabled;
107  } else {
108    const Function *F = MF->getFunction();
109    // Otherwise, check for an nvptx-f32ftz attribute on the function
110    if (F->hasFnAttribute("nvptx-f32ftz"))
111      return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
112                                              "nvptx-f32ftz")
113                                              .getValueAsString() == "true");
114    else
115      return false;
116  }
117}
118
119/// Select - Select instructions not customized! Used for
120/// expanded, promoted and normal instructions.
121SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
122
123  if (N->isMachineOpcode()) {
124    N->setNodeId(-1);
125    return nullptr; // Already selected.
126  }
127
128  SDNode *ResNode = nullptr;
129  switch (N->getOpcode()) {
130  case ISD::LOAD:
131    ResNode = SelectLoad(N);
132    break;
133  case ISD::STORE:
134    ResNode = SelectStore(N);
135    break;
136  case NVPTXISD::LoadV2:
137  case NVPTXISD::LoadV4:
138    ResNode = SelectLoadVector(N);
139    break;
140  case NVPTXISD::LDGV2:
141  case NVPTXISD::LDGV4:
142  case NVPTXISD::LDUV2:
143  case NVPTXISD::LDUV4:
144    ResNode = SelectLDGLDU(N);
145    break;
146  case NVPTXISD::StoreV2:
147  case NVPTXISD::StoreV4:
148    ResNode = SelectStoreVector(N);
149    break;
150  case NVPTXISD::LoadParam:
151  case NVPTXISD::LoadParamV2:
152  case NVPTXISD::LoadParamV4:
153    ResNode = SelectLoadParam(N);
154    break;
155  case NVPTXISD::StoreRetval:
156  case NVPTXISD::StoreRetvalV2:
157  case NVPTXISD::StoreRetvalV4:
158    ResNode = SelectStoreRetval(N);
159    break;
160  case NVPTXISD::StoreParam:
161  case NVPTXISD::StoreParamV2:
162  case NVPTXISD::StoreParamV4:
163  case NVPTXISD::StoreParamS32:
164  case NVPTXISD::StoreParamU32:
165    ResNode = SelectStoreParam(N);
166    break;
167  case ISD::INTRINSIC_WO_CHAIN:
168    ResNode = SelectIntrinsicNoChain(N);
169    break;
170  case ISD::INTRINSIC_W_CHAIN:
171    ResNode = SelectIntrinsicChain(N);
172    break;
173  case NVPTXISD::Tex1DFloatI32:
174  case NVPTXISD::Tex1DFloatFloat:
175  case NVPTXISD::Tex1DFloatFloatLevel:
176  case NVPTXISD::Tex1DFloatFloatGrad:
177  case NVPTXISD::Tex1DI32I32:
178  case NVPTXISD::Tex1DI32Float:
179  case NVPTXISD::Tex1DI32FloatLevel:
180  case NVPTXISD::Tex1DI32FloatGrad:
181  case NVPTXISD::Tex1DArrayFloatI32:
182  case NVPTXISD::Tex1DArrayFloatFloat:
183  case NVPTXISD::Tex1DArrayFloatFloatLevel:
184  case NVPTXISD::Tex1DArrayFloatFloatGrad:
185  case NVPTXISD::Tex1DArrayI32I32:
186  case NVPTXISD::Tex1DArrayI32Float:
187  case NVPTXISD::Tex1DArrayI32FloatLevel:
188  case NVPTXISD::Tex1DArrayI32FloatGrad:
189  case NVPTXISD::Tex2DFloatI32:
190  case NVPTXISD::Tex2DFloatFloat:
191  case NVPTXISD::Tex2DFloatFloatLevel:
192  case NVPTXISD::Tex2DFloatFloatGrad:
193  case NVPTXISD::Tex2DI32I32:
194  case NVPTXISD::Tex2DI32Float:
195  case NVPTXISD::Tex2DI32FloatLevel:
196  case NVPTXISD::Tex2DI32FloatGrad:
197  case NVPTXISD::Tex2DArrayFloatI32:
198  case NVPTXISD::Tex2DArrayFloatFloat:
199  case NVPTXISD::Tex2DArrayFloatFloatLevel:
200  case NVPTXISD::Tex2DArrayFloatFloatGrad:
201  case NVPTXISD::Tex2DArrayI32I32:
202  case NVPTXISD::Tex2DArrayI32Float:
203  case NVPTXISD::Tex2DArrayI32FloatLevel:
204  case NVPTXISD::Tex2DArrayI32FloatGrad:
205  case NVPTXISD::Tex3DFloatI32:
206  case NVPTXISD::Tex3DFloatFloat:
207  case NVPTXISD::Tex3DFloatFloatLevel:
208  case NVPTXISD::Tex3DFloatFloatGrad:
209  case NVPTXISD::Tex3DI32I32:
210  case NVPTXISD::Tex3DI32Float:
211  case NVPTXISD::Tex3DI32FloatLevel:
212  case NVPTXISD::Tex3DI32FloatGrad:
213    ResNode = SelectTextureIntrinsic(N);
214    break;
215  case NVPTXISD::Suld1DI8Trap:
216  case NVPTXISD::Suld1DI16Trap:
217  case NVPTXISD::Suld1DI32Trap:
218  case NVPTXISD::Suld1DV2I8Trap:
219  case NVPTXISD::Suld1DV2I16Trap:
220  case NVPTXISD::Suld1DV2I32Trap:
221  case NVPTXISD::Suld1DV4I8Trap:
222  case NVPTXISD::Suld1DV4I16Trap:
223  case NVPTXISD::Suld1DV4I32Trap:
224  case NVPTXISD::Suld1DArrayI8Trap:
225  case NVPTXISD::Suld1DArrayI16Trap:
226  case NVPTXISD::Suld1DArrayI32Trap:
227  case NVPTXISD::Suld1DArrayV2I8Trap:
228  case NVPTXISD::Suld1DArrayV2I16Trap:
229  case NVPTXISD::Suld1DArrayV2I32Trap:
230  case NVPTXISD::Suld1DArrayV4I8Trap:
231  case NVPTXISD::Suld1DArrayV4I16Trap:
232  case NVPTXISD::Suld1DArrayV4I32Trap:
233  case NVPTXISD::Suld2DI8Trap:
234  case NVPTXISD::Suld2DI16Trap:
235  case NVPTXISD::Suld2DI32Trap:
236  case NVPTXISD::Suld2DV2I8Trap:
237  case NVPTXISD::Suld2DV2I16Trap:
238  case NVPTXISD::Suld2DV2I32Trap:
239  case NVPTXISD::Suld2DV4I8Trap:
240  case NVPTXISD::Suld2DV4I16Trap:
241  case NVPTXISD::Suld2DV4I32Trap:
242  case NVPTXISD::Suld2DArrayI8Trap:
243  case NVPTXISD::Suld2DArrayI16Trap:
244  case NVPTXISD::Suld2DArrayI32Trap:
245  case NVPTXISD::Suld2DArrayV2I8Trap:
246  case NVPTXISD::Suld2DArrayV2I16Trap:
247  case NVPTXISD::Suld2DArrayV2I32Trap:
248  case NVPTXISD::Suld2DArrayV4I8Trap:
249  case NVPTXISD::Suld2DArrayV4I16Trap:
250  case NVPTXISD::Suld2DArrayV4I32Trap:
251  case NVPTXISD::Suld3DI8Trap:
252  case NVPTXISD::Suld3DI16Trap:
253  case NVPTXISD::Suld3DI32Trap:
254  case NVPTXISD::Suld3DV2I8Trap:
255  case NVPTXISD::Suld3DV2I16Trap:
256  case NVPTXISD::Suld3DV2I32Trap:
257  case NVPTXISD::Suld3DV4I8Trap:
258  case NVPTXISD::Suld3DV4I16Trap:
259  case NVPTXISD::Suld3DV4I32Trap:
260    ResNode = SelectSurfaceIntrinsic(N);
261    break;
262  case ISD::AND:
263  case ISD::SRA:
264  case ISD::SRL:
265    // Try to select BFE
266    ResNode = SelectBFE(N);
267    break;
268  case ISD::ADDRSPACECAST:
269    ResNode = SelectAddrSpaceCast(N);
270    break;
271  default:
272    break;
273  }
274  if (ResNode)
275    return ResNode;
276  return SelectCode(N);
277}
278
279SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
280  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
281  switch (IID) {
282  default:
283    return NULL;
284  case Intrinsic::nvvm_ldg_global_f:
285  case Intrinsic::nvvm_ldg_global_i:
286  case Intrinsic::nvvm_ldg_global_p:
287  case Intrinsic::nvvm_ldu_global_f:
288  case Intrinsic::nvvm_ldu_global_i:
289  case Intrinsic::nvvm_ldu_global_p:
290    return SelectLDGLDU(N);
291  }
292}
293
294static unsigned int getCodeAddrSpace(MemSDNode *N,
295                                     const NVPTXSubtarget &Subtarget) {
296  const Value *Src = N->getMemOperand()->getValue();
297
298  if (!Src)
299    return NVPTX::PTXLdStInstCode::GENERIC;
300
301  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
302    switch (PT->getAddressSpace()) {
303    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
304    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
305    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
306    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
307    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
308    case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
309    default: break;
310    }
311  }
312  return NVPTX::PTXLdStInstCode::GENERIC;
313}
314
315SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
316  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
317  switch (IID) {
318  default:
319    return nullptr;
320  case Intrinsic::nvvm_texsurf_handle_internal:
321    return SelectTexSurfHandle(N);
322  }
323}
324
325SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
326  // Op 0 is the intrinsic ID
327  SDValue Wrapper = N->getOperand(1);
328  SDValue GlobalVal = Wrapper.getOperand(0);
329  return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
330                                GlobalVal);
331}
332
333SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
334  SDValue Src = N->getOperand(0);
335  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
336  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
337  unsigned DstAddrSpace = CastN->getDestAddressSpace();
338
339  assert(SrcAddrSpace != DstAddrSpace &&
340         "addrspacecast must be between different address spaces");
341
342  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
343    // Specific to generic
344    unsigned Opc;
345    switch (SrcAddrSpace) {
346    default: report_fatal_error("Bad address space in addrspacecast");
347    case ADDRESS_SPACE_GLOBAL:
348      Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
349                                : NVPTX::cvta_global_yes;
350      break;
351    case ADDRESS_SPACE_SHARED:
352      Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
353                                : NVPTX::cvta_shared_yes;
354      break;
355    case ADDRESS_SPACE_CONST:
356      Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
357                                : NVPTX::cvta_const_yes;
358      break;
359    case ADDRESS_SPACE_LOCAL:
360      Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
361                                : NVPTX::cvta_local_yes;
362      break;
363    }
364    return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
365  } else {
366    // Generic to specific
367    if (SrcAddrSpace != 0)
368      report_fatal_error("Cannot cast between two non-generic address spaces");
369    unsigned Opc;
370    switch (DstAddrSpace) {
371    default: report_fatal_error("Bad address space in addrspacecast");
372    case ADDRESS_SPACE_GLOBAL:
373      Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
374                                : NVPTX::cvta_to_global_yes;
375      break;
376    case ADDRESS_SPACE_SHARED:
377      Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
378                                : NVPTX::cvta_to_shared_yes;
379      break;
380    case ADDRESS_SPACE_CONST:
381      Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
382                                : NVPTX::cvta_to_const_yes;
383      break;
384    case ADDRESS_SPACE_LOCAL:
385      Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
386                                : NVPTX::cvta_to_local_yes;
387      break;
388    }
389    return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
390  }
391}
392
393SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
394  SDLoc dl(N);
395  LoadSDNode *LD = cast<LoadSDNode>(N);
396  EVT LoadedVT = LD->getMemoryVT();
397  SDNode *NVPTXLD = nullptr;
398
399  // do not support pre/post inc/dec
400  if (LD->isIndexed())
401    return nullptr;
402
403  if (!LoadedVT.isSimple())
404    return nullptr;
405
406  // Address Space Setting
407  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
408
409  // Volatile Setting
410  // - .volatile is only availalble for .global and .shared
411  bool isVolatile = LD->isVolatile();
412  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
413      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
414      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
415    isVolatile = false;
416
417  // Vector Setting
418  MVT SimpleVT = LoadedVT.getSimpleVT();
419  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
420  if (SimpleVT.isVector()) {
421    unsigned num = SimpleVT.getVectorNumElements();
422    if (num == 2)
423      vecType = NVPTX::PTXLdStInstCode::V2;
424    else if (num == 4)
425      vecType = NVPTX::PTXLdStInstCode::V4;
426    else
427      return nullptr;
428  }
429
430  // Type Setting: fromType + fromTypeWidth
431  //
432  // Sign   : ISD::SEXTLOAD
433  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
434  //          type is integer
435  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
436  MVT ScalarVT = SimpleVT.getScalarType();
437  // Read at least 8 bits (predicates are stored as 8-bit values)
438  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
439  unsigned int fromType;
440  if ((LD->getExtensionType() == ISD::SEXTLOAD))
441    fromType = NVPTX::PTXLdStInstCode::Signed;
442  else if (ScalarVT.isFloatingPoint())
443    fromType = NVPTX::PTXLdStInstCode::Float;
444  else
445    fromType = NVPTX::PTXLdStInstCode::Unsigned;
446
447  // Create the machine instruction DAG
448  SDValue Chain = N->getOperand(0);
449  SDValue N1 = N->getOperand(1);
450  SDValue Addr;
451  SDValue Offset, Base;
452  unsigned Opcode;
453  MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
454
455  if (SelectDirectAddr(N1, Addr)) {
456    switch (TargetVT) {
457    case MVT::i8:
458      Opcode = NVPTX::LD_i8_avar;
459      break;
460    case MVT::i16:
461      Opcode = NVPTX::LD_i16_avar;
462      break;
463    case MVT::i32:
464      Opcode = NVPTX::LD_i32_avar;
465      break;
466    case MVT::i64:
467      Opcode = NVPTX::LD_i64_avar;
468      break;
469    case MVT::f32:
470      Opcode = NVPTX::LD_f32_avar;
471      break;
472    case MVT::f64:
473      Opcode = NVPTX::LD_f64_avar;
474      break;
475    default:
476      return nullptr;
477    }
478    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
479                      getI32Imm(vecType), getI32Imm(fromType),
480                      getI32Imm(fromTypeWidth), Addr, Chain };
481    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
482  } else if (Subtarget.is64Bit()
483                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
484                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
485    switch (TargetVT) {
486    case MVT::i8:
487      Opcode = NVPTX::LD_i8_asi;
488      break;
489    case MVT::i16:
490      Opcode = NVPTX::LD_i16_asi;
491      break;
492    case MVT::i32:
493      Opcode = NVPTX::LD_i32_asi;
494      break;
495    case MVT::i64:
496      Opcode = NVPTX::LD_i64_asi;
497      break;
498    case MVT::f32:
499      Opcode = NVPTX::LD_f32_asi;
500      break;
501    case MVT::f64:
502      Opcode = NVPTX::LD_f64_asi;
503      break;
504    default:
505      return nullptr;
506    }
507    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
508                      getI32Imm(vecType), getI32Imm(fromType),
509                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
510    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
511  } else if (Subtarget.is64Bit()
512                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
513                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
514    if (Subtarget.is64Bit()) {
515      switch (TargetVT) {
516      case MVT::i8:
517        Opcode = NVPTX::LD_i8_ari_64;
518        break;
519      case MVT::i16:
520        Opcode = NVPTX::LD_i16_ari_64;
521        break;
522      case MVT::i32:
523        Opcode = NVPTX::LD_i32_ari_64;
524        break;
525      case MVT::i64:
526        Opcode = NVPTX::LD_i64_ari_64;
527        break;
528      case MVT::f32:
529        Opcode = NVPTX::LD_f32_ari_64;
530        break;
531      case MVT::f64:
532        Opcode = NVPTX::LD_f64_ari_64;
533        break;
534      default:
535        return nullptr;
536      }
537    } else {
538      switch (TargetVT) {
539      case MVT::i8:
540        Opcode = NVPTX::LD_i8_ari;
541        break;
542      case MVT::i16:
543        Opcode = NVPTX::LD_i16_ari;
544        break;
545      case MVT::i32:
546        Opcode = NVPTX::LD_i32_ari;
547        break;
548      case MVT::i64:
549        Opcode = NVPTX::LD_i64_ari;
550        break;
551      case MVT::f32:
552        Opcode = NVPTX::LD_f32_ari;
553        break;
554      case MVT::f64:
555        Opcode = NVPTX::LD_f64_ari;
556        break;
557      default:
558        return nullptr;
559      }
560    }
561    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
562                      getI32Imm(vecType), getI32Imm(fromType),
563                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
564    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
565  } else {
566    if (Subtarget.is64Bit()) {
567      switch (TargetVT) {
568      case MVT::i8:
569        Opcode = NVPTX::LD_i8_areg_64;
570        break;
571      case MVT::i16:
572        Opcode = NVPTX::LD_i16_areg_64;
573        break;
574      case MVT::i32:
575        Opcode = NVPTX::LD_i32_areg_64;
576        break;
577      case MVT::i64:
578        Opcode = NVPTX::LD_i64_areg_64;
579        break;
580      case MVT::f32:
581        Opcode = NVPTX::LD_f32_areg_64;
582        break;
583      case MVT::f64:
584        Opcode = NVPTX::LD_f64_areg_64;
585        break;
586      default:
587        return nullptr;
588      }
589    } else {
590      switch (TargetVT) {
591      case MVT::i8:
592        Opcode = NVPTX::LD_i8_areg;
593        break;
594      case MVT::i16:
595        Opcode = NVPTX::LD_i16_areg;
596        break;
597      case MVT::i32:
598        Opcode = NVPTX::LD_i32_areg;
599        break;
600      case MVT::i64:
601        Opcode = NVPTX::LD_i64_areg;
602        break;
603      case MVT::f32:
604        Opcode = NVPTX::LD_f32_areg;
605        break;
606      case MVT::f64:
607        Opcode = NVPTX::LD_f64_areg;
608        break;
609      default:
610        return nullptr;
611      }
612    }
613    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
614                      getI32Imm(vecType), getI32Imm(fromType),
615                      getI32Imm(fromTypeWidth), N1, Chain };
616    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
617  }
618
619  if (NVPTXLD) {
620    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
621    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
622    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
623  }
624
625  return NVPTXLD;
626}
627
628SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
629
630  SDValue Chain = N->getOperand(0);
631  SDValue Op1 = N->getOperand(1);
632  SDValue Addr, Offset, Base;
633  unsigned Opcode;
634  SDLoc DL(N);
635  SDNode *LD;
636  MemSDNode *MemSD = cast<MemSDNode>(N);
637  EVT LoadedVT = MemSD->getMemoryVT();
638
639  if (!LoadedVT.isSimple())
640    return nullptr;
641
642  // Address Space Setting
643  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
644
645  // Volatile Setting
646  // - .volatile is only availalble for .global and .shared
647  bool IsVolatile = MemSD->isVolatile();
648  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
649      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
650      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
651    IsVolatile = false;
652
653  // Vector Setting
654  MVT SimpleVT = LoadedVT.getSimpleVT();
655
656  // Type Setting: fromType + fromTypeWidth
657  //
658  // Sign   : ISD::SEXTLOAD
659  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
660  //          type is integer
661  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
662  MVT ScalarVT = SimpleVT.getScalarType();
663  // Read at least 8 bits (predicates are stored as 8-bit values)
664  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
665  unsigned int FromType;
666  // The last operand holds the original LoadSDNode::getExtensionType() value
667  unsigned ExtensionType = cast<ConstantSDNode>(
668      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
669  if (ExtensionType == ISD::SEXTLOAD)
670    FromType = NVPTX::PTXLdStInstCode::Signed;
671  else if (ScalarVT.isFloatingPoint())
672    FromType = NVPTX::PTXLdStInstCode::Float;
673  else
674    FromType = NVPTX::PTXLdStInstCode::Unsigned;
675
676  unsigned VecType;
677
678  switch (N->getOpcode()) {
679  case NVPTXISD::LoadV2:
680    VecType = NVPTX::PTXLdStInstCode::V2;
681    break;
682  case NVPTXISD::LoadV4:
683    VecType = NVPTX::PTXLdStInstCode::V4;
684    break;
685  default:
686    return nullptr;
687  }
688
689  EVT EltVT = N->getValueType(0);
690
691  if (SelectDirectAddr(Op1, Addr)) {
692    switch (N->getOpcode()) {
693    default:
694      return nullptr;
695    case NVPTXISD::LoadV2:
696      switch (EltVT.getSimpleVT().SimpleTy) {
697      default:
698        return nullptr;
699      case MVT::i8:
700        Opcode = NVPTX::LDV_i8_v2_avar;
701        break;
702      case MVT::i16:
703        Opcode = NVPTX::LDV_i16_v2_avar;
704        break;
705      case MVT::i32:
706        Opcode = NVPTX::LDV_i32_v2_avar;
707        break;
708      case MVT::i64:
709        Opcode = NVPTX::LDV_i64_v2_avar;
710        break;
711      case MVT::f32:
712        Opcode = NVPTX::LDV_f32_v2_avar;
713        break;
714      case MVT::f64:
715        Opcode = NVPTX::LDV_f64_v2_avar;
716        break;
717      }
718      break;
719    case NVPTXISD::LoadV4:
720      switch (EltVT.getSimpleVT().SimpleTy) {
721      default:
722        return nullptr;
723      case MVT::i8:
724        Opcode = NVPTX::LDV_i8_v4_avar;
725        break;
726      case MVT::i16:
727        Opcode = NVPTX::LDV_i16_v4_avar;
728        break;
729      case MVT::i32:
730        Opcode = NVPTX::LDV_i32_v4_avar;
731        break;
732      case MVT::f32:
733        Opcode = NVPTX::LDV_f32_v4_avar;
734        break;
735      }
736      break;
737    }
738
739    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
740                      getI32Imm(VecType), getI32Imm(FromType),
741                      getI32Imm(FromTypeWidth), Addr, Chain };
742    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
743  } else if (Subtarget.is64Bit()
744                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
745                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
746    switch (N->getOpcode()) {
747    default:
748      return nullptr;
749    case NVPTXISD::LoadV2:
750      switch (EltVT.getSimpleVT().SimpleTy) {
751      default:
752        return nullptr;
753      case MVT::i8:
754        Opcode = NVPTX::LDV_i8_v2_asi;
755        break;
756      case MVT::i16:
757        Opcode = NVPTX::LDV_i16_v2_asi;
758        break;
759      case MVT::i32:
760        Opcode = NVPTX::LDV_i32_v2_asi;
761        break;
762      case MVT::i64:
763        Opcode = NVPTX::LDV_i64_v2_asi;
764        break;
765      case MVT::f32:
766        Opcode = NVPTX::LDV_f32_v2_asi;
767        break;
768      case MVT::f64:
769        Opcode = NVPTX::LDV_f64_v2_asi;
770        break;
771      }
772      break;
773    case NVPTXISD::LoadV4:
774      switch (EltVT.getSimpleVT().SimpleTy) {
775      default:
776        return nullptr;
777      case MVT::i8:
778        Opcode = NVPTX::LDV_i8_v4_asi;
779        break;
780      case MVT::i16:
781        Opcode = NVPTX::LDV_i16_v4_asi;
782        break;
783      case MVT::i32:
784        Opcode = NVPTX::LDV_i32_v4_asi;
785        break;
786      case MVT::f32:
787        Opcode = NVPTX::LDV_f32_v4_asi;
788        break;
789      }
790      break;
791    }
792
793    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
794                      getI32Imm(VecType), getI32Imm(FromType),
795                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
796    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
797  } else if (Subtarget.is64Bit()
798                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
799                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
800    if (Subtarget.is64Bit()) {
801      switch (N->getOpcode()) {
802      default:
803        return nullptr;
804      case NVPTXISD::LoadV2:
805        switch (EltVT.getSimpleVT().SimpleTy) {
806        default:
807          return nullptr;
808        case MVT::i8:
809          Opcode = NVPTX::LDV_i8_v2_ari_64;
810          break;
811        case MVT::i16:
812          Opcode = NVPTX::LDV_i16_v2_ari_64;
813          break;
814        case MVT::i32:
815          Opcode = NVPTX::LDV_i32_v2_ari_64;
816          break;
817        case MVT::i64:
818          Opcode = NVPTX::LDV_i64_v2_ari_64;
819          break;
820        case MVT::f32:
821          Opcode = NVPTX::LDV_f32_v2_ari_64;
822          break;
823        case MVT::f64:
824          Opcode = NVPTX::LDV_f64_v2_ari_64;
825          break;
826        }
827        break;
828      case NVPTXISD::LoadV4:
829        switch (EltVT.getSimpleVT().SimpleTy) {
830        default:
831          return nullptr;
832        case MVT::i8:
833          Opcode = NVPTX::LDV_i8_v4_ari_64;
834          break;
835        case MVT::i16:
836          Opcode = NVPTX::LDV_i16_v4_ari_64;
837          break;
838        case MVT::i32:
839          Opcode = NVPTX::LDV_i32_v4_ari_64;
840          break;
841        case MVT::f32:
842          Opcode = NVPTX::LDV_f32_v4_ari_64;
843          break;
844        }
845        break;
846      }
847    } else {
848      switch (N->getOpcode()) {
849      default:
850        return nullptr;
851      case NVPTXISD::LoadV2:
852        switch (EltVT.getSimpleVT().SimpleTy) {
853        default:
854          return nullptr;
855        case MVT::i8:
856          Opcode = NVPTX::LDV_i8_v2_ari;
857          break;
858        case MVT::i16:
859          Opcode = NVPTX::LDV_i16_v2_ari;
860          break;
861        case MVT::i32:
862          Opcode = NVPTX::LDV_i32_v2_ari;
863          break;
864        case MVT::i64:
865          Opcode = NVPTX::LDV_i64_v2_ari;
866          break;
867        case MVT::f32:
868          Opcode = NVPTX::LDV_f32_v2_ari;
869          break;
870        case MVT::f64:
871          Opcode = NVPTX::LDV_f64_v2_ari;
872          break;
873        }
874        break;
875      case NVPTXISD::LoadV4:
876        switch (EltVT.getSimpleVT().SimpleTy) {
877        default:
878          return nullptr;
879        case MVT::i8:
880          Opcode = NVPTX::LDV_i8_v4_ari;
881          break;
882        case MVT::i16:
883          Opcode = NVPTX::LDV_i16_v4_ari;
884          break;
885        case MVT::i32:
886          Opcode = NVPTX::LDV_i32_v4_ari;
887          break;
888        case MVT::f32:
889          Opcode = NVPTX::LDV_f32_v4_ari;
890          break;
891        }
892        break;
893      }
894    }
895
896    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
897                      getI32Imm(VecType), getI32Imm(FromType),
898                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
899
900    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
901  } else {
902    if (Subtarget.is64Bit()) {
903      switch (N->getOpcode()) {
904      default:
905        return nullptr;
906      case NVPTXISD::LoadV2:
907        switch (EltVT.getSimpleVT().SimpleTy) {
908        default:
909          return nullptr;
910        case MVT::i8:
911          Opcode = NVPTX::LDV_i8_v2_areg_64;
912          break;
913        case MVT::i16:
914          Opcode = NVPTX::LDV_i16_v2_areg_64;
915          break;
916        case MVT::i32:
917          Opcode = NVPTX::LDV_i32_v2_areg_64;
918          break;
919        case MVT::i64:
920          Opcode = NVPTX::LDV_i64_v2_areg_64;
921          break;
922        case MVT::f32:
923          Opcode = NVPTX::LDV_f32_v2_areg_64;
924          break;
925        case MVT::f64:
926          Opcode = NVPTX::LDV_f64_v2_areg_64;
927          break;
928        }
929        break;
930      case NVPTXISD::LoadV4:
931        switch (EltVT.getSimpleVT().SimpleTy) {
932        default:
933          return nullptr;
934        case MVT::i8:
935          Opcode = NVPTX::LDV_i8_v4_areg_64;
936          break;
937        case MVT::i16:
938          Opcode = NVPTX::LDV_i16_v4_areg_64;
939          break;
940        case MVT::i32:
941          Opcode = NVPTX::LDV_i32_v4_areg_64;
942          break;
943        case MVT::f32:
944          Opcode = NVPTX::LDV_f32_v4_areg_64;
945          break;
946        }
947        break;
948      }
949    } else {
950      switch (N->getOpcode()) {
951      default:
952        return nullptr;
953      case NVPTXISD::LoadV2:
954        switch (EltVT.getSimpleVT().SimpleTy) {
955        default:
956          return nullptr;
957        case MVT::i8:
958          Opcode = NVPTX::LDV_i8_v2_areg;
959          break;
960        case MVT::i16:
961          Opcode = NVPTX::LDV_i16_v2_areg;
962          break;
963        case MVT::i32:
964          Opcode = NVPTX::LDV_i32_v2_areg;
965          break;
966        case MVT::i64:
967          Opcode = NVPTX::LDV_i64_v2_areg;
968          break;
969        case MVT::f32:
970          Opcode = NVPTX::LDV_f32_v2_areg;
971          break;
972        case MVT::f64:
973          Opcode = NVPTX::LDV_f64_v2_areg;
974          break;
975        }
976        break;
977      case NVPTXISD::LoadV4:
978        switch (EltVT.getSimpleVT().SimpleTy) {
979        default:
980          return nullptr;
981        case MVT::i8:
982          Opcode = NVPTX::LDV_i8_v4_areg;
983          break;
984        case MVT::i16:
985          Opcode = NVPTX::LDV_i16_v4_areg;
986          break;
987        case MVT::i32:
988          Opcode = NVPTX::LDV_i32_v4_areg;
989          break;
990        case MVT::f32:
991          Opcode = NVPTX::LDV_f32_v4_areg;
992          break;
993        }
994        break;
995      }
996    }
997
998    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
999                      getI32Imm(VecType), getI32Imm(FromType),
1000                      getI32Imm(FromTypeWidth), Op1, Chain };
1001    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1002  }
1003
1004  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1005  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1006  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1007
1008  return LD;
1009}
1010
1011SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1012
1013  SDValue Chain = N->getOperand(0);
1014  SDValue Op1;
1015  MemSDNode *Mem;
1016  bool IsLDG = true;
1017
1018  // If this is an LDG intrinsic, the address is the third operand. Its its an
1019  // LDG/LDU SD node (from custom vector handling), then its the second operand
1020  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1021    Op1 = N->getOperand(2);
1022    Mem = cast<MemIntrinsicSDNode>(N);
1023    unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1024    switch (IID) {
1025    default:
1026      return NULL;
1027    case Intrinsic::nvvm_ldg_global_f:
1028    case Intrinsic::nvvm_ldg_global_i:
1029    case Intrinsic::nvvm_ldg_global_p:
1030      IsLDG = true;
1031      break;
1032    case Intrinsic::nvvm_ldu_global_f:
1033    case Intrinsic::nvvm_ldu_global_i:
1034    case Intrinsic::nvvm_ldu_global_p:
1035      IsLDG = false;
1036      break;
1037    }
1038  } else {
1039    Op1 = N->getOperand(1);
1040    Mem = cast<MemSDNode>(N);
1041  }
1042
1043  unsigned Opcode;
1044  SDLoc DL(N);
1045  SDNode *LD;
1046  SDValue Base, Offset, Addr;
1047
1048  EVT EltVT = Mem->getMemoryVT();
1049  if (EltVT.isVector()) {
1050    EltVT = EltVT.getVectorElementType();
1051  }
1052
1053  if (SelectDirectAddr(Op1, Addr)) {
1054    switch (N->getOpcode()) {
1055    default:
1056      return nullptr;
1057    case ISD::INTRINSIC_W_CHAIN:
1058      if (IsLDG) {
1059        switch (EltVT.getSimpleVT().SimpleTy) {
1060        default:
1061          return nullptr;
1062        case MVT::i8:
1063          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1064          break;
1065        case MVT::i16:
1066          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1067          break;
1068        case MVT::i32:
1069          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1070          break;
1071        case MVT::i64:
1072          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1073          break;
1074        case MVT::f32:
1075          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1076          break;
1077        case MVT::f64:
1078          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1079          break;
1080        }
1081      } else {
1082        switch (EltVT.getSimpleVT().SimpleTy) {
1083        default:
1084          return nullptr;
1085        case MVT::i8:
1086          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1087          break;
1088        case MVT::i16:
1089          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1090          break;
1091        case MVT::i32:
1092          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1093          break;
1094        case MVT::i64:
1095          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1096          break;
1097        case MVT::f32:
1098          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1099          break;
1100        case MVT::f64:
1101          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1102          break;
1103        }
1104      }
1105      break;
1106    case NVPTXISD::LDGV2:
1107      switch (EltVT.getSimpleVT().SimpleTy) {
1108      default:
1109        return nullptr;
1110      case MVT::i8:
1111        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1112        break;
1113      case MVT::i16:
1114        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1115        break;
1116      case MVT::i32:
1117        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1118        break;
1119      case MVT::i64:
1120        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1121        break;
1122      case MVT::f32:
1123        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1124        break;
1125      case MVT::f64:
1126        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1127        break;
1128      }
1129      break;
1130    case NVPTXISD::LDUV2:
1131      switch (EltVT.getSimpleVT().SimpleTy) {
1132      default:
1133        return nullptr;
1134      case MVT::i8:
1135        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1136        break;
1137      case MVT::i16:
1138        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1139        break;
1140      case MVT::i32:
1141        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1142        break;
1143      case MVT::i64:
1144        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1145        break;
1146      case MVT::f32:
1147        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1148        break;
1149      case MVT::f64:
1150        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1151        break;
1152      }
1153      break;
1154    case NVPTXISD::LDGV4:
1155      switch (EltVT.getSimpleVT().SimpleTy) {
1156      default:
1157        return nullptr;
1158      case MVT::i8:
1159        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1160        break;
1161      case MVT::i16:
1162        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1163        break;
1164      case MVT::i32:
1165        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1166        break;
1167      case MVT::f32:
1168        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1169        break;
1170      }
1171      break;
1172    case NVPTXISD::LDUV4:
1173      switch (EltVT.getSimpleVT().SimpleTy) {
1174      default:
1175        return nullptr;
1176      case MVT::i8:
1177        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1178        break;
1179      case MVT::i16:
1180        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1181        break;
1182      case MVT::i32:
1183        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1184        break;
1185      case MVT::f32:
1186        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1187        break;
1188      }
1189      break;
1190    }
1191
1192    SDValue Ops[] = { Addr, Chain };
1193    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1194  } else if (Subtarget.is64Bit()
1195                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1196                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1197    if (Subtarget.is64Bit()) {
1198      switch (N->getOpcode()) {
1199      default:
1200        return nullptr;
1201      case ISD::INTRINSIC_W_CHAIN:
1202        if (IsLDG) {
1203          switch (EltVT.getSimpleVT().SimpleTy) {
1204          default:
1205            return nullptr;
1206          case MVT::i8:
1207            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1208            break;
1209          case MVT::i16:
1210            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1211            break;
1212          case MVT::i32:
1213            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1214            break;
1215          case MVT::i64:
1216            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1217            break;
1218          case MVT::f32:
1219            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1220            break;
1221          case MVT::f64:
1222            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1223            break;
1224          }
1225        } else {
1226          switch (EltVT.getSimpleVT().SimpleTy) {
1227          default:
1228            return nullptr;
1229          case MVT::i8:
1230            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1231            break;
1232          case MVT::i16:
1233            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1234            break;
1235          case MVT::i32:
1236            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1237            break;
1238          case MVT::i64:
1239            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1240            break;
1241          case MVT::f32:
1242            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1243            break;
1244          case MVT::f64:
1245            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1246            break;
1247          }
1248        }
1249        break;
1250      case NVPTXISD::LDGV2:
1251        switch (EltVT.getSimpleVT().SimpleTy) {
1252        default:
1253          return nullptr;
1254        case MVT::i8:
1255          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1256          break;
1257        case MVT::i16:
1258          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1259          break;
1260        case MVT::i32:
1261          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1262          break;
1263        case MVT::i64:
1264          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1265          break;
1266        case MVT::f32:
1267          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1268          break;
1269        case MVT::f64:
1270          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1271          break;
1272        }
1273        break;
1274      case NVPTXISD::LDUV2:
1275        switch (EltVT.getSimpleVT().SimpleTy) {
1276        default:
1277          return nullptr;
1278        case MVT::i8:
1279          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1280          break;
1281        case MVT::i16:
1282          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1283          break;
1284        case MVT::i32:
1285          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1286          break;
1287        case MVT::i64:
1288          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1289          break;
1290        case MVT::f32:
1291          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1292          break;
1293        case MVT::f64:
1294          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1295          break;
1296        }
1297        break;
1298      case NVPTXISD::LDGV4:
1299        switch (EltVT.getSimpleVT().SimpleTy) {
1300        default:
1301          return nullptr;
1302        case MVT::i8:
1303          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1304          break;
1305        case MVT::i16:
1306          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1307          break;
1308        case MVT::i32:
1309          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1310          break;
1311        case MVT::f32:
1312          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1313          break;
1314        }
1315        break;
1316      case NVPTXISD::LDUV4:
1317        switch (EltVT.getSimpleVT().SimpleTy) {
1318        default:
1319          return nullptr;
1320        case MVT::i8:
1321          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1322          break;
1323        case MVT::i16:
1324          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1325          break;
1326        case MVT::i32:
1327          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1328          break;
1329        case MVT::f32:
1330          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1331          break;
1332        }
1333        break;
1334      }
1335    } else {
1336      switch (N->getOpcode()) {
1337      default:
1338        return nullptr;
1339      case ISD::INTRINSIC_W_CHAIN:
1340        if (IsLDG) {
1341          switch (EltVT.getSimpleVT().SimpleTy) {
1342          default:
1343            return nullptr;
1344          case MVT::i8:
1345            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1346            break;
1347          case MVT::i16:
1348            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1349            break;
1350          case MVT::i32:
1351            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1352            break;
1353          case MVT::i64:
1354            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1355            break;
1356          case MVT::f32:
1357            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1358            break;
1359          case MVT::f64:
1360            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1361            break;
1362          }
1363        } else {
1364          switch (EltVT.getSimpleVT().SimpleTy) {
1365          default:
1366            return nullptr;
1367          case MVT::i8:
1368            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1369            break;
1370          case MVT::i16:
1371            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1372            break;
1373          case MVT::i32:
1374            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1375            break;
1376          case MVT::i64:
1377            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1378            break;
1379          case MVT::f32:
1380            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1381            break;
1382          case MVT::f64:
1383            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1384            break;
1385          }
1386        }
1387        break;
1388      case NVPTXISD::LDGV2:
1389        switch (EltVT.getSimpleVT().SimpleTy) {
1390        default:
1391          return nullptr;
1392        case MVT::i8:
1393          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1394          break;
1395        case MVT::i16:
1396          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1397          break;
1398        case MVT::i32:
1399          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1400          break;
1401        case MVT::i64:
1402          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1403          break;
1404        case MVT::f32:
1405          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1406          break;
1407        case MVT::f64:
1408          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1409          break;
1410        }
1411        break;
1412      case NVPTXISD::LDUV2:
1413        switch (EltVT.getSimpleVT().SimpleTy) {
1414        default:
1415          return nullptr;
1416        case MVT::i8:
1417          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1418          break;
1419        case MVT::i16:
1420          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1421          break;
1422        case MVT::i32:
1423          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1424          break;
1425        case MVT::i64:
1426          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1427          break;
1428        case MVT::f32:
1429          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1430          break;
1431        case MVT::f64:
1432          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1433          break;
1434        }
1435        break;
1436      case NVPTXISD::LDGV4:
1437        switch (EltVT.getSimpleVT().SimpleTy) {
1438        default:
1439          return nullptr;
1440        case MVT::i8:
1441          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1442          break;
1443        case MVT::i16:
1444          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1445          break;
1446        case MVT::i32:
1447          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1448          break;
1449        case MVT::f32:
1450          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1451          break;
1452        }
1453        break;
1454      case NVPTXISD::LDUV4:
1455        switch (EltVT.getSimpleVT().SimpleTy) {
1456        default:
1457          return nullptr;
1458        case MVT::i8:
1459          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1460          break;
1461        case MVT::i16:
1462          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1463          break;
1464        case MVT::i32:
1465          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1466          break;
1467        case MVT::f32:
1468          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1469          break;
1470        }
1471        break;
1472      }
1473    }
1474
1475    SDValue Ops[] = { Base, Offset, Chain };
1476
1477    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1478  } else {
1479    if (Subtarget.is64Bit()) {
1480      switch (N->getOpcode()) {
1481      default:
1482        return nullptr;
1483      case ISD::INTRINSIC_W_CHAIN:
1484        if (IsLDG) {
1485          switch (EltVT.getSimpleVT().SimpleTy) {
1486          default:
1487            return nullptr;
1488          case MVT::i8:
1489            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1490            break;
1491          case MVT::i16:
1492            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1493            break;
1494          case MVT::i32:
1495            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1496            break;
1497          case MVT::i64:
1498            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1499            break;
1500          case MVT::f32:
1501            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1502            break;
1503          case MVT::f64:
1504            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1505            break;
1506          }
1507        } else {
1508          switch (EltVT.getSimpleVT().SimpleTy) {
1509          default:
1510            return nullptr;
1511          case MVT::i8:
1512            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1513            break;
1514          case MVT::i16:
1515            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1516            break;
1517          case MVT::i32:
1518            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1519            break;
1520          case MVT::i64:
1521            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1522            break;
1523          case MVT::f32:
1524            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1525            break;
1526          case MVT::f64:
1527            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1528            break;
1529          }
1530        }
1531        break;
1532      case NVPTXISD::LDGV2:
1533        switch (EltVT.getSimpleVT().SimpleTy) {
1534        default:
1535          return nullptr;
1536        case MVT::i8:
1537          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1538          break;
1539        case MVT::i16:
1540          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1541          break;
1542        case MVT::i32:
1543          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1544          break;
1545        case MVT::i64:
1546          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1547          break;
1548        case MVT::f32:
1549          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1550          break;
1551        case MVT::f64:
1552          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1553          break;
1554        }
1555        break;
1556      case NVPTXISD::LDUV2:
1557        switch (EltVT.getSimpleVT().SimpleTy) {
1558        default:
1559          return nullptr;
1560        case MVT::i8:
1561          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1562          break;
1563        case MVT::i16:
1564          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1565          break;
1566        case MVT::i32:
1567          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1568          break;
1569        case MVT::i64:
1570          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1571          break;
1572        case MVT::f32:
1573          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1574          break;
1575        case MVT::f64:
1576          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1577          break;
1578        }
1579        break;
1580      case NVPTXISD::LDGV4:
1581        switch (EltVT.getSimpleVT().SimpleTy) {
1582        default:
1583          return nullptr;
1584        case MVT::i8:
1585          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1586          break;
1587        case MVT::i16:
1588          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1589          break;
1590        case MVT::i32:
1591          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1592          break;
1593        case MVT::f32:
1594          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1595          break;
1596        }
1597        break;
1598      case NVPTXISD::LDUV4:
1599        switch (EltVT.getSimpleVT().SimpleTy) {
1600        default:
1601          return nullptr;
1602        case MVT::i8:
1603          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1604          break;
1605        case MVT::i16:
1606          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1607          break;
1608        case MVT::i32:
1609          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1610          break;
1611        case MVT::f32:
1612          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1613          break;
1614        }
1615        break;
1616      }
1617    } else {
1618      switch (N->getOpcode()) {
1619      default:
1620        return nullptr;
1621      case ISD::INTRINSIC_W_CHAIN:
1622        if (IsLDG) {
1623          switch (EltVT.getSimpleVT().SimpleTy) {
1624          default:
1625            return nullptr;
1626          case MVT::i8:
1627            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1628            break;
1629          case MVT::i16:
1630            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1631            break;
1632          case MVT::i32:
1633            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1634            break;
1635          case MVT::i64:
1636            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1637            break;
1638          case MVT::f32:
1639            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1640            break;
1641          case MVT::f64:
1642            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1643            break;
1644          }
1645        } else {
1646          switch (EltVT.getSimpleVT().SimpleTy) {
1647          default:
1648            return nullptr;
1649          case MVT::i8:
1650            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1651            break;
1652          case MVT::i16:
1653            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1654            break;
1655          case MVT::i32:
1656            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1657            break;
1658          case MVT::i64:
1659            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1660            break;
1661          case MVT::f32:
1662            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1663            break;
1664          case MVT::f64:
1665            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1666            break;
1667          }
1668        }
1669        break;
1670      case NVPTXISD::LDGV2:
1671        switch (EltVT.getSimpleVT().SimpleTy) {
1672        default:
1673          return nullptr;
1674        case MVT::i8:
1675          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1676          break;
1677        case MVT::i16:
1678          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1679          break;
1680        case MVT::i32:
1681          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1682          break;
1683        case MVT::i64:
1684          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1685          break;
1686        case MVT::f32:
1687          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1688          break;
1689        case MVT::f64:
1690          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1691          break;
1692        }
1693        break;
1694      case NVPTXISD::LDUV2:
1695        switch (EltVT.getSimpleVT().SimpleTy) {
1696        default:
1697          return nullptr;
1698        case MVT::i8:
1699          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1700          break;
1701        case MVT::i16:
1702          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1703          break;
1704        case MVT::i32:
1705          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1706          break;
1707        case MVT::i64:
1708          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1709          break;
1710        case MVT::f32:
1711          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1712          break;
1713        case MVT::f64:
1714          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1715          break;
1716        }
1717        break;
1718      case NVPTXISD::LDGV4:
1719        switch (EltVT.getSimpleVT().SimpleTy) {
1720        default:
1721          return nullptr;
1722        case MVT::i8:
1723          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1724          break;
1725        case MVT::i16:
1726          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1727          break;
1728        case MVT::i32:
1729          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1730          break;
1731        case MVT::f32:
1732          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1733          break;
1734        }
1735        break;
1736      case NVPTXISD::LDUV4:
1737        switch (EltVT.getSimpleVT().SimpleTy) {
1738        default:
1739          return nullptr;
1740        case MVT::i8:
1741          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1742          break;
1743        case MVT::i16:
1744          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1745          break;
1746        case MVT::i32:
1747          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1748          break;
1749        case MVT::f32:
1750          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1751          break;
1752        }
1753        break;
1754      }
1755    }
1756
1757    SDValue Ops[] = { Op1, Chain };
1758    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1759  }
1760
1761  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1762  MemRefs0[0] = Mem->getMemOperand();
1763  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1764
1765  return LD;
1766}
1767
1768SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1769  SDLoc dl(N);
1770  StoreSDNode *ST = cast<StoreSDNode>(N);
1771  EVT StoreVT = ST->getMemoryVT();
1772  SDNode *NVPTXST = nullptr;
1773
1774  // do not support pre/post inc/dec
1775  if (ST->isIndexed())
1776    return nullptr;
1777
1778  if (!StoreVT.isSimple())
1779    return nullptr;
1780
1781  // Address Space Setting
1782  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1783
1784  // Volatile Setting
1785  // - .volatile is only availalble for .global and .shared
1786  bool isVolatile = ST->isVolatile();
1787  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1788      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1789      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1790    isVolatile = false;
1791
1792  // Vector Setting
1793  MVT SimpleVT = StoreVT.getSimpleVT();
1794  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1795  if (SimpleVT.isVector()) {
1796    unsigned num = SimpleVT.getVectorNumElements();
1797    if (num == 2)
1798      vecType = NVPTX::PTXLdStInstCode::V2;
1799    else if (num == 4)
1800      vecType = NVPTX::PTXLdStInstCode::V4;
1801    else
1802      return nullptr;
1803  }
1804
1805  // Type Setting: toType + toTypeWidth
1806  // - for integer type, always use 'u'
1807  //
1808  MVT ScalarVT = SimpleVT.getScalarType();
1809  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1810  unsigned int toType;
1811  if (ScalarVT.isFloatingPoint())
1812    toType = NVPTX::PTXLdStInstCode::Float;
1813  else
1814    toType = NVPTX::PTXLdStInstCode::Unsigned;
1815
1816  // Create the machine instruction DAG
1817  SDValue Chain = N->getOperand(0);
1818  SDValue N1 = N->getOperand(1);
1819  SDValue N2 = N->getOperand(2);
1820  SDValue Addr;
1821  SDValue Offset, Base;
1822  unsigned Opcode;
1823  MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
1824
1825  if (SelectDirectAddr(N2, Addr)) {
1826    switch (SourceVT) {
1827    case MVT::i8:
1828      Opcode = NVPTX::ST_i8_avar;
1829      break;
1830    case MVT::i16:
1831      Opcode = NVPTX::ST_i16_avar;
1832      break;
1833    case MVT::i32:
1834      Opcode = NVPTX::ST_i32_avar;
1835      break;
1836    case MVT::i64:
1837      Opcode = NVPTX::ST_i64_avar;
1838      break;
1839    case MVT::f32:
1840      Opcode = NVPTX::ST_f32_avar;
1841      break;
1842    case MVT::f64:
1843      Opcode = NVPTX::ST_f64_avar;
1844      break;
1845    default:
1846      return nullptr;
1847    }
1848    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1849                      getI32Imm(vecType), getI32Imm(toType),
1850                      getI32Imm(toTypeWidth), Addr, Chain };
1851    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1852  } else if (Subtarget.is64Bit()
1853                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1854                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1855    switch (SourceVT) {
1856    case MVT::i8:
1857      Opcode = NVPTX::ST_i8_asi;
1858      break;
1859    case MVT::i16:
1860      Opcode = NVPTX::ST_i16_asi;
1861      break;
1862    case MVT::i32:
1863      Opcode = NVPTX::ST_i32_asi;
1864      break;
1865    case MVT::i64:
1866      Opcode = NVPTX::ST_i64_asi;
1867      break;
1868    case MVT::f32:
1869      Opcode = NVPTX::ST_f32_asi;
1870      break;
1871    case MVT::f64:
1872      Opcode = NVPTX::ST_f64_asi;
1873      break;
1874    default:
1875      return nullptr;
1876    }
1877    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1878                      getI32Imm(vecType), getI32Imm(toType),
1879                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1880    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1881  } else if (Subtarget.is64Bit()
1882                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1883                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1884    if (Subtarget.is64Bit()) {
1885      switch (SourceVT) {
1886      case MVT::i8:
1887        Opcode = NVPTX::ST_i8_ari_64;
1888        break;
1889      case MVT::i16:
1890        Opcode = NVPTX::ST_i16_ari_64;
1891        break;
1892      case MVT::i32:
1893        Opcode = NVPTX::ST_i32_ari_64;
1894        break;
1895      case MVT::i64:
1896        Opcode = NVPTX::ST_i64_ari_64;
1897        break;
1898      case MVT::f32:
1899        Opcode = NVPTX::ST_f32_ari_64;
1900        break;
1901      case MVT::f64:
1902        Opcode = NVPTX::ST_f64_ari_64;
1903        break;
1904      default:
1905        return nullptr;
1906      }
1907    } else {
1908      switch (SourceVT) {
1909      case MVT::i8:
1910        Opcode = NVPTX::ST_i8_ari;
1911        break;
1912      case MVT::i16:
1913        Opcode = NVPTX::ST_i16_ari;
1914        break;
1915      case MVT::i32:
1916        Opcode = NVPTX::ST_i32_ari;
1917        break;
1918      case MVT::i64:
1919        Opcode = NVPTX::ST_i64_ari;
1920        break;
1921      case MVT::f32:
1922        Opcode = NVPTX::ST_f32_ari;
1923        break;
1924      case MVT::f64:
1925        Opcode = NVPTX::ST_f64_ari;
1926        break;
1927      default:
1928        return nullptr;
1929      }
1930    }
1931    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1932                      getI32Imm(vecType), getI32Imm(toType),
1933                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1934    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1935  } else {
1936    if (Subtarget.is64Bit()) {
1937      switch (SourceVT) {
1938      case MVT::i8:
1939        Opcode = NVPTX::ST_i8_areg_64;
1940        break;
1941      case MVT::i16:
1942        Opcode = NVPTX::ST_i16_areg_64;
1943        break;
1944      case MVT::i32:
1945        Opcode = NVPTX::ST_i32_areg_64;
1946        break;
1947      case MVT::i64:
1948        Opcode = NVPTX::ST_i64_areg_64;
1949        break;
1950      case MVT::f32:
1951        Opcode = NVPTX::ST_f32_areg_64;
1952        break;
1953      case MVT::f64:
1954        Opcode = NVPTX::ST_f64_areg_64;
1955        break;
1956      default:
1957        return nullptr;
1958      }
1959    } else {
1960      switch (SourceVT) {
1961      case MVT::i8:
1962        Opcode = NVPTX::ST_i8_areg;
1963        break;
1964      case MVT::i16:
1965        Opcode = NVPTX::ST_i16_areg;
1966        break;
1967      case MVT::i32:
1968        Opcode = NVPTX::ST_i32_areg;
1969        break;
1970      case MVT::i64:
1971        Opcode = NVPTX::ST_i64_areg;
1972        break;
1973      case MVT::f32:
1974        Opcode = NVPTX::ST_f32_areg;
1975        break;
1976      case MVT::f64:
1977        Opcode = NVPTX::ST_f64_areg;
1978        break;
1979      default:
1980        return nullptr;
1981      }
1982    }
1983    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1984                      getI32Imm(vecType), getI32Imm(toType),
1985                      getI32Imm(toTypeWidth), N2, Chain };
1986    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1987  }
1988
1989  if (NVPTXST) {
1990    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1991    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1992    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1993  }
1994
1995  return NVPTXST;
1996}
1997
1998SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1999  SDValue Chain = N->getOperand(0);
2000  SDValue Op1 = N->getOperand(1);
2001  SDValue Addr, Offset, Base;
2002  unsigned Opcode;
2003  SDLoc DL(N);
2004  SDNode *ST;
2005  EVT EltVT = Op1.getValueType();
2006  MemSDNode *MemSD = cast<MemSDNode>(N);
2007  EVT StoreVT = MemSD->getMemoryVT();
2008
2009  // Address Space Setting
2010  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
2011
2012  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2013    report_fatal_error("Cannot store to pointer that points to constant "
2014                       "memory space");
2015  }
2016
2017  // Volatile Setting
2018  // - .volatile is only availalble for .global and .shared
2019  bool IsVolatile = MemSD->isVolatile();
2020  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2021      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2022      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2023    IsVolatile = false;
2024
2025  // Type Setting: toType + toTypeWidth
2026  // - for integer type, always use 'u'
2027  assert(StoreVT.isSimple() && "Store value is not simple");
2028  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2029  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2030  unsigned ToType;
2031  if (ScalarVT.isFloatingPoint())
2032    ToType = NVPTX::PTXLdStInstCode::Float;
2033  else
2034    ToType = NVPTX::PTXLdStInstCode::Unsigned;
2035
2036  SmallVector<SDValue, 12> StOps;
2037  SDValue N2;
2038  unsigned VecType;
2039
2040  switch (N->getOpcode()) {
2041  case NVPTXISD::StoreV2:
2042    VecType = NVPTX::PTXLdStInstCode::V2;
2043    StOps.push_back(N->getOperand(1));
2044    StOps.push_back(N->getOperand(2));
2045    N2 = N->getOperand(3);
2046    break;
2047  case NVPTXISD::StoreV4:
2048    VecType = NVPTX::PTXLdStInstCode::V4;
2049    StOps.push_back(N->getOperand(1));
2050    StOps.push_back(N->getOperand(2));
2051    StOps.push_back(N->getOperand(3));
2052    StOps.push_back(N->getOperand(4));
2053    N2 = N->getOperand(5);
2054    break;
2055  default:
2056    return nullptr;
2057  }
2058
2059  StOps.push_back(getI32Imm(IsVolatile));
2060  StOps.push_back(getI32Imm(CodeAddrSpace));
2061  StOps.push_back(getI32Imm(VecType));
2062  StOps.push_back(getI32Imm(ToType));
2063  StOps.push_back(getI32Imm(ToTypeWidth));
2064
2065  if (SelectDirectAddr(N2, Addr)) {
2066    switch (N->getOpcode()) {
2067    default:
2068      return nullptr;
2069    case NVPTXISD::StoreV2:
2070      switch (EltVT.getSimpleVT().SimpleTy) {
2071      default:
2072        return nullptr;
2073      case MVT::i8:
2074        Opcode = NVPTX::STV_i8_v2_avar;
2075        break;
2076      case MVT::i16:
2077        Opcode = NVPTX::STV_i16_v2_avar;
2078        break;
2079      case MVT::i32:
2080        Opcode = NVPTX::STV_i32_v2_avar;
2081        break;
2082      case MVT::i64:
2083        Opcode = NVPTX::STV_i64_v2_avar;
2084        break;
2085      case MVT::f32:
2086        Opcode = NVPTX::STV_f32_v2_avar;
2087        break;
2088      case MVT::f64:
2089        Opcode = NVPTX::STV_f64_v2_avar;
2090        break;
2091      }
2092      break;
2093    case NVPTXISD::StoreV4:
2094      switch (EltVT.getSimpleVT().SimpleTy) {
2095      default:
2096        return nullptr;
2097      case MVT::i8:
2098        Opcode = NVPTX::STV_i8_v4_avar;
2099        break;
2100      case MVT::i16:
2101        Opcode = NVPTX::STV_i16_v4_avar;
2102        break;
2103      case MVT::i32:
2104        Opcode = NVPTX::STV_i32_v4_avar;
2105        break;
2106      case MVT::f32:
2107        Opcode = NVPTX::STV_f32_v4_avar;
2108        break;
2109      }
2110      break;
2111    }
2112    StOps.push_back(Addr);
2113  } else if (Subtarget.is64Bit()
2114                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2115                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2116    switch (N->getOpcode()) {
2117    default:
2118      return nullptr;
2119    case NVPTXISD::StoreV2:
2120      switch (EltVT.getSimpleVT().SimpleTy) {
2121      default:
2122        return nullptr;
2123      case MVT::i8:
2124        Opcode = NVPTX::STV_i8_v2_asi;
2125        break;
2126      case MVT::i16:
2127        Opcode = NVPTX::STV_i16_v2_asi;
2128        break;
2129      case MVT::i32:
2130        Opcode = NVPTX::STV_i32_v2_asi;
2131        break;
2132      case MVT::i64:
2133        Opcode = NVPTX::STV_i64_v2_asi;
2134        break;
2135      case MVT::f32:
2136        Opcode = NVPTX::STV_f32_v2_asi;
2137        break;
2138      case MVT::f64:
2139        Opcode = NVPTX::STV_f64_v2_asi;
2140        break;
2141      }
2142      break;
2143    case NVPTXISD::StoreV4:
2144      switch (EltVT.getSimpleVT().SimpleTy) {
2145      default:
2146        return nullptr;
2147      case MVT::i8:
2148        Opcode = NVPTX::STV_i8_v4_asi;
2149        break;
2150      case MVT::i16:
2151        Opcode = NVPTX::STV_i16_v4_asi;
2152        break;
2153      case MVT::i32:
2154        Opcode = NVPTX::STV_i32_v4_asi;
2155        break;
2156      case MVT::f32:
2157        Opcode = NVPTX::STV_f32_v4_asi;
2158        break;
2159      }
2160      break;
2161    }
2162    StOps.push_back(Base);
2163    StOps.push_back(Offset);
2164  } else if (Subtarget.is64Bit()
2165                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2166                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2167    if (Subtarget.is64Bit()) {
2168      switch (N->getOpcode()) {
2169      default:
2170        return nullptr;
2171      case NVPTXISD::StoreV2:
2172        switch (EltVT.getSimpleVT().SimpleTy) {
2173        default:
2174          return nullptr;
2175        case MVT::i8:
2176          Opcode = NVPTX::STV_i8_v2_ari_64;
2177          break;
2178        case MVT::i16:
2179          Opcode = NVPTX::STV_i16_v2_ari_64;
2180          break;
2181        case MVT::i32:
2182          Opcode = NVPTX::STV_i32_v2_ari_64;
2183          break;
2184        case MVT::i64:
2185          Opcode = NVPTX::STV_i64_v2_ari_64;
2186          break;
2187        case MVT::f32:
2188          Opcode = NVPTX::STV_f32_v2_ari_64;
2189          break;
2190        case MVT::f64:
2191          Opcode = NVPTX::STV_f64_v2_ari_64;
2192          break;
2193        }
2194        break;
2195      case NVPTXISD::StoreV4:
2196        switch (EltVT.getSimpleVT().SimpleTy) {
2197        default:
2198          return nullptr;
2199        case MVT::i8:
2200          Opcode = NVPTX::STV_i8_v4_ari_64;
2201          break;
2202        case MVT::i16:
2203          Opcode = NVPTX::STV_i16_v4_ari_64;
2204          break;
2205        case MVT::i32:
2206          Opcode = NVPTX::STV_i32_v4_ari_64;
2207          break;
2208        case MVT::f32:
2209          Opcode = NVPTX::STV_f32_v4_ari_64;
2210          break;
2211        }
2212        break;
2213      }
2214    } else {
2215      switch (N->getOpcode()) {
2216      default:
2217        return nullptr;
2218      case NVPTXISD::StoreV2:
2219        switch (EltVT.getSimpleVT().SimpleTy) {
2220        default:
2221          return nullptr;
2222        case MVT::i8:
2223          Opcode = NVPTX::STV_i8_v2_ari;
2224          break;
2225        case MVT::i16:
2226          Opcode = NVPTX::STV_i16_v2_ari;
2227          break;
2228        case MVT::i32:
2229          Opcode = NVPTX::STV_i32_v2_ari;
2230          break;
2231        case MVT::i64:
2232          Opcode = NVPTX::STV_i64_v2_ari;
2233          break;
2234        case MVT::f32:
2235          Opcode = NVPTX::STV_f32_v2_ari;
2236          break;
2237        case MVT::f64:
2238          Opcode = NVPTX::STV_f64_v2_ari;
2239          break;
2240        }
2241        break;
2242      case NVPTXISD::StoreV4:
2243        switch (EltVT.getSimpleVT().SimpleTy) {
2244        default:
2245          return nullptr;
2246        case MVT::i8:
2247          Opcode = NVPTX::STV_i8_v4_ari;
2248          break;
2249        case MVT::i16:
2250          Opcode = NVPTX::STV_i16_v4_ari;
2251          break;
2252        case MVT::i32:
2253          Opcode = NVPTX::STV_i32_v4_ari;
2254          break;
2255        case MVT::f32:
2256          Opcode = NVPTX::STV_f32_v4_ari;
2257          break;
2258        }
2259        break;
2260      }
2261    }
2262    StOps.push_back(Base);
2263    StOps.push_back(Offset);
2264  } else {
2265    if (Subtarget.is64Bit()) {
2266      switch (N->getOpcode()) {
2267      default:
2268        return nullptr;
2269      case NVPTXISD::StoreV2:
2270        switch (EltVT.getSimpleVT().SimpleTy) {
2271        default:
2272          return nullptr;
2273        case MVT::i8:
2274          Opcode = NVPTX::STV_i8_v2_areg_64;
2275          break;
2276        case MVT::i16:
2277          Opcode = NVPTX::STV_i16_v2_areg_64;
2278          break;
2279        case MVT::i32:
2280          Opcode = NVPTX::STV_i32_v2_areg_64;
2281          break;
2282        case MVT::i64:
2283          Opcode = NVPTX::STV_i64_v2_areg_64;
2284          break;
2285        case MVT::f32:
2286          Opcode = NVPTX::STV_f32_v2_areg_64;
2287          break;
2288        case MVT::f64:
2289          Opcode = NVPTX::STV_f64_v2_areg_64;
2290          break;
2291        }
2292        break;
2293      case NVPTXISD::StoreV4:
2294        switch (EltVT.getSimpleVT().SimpleTy) {
2295        default:
2296          return nullptr;
2297        case MVT::i8:
2298          Opcode = NVPTX::STV_i8_v4_areg_64;
2299          break;
2300        case MVT::i16:
2301          Opcode = NVPTX::STV_i16_v4_areg_64;
2302          break;
2303        case MVT::i32:
2304          Opcode = NVPTX::STV_i32_v4_areg_64;
2305          break;
2306        case MVT::f32:
2307          Opcode = NVPTX::STV_f32_v4_areg_64;
2308          break;
2309        }
2310        break;
2311      }
2312    } else {
2313      switch (N->getOpcode()) {
2314      default:
2315        return nullptr;
2316      case NVPTXISD::StoreV2:
2317        switch (EltVT.getSimpleVT().SimpleTy) {
2318        default:
2319          return nullptr;
2320        case MVT::i8:
2321          Opcode = NVPTX::STV_i8_v2_areg;
2322          break;
2323        case MVT::i16:
2324          Opcode = NVPTX::STV_i16_v2_areg;
2325          break;
2326        case MVT::i32:
2327          Opcode = NVPTX::STV_i32_v2_areg;
2328          break;
2329        case MVT::i64:
2330          Opcode = NVPTX::STV_i64_v2_areg;
2331          break;
2332        case MVT::f32:
2333          Opcode = NVPTX::STV_f32_v2_areg;
2334          break;
2335        case MVT::f64:
2336          Opcode = NVPTX::STV_f64_v2_areg;
2337          break;
2338        }
2339        break;
2340      case NVPTXISD::StoreV4:
2341        switch (EltVT.getSimpleVT().SimpleTy) {
2342        default:
2343          return nullptr;
2344        case MVT::i8:
2345          Opcode = NVPTX::STV_i8_v4_areg;
2346          break;
2347        case MVT::i16:
2348          Opcode = NVPTX::STV_i16_v4_areg;
2349          break;
2350        case MVT::i32:
2351          Opcode = NVPTX::STV_i32_v4_areg;
2352          break;
2353        case MVT::f32:
2354          Opcode = NVPTX::STV_f32_v4_areg;
2355          break;
2356        }
2357        break;
2358      }
2359    }
2360    StOps.push_back(N2);
2361  }
2362
2363  StOps.push_back(Chain);
2364
2365  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2366
2367  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2368  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2369  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2370
2371  return ST;
2372}
2373
2374SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2375  SDValue Chain = Node->getOperand(0);
2376  SDValue Offset = Node->getOperand(2);
2377  SDValue Flag = Node->getOperand(3);
2378  SDLoc DL(Node);
2379  MemSDNode *Mem = cast<MemSDNode>(Node);
2380
2381  unsigned VecSize;
2382  switch (Node->getOpcode()) {
2383  default:
2384    return nullptr;
2385  case NVPTXISD::LoadParam:
2386    VecSize = 1;
2387    break;
2388  case NVPTXISD::LoadParamV2:
2389    VecSize = 2;
2390    break;
2391  case NVPTXISD::LoadParamV4:
2392    VecSize = 4;
2393    break;
2394  }
2395
2396  EVT EltVT = Node->getValueType(0);
2397  EVT MemVT = Mem->getMemoryVT();
2398
2399  unsigned Opc = 0;
2400
2401  switch (VecSize) {
2402  default:
2403    return nullptr;
2404  case 1:
2405    switch (MemVT.getSimpleVT().SimpleTy) {
2406    default:
2407      return nullptr;
2408    case MVT::i1:
2409      Opc = NVPTX::LoadParamMemI8;
2410      break;
2411    case MVT::i8:
2412      Opc = NVPTX::LoadParamMemI8;
2413      break;
2414    case MVT::i16:
2415      Opc = NVPTX::LoadParamMemI16;
2416      break;
2417    case MVT::i32:
2418      Opc = NVPTX::LoadParamMemI32;
2419      break;
2420    case MVT::i64:
2421      Opc = NVPTX::LoadParamMemI64;
2422      break;
2423    case MVT::f32:
2424      Opc = NVPTX::LoadParamMemF32;
2425      break;
2426    case MVT::f64:
2427      Opc = NVPTX::LoadParamMemF64;
2428      break;
2429    }
2430    break;
2431  case 2:
2432    switch (MemVT.getSimpleVT().SimpleTy) {
2433    default:
2434      return nullptr;
2435    case MVT::i1:
2436      Opc = NVPTX::LoadParamMemV2I8;
2437      break;
2438    case MVT::i8:
2439      Opc = NVPTX::LoadParamMemV2I8;
2440      break;
2441    case MVT::i16:
2442      Opc = NVPTX::LoadParamMemV2I16;
2443      break;
2444    case MVT::i32:
2445      Opc = NVPTX::LoadParamMemV2I32;
2446      break;
2447    case MVT::i64:
2448      Opc = NVPTX::LoadParamMemV2I64;
2449      break;
2450    case MVT::f32:
2451      Opc = NVPTX::LoadParamMemV2F32;
2452      break;
2453    case MVT::f64:
2454      Opc = NVPTX::LoadParamMemV2F64;
2455      break;
2456    }
2457    break;
2458  case 4:
2459    switch (MemVT.getSimpleVT().SimpleTy) {
2460    default:
2461      return nullptr;
2462    case MVT::i1:
2463      Opc = NVPTX::LoadParamMemV4I8;
2464      break;
2465    case MVT::i8:
2466      Opc = NVPTX::LoadParamMemV4I8;
2467      break;
2468    case MVT::i16:
2469      Opc = NVPTX::LoadParamMemV4I16;
2470      break;
2471    case MVT::i32:
2472      Opc = NVPTX::LoadParamMemV4I32;
2473      break;
2474    case MVT::f32:
2475      Opc = NVPTX::LoadParamMemV4F32;
2476      break;
2477    }
2478    break;
2479  }
2480
2481  SDVTList VTs;
2482  if (VecSize == 1) {
2483    VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2484  } else if (VecSize == 2) {
2485    VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2486  } else {
2487    EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2488    VTs = CurDAG->getVTList(EVTs);
2489  }
2490
2491  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2492
2493  SmallVector<SDValue, 2> Ops;
2494  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2495  Ops.push_back(Chain);
2496  Ops.push_back(Flag);
2497
2498  SDNode *Ret =
2499      CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2500  return Ret;
2501}
2502
2503SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2504  SDLoc DL(N);
2505  SDValue Chain = N->getOperand(0);
2506  SDValue Offset = N->getOperand(1);
2507  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2508  MemSDNode *Mem = cast<MemSDNode>(N);
2509
2510  // How many elements do we have?
2511  unsigned NumElts = 1;
2512  switch (N->getOpcode()) {
2513  default:
2514    return nullptr;
2515  case NVPTXISD::StoreRetval:
2516    NumElts = 1;
2517    break;
2518  case NVPTXISD::StoreRetvalV2:
2519    NumElts = 2;
2520    break;
2521  case NVPTXISD::StoreRetvalV4:
2522    NumElts = 4;
2523    break;
2524  }
2525
2526  // Build vector of operands
2527  SmallVector<SDValue, 6> Ops;
2528  for (unsigned i = 0; i < NumElts; ++i)
2529    Ops.push_back(N->getOperand(i + 2));
2530  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2531  Ops.push_back(Chain);
2532
2533  // Determine target opcode
2534  // If we have an i1, use an 8-bit store. The lowering code in
2535  // NVPTXISelLowering will have already emitted an upcast.
2536  unsigned Opcode = 0;
2537  switch (NumElts) {
2538  default:
2539    return nullptr;
2540  case 1:
2541    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2542    default:
2543      return nullptr;
2544    case MVT::i1:
2545      Opcode = NVPTX::StoreRetvalI8;
2546      break;
2547    case MVT::i8:
2548      Opcode = NVPTX::StoreRetvalI8;
2549      break;
2550    case MVT::i16:
2551      Opcode = NVPTX::StoreRetvalI16;
2552      break;
2553    case MVT::i32:
2554      Opcode = NVPTX::StoreRetvalI32;
2555      break;
2556    case MVT::i64:
2557      Opcode = NVPTX::StoreRetvalI64;
2558      break;
2559    case MVT::f32:
2560      Opcode = NVPTX::StoreRetvalF32;
2561      break;
2562    case MVT::f64:
2563      Opcode = NVPTX::StoreRetvalF64;
2564      break;
2565    }
2566    break;
2567  case 2:
2568    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2569    default:
2570      return nullptr;
2571    case MVT::i1:
2572      Opcode = NVPTX::StoreRetvalV2I8;
2573      break;
2574    case MVT::i8:
2575      Opcode = NVPTX::StoreRetvalV2I8;
2576      break;
2577    case MVT::i16:
2578      Opcode = NVPTX::StoreRetvalV2I16;
2579      break;
2580    case MVT::i32:
2581      Opcode = NVPTX::StoreRetvalV2I32;
2582      break;
2583    case MVT::i64:
2584      Opcode = NVPTX::StoreRetvalV2I64;
2585      break;
2586    case MVT::f32:
2587      Opcode = NVPTX::StoreRetvalV2F32;
2588      break;
2589    case MVT::f64:
2590      Opcode = NVPTX::StoreRetvalV2F64;
2591      break;
2592    }
2593    break;
2594  case 4:
2595    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2596    default:
2597      return nullptr;
2598    case MVT::i1:
2599      Opcode = NVPTX::StoreRetvalV4I8;
2600      break;
2601    case MVT::i8:
2602      Opcode = NVPTX::StoreRetvalV4I8;
2603      break;
2604    case MVT::i16:
2605      Opcode = NVPTX::StoreRetvalV4I16;
2606      break;
2607    case MVT::i32:
2608      Opcode = NVPTX::StoreRetvalV4I32;
2609      break;
2610    case MVT::f32:
2611      Opcode = NVPTX::StoreRetvalV4F32;
2612      break;
2613    }
2614    break;
2615  }
2616
2617  SDNode *Ret =
2618      CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2619  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2620  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2621  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2622
2623  return Ret;
2624}
2625
2626SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2627  SDLoc DL(N);
2628  SDValue Chain = N->getOperand(0);
2629  SDValue Param = N->getOperand(1);
2630  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2631  SDValue Offset = N->getOperand(2);
2632  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2633  MemSDNode *Mem = cast<MemSDNode>(N);
2634  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2635
2636  // How many elements do we have?
2637  unsigned NumElts = 1;
2638  switch (N->getOpcode()) {
2639  default:
2640    return nullptr;
2641  case NVPTXISD::StoreParamU32:
2642  case NVPTXISD::StoreParamS32:
2643  case NVPTXISD::StoreParam:
2644    NumElts = 1;
2645    break;
2646  case NVPTXISD::StoreParamV2:
2647    NumElts = 2;
2648    break;
2649  case NVPTXISD::StoreParamV4:
2650    NumElts = 4;
2651    break;
2652  }
2653
2654  // Build vector of operands
2655  SmallVector<SDValue, 8> Ops;
2656  for (unsigned i = 0; i < NumElts; ++i)
2657    Ops.push_back(N->getOperand(i + 3));
2658  Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2659  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2660  Ops.push_back(Chain);
2661  Ops.push_back(Flag);
2662
2663  // Determine target opcode
2664  // If we have an i1, use an 8-bit store. The lowering code in
2665  // NVPTXISelLowering will have already emitted an upcast.
2666  unsigned Opcode = 0;
2667  switch (N->getOpcode()) {
2668  default:
2669    switch (NumElts) {
2670    default:
2671      return nullptr;
2672    case 1:
2673      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2674      default:
2675        return nullptr;
2676      case MVT::i1:
2677        Opcode = NVPTX::StoreParamI8;
2678        break;
2679      case MVT::i8:
2680        Opcode = NVPTX::StoreParamI8;
2681        break;
2682      case MVT::i16:
2683        Opcode = NVPTX::StoreParamI16;
2684        break;
2685      case MVT::i32:
2686        Opcode = NVPTX::StoreParamI32;
2687        break;
2688      case MVT::i64:
2689        Opcode = NVPTX::StoreParamI64;
2690        break;
2691      case MVT::f32:
2692        Opcode = NVPTX::StoreParamF32;
2693        break;
2694      case MVT::f64:
2695        Opcode = NVPTX::StoreParamF64;
2696        break;
2697      }
2698      break;
2699    case 2:
2700      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2701      default:
2702        return nullptr;
2703      case MVT::i1:
2704        Opcode = NVPTX::StoreParamV2I8;
2705        break;
2706      case MVT::i8:
2707        Opcode = NVPTX::StoreParamV2I8;
2708        break;
2709      case MVT::i16:
2710        Opcode = NVPTX::StoreParamV2I16;
2711        break;
2712      case MVT::i32:
2713        Opcode = NVPTX::StoreParamV2I32;
2714        break;
2715      case MVT::i64:
2716        Opcode = NVPTX::StoreParamV2I64;
2717        break;
2718      case MVT::f32:
2719        Opcode = NVPTX::StoreParamV2F32;
2720        break;
2721      case MVT::f64:
2722        Opcode = NVPTX::StoreParamV2F64;
2723        break;
2724      }
2725      break;
2726    case 4:
2727      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2728      default:
2729        return nullptr;
2730      case MVT::i1:
2731        Opcode = NVPTX::StoreParamV4I8;
2732        break;
2733      case MVT::i8:
2734        Opcode = NVPTX::StoreParamV4I8;
2735        break;
2736      case MVT::i16:
2737        Opcode = NVPTX::StoreParamV4I16;
2738        break;
2739      case MVT::i32:
2740        Opcode = NVPTX::StoreParamV4I32;
2741        break;
2742      case MVT::f32:
2743        Opcode = NVPTX::StoreParamV4F32;
2744        break;
2745      }
2746      break;
2747    }
2748    break;
2749  // Special case: if we have a sign-extend/zero-extend node, insert the
2750  // conversion instruction first, and use that as the value operand to
2751  // the selected StoreParam node.
2752  case NVPTXISD::StoreParamU32: {
2753    Opcode = NVPTX::StoreParamI32;
2754    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2755                                                MVT::i32);
2756    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2757                                         MVT::i32, Ops[0], CvtNone);
2758    Ops[0] = SDValue(Cvt, 0);
2759    break;
2760  }
2761  case NVPTXISD::StoreParamS32: {
2762    Opcode = NVPTX::StoreParamI32;
2763    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2764                                                MVT::i32);
2765    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2766                                         MVT::i32, Ops[0], CvtNone);
2767    Ops[0] = SDValue(Cvt, 0);
2768    break;
2769  }
2770  }
2771
2772  SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2773  SDNode *Ret =
2774      CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2775  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2776  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2777  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2778
2779  return Ret;
2780}
2781
2782SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
2783  SDValue Chain = N->getOperand(0);
2784  SDValue TexRef = N->getOperand(1);
2785  SDValue SampRef = N->getOperand(2);
2786  SDNode *Ret = nullptr;
2787  unsigned Opc = 0;
2788  SmallVector<SDValue, 8> Ops;
2789
2790  switch (N->getOpcode()) {
2791  default: return nullptr;
2792  case NVPTXISD::Tex1DFloatI32:
2793    Opc = NVPTX::TEX_1D_F32_I32;
2794    break;
2795  case NVPTXISD::Tex1DFloatFloat:
2796    Opc = NVPTX::TEX_1D_F32_F32;
2797    break;
2798  case NVPTXISD::Tex1DFloatFloatLevel:
2799    Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2800    break;
2801  case NVPTXISD::Tex1DFloatFloatGrad:
2802    Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2803    break;
2804  case NVPTXISD::Tex1DI32I32:
2805    Opc = NVPTX::TEX_1D_I32_I32;
2806    break;
2807  case NVPTXISD::Tex1DI32Float:
2808    Opc = NVPTX::TEX_1D_I32_F32;
2809    break;
2810  case NVPTXISD::Tex1DI32FloatLevel:
2811    Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
2812    break;
2813  case NVPTXISD::Tex1DI32FloatGrad:
2814    Opc = NVPTX::TEX_1D_I32_F32_GRAD;
2815    break;
2816  case NVPTXISD::Tex1DArrayFloatI32:
2817    Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
2818    break;
2819  case NVPTXISD::Tex1DArrayFloatFloat:
2820    Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2821    break;
2822  case NVPTXISD::Tex1DArrayFloatFloatLevel:
2823    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2824    break;
2825  case NVPTXISD::Tex1DArrayFloatFloatGrad:
2826    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2827    break;
2828  case NVPTXISD::Tex1DArrayI32I32:
2829    Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
2830    break;
2831  case NVPTXISD::Tex1DArrayI32Float:
2832    Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
2833    break;
2834  case NVPTXISD::Tex1DArrayI32FloatLevel:
2835    Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
2836    break;
2837  case NVPTXISD::Tex1DArrayI32FloatGrad:
2838    Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
2839    break;
2840  case NVPTXISD::Tex2DFloatI32:
2841    Opc = NVPTX::TEX_2D_F32_I32;
2842    break;
2843  case NVPTXISD::Tex2DFloatFloat:
2844    Opc = NVPTX::TEX_2D_F32_F32;
2845    break;
2846  case NVPTXISD::Tex2DFloatFloatLevel:
2847    Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2848    break;
2849  case NVPTXISD::Tex2DFloatFloatGrad:
2850    Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2851    break;
2852  case NVPTXISD::Tex2DI32I32:
2853    Opc = NVPTX::TEX_2D_I32_I32;
2854    break;
2855  case NVPTXISD::Tex2DI32Float:
2856    Opc = NVPTX::TEX_2D_I32_F32;
2857    break;
2858  case NVPTXISD::Tex2DI32FloatLevel:
2859    Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
2860    break;
2861  case NVPTXISD::Tex2DI32FloatGrad:
2862    Opc = NVPTX::TEX_2D_I32_F32_GRAD;
2863    break;
2864  case NVPTXISD::Tex2DArrayFloatI32:
2865    Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
2866    break;
2867  case NVPTXISD::Tex2DArrayFloatFloat:
2868    Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2869    break;
2870  case NVPTXISD::Tex2DArrayFloatFloatLevel:
2871    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2872    break;
2873  case NVPTXISD::Tex2DArrayFloatFloatGrad:
2874    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2875    break;
2876  case NVPTXISD::Tex2DArrayI32I32:
2877    Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
2878    break;
2879  case NVPTXISD::Tex2DArrayI32Float:
2880    Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
2881    break;
2882  case NVPTXISD::Tex2DArrayI32FloatLevel:
2883    Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
2884    break;
2885  case NVPTXISD::Tex2DArrayI32FloatGrad:
2886    Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
2887    break;
2888  case NVPTXISD::Tex3DFloatI32:
2889    Opc = NVPTX::TEX_3D_F32_I32;
2890    break;
2891  case NVPTXISD::Tex3DFloatFloat:
2892    Opc = NVPTX::TEX_3D_F32_F32;
2893    break;
2894  case NVPTXISD::Tex3DFloatFloatLevel:
2895    Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2896    break;
2897  case NVPTXISD::Tex3DFloatFloatGrad:
2898    Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2899    break;
2900  case NVPTXISD::Tex3DI32I32:
2901    Opc = NVPTX::TEX_3D_I32_I32;
2902    break;
2903  case NVPTXISD::Tex3DI32Float:
2904    Opc = NVPTX::TEX_3D_I32_F32;
2905    break;
2906  case NVPTXISD::Tex3DI32FloatLevel:
2907    Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
2908    break;
2909  case NVPTXISD::Tex3DI32FloatGrad:
2910    Opc = NVPTX::TEX_3D_I32_F32_GRAD;
2911    break;
2912  }
2913
2914  Ops.push_back(TexRef);
2915  Ops.push_back(SampRef);
2916
2917  // Copy over indices
2918  for (unsigned i = 3; i < N->getNumOperands(); ++i) {
2919    Ops.push_back(N->getOperand(i));
2920  }
2921
2922  Ops.push_back(Chain);
2923  Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2924  return Ret;
2925}
2926
2927SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
2928  SDValue Chain = N->getOperand(0);
2929  SDValue TexHandle = N->getOperand(1);
2930  SDNode *Ret = nullptr;
2931  unsigned Opc = 0;
2932  SmallVector<SDValue, 8> Ops;
2933  switch (N->getOpcode()) {
2934  default: return nullptr;
2935  case NVPTXISD::Suld1DI8Trap:
2936    Opc = NVPTX::SULD_1D_I8_TRAP;
2937    Ops.push_back(TexHandle);
2938    Ops.push_back(N->getOperand(2));
2939    Ops.push_back(Chain);
2940    break;
2941  case NVPTXISD::Suld1DI16Trap:
2942    Opc = NVPTX::SULD_1D_I16_TRAP;
2943    Ops.push_back(TexHandle);
2944    Ops.push_back(N->getOperand(2));
2945    Ops.push_back(Chain);
2946    break;
2947  case NVPTXISD::Suld1DI32Trap:
2948    Opc = NVPTX::SULD_1D_I32_TRAP;
2949    Ops.push_back(TexHandle);
2950    Ops.push_back(N->getOperand(2));
2951    Ops.push_back(Chain);
2952    break;
2953  case NVPTXISD::Suld1DV2I8Trap:
2954    Opc = NVPTX::SULD_1D_V2I8_TRAP;
2955    Ops.push_back(TexHandle);
2956    Ops.push_back(N->getOperand(2));
2957    Ops.push_back(Chain);
2958    break;
2959  case NVPTXISD::Suld1DV2I16Trap:
2960    Opc = NVPTX::SULD_1D_V2I16_TRAP;
2961    Ops.push_back(TexHandle);
2962    Ops.push_back(N->getOperand(2));
2963    Ops.push_back(Chain);
2964    break;
2965  case NVPTXISD::Suld1DV2I32Trap:
2966    Opc = NVPTX::SULD_1D_V2I32_TRAP;
2967    Ops.push_back(TexHandle);
2968    Ops.push_back(N->getOperand(2));
2969    Ops.push_back(Chain);
2970    break;
2971  case NVPTXISD::Suld1DV4I8Trap:
2972    Opc = NVPTX::SULD_1D_V4I8_TRAP;
2973    Ops.push_back(TexHandle);
2974    Ops.push_back(N->getOperand(2));
2975    Ops.push_back(Chain);
2976    break;
2977  case NVPTXISD::Suld1DV4I16Trap:
2978    Opc = NVPTX::SULD_1D_V4I16_TRAP;
2979    Ops.push_back(TexHandle);
2980    Ops.push_back(N->getOperand(2));
2981    Ops.push_back(Chain);
2982    break;
2983  case NVPTXISD::Suld1DV4I32Trap:
2984    Opc = NVPTX::SULD_1D_V4I32_TRAP;
2985    Ops.push_back(TexHandle);
2986    Ops.push_back(N->getOperand(2));
2987    Ops.push_back(Chain);
2988    break;
2989  case NVPTXISD::Suld1DArrayI8Trap:
2990    Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
2991    Ops.push_back(TexHandle);
2992    Ops.push_back(N->getOperand(2));
2993    Ops.push_back(N->getOperand(3));
2994    Ops.push_back(Chain);
2995    break;
2996  case NVPTXISD::Suld1DArrayI16Trap:
2997    Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
2998    Ops.push_back(TexHandle);
2999    Ops.push_back(N->getOperand(2));
3000    Ops.push_back(N->getOperand(3));
3001    Ops.push_back(Chain);
3002    break;
3003  case NVPTXISD::Suld1DArrayI32Trap:
3004    Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3005    Ops.push_back(TexHandle);
3006    Ops.push_back(N->getOperand(2));
3007    Ops.push_back(N->getOperand(3));
3008    Ops.push_back(Chain);
3009    break;
3010  case NVPTXISD::Suld1DArrayV2I8Trap:
3011    Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3012    Ops.push_back(TexHandle);
3013    Ops.push_back(N->getOperand(2));
3014    Ops.push_back(N->getOperand(3));
3015    Ops.push_back(Chain);
3016    break;
3017  case NVPTXISD::Suld1DArrayV2I16Trap:
3018    Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3019    Ops.push_back(TexHandle);
3020    Ops.push_back(N->getOperand(2));
3021    Ops.push_back(N->getOperand(3));
3022    Ops.push_back(Chain);
3023    break;
3024  case NVPTXISD::Suld1DArrayV2I32Trap:
3025    Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3026    Ops.push_back(TexHandle);
3027    Ops.push_back(N->getOperand(2));
3028    Ops.push_back(N->getOperand(3));
3029    Ops.push_back(Chain);
3030    break;
3031  case NVPTXISD::Suld1DArrayV4I8Trap:
3032    Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3033    Ops.push_back(TexHandle);
3034    Ops.push_back(N->getOperand(2));
3035    Ops.push_back(N->getOperand(3));
3036    Ops.push_back(Chain);
3037    break;
3038  case NVPTXISD::Suld1DArrayV4I16Trap:
3039    Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3040    Ops.push_back(TexHandle);
3041    Ops.push_back(N->getOperand(2));
3042    Ops.push_back(N->getOperand(3));
3043    Ops.push_back(Chain);
3044    break;
3045  case NVPTXISD::Suld1DArrayV4I32Trap:
3046    Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3047    Ops.push_back(TexHandle);
3048    Ops.push_back(N->getOperand(2));
3049    Ops.push_back(N->getOperand(3));
3050    Ops.push_back(Chain);
3051    break;
3052  case NVPTXISD::Suld2DI8Trap:
3053    Opc = NVPTX::SULD_2D_I8_TRAP;
3054    Ops.push_back(TexHandle);
3055    Ops.push_back(N->getOperand(2));
3056    Ops.push_back(N->getOperand(3));
3057    Ops.push_back(Chain);
3058    break;
3059  case NVPTXISD::Suld2DI16Trap:
3060    Opc = NVPTX::SULD_2D_I16_TRAP;
3061    Ops.push_back(TexHandle);
3062    Ops.push_back(N->getOperand(2));
3063    Ops.push_back(N->getOperand(3));
3064    Ops.push_back(Chain);
3065    break;
3066  case NVPTXISD::Suld2DI32Trap:
3067    Opc = NVPTX::SULD_2D_I32_TRAP;
3068    Ops.push_back(TexHandle);
3069    Ops.push_back(N->getOperand(2));
3070    Ops.push_back(N->getOperand(3));
3071    Ops.push_back(Chain);
3072    break;
3073  case NVPTXISD::Suld2DV2I8Trap:
3074    Opc = NVPTX::SULD_2D_V2I8_TRAP;
3075    Ops.push_back(TexHandle);
3076    Ops.push_back(N->getOperand(2));
3077    Ops.push_back(N->getOperand(3));
3078    Ops.push_back(Chain);
3079    break;
3080  case NVPTXISD::Suld2DV2I16Trap:
3081    Opc = NVPTX::SULD_2D_V2I16_TRAP;
3082    Ops.push_back(TexHandle);
3083    Ops.push_back(N->getOperand(2));
3084    Ops.push_back(N->getOperand(3));
3085    Ops.push_back(Chain);
3086    break;
3087  case NVPTXISD::Suld2DV2I32Trap:
3088    Opc = NVPTX::SULD_2D_V2I32_TRAP;
3089    Ops.push_back(TexHandle);
3090    Ops.push_back(N->getOperand(2));
3091    Ops.push_back(N->getOperand(3));
3092    Ops.push_back(Chain);
3093    break;
3094  case NVPTXISD::Suld2DV4I8Trap:
3095    Opc = NVPTX::SULD_2D_V4I8_TRAP;
3096    Ops.push_back(TexHandle);
3097    Ops.push_back(N->getOperand(2));
3098    Ops.push_back(N->getOperand(3));
3099    Ops.push_back(Chain);
3100    break;
3101  case NVPTXISD::Suld2DV4I16Trap:
3102    Opc = NVPTX::SULD_2D_V4I16_TRAP;
3103    Ops.push_back(TexHandle);
3104    Ops.push_back(N->getOperand(2));
3105    Ops.push_back(N->getOperand(3));
3106    Ops.push_back(Chain);
3107    break;
3108  case NVPTXISD::Suld2DV4I32Trap:
3109    Opc = NVPTX::SULD_2D_V4I32_TRAP;
3110    Ops.push_back(TexHandle);
3111    Ops.push_back(N->getOperand(2));
3112    Ops.push_back(N->getOperand(3));
3113    Ops.push_back(Chain);
3114    break;
3115  case NVPTXISD::Suld2DArrayI8Trap:
3116    Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3117    Ops.push_back(TexHandle);
3118    Ops.push_back(N->getOperand(2));
3119    Ops.push_back(N->getOperand(3));
3120    Ops.push_back(N->getOperand(4));
3121    Ops.push_back(Chain);
3122    break;
3123  case NVPTXISD::Suld2DArrayI16Trap:
3124    Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3125    Ops.push_back(TexHandle);
3126    Ops.push_back(N->getOperand(2));
3127    Ops.push_back(N->getOperand(3));
3128    Ops.push_back(N->getOperand(4));
3129    Ops.push_back(Chain);
3130    break;
3131  case NVPTXISD::Suld2DArrayI32Trap:
3132    Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3133    Ops.push_back(TexHandle);
3134    Ops.push_back(N->getOperand(2));
3135    Ops.push_back(N->getOperand(3));
3136    Ops.push_back(N->getOperand(4));
3137    Ops.push_back(Chain);
3138    break;
3139  case NVPTXISD::Suld2DArrayV2I8Trap:
3140    Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3141    Ops.push_back(TexHandle);
3142    Ops.push_back(N->getOperand(2));
3143    Ops.push_back(N->getOperand(3));
3144    Ops.push_back(N->getOperand(4));
3145    Ops.push_back(Chain);
3146    break;
3147  case NVPTXISD::Suld2DArrayV2I16Trap:
3148    Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3149    Ops.push_back(TexHandle);
3150    Ops.push_back(N->getOperand(2));
3151    Ops.push_back(N->getOperand(3));
3152    Ops.push_back(N->getOperand(4));
3153    Ops.push_back(Chain);
3154    break;
3155  case NVPTXISD::Suld2DArrayV2I32Trap:
3156    Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3157    Ops.push_back(TexHandle);
3158    Ops.push_back(N->getOperand(2));
3159    Ops.push_back(N->getOperand(3));
3160    Ops.push_back(N->getOperand(4));
3161    Ops.push_back(Chain);
3162    break;
3163  case NVPTXISD::Suld2DArrayV4I8Trap:
3164    Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3165    Ops.push_back(TexHandle);
3166    Ops.push_back(N->getOperand(2));
3167    Ops.push_back(N->getOperand(3));
3168    Ops.push_back(N->getOperand(4));
3169    Ops.push_back(Chain);
3170    break;
3171  case NVPTXISD::Suld2DArrayV4I16Trap:
3172    Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3173    Ops.push_back(TexHandle);
3174    Ops.push_back(N->getOperand(2));
3175    Ops.push_back(N->getOperand(3));
3176    Ops.push_back(N->getOperand(4));
3177    Ops.push_back(Chain);
3178    break;
3179  case NVPTXISD::Suld2DArrayV4I32Trap:
3180    Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3181    Ops.push_back(TexHandle);
3182    Ops.push_back(N->getOperand(2));
3183    Ops.push_back(N->getOperand(3));
3184    Ops.push_back(N->getOperand(4));
3185    Ops.push_back(Chain);
3186    break;
3187  case NVPTXISD::Suld3DI8Trap:
3188    Opc = NVPTX::SULD_3D_I8_TRAP;
3189    Ops.push_back(TexHandle);
3190    Ops.push_back(N->getOperand(2));
3191    Ops.push_back(N->getOperand(3));
3192    Ops.push_back(N->getOperand(4));
3193    Ops.push_back(Chain);
3194    break;
3195  case NVPTXISD::Suld3DI16Trap:
3196    Opc = NVPTX::SULD_3D_I16_TRAP;
3197    Ops.push_back(TexHandle);
3198    Ops.push_back(N->getOperand(2));
3199    Ops.push_back(N->getOperand(3));
3200    Ops.push_back(N->getOperand(4));
3201    Ops.push_back(Chain);
3202    break;
3203  case NVPTXISD::Suld3DI32Trap:
3204    Opc = NVPTX::SULD_3D_I32_TRAP;
3205    Ops.push_back(TexHandle);
3206    Ops.push_back(N->getOperand(2));
3207    Ops.push_back(N->getOperand(3));
3208    Ops.push_back(N->getOperand(4));
3209    Ops.push_back(Chain);
3210    break;
3211  case NVPTXISD::Suld3DV2I8Trap:
3212    Opc = NVPTX::SULD_3D_V2I8_TRAP;
3213    Ops.push_back(TexHandle);
3214    Ops.push_back(N->getOperand(2));
3215    Ops.push_back(N->getOperand(3));
3216    Ops.push_back(N->getOperand(4));
3217    Ops.push_back(Chain);
3218    break;
3219  case NVPTXISD::Suld3DV2I16Trap:
3220    Opc = NVPTX::SULD_3D_V2I16_TRAP;
3221    Ops.push_back(TexHandle);
3222    Ops.push_back(N->getOperand(2));
3223    Ops.push_back(N->getOperand(3));
3224    Ops.push_back(N->getOperand(4));
3225    Ops.push_back(Chain);
3226    break;
3227  case NVPTXISD::Suld3DV2I32Trap:
3228    Opc = NVPTX::SULD_3D_V2I32_TRAP;
3229    Ops.push_back(TexHandle);
3230    Ops.push_back(N->getOperand(2));
3231    Ops.push_back(N->getOperand(3));
3232    Ops.push_back(N->getOperand(4));
3233    Ops.push_back(Chain);
3234    break;
3235  case NVPTXISD::Suld3DV4I8Trap:
3236    Opc = NVPTX::SULD_3D_V4I8_TRAP;
3237    Ops.push_back(TexHandle);
3238    Ops.push_back(N->getOperand(2));
3239    Ops.push_back(N->getOperand(3));
3240    Ops.push_back(N->getOperand(4));
3241    Ops.push_back(Chain);
3242    break;
3243  case NVPTXISD::Suld3DV4I16Trap:
3244    Opc = NVPTX::SULD_3D_V4I16_TRAP;
3245    Ops.push_back(TexHandle);
3246    Ops.push_back(N->getOperand(2));
3247    Ops.push_back(N->getOperand(3));
3248    Ops.push_back(N->getOperand(4));
3249    Ops.push_back(Chain);
3250    break;
3251  case NVPTXISD::Suld3DV4I32Trap:
3252    Opc = NVPTX::SULD_3D_V4I32_TRAP;
3253    Ops.push_back(TexHandle);
3254    Ops.push_back(N->getOperand(2));
3255    Ops.push_back(N->getOperand(3));
3256    Ops.push_back(N->getOperand(4));
3257    Ops.push_back(Chain);
3258    break;
3259  }
3260  Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3261  return Ret;
3262}
3263
3264/// SelectBFE - Look for instruction sequences that can be made more efficient
3265/// by using the 'bfe' (bit-field extract) PTX instruction
3266SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
3267  SDValue LHS = N->getOperand(0);
3268  SDValue RHS = N->getOperand(1);
3269  SDValue Len;
3270  SDValue Start;
3271  SDValue Val;
3272  bool IsSigned = false;
3273
3274  if (N->getOpcode() == ISD::AND) {
3275    // Canonicalize the operands
3276    // We want 'and %val, %mask'
3277    if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3278      std::swap(LHS, RHS);
3279    }
3280
3281    ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3282    if (!Mask) {
3283      // We need a constant mask on the RHS of the AND
3284      return NULL;
3285    }
3286
3287    // Extract the mask bits
3288    uint64_t MaskVal = Mask->getZExtValue();
3289    if (!isMask_64(MaskVal)) {
3290      // We *could* handle shifted masks here, but doing so would require an
3291      // 'and' operation to fix up the low-order bits so we would trade
3292      // shr+and for bfe+and, which has the same throughput
3293      return NULL;
3294    }
3295
3296    // How many bits are in our mask?
3297    uint64_t NumBits = CountTrailingOnes_64(MaskVal);
3298    Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3299
3300    if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3301      // We have a 'srl/and' pair, extract the effective start bit and length
3302      Val = LHS.getNode()->getOperand(0);
3303      Start = LHS.getNode()->getOperand(1);
3304      ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3305      if (StartConst) {
3306        uint64_t StartVal = StartConst->getZExtValue();
3307        // How many "good" bits do we have left?  "good" is defined here as bits
3308        // that exist in the original value, not shifted in.
3309        uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
3310        if (NumBits > GoodBits) {
3311          // Do not handle the case where bits have been shifted in. In theory
3312          // we could handle this, but the cost is likely higher than just
3313          // emitting the srl/and pair.
3314          return NULL;
3315        }
3316        Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
3317      } else {
3318        // Do not handle the case where the shift amount (can be zero if no srl
3319        // was found) is not constant. We could handle this case, but it would
3320        // require run-time logic that would be more expensive than just
3321        // emitting the srl/and pair.
3322        return NULL;
3323      }
3324    } else {
3325      // Do not handle the case where the LHS of the and is not a shift. While
3326      // it would be trivial to handle this case, it would just transform
3327      // 'and' -> 'bfe', but 'and' has higher-throughput.
3328      return NULL;
3329    }
3330  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3331    if (LHS->getOpcode() == ISD::AND) {
3332      ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3333      if (!ShiftCnst) {
3334        // Shift amount must be constant
3335        return NULL;
3336      }
3337
3338      uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3339
3340      SDValue AndLHS = LHS->getOperand(0);
3341      SDValue AndRHS = LHS->getOperand(1);
3342
3343      // Canonicalize the AND to have the mask on the RHS
3344      if (isa<ConstantSDNode>(AndLHS)) {
3345        std::swap(AndLHS, AndRHS);
3346      }
3347
3348      ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3349      if (!MaskCnst) {
3350        // Mask must be constant
3351        return NULL;
3352      }
3353
3354      uint64_t MaskVal = MaskCnst->getZExtValue();
3355      uint64_t NumZeros;
3356      uint64_t NumBits;
3357      if (isMask_64(MaskVal)) {
3358        NumZeros = 0;
3359        // The number of bits in the result bitfield will be the number of
3360        // trailing ones (the AND) minus the number of bits we shift off
3361        NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
3362      } else if (isShiftedMask_64(MaskVal)) {
3363        NumZeros = countTrailingZeros(MaskVal);
3364        unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
3365        // The number of bits in the result bitfield will be the number of
3366        // trailing zeros plus the number of set bits in the mask minus the
3367        // number of bits we shift off
3368        NumBits = NumZeros + NumOnes - ShiftAmt;
3369      } else {
3370        // This is not a mask we can handle
3371        return NULL;
3372      }
3373
3374      if (ShiftAmt < NumZeros) {
3375        // Handling this case would require extra logic that would make this
3376        // transformation non-profitable
3377        return NULL;
3378      }
3379
3380      Val = AndLHS;
3381      Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
3382      Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3383    } else if (LHS->getOpcode() == ISD::SHL) {
3384      // Here, we have a pattern like:
3385      //
3386      // (sra (shl val, NN), MM)
3387      // or
3388      // (srl (shl val, NN), MM)
3389      //
3390      // If MM >= NN, we can efficiently optimize this with bfe
3391      Val = LHS->getOperand(0);
3392
3393      SDValue ShlRHS = LHS->getOperand(1);
3394      ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3395      if (!ShlCnst) {
3396        // Shift amount must be constant
3397        return NULL;
3398      }
3399      uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3400
3401      SDValue ShrRHS = RHS;
3402      ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3403      if (!ShrCnst) {
3404        // Shift amount must be constant
3405        return NULL;
3406      }
3407      uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3408
3409      // To avoid extra codegen and be profitable, we need Outer >= Inner
3410      if (OuterShiftAmt < InnerShiftAmt) {
3411        return NULL;
3412      }
3413
3414      // If the outer shift is more than the type size, we have no bitfield to
3415      // extract (since we also check that the inner shift is <= the outer shift
3416      // then this also implies that the inner shift is < the type size)
3417      if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
3418        return NULL;
3419      }
3420
3421      Start =
3422        CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
3423      Len =
3424        CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
3425                                  OuterShiftAmt, MVT::i32);
3426
3427      if (N->getOpcode() == ISD::SRA) {
3428        // If we have a arithmetic right shift, we need to use the signed bfe
3429        // variant
3430        IsSigned = true;
3431      }
3432    } else {
3433      // No can do...
3434      return NULL;
3435    }
3436  } else {
3437    // No can do...
3438    return NULL;
3439  }
3440
3441
3442  unsigned Opc;
3443  // For the BFE operations we form here from "and" and "srl", always use the
3444  // unsigned variants.
3445  if (Val.getValueType() == MVT::i32) {
3446    if (IsSigned) {
3447      Opc = NVPTX::BFE_S32rii;
3448    } else {
3449      Opc = NVPTX::BFE_U32rii;
3450    }
3451  } else if (Val.getValueType() == MVT::i64) {
3452    if (IsSigned) {
3453      Opc = NVPTX::BFE_S64rii;
3454    } else {
3455      Opc = NVPTX::BFE_U64rii;
3456    }
3457  } else {
3458    // We cannot handle this type
3459    return NULL;
3460  }
3461
3462  SDValue Ops[] = {
3463    Val, Start, Len
3464  };
3465
3466  SDNode *Ret =
3467    CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3468
3469  return Ret;
3470}
3471
3472// SelectDirectAddr - Match a direct address for DAG.
3473// A direct address could be a globaladdress or externalsymbol.
3474bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3475  // Return true if TGA or ES.
3476  if (N.getOpcode() == ISD::TargetGlobalAddress ||
3477      N.getOpcode() == ISD::TargetExternalSymbol) {
3478    Address = N;
3479    return true;
3480  }
3481  if (N.getOpcode() == NVPTXISD::Wrapper) {
3482    Address = N.getOperand(0);
3483    return true;
3484  }
3485  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3486    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
3487    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
3488      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
3489        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
3490  }
3491  return false;
3492}
3493
3494// symbol+offset
3495bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3496    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3497  if (Addr.getOpcode() == ISD::ADD) {
3498    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3499      SDValue base = Addr.getOperand(0);
3500      if (SelectDirectAddr(base, Base)) {
3501        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3502        return true;
3503      }
3504    }
3505  }
3506  return false;
3507}
3508
3509// symbol+offset
3510bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3511                                     SDValue &Base, SDValue &Offset) {
3512  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3513}
3514
3515// symbol+offset
3516bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3517                                       SDValue &Base, SDValue &Offset) {
3518  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3519}
3520
3521// register+offset
3522bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3523    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3524  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3525    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3526    Offset = CurDAG->getTargetConstant(0, mvt);
3527    return true;
3528  }
3529  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3530      Addr.getOpcode() == ISD::TargetGlobalAddress)
3531    return false; // direct calls.
3532
3533  if (Addr.getOpcode() == ISD::ADD) {
3534    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3535      return false;
3536    }
3537    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3538      if (FrameIndexSDNode *FIN =
3539              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3540        // Constant offset from frame ref.
3541        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3542      else
3543        Base = Addr.getOperand(0);
3544      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3545      return true;
3546    }
3547  }
3548  return false;
3549}
3550
3551// register+offset
3552bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3553                                     SDValue &Base, SDValue &Offset) {
3554  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3555}
3556
3557// register+offset
3558bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3559                                       SDValue &Base, SDValue &Offset) {
3560  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3561}
3562
3563bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3564                                                 unsigned int spN) const {
3565  const Value *Src = nullptr;
3566  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
3567  // the classof() for MemSDNode does not include MemIntrinsicSDNode
3568  // (See SelectionDAGNodes.h). So we need to check for both.
3569  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3570    if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3571      return true;
3572    Src = mN->getMemOperand()->getValue();
3573  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
3574    if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3575      return true;
3576    Src = mN->getMemOperand()->getValue();
3577  }
3578  if (!Src)
3579    return false;
3580  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
3581    return (PT->getAddressSpace() == spN);
3582  return false;
3583}
3584
3585/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3586/// inline asm expressions.
3587bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3588    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
3589  SDValue Op0, Op1;
3590  switch (ConstraintCode) {
3591  default:
3592    return true;
3593  case 'm': // memory
3594    if (SelectDirectAddr(Op, Op0)) {
3595      OutOps.push_back(Op0);
3596      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
3597      return false;
3598    }
3599    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3600      OutOps.push_back(Op0);
3601      OutOps.push_back(Op1);
3602      return false;
3603    }
3604    break;
3605  }
3606  return true;
3607}
3608