NVPTXISelDAGToDAG.cpp revision 9bc8feeb4fd15883949900194c93fd1704c404b4
1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelDAGToDAG.h"
15#include "llvm/IR/GlobalValue.h"
16#include "llvm/IR/Instructions.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/Debug.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/raw_ostream.h"
21#include "llvm/Target/TargetIntrinsicInfo.h"
22
23#undef DEBUG_TYPE
24#define DEBUG_TYPE "nvptx-isel"
25
26using namespace llvm;
27
28static cl::opt<bool> UseFMADInstruction(
29    "nvptx-mad-enable", cl::ZeroOrMore,
30    cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
31    cl::init(false));
32
33static cl::opt<int>
34FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
35                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
36                          " 1: do it  2: do it aggressively"),
37                 cl::init(2));
38
39static cl::opt<int> UsePrecDivF32(
40    "nvptx-prec-divf32", cl::ZeroOrMore,
41    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
42             " IEEE Compliant F32 div.rnd if avaiable."),
43    cl::init(2));
44
45static cl::opt<bool>
46UsePrecSqrtF32("nvptx-prec-sqrtf32",
47          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
48          cl::init(true));
49
50/// createNVPTXISelDag - This pass converts a legalized DAG into a
51/// NVPTX-specific DAG, ready for instruction scheduling.
52FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
53                                       llvm::CodeGenOpt::Level OptLevel) {
54  return new NVPTXDAGToDAGISel(TM, OptLevel);
55}
56
57NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
58                                     CodeGenOpt::Level OptLevel)
59    : SelectionDAGISel(tm, OptLevel),
60      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
61  // Always do fma.f32 fpcontract if the target supports the instruction.
62  // Always do fma.f64 fpcontract if the target supports the instruction.
63  // Do mad.f32 is nvptx-mad-enable is specified and the target does not
64  // support fma.f32.
65
66  doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
67  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
68  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
69  doFMAF32AGG =
70      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
71  doFMAF64AGG =
72      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
73
74  allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
75
76  UseF32FTZ = false;
77
78  doMulWide = (OptLevel > 0);
79
80  // Decide how to translate f32 div
81  do_DIVF32_PREC = UsePrecDivF32;
82  // Decide how to translate f32 sqrt
83  do_SQRTF32_PREC = UsePrecSqrtF32;
84  // sm less than sm_20 does not support div.rnd. Use div.full.
85  if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
86    do_DIVF32_PREC = 1;
87
88}
89
90/// Select - Select instructions not customized! Used for
91/// expanded, promoted and normal instructions.
92SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
93
94  if (N->isMachineOpcode())
95    return NULL; // Already selected.
96
97  SDNode *ResNode = NULL;
98  switch (N->getOpcode()) {
99  case ISD::LOAD:
100    ResNode = SelectLoad(N);
101    break;
102  case ISD::STORE:
103    ResNode = SelectStore(N);
104    break;
105  case NVPTXISD::LoadV2:
106  case NVPTXISD::LoadV4:
107    ResNode = SelectLoadVector(N);
108    break;
109  case NVPTXISD::LDGV2:
110  case NVPTXISD::LDGV4:
111  case NVPTXISD::LDUV2:
112  case NVPTXISD::LDUV4:
113    ResNode = SelectLDGLDUVector(N);
114    break;
115  case NVPTXISD::StoreV2:
116  case NVPTXISD::StoreV4:
117    ResNode = SelectStoreVector(N);
118    break;
119  case NVPTXISD::LoadParam:
120  case NVPTXISD::LoadParamV2:
121  case NVPTXISD::LoadParamV4:
122    ResNode = SelectLoadParam(N);
123    break;
124  case NVPTXISD::StoreRetval:
125  case NVPTXISD::StoreRetvalV2:
126  case NVPTXISD::StoreRetvalV4:
127    ResNode = SelectStoreRetval(N);
128    break;
129  case NVPTXISD::StoreParam:
130  case NVPTXISD::StoreParamV2:
131  case NVPTXISD::StoreParamV4:
132  case NVPTXISD::StoreParamS32:
133  case NVPTXISD::StoreParamU32:
134    ResNode = SelectStoreParam(N);
135    break;
136  default:
137    break;
138  }
139  if (ResNode)
140    return ResNode;
141  return SelectCode(N);
142}
143
144static unsigned int getCodeAddrSpace(MemSDNode *N,
145                                     const NVPTXSubtarget &Subtarget) {
146  const Value *Src = N->getSrcValue();
147
148  if (!Src)
149    return NVPTX::PTXLdStInstCode::GENERIC;
150
151  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
152    switch (PT->getAddressSpace()) {
153    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
154    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
155    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
156    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
157    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
158    case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
159    default: break;
160    }
161  }
162  return NVPTX::PTXLdStInstCode::GENERIC;
163}
164
165SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
166  SDLoc dl(N);
167  LoadSDNode *LD = cast<LoadSDNode>(N);
168  EVT LoadedVT = LD->getMemoryVT();
169  SDNode *NVPTXLD = NULL;
170
171  // do not support pre/post inc/dec
172  if (LD->isIndexed())
173    return NULL;
174
175  if (!LoadedVT.isSimple())
176    return NULL;
177
178  // Address Space Setting
179  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
180
181  // Volatile Setting
182  // - .volatile is only availalble for .global and .shared
183  bool isVolatile = LD->isVolatile();
184  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
185      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
186      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
187    isVolatile = false;
188
189  // Vector Setting
190  MVT SimpleVT = LoadedVT.getSimpleVT();
191  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
192  if (SimpleVT.isVector()) {
193    unsigned num = SimpleVT.getVectorNumElements();
194    if (num == 2)
195      vecType = NVPTX::PTXLdStInstCode::V2;
196    else if (num == 4)
197      vecType = NVPTX::PTXLdStInstCode::V4;
198    else
199      return NULL;
200  }
201
202  // Type Setting: fromType + fromTypeWidth
203  //
204  // Sign   : ISD::SEXTLOAD
205  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
206  //          type is integer
207  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
208  MVT ScalarVT = SimpleVT.getScalarType();
209  // Read at least 8 bits (predicates are stored as 8-bit values)
210  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
211  unsigned int fromType;
212  if ((LD->getExtensionType() == ISD::SEXTLOAD))
213    fromType = NVPTX::PTXLdStInstCode::Signed;
214  else if (ScalarVT.isFloatingPoint())
215    fromType = NVPTX::PTXLdStInstCode::Float;
216  else
217    fromType = NVPTX::PTXLdStInstCode::Unsigned;
218
219  // Create the machine instruction DAG
220  SDValue Chain = N->getOperand(0);
221  SDValue N1 = N->getOperand(1);
222  SDValue Addr;
223  SDValue Offset, Base;
224  unsigned Opcode;
225  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
226
227  if (SelectDirectAddr(N1, Addr)) {
228    switch (TargetVT) {
229    case MVT::i8:
230      Opcode = NVPTX::LD_i8_avar;
231      break;
232    case MVT::i16:
233      Opcode = NVPTX::LD_i16_avar;
234      break;
235    case MVT::i32:
236      Opcode = NVPTX::LD_i32_avar;
237      break;
238    case MVT::i64:
239      Opcode = NVPTX::LD_i64_avar;
240      break;
241    case MVT::f32:
242      Opcode = NVPTX::LD_f32_avar;
243      break;
244    case MVT::f64:
245      Opcode = NVPTX::LD_f64_avar;
246      break;
247    default:
248      return NULL;
249    }
250    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
251                      getI32Imm(vecType), getI32Imm(fromType),
252                      getI32Imm(fromTypeWidth), Addr, Chain };
253    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
254  } else if (Subtarget.is64Bit()
255                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
256                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
257    switch (TargetVT) {
258    case MVT::i8:
259      Opcode = NVPTX::LD_i8_asi;
260      break;
261    case MVT::i16:
262      Opcode = NVPTX::LD_i16_asi;
263      break;
264    case MVT::i32:
265      Opcode = NVPTX::LD_i32_asi;
266      break;
267    case MVT::i64:
268      Opcode = NVPTX::LD_i64_asi;
269      break;
270    case MVT::f32:
271      Opcode = NVPTX::LD_f32_asi;
272      break;
273    case MVT::f64:
274      Opcode = NVPTX::LD_f64_asi;
275      break;
276    default:
277      return NULL;
278    }
279    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
280                      getI32Imm(vecType), getI32Imm(fromType),
281                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
282    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
283  } else if (Subtarget.is64Bit()
284                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
285                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
286    if (Subtarget.is64Bit()) {
287      switch (TargetVT) {
288      case MVT::i8:
289        Opcode = NVPTX::LD_i8_ari_64;
290        break;
291      case MVT::i16:
292        Opcode = NVPTX::LD_i16_ari_64;
293        break;
294      case MVT::i32:
295        Opcode = NVPTX::LD_i32_ari_64;
296        break;
297      case MVT::i64:
298        Opcode = NVPTX::LD_i64_ari_64;
299        break;
300      case MVT::f32:
301        Opcode = NVPTX::LD_f32_ari_64;
302        break;
303      case MVT::f64:
304        Opcode = NVPTX::LD_f64_ari_64;
305        break;
306      default:
307        return NULL;
308      }
309    } else {
310      switch (TargetVT) {
311      case MVT::i8:
312        Opcode = NVPTX::LD_i8_ari;
313        break;
314      case MVT::i16:
315        Opcode = NVPTX::LD_i16_ari;
316        break;
317      case MVT::i32:
318        Opcode = NVPTX::LD_i32_ari;
319        break;
320      case MVT::i64:
321        Opcode = NVPTX::LD_i64_ari;
322        break;
323      case MVT::f32:
324        Opcode = NVPTX::LD_f32_ari;
325        break;
326      case MVT::f64:
327        Opcode = NVPTX::LD_f64_ari;
328        break;
329      default:
330        return NULL;
331      }
332    }
333    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
334                      getI32Imm(vecType), getI32Imm(fromType),
335                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
336    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
337  } else {
338    if (Subtarget.is64Bit()) {
339      switch (TargetVT) {
340      case MVT::i8:
341        Opcode = NVPTX::LD_i8_areg_64;
342        break;
343      case MVT::i16:
344        Opcode = NVPTX::LD_i16_areg_64;
345        break;
346      case MVT::i32:
347        Opcode = NVPTX::LD_i32_areg_64;
348        break;
349      case MVT::i64:
350        Opcode = NVPTX::LD_i64_areg_64;
351        break;
352      case MVT::f32:
353        Opcode = NVPTX::LD_f32_areg_64;
354        break;
355      case MVT::f64:
356        Opcode = NVPTX::LD_f64_areg_64;
357        break;
358      default:
359        return NULL;
360      }
361    } else {
362      switch (TargetVT) {
363      case MVT::i8:
364        Opcode = NVPTX::LD_i8_areg;
365        break;
366      case MVT::i16:
367        Opcode = NVPTX::LD_i16_areg;
368        break;
369      case MVT::i32:
370        Opcode = NVPTX::LD_i32_areg;
371        break;
372      case MVT::i64:
373        Opcode = NVPTX::LD_i64_areg;
374        break;
375      case MVT::f32:
376        Opcode = NVPTX::LD_f32_areg;
377        break;
378      case MVT::f64:
379        Opcode = NVPTX::LD_f64_areg;
380        break;
381      default:
382        return NULL;
383      }
384    }
385    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
386                      getI32Imm(vecType), getI32Imm(fromType),
387                      getI32Imm(fromTypeWidth), N1, Chain };
388    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
389  }
390
391  if (NVPTXLD != NULL) {
392    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
393    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
394    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
395  }
396
397  return NVPTXLD;
398}
399
400SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
401
402  SDValue Chain = N->getOperand(0);
403  SDValue Op1 = N->getOperand(1);
404  SDValue Addr, Offset, Base;
405  unsigned Opcode;
406  SDLoc DL(N);
407  SDNode *LD;
408  MemSDNode *MemSD = cast<MemSDNode>(N);
409  EVT LoadedVT = MemSD->getMemoryVT();
410
411  if (!LoadedVT.isSimple())
412    return NULL;
413
414  // Address Space Setting
415  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
416
417  // Volatile Setting
418  // - .volatile is only availalble for .global and .shared
419  bool IsVolatile = MemSD->isVolatile();
420  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
421      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
422      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
423    IsVolatile = false;
424
425  // Vector Setting
426  MVT SimpleVT = LoadedVT.getSimpleVT();
427
428  // Type Setting: fromType + fromTypeWidth
429  //
430  // Sign   : ISD::SEXTLOAD
431  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
432  //          type is integer
433  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
434  MVT ScalarVT = SimpleVT.getScalarType();
435  // Read at least 8 bits (predicates are stored as 8-bit values)
436  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
437  unsigned int FromType;
438  // The last operand holds the original LoadSDNode::getExtensionType() value
439  unsigned ExtensionType = cast<ConstantSDNode>(
440      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
441  if (ExtensionType == ISD::SEXTLOAD)
442    FromType = NVPTX::PTXLdStInstCode::Signed;
443  else if (ScalarVT.isFloatingPoint())
444    FromType = NVPTX::PTXLdStInstCode::Float;
445  else
446    FromType = NVPTX::PTXLdStInstCode::Unsigned;
447
448  unsigned VecType;
449
450  switch (N->getOpcode()) {
451  case NVPTXISD::LoadV2:
452    VecType = NVPTX::PTXLdStInstCode::V2;
453    break;
454  case NVPTXISD::LoadV4:
455    VecType = NVPTX::PTXLdStInstCode::V4;
456    break;
457  default:
458    return NULL;
459  }
460
461  EVT EltVT = N->getValueType(0);
462
463  if (SelectDirectAddr(Op1, Addr)) {
464    switch (N->getOpcode()) {
465    default:
466      return NULL;
467    case NVPTXISD::LoadV2:
468      switch (EltVT.getSimpleVT().SimpleTy) {
469      default:
470        return NULL;
471      case MVT::i8:
472        Opcode = NVPTX::LDV_i8_v2_avar;
473        break;
474      case MVT::i16:
475        Opcode = NVPTX::LDV_i16_v2_avar;
476        break;
477      case MVT::i32:
478        Opcode = NVPTX::LDV_i32_v2_avar;
479        break;
480      case MVT::i64:
481        Opcode = NVPTX::LDV_i64_v2_avar;
482        break;
483      case MVT::f32:
484        Opcode = NVPTX::LDV_f32_v2_avar;
485        break;
486      case MVT::f64:
487        Opcode = NVPTX::LDV_f64_v2_avar;
488        break;
489      }
490      break;
491    case NVPTXISD::LoadV4:
492      switch (EltVT.getSimpleVT().SimpleTy) {
493      default:
494        return NULL;
495      case MVT::i8:
496        Opcode = NVPTX::LDV_i8_v4_avar;
497        break;
498      case MVT::i16:
499        Opcode = NVPTX::LDV_i16_v4_avar;
500        break;
501      case MVT::i32:
502        Opcode = NVPTX::LDV_i32_v4_avar;
503        break;
504      case MVT::f32:
505        Opcode = NVPTX::LDV_f32_v4_avar;
506        break;
507      }
508      break;
509    }
510
511    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
512                      getI32Imm(VecType), getI32Imm(FromType),
513                      getI32Imm(FromTypeWidth), Addr, Chain };
514    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
515  } else if (Subtarget.is64Bit()
516                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
517                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
518    switch (N->getOpcode()) {
519    default:
520      return NULL;
521    case NVPTXISD::LoadV2:
522      switch (EltVT.getSimpleVT().SimpleTy) {
523      default:
524        return NULL;
525      case MVT::i8:
526        Opcode = NVPTX::LDV_i8_v2_asi;
527        break;
528      case MVT::i16:
529        Opcode = NVPTX::LDV_i16_v2_asi;
530        break;
531      case MVT::i32:
532        Opcode = NVPTX::LDV_i32_v2_asi;
533        break;
534      case MVT::i64:
535        Opcode = NVPTX::LDV_i64_v2_asi;
536        break;
537      case MVT::f32:
538        Opcode = NVPTX::LDV_f32_v2_asi;
539        break;
540      case MVT::f64:
541        Opcode = NVPTX::LDV_f64_v2_asi;
542        break;
543      }
544      break;
545    case NVPTXISD::LoadV4:
546      switch (EltVT.getSimpleVT().SimpleTy) {
547      default:
548        return NULL;
549      case MVT::i8:
550        Opcode = NVPTX::LDV_i8_v4_asi;
551        break;
552      case MVT::i16:
553        Opcode = NVPTX::LDV_i16_v4_asi;
554        break;
555      case MVT::i32:
556        Opcode = NVPTX::LDV_i32_v4_asi;
557        break;
558      case MVT::f32:
559        Opcode = NVPTX::LDV_f32_v4_asi;
560        break;
561      }
562      break;
563    }
564
565    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
566                      getI32Imm(VecType), getI32Imm(FromType),
567                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
568    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
569  } else if (Subtarget.is64Bit()
570                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
571                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
572    if (Subtarget.is64Bit()) {
573      switch (N->getOpcode()) {
574      default:
575        return NULL;
576      case NVPTXISD::LoadV2:
577        switch (EltVT.getSimpleVT().SimpleTy) {
578        default:
579          return NULL;
580        case MVT::i8:
581          Opcode = NVPTX::LDV_i8_v2_ari_64;
582          break;
583        case MVT::i16:
584          Opcode = NVPTX::LDV_i16_v2_ari_64;
585          break;
586        case MVT::i32:
587          Opcode = NVPTX::LDV_i32_v2_ari_64;
588          break;
589        case MVT::i64:
590          Opcode = NVPTX::LDV_i64_v2_ari_64;
591          break;
592        case MVT::f32:
593          Opcode = NVPTX::LDV_f32_v2_ari_64;
594          break;
595        case MVT::f64:
596          Opcode = NVPTX::LDV_f64_v2_ari_64;
597          break;
598        }
599        break;
600      case NVPTXISD::LoadV4:
601        switch (EltVT.getSimpleVT().SimpleTy) {
602        default:
603          return NULL;
604        case MVT::i8:
605          Opcode = NVPTX::LDV_i8_v4_ari_64;
606          break;
607        case MVT::i16:
608          Opcode = NVPTX::LDV_i16_v4_ari_64;
609          break;
610        case MVT::i32:
611          Opcode = NVPTX::LDV_i32_v4_ari_64;
612          break;
613        case MVT::f32:
614          Opcode = NVPTX::LDV_f32_v4_ari_64;
615          break;
616        }
617        break;
618      }
619    } else {
620      switch (N->getOpcode()) {
621      default:
622        return NULL;
623      case NVPTXISD::LoadV2:
624        switch (EltVT.getSimpleVT().SimpleTy) {
625        default:
626          return NULL;
627        case MVT::i8:
628          Opcode = NVPTX::LDV_i8_v2_ari;
629          break;
630        case MVT::i16:
631          Opcode = NVPTX::LDV_i16_v2_ari;
632          break;
633        case MVT::i32:
634          Opcode = NVPTX::LDV_i32_v2_ari;
635          break;
636        case MVT::i64:
637          Opcode = NVPTX::LDV_i64_v2_ari;
638          break;
639        case MVT::f32:
640          Opcode = NVPTX::LDV_f32_v2_ari;
641          break;
642        case MVT::f64:
643          Opcode = NVPTX::LDV_f64_v2_ari;
644          break;
645        }
646        break;
647      case NVPTXISD::LoadV4:
648        switch (EltVT.getSimpleVT().SimpleTy) {
649        default:
650          return NULL;
651        case MVT::i8:
652          Opcode = NVPTX::LDV_i8_v4_ari;
653          break;
654        case MVT::i16:
655          Opcode = NVPTX::LDV_i16_v4_ari;
656          break;
657        case MVT::i32:
658          Opcode = NVPTX::LDV_i32_v4_ari;
659          break;
660        case MVT::f32:
661          Opcode = NVPTX::LDV_f32_v4_ari;
662          break;
663        }
664        break;
665      }
666    }
667
668    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
669                      getI32Imm(VecType), getI32Imm(FromType),
670                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
671
672    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
673  } else {
674    if (Subtarget.is64Bit()) {
675      switch (N->getOpcode()) {
676      default:
677        return NULL;
678      case NVPTXISD::LoadV2:
679        switch (EltVT.getSimpleVT().SimpleTy) {
680        default:
681          return NULL;
682        case MVT::i8:
683          Opcode = NVPTX::LDV_i8_v2_areg_64;
684          break;
685        case MVT::i16:
686          Opcode = NVPTX::LDV_i16_v2_areg_64;
687          break;
688        case MVT::i32:
689          Opcode = NVPTX::LDV_i32_v2_areg_64;
690          break;
691        case MVT::i64:
692          Opcode = NVPTX::LDV_i64_v2_areg_64;
693          break;
694        case MVT::f32:
695          Opcode = NVPTX::LDV_f32_v2_areg_64;
696          break;
697        case MVT::f64:
698          Opcode = NVPTX::LDV_f64_v2_areg_64;
699          break;
700        }
701        break;
702      case NVPTXISD::LoadV4:
703        switch (EltVT.getSimpleVT().SimpleTy) {
704        default:
705          return NULL;
706        case MVT::i8:
707          Opcode = NVPTX::LDV_i8_v4_areg_64;
708          break;
709        case MVT::i16:
710          Opcode = NVPTX::LDV_i16_v4_areg_64;
711          break;
712        case MVT::i32:
713          Opcode = NVPTX::LDV_i32_v4_areg_64;
714          break;
715        case MVT::f32:
716          Opcode = NVPTX::LDV_f32_v4_areg_64;
717          break;
718        }
719        break;
720      }
721    } else {
722      switch (N->getOpcode()) {
723      default:
724        return NULL;
725      case NVPTXISD::LoadV2:
726        switch (EltVT.getSimpleVT().SimpleTy) {
727        default:
728          return NULL;
729        case MVT::i8:
730          Opcode = NVPTX::LDV_i8_v2_areg;
731          break;
732        case MVT::i16:
733          Opcode = NVPTX::LDV_i16_v2_areg;
734          break;
735        case MVT::i32:
736          Opcode = NVPTX::LDV_i32_v2_areg;
737          break;
738        case MVT::i64:
739          Opcode = NVPTX::LDV_i64_v2_areg;
740          break;
741        case MVT::f32:
742          Opcode = NVPTX::LDV_f32_v2_areg;
743          break;
744        case MVT::f64:
745          Opcode = NVPTX::LDV_f64_v2_areg;
746          break;
747        }
748        break;
749      case NVPTXISD::LoadV4:
750        switch (EltVT.getSimpleVT().SimpleTy) {
751        default:
752          return NULL;
753        case MVT::i8:
754          Opcode = NVPTX::LDV_i8_v4_areg;
755          break;
756        case MVT::i16:
757          Opcode = NVPTX::LDV_i16_v4_areg;
758          break;
759        case MVT::i32:
760          Opcode = NVPTX::LDV_i32_v4_areg;
761          break;
762        case MVT::f32:
763          Opcode = NVPTX::LDV_f32_v4_areg;
764          break;
765        }
766        break;
767      }
768    }
769
770    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
771                      getI32Imm(VecType), getI32Imm(FromType),
772                      getI32Imm(FromTypeWidth), Op1, Chain };
773    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
774  }
775
776  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
777  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
778  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
779
780  return LD;
781}
782
783SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
784
785  SDValue Chain = N->getOperand(0);
786  SDValue Op1 = N->getOperand(1);
787  unsigned Opcode;
788  SDLoc DL(N);
789  SDNode *LD;
790  MemSDNode *Mem = cast<MemSDNode>(N);
791  SDValue Base, Offset, Addr;
792
793  EVT EltVT = Mem->getMemoryVT().getVectorElementType();
794
795  if (SelectDirectAddr(Op1, Addr)) {
796    switch (N->getOpcode()) {
797    default:
798      return NULL;
799    case NVPTXISD::LDGV2:
800      switch (EltVT.getSimpleVT().SimpleTy) {
801      default:
802        return NULL;
803      case MVT::i8:
804        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
805        break;
806      case MVT::i16:
807        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
808        break;
809      case MVT::i32:
810        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
811        break;
812      case MVT::i64:
813        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
814        break;
815      case MVT::f32:
816        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
817        break;
818      case MVT::f64:
819        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
820        break;
821      }
822      break;
823    case NVPTXISD::LDUV2:
824      switch (EltVT.getSimpleVT().SimpleTy) {
825      default:
826        return NULL;
827      case MVT::i8:
828        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
829        break;
830      case MVT::i16:
831        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
832        break;
833      case MVT::i32:
834        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
835        break;
836      case MVT::i64:
837        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
838        break;
839      case MVT::f32:
840        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
841        break;
842      case MVT::f64:
843        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
844        break;
845      }
846      break;
847    case NVPTXISD::LDGV4:
848      switch (EltVT.getSimpleVT().SimpleTy) {
849      default:
850        return NULL;
851      case MVT::i8:
852        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
853        break;
854      case MVT::i16:
855        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
856        break;
857      case MVT::i32:
858        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
859        break;
860      case MVT::f32:
861        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
862        break;
863      }
864      break;
865    case NVPTXISD::LDUV4:
866      switch (EltVT.getSimpleVT().SimpleTy) {
867      default:
868        return NULL;
869      case MVT::i8:
870        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
871        break;
872      case MVT::i16:
873        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
874        break;
875      case MVT::i32:
876        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
877        break;
878      case MVT::f32:
879        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
880        break;
881      }
882      break;
883    }
884
885    SDValue Ops[] = { Addr, Chain };
886    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
887                                ArrayRef<SDValue>(Ops, 2));
888  } else if (Subtarget.is64Bit()
889                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
890                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
891    if (Subtarget.is64Bit()) {
892      switch (N->getOpcode()) {
893      default:
894        return NULL;
895      case NVPTXISD::LDGV2:
896        switch (EltVT.getSimpleVT().SimpleTy) {
897        default:
898          return NULL;
899        case MVT::i8:
900          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
901          break;
902        case MVT::i16:
903          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
904          break;
905        case MVT::i32:
906          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
907          break;
908        case MVT::i64:
909          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
910          break;
911        case MVT::f32:
912          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
913          break;
914        case MVT::f64:
915          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
916          break;
917        }
918        break;
919      case NVPTXISD::LDUV2:
920        switch (EltVT.getSimpleVT().SimpleTy) {
921        default:
922          return NULL;
923        case MVT::i8:
924          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
925          break;
926        case MVT::i16:
927          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
928          break;
929        case MVT::i32:
930          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
931          break;
932        case MVT::i64:
933          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
934          break;
935        case MVT::f32:
936          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
937          break;
938        case MVT::f64:
939          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
940          break;
941        }
942        break;
943      case NVPTXISD::LDGV4:
944        switch (EltVT.getSimpleVT().SimpleTy) {
945        default:
946          return NULL;
947        case MVT::i8:
948          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
949          break;
950        case MVT::i16:
951          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
952          break;
953        case MVT::i32:
954          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
955          break;
956        case MVT::f32:
957          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
958          break;
959        }
960        break;
961      case NVPTXISD::LDUV4:
962        switch (EltVT.getSimpleVT().SimpleTy) {
963        default:
964          return NULL;
965        case MVT::i8:
966          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
967          break;
968        case MVT::i16:
969          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
970          break;
971        case MVT::i32:
972          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
973          break;
974        case MVT::f32:
975          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
976          break;
977        }
978        break;
979      }
980    } else {
981      switch (N->getOpcode()) {
982      default:
983        return NULL;
984      case NVPTXISD::LDGV2:
985        switch (EltVT.getSimpleVT().SimpleTy) {
986        default:
987          return NULL;
988        case MVT::i8:
989          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
990          break;
991        case MVT::i16:
992          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
993          break;
994        case MVT::i32:
995          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
996          break;
997        case MVT::i64:
998          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
999          break;
1000        case MVT::f32:
1001          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1002          break;
1003        case MVT::f64:
1004          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1005          break;
1006        }
1007        break;
1008      case NVPTXISD::LDUV2:
1009        switch (EltVT.getSimpleVT().SimpleTy) {
1010        default:
1011          return NULL;
1012        case MVT::i8:
1013          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1014          break;
1015        case MVT::i16:
1016          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1017          break;
1018        case MVT::i32:
1019          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1020          break;
1021        case MVT::i64:
1022          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1023          break;
1024        case MVT::f32:
1025          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1026          break;
1027        case MVT::f64:
1028          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1029          break;
1030        }
1031        break;
1032      case NVPTXISD::LDGV4:
1033        switch (EltVT.getSimpleVT().SimpleTy) {
1034        default:
1035          return NULL;
1036        case MVT::i8:
1037          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1038          break;
1039        case MVT::i16:
1040          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1041          break;
1042        case MVT::i32:
1043          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1044          break;
1045        case MVT::f32:
1046          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1047          break;
1048        }
1049        break;
1050      case NVPTXISD::LDUV4:
1051        switch (EltVT.getSimpleVT().SimpleTy) {
1052        default:
1053          return NULL;
1054        case MVT::i8:
1055          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1056          break;
1057        case MVT::i16:
1058          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1059          break;
1060        case MVT::i32:
1061          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1062          break;
1063        case MVT::f32:
1064          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1065          break;
1066        }
1067        break;
1068      }
1069    }
1070
1071    SDValue Ops[] = { Base, Offset, Chain };
1072
1073    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1074                                ArrayRef<SDValue>(Ops, 3));
1075  } else {
1076    if (Subtarget.is64Bit()) {
1077      switch (N->getOpcode()) {
1078      default:
1079        return NULL;
1080      case NVPTXISD::LDGV2:
1081        switch (EltVT.getSimpleVT().SimpleTy) {
1082        default:
1083          return NULL;
1084        case MVT::i8:
1085          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1086          break;
1087        case MVT::i16:
1088          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1089          break;
1090        case MVT::i32:
1091          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1092          break;
1093        case MVT::i64:
1094          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1095          break;
1096        case MVT::f32:
1097          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1098          break;
1099        case MVT::f64:
1100          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1101          break;
1102        }
1103        break;
1104      case NVPTXISD::LDUV2:
1105        switch (EltVT.getSimpleVT().SimpleTy) {
1106        default:
1107          return NULL;
1108        case MVT::i8:
1109          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1110          break;
1111        case MVT::i16:
1112          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1113          break;
1114        case MVT::i32:
1115          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1116          break;
1117        case MVT::i64:
1118          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1119          break;
1120        case MVT::f32:
1121          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1122          break;
1123        case MVT::f64:
1124          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1125          break;
1126        }
1127        break;
1128      case NVPTXISD::LDGV4:
1129        switch (EltVT.getSimpleVT().SimpleTy) {
1130        default:
1131          return NULL;
1132        case MVT::i8:
1133          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1134          break;
1135        case MVT::i16:
1136          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1137          break;
1138        case MVT::i32:
1139          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1140          break;
1141        case MVT::f32:
1142          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1143          break;
1144        }
1145        break;
1146      case NVPTXISD::LDUV4:
1147        switch (EltVT.getSimpleVT().SimpleTy) {
1148        default:
1149          return NULL;
1150        case MVT::i8:
1151          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1152          break;
1153        case MVT::i16:
1154          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1155          break;
1156        case MVT::i32:
1157          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1158          break;
1159        case MVT::f32:
1160          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1161          break;
1162        }
1163        break;
1164      }
1165    } else {
1166      switch (N->getOpcode()) {
1167      default:
1168        return NULL;
1169      case NVPTXISD::LDGV2:
1170        switch (EltVT.getSimpleVT().SimpleTy) {
1171        default:
1172          return NULL;
1173        case MVT::i8:
1174          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1175          break;
1176        case MVT::i16:
1177          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1178          break;
1179        case MVT::i32:
1180          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1181          break;
1182        case MVT::i64:
1183          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1184          break;
1185        case MVT::f32:
1186          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1187          break;
1188        case MVT::f64:
1189          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1190          break;
1191        }
1192        break;
1193      case NVPTXISD::LDUV2:
1194        switch (EltVT.getSimpleVT().SimpleTy) {
1195        default:
1196          return NULL;
1197        case MVT::i8:
1198          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1199          break;
1200        case MVT::i16:
1201          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1202          break;
1203        case MVT::i32:
1204          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1205          break;
1206        case MVT::i64:
1207          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1208          break;
1209        case MVT::f32:
1210          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1211          break;
1212        case MVT::f64:
1213          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1214          break;
1215        }
1216        break;
1217      case NVPTXISD::LDGV4:
1218        switch (EltVT.getSimpleVT().SimpleTy) {
1219        default:
1220          return NULL;
1221        case MVT::i8:
1222          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1223          break;
1224        case MVT::i16:
1225          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1226          break;
1227        case MVT::i32:
1228          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1229          break;
1230        case MVT::f32:
1231          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1232          break;
1233        }
1234        break;
1235      case NVPTXISD::LDUV4:
1236        switch (EltVT.getSimpleVT().SimpleTy) {
1237        default:
1238          return NULL;
1239        case MVT::i8:
1240          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1241          break;
1242        case MVT::i16:
1243          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1244          break;
1245        case MVT::i32:
1246          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1247          break;
1248        case MVT::f32:
1249          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1250          break;
1251        }
1252        break;
1253      }
1254    }
1255
1256    SDValue Ops[] = { Op1, Chain };
1257    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1258                                ArrayRef<SDValue>(Ops, 2));
1259  }
1260
1261  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1262  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1263  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1264
1265  return LD;
1266}
1267
1268SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1269  SDLoc dl(N);
1270  StoreSDNode *ST = cast<StoreSDNode>(N);
1271  EVT StoreVT = ST->getMemoryVT();
1272  SDNode *NVPTXST = NULL;
1273
1274  // do not support pre/post inc/dec
1275  if (ST->isIndexed())
1276    return NULL;
1277
1278  if (!StoreVT.isSimple())
1279    return NULL;
1280
1281  // Address Space Setting
1282  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1283
1284  // Volatile Setting
1285  // - .volatile is only availalble for .global and .shared
1286  bool isVolatile = ST->isVolatile();
1287  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1288      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1289      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1290    isVolatile = false;
1291
1292  // Vector Setting
1293  MVT SimpleVT = StoreVT.getSimpleVT();
1294  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1295  if (SimpleVT.isVector()) {
1296    unsigned num = SimpleVT.getVectorNumElements();
1297    if (num == 2)
1298      vecType = NVPTX::PTXLdStInstCode::V2;
1299    else if (num == 4)
1300      vecType = NVPTX::PTXLdStInstCode::V4;
1301    else
1302      return NULL;
1303  }
1304
1305  // Type Setting: toType + toTypeWidth
1306  // - for integer type, always use 'u'
1307  //
1308  MVT ScalarVT = SimpleVT.getScalarType();
1309  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1310  unsigned int toType;
1311  if (ScalarVT.isFloatingPoint())
1312    toType = NVPTX::PTXLdStInstCode::Float;
1313  else
1314    toType = NVPTX::PTXLdStInstCode::Unsigned;
1315
1316  // Create the machine instruction DAG
1317  SDValue Chain = N->getOperand(0);
1318  SDValue N1 = N->getOperand(1);
1319  SDValue N2 = N->getOperand(2);
1320  SDValue Addr;
1321  SDValue Offset, Base;
1322  unsigned Opcode;
1323  MVT::SimpleValueType SourceVT =
1324      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
1325
1326  if (SelectDirectAddr(N2, Addr)) {
1327    switch (SourceVT) {
1328    case MVT::i8:
1329      Opcode = NVPTX::ST_i8_avar;
1330      break;
1331    case MVT::i16:
1332      Opcode = NVPTX::ST_i16_avar;
1333      break;
1334    case MVT::i32:
1335      Opcode = NVPTX::ST_i32_avar;
1336      break;
1337    case MVT::i64:
1338      Opcode = NVPTX::ST_i64_avar;
1339      break;
1340    case MVT::f32:
1341      Opcode = NVPTX::ST_f32_avar;
1342      break;
1343    case MVT::f64:
1344      Opcode = NVPTX::ST_f64_avar;
1345      break;
1346    default:
1347      return NULL;
1348    }
1349    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1350                      getI32Imm(vecType), getI32Imm(toType),
1351                      getI32Imm(toTypeWidth), Addr, Chain };
1352    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1353  } else if (Subtarget.is64Bit()
1354                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1355                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1356    switch (SourceVT) {
1357    case MVT::i8:
1358      Opcode = NVPTX::ST_i8_asi;
1359      break;
1360    case MVT::i16:
1361      Opcode = NVPTX::ST_i16_asi;
1362      break;
1363    case MVT::i32:
1364      Opcode = NVPTX::ST_i32_asi;
1365      break;
1366    case MVT::i64:
1367      Opcode = NVPTX::ST_i64_asi;
1368      break;
1369    case MVT::f32:
1370      Opcode = NVPTX::ST_f32_asi;
1371      break;
1372    case MVT::f64:
1373      Opcode = NVPTX::ST_f64_asi;
1374      break;
1375    default:
1376      return NULL;
1377    }
1378    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1379                      getI32Imm(vecType), getI32Imm(toType),
1380                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1381    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1382  } else if (Subtarget.is64Bit()
1383                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1384                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1385    if (Subtarget.is64Bit()) {
1386      switch (SourceVT) {
1387      case MVT::i8:
1388        Opcode = NVPTX::ST_i8_ari_64;
1389        break;
1390      case MVT::i16:
1391        Opcode = NVPTX::ST_i16_ari_64;
1392        break;
1393      case MVT::i32:
1394        Opcode = NVPTX::ST_i32_ari_64;
1395        break;
1396      case MVT::i64:
1397        Opcode = NVPTX::ST_i64_ari_64;
1398        break;
1399      case MVT::f32:
1400        Opcode = NVPTX::ST_f32_ari_64;
1401        break;
1402      case MVT::f64:
1403        Opcode = NVPTX::ST_f64_ari_64;
1404        break;
1405      default:
1406        return NULL;
1407      }
1408    } else {
1409      switch (SourceVT) {
1410      case MVT::i8:
1411        Opcode = NVPTX::ST_i8_ari;
1412        break;
1413      case MVT::i16:
1414        Opcode = NVPTX::ST_i16_ari;
1415        break;
1416      case MVT::i32:
1417        Opcode = NVPTX::ST_i32_ari;
1418        break;
1419      case MVT::i64:
1420        Opcode = NVPTX::ST_i64_ari;
1421        break;
1422      case MVT::f32:
1423        Opcode = NVPTX::ST_f32_ari;
1424        break;
1425      case MVT::f64:
1426        Opcode = NVPTX::ST_f64_ari;
1427        break;
1428      default:
1429        return NULL;
1430      }
1431    }
1432    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1433                      getI32Imm(vecType), getI32Imm(toType),
1434                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1435    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1436  } else {
1437    if (Subtarget.is64Bit()) {
1438      switch (SourceVT) {
1439      case MVT::i8:
1440        Opcode = NVPTX::ST_i8_areg_64;
1441        break;
1442      case MVT::i16:
1443        Opcode = NVPTX::ST_i16_areg_64;
1444        break;
1445      case MVT::i32:
1446        Opcode = NVPTX::ST_i32_areg_64;
1447        break;
1448      case MVT::i64:
1449        Opcode = NVPTX::ST_i64_areg_64;
1450        break;
1451      case MVT::f32:
1452        Opcode = NVPTX::ST_f32_areg_64;
1453        break;
1454      case MVT::f64:
1455        Opcode = NVPTX::ST_f64_areg_64;
1456        break;
1457      default:
1458        return NULL;
1459      }
1460    } else {
1461      switch (SourceVT) {
1462      case MVT::i8:
1463        Opcode = NVPTX::ST_i8_areg;
1464        break;
1465      case MVT::i16:
1466        Opcode = NVPTX::ST_i16_areg;
1467        break;
1468      case MVT::i32:
1469        Opcode = NVPTX::ST_i32_areg;
1470        break;
1471      case MVT::i64:
1472        Opcode = NVPTX::ST_i64_areg;
1473        break;
1474      case MVT::f32:
1475        Opcode = NVPTX::ST_f32_areg;
1476        break;
1477      case MVT::f64:
1478        Opcode = NVPTX::ST_f64_areg;
1479        break;
1480      default:
1481        return NULL;
1482      }
1483    }
1484    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1485                      getI32Imm(vecType), getI32Imm(toType),
1486                      getI32Imm(toTypeWidth), N2, Chain };
1487    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1488  }
1489
1490  if (NVPTXST != NULL) {
1491    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1492    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1493    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1494  }
1495
1496  return NVPTXST;
1497}
1498
1499SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1500  SDValue Chain = N->getOperand(0);
1501  SDValue Op1 = N->getOperand(1);
1502  SDValue Addr, Offset, Base;
1503  unsigned Opcode;
1504  SDLoc DL(N);
1505  SDNode *ST;
1506  EVT EltVT = Op1.getValueType();
1507  MemSDNode *MemSD = cast<MemSDNode>(N);
1508  EVT StoreVT = MemSD->getMemoryVT();
1509
1510  // Address Space Setting
1511  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1512
1513  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1514    report_fatal_error("Cannot store to pointer that points to constant "
1515                       "memory space");
1516  }
1517
1518  // Volatile Setting
1519  // - .volatile is only availalble for .global and .shared
1520  bool IsVolatile = MemSD->isVolatile();
1521  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1522      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1523      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1524    IsVolatile = false;
1525
1526  // Type Setting: toType + toTypeWidth
1527  // - for integer type, always use 'u'
1528  assert(StoreVT.isSimple() && "Store value is not simple");
1529  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1530  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1531  unsigned ToType;
1532  if (ScalarVT.isFloatingPoint())
1533    ToType = NVPTX::PTXLdStInstCode::Float;
1534  else
1535    ToType = NVPTX::PTXLdStInstCode::Unsigned;
1536
1537  SmallVector<SDValue, 12> StOps;
1538  SDValue N2;
1539  unsigned VecType;
1540
1541  switch (N->getOpcode()) {
1542  case NVPTXISD::StoreV2:
1543    VecType = NVPTX::PTXLdStInstCode::V2;
1544    StOps.push_back(N->getOperand(1));
1545    StOps.push_back(N->getOperand(2));
1546    N2 = N->getOperand(3);
1547    break;
1548  case NVPTXISD::StoreV4:
1549    VecType = NVPTX::PTXLdStInstCode::V4;
1550    StOps.push_back(N->getOperand(1));
1551    StOps.push_back(N->getOperand(2));
1552    StOps.push_back(N->getOperand(3));
1553    StOps.push_back(N->getOperand(4));
1554    N2 = N->getOperand(5);
1555    break;
1556  default:
1557    return NULL;
1558  }
1559
1560  StOps.push_back(getI32Imm(IsVolatile));
1561  StOps.push_back(getI32Imm(CodeAddrSpace));
1562  StOps.push_back(getI32Imm(VecType));
1563  StOps.push_back(getI32Imm(ToType));
1564  StOps.push_back(getI32Imm(ToTypeWidth));
1565
1566  if (SelectDirectAddr(N2, Addr)) {
1567    switch (N->getOpcode()) {
1568    default:
1569      return NULL;
1570    case NVPTXISD::StoreV2:
1571      switch (EltVT.getSimpleVT().SimpleTy) {
1572      default:
1573        return NULL;
1574      case MVT::i8:
1575        Opcode = NVPTX::STV_i8_v2_avar;
1576        break;
1577      case MVT::i16:
1578        Opcode = NVPTX::STV_i16_v2_avar;
1579        break;
1580      case MVT::i32:
1581        Opcode = NVPTX::STV_i32_v2_avar;
1582        break;
1583      case MVT::i64:
1584        Opcode = NVPTX::STV_i64_v2_avar;
1585        break;
1586      case MVT::f32:
1587        Opcode = NVPTX::STV_f32_v2_avar;
1588        break;
1589      case MVT::f64:
1590        Opcode = NVPTX::STV_f64_v2_avar;
1591        break;
1592      }
1593      break;
1594    case NVPTXISD::StoreV4:
1595      switch (EltVT.getSimpleVT().SimpleTy) {
1596      default:
1597        return NULL;
1598      case MVT::i8:
1599        Opcode = NVPTX::STV_i8_v4_avar;
1600        break;
1601      case MVT::i16:
1602        Opcode = NVPTX::STV_i16_v4_avar;
1603        break;
1604      case MVT::i32:
1605        Opcode = NVPTX::STV_i32_v4_avar;
1606        break;
1607      case MVT::f32:
1608        Opcode = NVPTX::STV_f32_v4_avar;
1609        break;
1610      }
1611      break;
1612    }
1613    StOps.push_back(Addr);
1614  } else if (Subtarget.is64Bit()
1615                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1616                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1617    switch (N->getOpcode()) {
1618    default:
1619      return NULL;
1620    case NVPTXISD::StoreV2:
1621      switch (EltVT.getSimpleVT().SimpleTy) {
1622      default:
1623        return NULL;
1624      case MVT::i8:
1625        Opcode = NVPTX::STV_i8_v2_asi;
1626        break;
1627      case MVT::i16:
1628        Opcode = NVPTX::STV_i16_v2_asi;
1629        break;
1630      case MVT::i32:
1631        Opcode = NVPTX::STV_i32_v2_asi;
1632        break;
1633      case MVT::i64:
1634        Opcode = NVPTX::STV_i64_v2_asi;
1635        break;
1636      case MVT::f32:
1637        Opcode = NVPTX::STV_f32_v2_asi;
1638        break;
1639      case MVT::f64:
1640        Opcode = NVPTX::STV_f64_v2_asi;
1641        break;
1642      }
1643      break;
1644    case NVPTXISD::StoreV4:
1645      switch (EltVT.getSimpleVT().SimpleTy) {
1646      default:
1647        return NULL;
1648      case MVT::i8:
1649        Opcode = NVPTX::STV_i8_v4_asi;
1650        break;
1651      case MVT::i16:
1652        Opcode = NVPTX::STV_i16_v4_asi;
1653        break;
1654      case MVT::i32:
1655        Opcode = NVPTX::STV_i32_v4_asi;
1656        break;
1657      case MVT::f32:
1658        Opcode = NVPTX::STV_f32_v4_asi;
1659        break;
1660      }
1661      break;
1662    }
1663    StOps.push_back(Base);
1664    StOps.push_back(Offset);
1665  } else if (Subtarget.is64Bit()
1666                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1667                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1668    if (Subtarget.is64Bit()) {
1669      switch (N->getOpcode()) {
1670      default:
1671        return NULL;
1672      case NVPTXISD::StoreV2:
1673        switch (EltVT.getSimpleVT().SimpleTy) {
1674        default:
1675          return NULL;
1676        case MVT::i8:
1677          Opcode = NVPTX::STV_i8_v2_ari_64;
1678          break;
1679        case MVT::i16:
1680          Opcode = NVPTX::STV_i16_v2_ari_64;
1681          break;
1682        case MVT::i32:
1683          Opcode = NVPTX::STV_i32_v2_ari_64;
1684          break;
1685        case MVT::i64:
1686          Opcode = NVPTX::STV_i64_v2_ari_64;
1687          break;
1688        case MVT::f32:
1689          Opcode = NVPTX::STV_f32_v2_ari_64;
1690          break;
1691        case MVT::f64:
1692          Opcode = NVPTX::STV_f64_v2_ari_64;
1693          break;
1694        }
1695        break;
1696      case NVPTXISD::StoreV4:
1697        switch (EltVT.getSimpleVT().SimpleTy) {
1698        default:
1699          return NULL;
1700        case MVT::i8:
1701          Opcode = NVPTX::STV_i8_v4_ari_64;
1702          break;
1703        case MVT::i16:
1704          Opcode = NVPTX::STV_i16_v4_ari_64;
1705          break;
1706        case MVT::i32:
1707          Opcode = NVPTX::STV_i32_v4_ari_64;
1708          break;
1709        case MVT::f32:
1710          Opcode = NVPTX::STV_f32_v4_ari_64;
1711          break;
1712        }
1713        break;
1714      }
1715    } else {
1716      switch (N->getOpcode()) {
1717      default:
1718        return NULL;
1719      case NVPTXISD::StoreV2:
1720        switch (EltVT.getSimpleVT().SimpleTy) {
1721        default:
1722          return NULL;
1723        case MVT::i8:
1724          Opcode = NVPTX::STV_i8_v2_ari;
1725          break;
1726        case MVT::i16:
1727          Opcode = NVPTX::STV_i16_v2_ari;
1728          break;
1729        case MVT::i32:
1730          Opcode = NVPTX::STV_i32_v2_ari;
1731          break;
1732        case MVT::i64:
1733          Opcode = NVPTX::STV_i64_v2_ari;
1734          break;
1735        case MVT::f32:
1736          Opcode = NVPTX::STV_f32_v2_ari;
1737          break;
1738        case MVT::f64:
1739          Opcode = NVPTX::STV_f64_v2_ari;
1740          break;
1741        }
1742        break;
1743      case NVPTXISD::StoreV4:
1744        switch (EltVT.getSimpleVT().SimpleTy) {
1745        default:
1746          return NULL;
1747        case MVT::i8:
1748          Opcode = NVPTX::STV_i8_v4_ari;
1749          break;
1750        case MVT::i16:
1751          Opcode = NVPTX::STV_i16_v4_ari;
1752          break;
1753        case MVT::i32:
1754          Opcode = NVPTX::STV_i32_v4_ari;
1755          break;
1756        case MVT::f32:
1757          Opcode = NVPTX::STV_f32_v4_ari;
1758          break;
1759        }
1760        break;
1761      }
1762    }
1763    StOps.push_back(Base);
1764    StOps.push_back(Offset);
1765  } else {
1766    if (Subtarget.is64Bit()) {
1767      switch (N->getOpcode()) {
1768      default:
1769        return NULL;
1770      case NVPTXISD::StoreV2:
1771        switch (EltVT.getSimpleVT().SimpleTy) {
1772        default:
1773          return NULL;
1774        case MVT::i8:
1775          Opcode = NVPTX::STV_i8_v2_areg_64;
1776          break;
1777        case MVT::i16:
1778          Opcode = NVPTX::STV_i16_v2_areg_64;
1779          break;
1780        case MVT::i32:
1781          Opcode = NVPTX::STV_i32_v2_areg_64;
1782          break;
1783        case MVT::i64:
1784          Opcode = NVPTX::STV_i64_v2_areg_64;
1785          break;
1786        case MVT::f32:
1787          Opcode = NVPTX::STV_f32_v2_areg_64;
1788          break;
1789        case MVT::f64:
1790          Opcode = NVPTX::STV_f64_v2_areg_64;
1791          break;
1792        }
1793        break;
1794      case NVPTXISD::StoreV4:
1795        switch (EltVT.getSimpleVT().SimpleTy) {
1796        default:
1797          return NULL;
1798        case MVT::i8:
1799          Opcode = NVPTX::STV_i8_v4_areg_64;
1800          break;
1801        case MVT::i16:
1802          Opcode = NVPTX::STV_i16_v4_areg_64;
1803          break;
1804        case MVT::i32:
1805          Opcode = NVPTX::STV_i32_v4_areg_64;
1806          break;
1807        case MVT::f32:
1808          Opcode = NVPTX::STV_f32_v4_areg_64;
1809          break;
1810        }
1811        break;
1812      }
1813    } else {
1814      switch (N->getOpcode()) {
1815      default:
1816        return NULL;
1817      case NVPTXISD::StoreV2:
1818        switch (EltVT.getSimpleVT().SimpleTy) {
1819        default:
1820          return NULL;
1821        case MVT::i8:
1822          Opcode = NVPTX::STV_i8_v2_areg;
1823          break;
1824        case MVT::i16:
1825          Opcode = NVPTX::STV_i16_v2_areg;
1826          break;
1827        case MVT::i32:
1828          Opcode = NVPTX::STV_i32_v2_areg;
1829          break;
1830        case MVT::i64:
1831          Opcode = NVPTX::STV_i64_v2_areg;
1832          break;
1833        case MVT::f32:
1834          Opcode = NVPTX::STV_f32_v2_areg;
1835          break;
1836        case MVT::f64:
1837          Opcode = NVPTX::STV_f64_v2_areg;
1838          break;
1839        }
1840        break;
1841      case NVPTXISD::StoreV4:
1842        switch (EltVT.getSimpleVT().SimpleTy) {
1843        default:
1844          return NULL;
1845        case MVT::i8:
1846          Opcode = NVPTX::STV_i8_v4_areg;
1847          break;
1848        case MVT::i16:
1849          Opcode = NVPTX::STV_i16_v4_areg;
1850          break;
1851        case MVT::i32:
1852          Opcode = NVPTX::STV_i32_v4_areg;
1853          break;
1854        case MVT::f32:
1855          Opcode = NVPTX::STV_f32_v4_areg;
1856          break;
1857        }
1858        break;
1859      }
1860    }
1861    StOps.push_back(N2);
1862  }
1863
1864  StOps.push_back(Chain);
1865
1866  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
1867
1868  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1869  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1870  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1871
1872  return ST;
1873}
1874
1875SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
1876  SDValue Chain = Node->getOperand(0);
1877  SDValue Offset = Node->getOperand(2);
1878  SDValue Flag = Node->getOperand(3);
1879  SDLoc DL(Node);
1880  MemSDNode *Mem = cast<MemSDNode>(Node);
1881
1882  unsigned VecSize;
1883  switch (Node->getOpcode()) {
1884  default:
1885    return NULL;
1886  case NVPTXISD::LoadParam:
1887    VecSize = 1;
1888    break;
1889  case NVPTXISD::LoadParamV2:
1890    VecSize = 2;
1891    break;
1892  case NVPTXISD::LoadParamV4:
1893    VecSize = 4;
1894    break;
1895  }
1896
1897  EVT EltVT = Node->getValueType(0);
1898  EVT MemVT = Mem->getMemoryVT();
1899
1900  unsigned Opc = 0;
1901
1902  switch (VecSize) {
1903  default:
1904    return NULL;
1905  case 1:
1906    switch (MemVT.getSimpleVT().SimpleTy) {
1907    default:
1908      return NULL;
1909    case MVT::i1:
1910      Opc = NVPTX::LoadParamMemI8;
1911      break;
1912    case MVT::i8:
1913      Opc = NVPTX::LoadParamMemI8;
1914      break;
1915    case MVT::i16:
1916      Opc = NVPTX::LoadParamMemI16;
1917      break;
1918    case MVT::i32:
1919      Opc = NVPTX::LoadParamMemI32;
1920      break;
1921    case MVT::i64:
1922      Opc = NVPTX::LoadParamMemI64;
1923      break;
1924    case MVT::f32:
1925      Opc = NVPTX::LoadParamMemF32;
1926      break;
1927    case MVT::f64:
1928      Opc = NVPTX::LoadParamMemF64;
1929      break;
1930    }
1931    break;
1932  case 2:
1933    switch (MemVT.getSimpleVT().SimpleTy) {
1934    default:
1935      return NULL;
1936    case MVT::i1:
1937      Opc = NVPTX::LoadParamMemV2I8;
1938      break;
1939    case MVT::i8:
1940      Opc = NVPTX::LoadParamMemV2I8;
1941      break;
1942    case MVT::i16:
1943      Opc = NVPTX::LoadParamMemV2I16;
1944      break;
1945    case MVT::i32:
1946      Opc = NVPTX::LoadParamMemV2I32;
1947      break;
1948    case MVT::i64:
1949      Opc = NVPTX::LoadParamMemV2I64;
1950      break;
1951    case MVT::f32:
1952      Opc = NVPTX::LoadParamMemV2F32;
1953      break;
1954    case MVT::f64:
1955      Opc = NVPTX::LoadParamMemV2F64;
1956      break;
1957    }
1958    break;
1959  case 4:
1960    switch (MemVT.getSimpleVT().SimpleTy) {
1961    default:
1962      return NULL;
1963    case MVT::i1:
1964      Opc = NVPTX::LoadParamMemV4I8;
1965      break;
1966    case MVT::i8:
1967      Opc = NVPTX::LoadParamMemV4I8;
1968      break;
1969    case MVT::i16:
1970      Opc = NVPTX::LoadParamMemV4I16;
1971      break;
1972    case MVT::i32:
1973      Opc = NVPTX::LoadParamMemV4I32;
1974      break;
1975    case MVT::f32:
1976      Opc = NVPTX::LoadParamMemV4F32;
1977      break;
1978    }
1979    break;
1980  }
1981
1982  SDVTList VTs;
1983  if (VecSize == 1) {
1984    VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
1985  } else if (VecSize == 2) {
1986    VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
1987  } else {
1988    EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
1989    VTs = CurDAG->getVTList(&EVTs[0], 5);
1990  }
1991
1992  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
1993
1994  SmallVector<SDValue, 2> Ops;
1995  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
1996  Ops.push_back(Chain);
1997  Ops.push_back(Flag);
1998
1999  SDNode *Ret =
2000      CurDAG->getMachineNode(Opc, DL, Node->getVTList(), Ops);
2001  return Ret;
2002}
2003
2004SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2005  SDLoc DL(N);
2006  SDValue Chain = N->getOperand(0);
2007  SDValue Offset = N->getOperand(1);
2008  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2009  MemSDNode *Mem = cast<MemSDNode>(N);
2010
2011  // How many elements do we have?
2012  unsigned NumElts = 1;
2013  switch (N->getOpcode()) {
2014  default:
2015    return NULL;
2016  case NVPTXISD::StoreRetval:
2017    NumElts = 1;
2018    break;
2019  case NVPTXISD::StoreRetvalV2:
2020    NumElts = 2;
2021    break;
2022  case NVPTXISD::StoreRetvalV4:
2023    NumElts = 4;
2024    break;
2025  }
2026
2027  // Build vector of operands
2028  SmallVector<SDValue, 6> Ops;
2029  for (unsigned i = 0; i < NumElts; ++i)
2030    Ops.push_back(N->getOperand(i + 2));
2031  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2032  Ops.push_back(Chain);
2033
2034  // Determine target opcode
2035  // If we have an i1, use an 8-bit store. The lowering code in
2036  // NVPTXISelLowering will have already emitted an upcast.
2037  unsigned Opcode = 0;
2038  switch (NumElts) {
2039  default:
2040    return NULL;
2041  case 1:
2042    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2043    default:
2044      return NULL;
2045    case MVT::i1:
2046      Opcode = NVPTX::StoreRetvalI8;
2047      break;
2048    case MVT::i8:
2049      Opcode = NVPTX::StoreRetvalI8;
2050      break;
2051    case MVT::i16:
2052      Opcode = NVPTX::StoreRetvalI16;
2053      break;
2054    case MVT::i32:
2055      Opcode = NVPTX::StoreRetvalI32;
2056      break;
2057    case MVT::i64:
2058      Opcode = NVPTX::StoreRetvalI64;
2059      break;
2060    case MVT::f32:
2061      Opcode = NVPTX::StoreRetvalF32;
2062      break;
2063    case MVT::f64:
2064      Opcode = NVPTX::StoreRetvalF64;
2065      break;
2066    }
2067    break;
2068  case 2:
2069    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2070    default:
2071      return NULL;
2072    case MVT::i1:
2073      Opcode = NVPTX::StoreRetvalV2I8;
2074      break;
2075    case MVT::i8:
2076      Opcode = NVPTX::StoreRetvalV2I8;
2077      break;
2078    case MVT::i16:
2079      Opcode = NVPTX::StoreRetvalV2I16;
2080      break;
2081    case MVT::i32:
2082      Opcode = NVPTX::StoreRetvalV2I32;
2083      break;
2084    case MVT::i64:
2085      Opcode = NVPTX::StoreRetvalV2I64;
2086      break;
2087    case MVT::f32:
2088      Opcode = NVPTX::StoreRetvalV2F32;
2089      break;
2090    case MVT::f64:
2091      Opcode = NVPTX::StoreRetvalV2F64;
2092      break;
2093    }
2094    break;
2095  case 4:
2096    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2097    default:
2098      return NULL;
2099    case MVT::i1:
2100      Opcode = NVPTX::StoreRetvalV4I8;
2101      break;
2102    case MVT::i8:
2103      Opcode = NVPTX::StoreRetvalV4I8;
2104      break;
2105    case MVT::i16:
2106      Opcode = NVPTX::StoreRetvalV4I16;
2107      break;
2108    case MVT::i32:
2109      Opcode = NVPTX::StoreRetvalV4I32;
2110      break;
2111    case MVT::f32:
2112      Opcode = NVPTX::StoreRetvalV4F32;
2113      break;
2114    }
2115    break;
2116  }
2117
2118  SDNode *Ret =
2119      CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2120  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2121  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2122  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2123
2124  return Ret;
2125}
2126
2127SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2128  SDLoc DL(N);
2129  SDValue Chain = N->getOperand(0);
2130  SDValue Param = N->getOperand(1);
2131  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2132  SDValue Offset = N->getOperand(2);
2133  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2134  MemSDNode *Mem = cast<MemSDNode>(N);
2135  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2136
2137  // How many elements do we have?
2138  unsigned NumElts = 1;
2139  switch (N->getOpcode()) {
2140  default:
2141    return NULL;
2142  case NVPTXISD::StoreParamU32:
2143  case NVPTXISD::StoreParamS32:
2144  case NVPTXISD::StoreParam:
2145    NumElts = 1;
2146    break;
2147  case NVPTXISD::StoreParamV2:
2148    NumElts = 2;
2149    break;
2150  case NVPTXISD::StoreParamV4:
2151    NumElts = 4;
2152    break;
2153  }
2154
2155  // Build vector of operands
2156  SmallVector<SDValue, 8> Ops;
2157  for (unsigned i = 0; i < NumElts; ++i)
2158    Ops.push_back(N->getOperand(i + 3));
2159  Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2160  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2161  Ops.push_back(Chain);
2162  Ops.push_back(Flag);
2163
2164  // Determine target opcode
2165  // If we have an i1, use an 8-bit store. The lowering code in
2166  // NVPTXISelLowering will have already emitted an upcast.
2167  unsigned Opcode = 0;
2168  switch (N->getOpcode()) {
2169  default:
2170    switch (NumElts) {
2171    default:
2172      return NULL;
2173    case 1:
2174      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2175      default:
2176        return NULL;
2177      case MVT::i1:
2178        Opcode = NVPTX::StoreParamI8;
2179        break;
2180      case MVT::i8:
2181        Opcode = NVPTX::StoreParamI8;
2182        break;
2183      case MVT::i16:
2184        Opcode = NVPTX::StoreParamI16;
2185        break;
2186      case MVT::i32:
2187        Opcode = NVPTX::StoreParamI32;
2188        break;
2189      case MVT::i64:
2190        Opcode = NVPTX::StoreParamI64;
2191        break;
2192      case MVT::f32:
2193        Opcode = NVPTX::StoreParamF32;
2194        break;
2195      case MVT::f64:
2196        Opcode = NVPTX::StoreParamF64;
2197        break;
2198      }
2199      break;
2200    case 2:
2201      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2202      default:
2203        return NULL;
2204      case MVT::i1:
2205        Opcode = NVPTX::StoreParamV2I8;
2206        break;
2207      case MVT::i8:
2208        Opcode = NVPTX::StoreParamV2I8;
2209        break;
2210      case MVT::i16:
2211        Opcode = NVPTX::StoreParamV2I16;
2212        break;
2213      case MVT::i32:
2214        Opcode = NVPTX::StoreParamV2I32;
2215        break;
2216      case MVT::i64:
2217        Opcode = NVPTX::StoreParamV2I64;
2218        break;
2219      case MVT::f32:
2220        Opcode = NVPTX::StoreParamV2F32;
2221        break;
2222      case MVT::f64:
2223        Opcode = NVPTX::StoreParamV2F64;
2224        break;
2225      }
2226      break;
2227    case 4:
2228      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2229      default:
2230        return NULL;
2231      case MVT::i1:
2232        Opcode = NVPTX::StoreParamV4I8;
2233        break;
2234      case MVT::i8:
2235        Opcode = NVPTX::StoreParamV4I8;
2236        break;
2237      case MVT::i16:
2238        Opcode = NVPTX::StoreParamV4I16;
2239        break;
2240      case MVT::i32:
2241        Opcode = NVPTX::StoreParamV4I32;
2242        break;
2243      case MVT::f32:
2244        Opcode = NVPTX::StoreParamV4F32;
2245        break;
2246      }
2247      break;
2248    }
2249    break;
2250  // Special case: if we have a sign-extend/zero-extend node, insert the
2251  // conversion instruction first, and use that as the value operand to
2252  // the selected StoreParam node.
2253  case NVPTXISD::StoreParamU32: {
2254    Opcode = NVPTX::StoreParamI32;
2255    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2256                                                MVT::i32);
2257    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2258                                         MVT::i32, Ops[0], CvtNone);
2259    Ops[0] = SDValue(Cvt, 0);
2260    break;
2261  }
2262  case NVPTXISD::StoreParamS32: {
2263    Opcode = NVPTX::StoreParamI32;
2264    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2265                                                MVT::i32);
2266    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2267                                         MVT::i32, Ops[0], CvtNone);
2268    Ops[0] = SDValue(Cvt, 0);
2269    break;
2270  }
2271  }
2272
2273  SDNode *Ret =
2274      CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
2275  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2276  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2277  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2278
2279  return Ret;
2280}
2281
2282// SelectDirectAddr - Match a direct address for DAG.
2283// A direct address could be a globaladdress or externalsymbol.
2284bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
2285  // Return true if TGA or ES.
2286  if (N.getOpcode() == ISD::TargetGlobalAddress ||
2287      N.getOpcode() == ISD::TargetExternalSymbol) {
2288    Address = N;
2289    return true;
2290  }
2291  if (N.getOpcode() == NVPTXISD::Wrapper) {
2292    Address = N.getOperand(0);
2293    return true;
2294  }
2295  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2296    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
2297    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
2298      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
2299        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
2300  }
2301  return false;
2302}
2303
2304// symbol+offset
2305bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
2306    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2307  if (Addr.getOpcode() == ISD::ADD) {
2308    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2309      SDValue base = Addr.getOperand(0);
2310      if (SelectDirectAddr(base, Base)) {
2311        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2312        return true;
2313      }
2314    }
2315  }
2316  return false;
2317}
2318
2319// symbol+offset
2320bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
2321                                     SDValue &Base, SDValue &Offset) {
2322  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
2323}
2324
2325// symbol+offset
2326bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
2327                                       SDValue &Base, SDValue &Offset) {
2328  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
2329}
2330
2331// register+offset
2332bool NVPTXDAGToDAGISel::SelectADDRri_imp(
2333    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2334  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2335    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2336    Offset = CurDAG->getTargetConstant(0, mvt);
2337    return true;
2338  }
2339  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
2340      Addr.getOpcode() == ISD::TargetGlobalAddress)
2341    return false; // direct calls.
2342
2343  if (Addr.getOpcode() == ISD::ADD) {
2344    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
2345      return false;
2346    }
2347    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2348      if (FrameIndexSDNode *FIN =
2349              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
2350        // Constant offset from frame ref.
2351        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2352      else
2353        Base = Addr.getOperand(0);
2354      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2355      return true;
2356    }
2357  }
2358  return false;
2359}
2360
2361// register+offset
2362bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
2363                                     SDValue &Base, SDValue &Offset) {
2364  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
2365}
2366
2367// register+offset
2368bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
2369                                       SDValue &Base, SDValue &Offset) {
2370  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
2371}
2372
2373bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
2374                                                 unsigned int spN) const {
2375  const Value *Src = NULL;
2376  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
2377  // the classof() for MemSDNode does not include MemIntrinsicSDNode
2378  // (See SelectionDAGNodes.h). So we need to check for both.
2379  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
2380    Src = mN->getSrcValue();
2381  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
2382    Src = mN->getSrcValue();
2383  }
2384  if (!Src)
2385    return false;
2386  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
2387    return (PT->getAddressSpace() == spN);
2388  return false;
2389}
2390
2391/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
2392/// inline asm expressions.
2393bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
2394    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
2395  SDValue Op0, Op1;
2396  switch (ConstraintCode) {
2397  default:
2398    return true;
2399  case 'm': // memory
2400    if (SelectDirectAddr(Op, Op0)) {
2401      OutOps.push_back(Op0);
2402      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
2403      return false;
2404    }
2405    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
2406      OutOps.push_back(Op0);
2407      OutOps.push_back(Op1);
2408      return false;
2409    }
2410    break;
2411  }
2412  return true;
2413}
2414
2415// Return true if N is a undef or a constant.
2416// If N was undef, return a (i8imm 0) in Retval
2417// If N was imm, convert it to i8imm and return in Retval
2418// Note: The convert to i8imm is required, otherwise the
2419// pattern matcher inserts a bunch of IMOVi8rr to convert
2420// the imm to i8imm, and this causes instruction selection
2421// to fail.
2422bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
2423  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
2424    return false;
2425
2426  if (N.getOpcode() == ISD::UNDEF)
2427    Retval = CurDAG->getTargetConstant(0, MVT::i8);
2428  else {
2429    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
2430    unsigned retval = cn->getZExtValue();
2431    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
2432  }
2433  return true;
2434}
2435