NVPTXISelDAGToDAG.cpp revision 3a8ee4ffd783bd0cf2d83089edb43ec546b49d0d
1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelDAGToDAG.h"
15#include "llvm/IR/GlobalValue.h"
16#include "llvm/IR/Instructions.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/Debug.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/raw_ostream.h"
21#include "llvm/Target/TargetIntrinsicInfo.h"
22
23#undef DEBUG_TYPE
24#define DEBUG_TYPE "nvptx-isel"
25
26using namespace llvm;
27
28static cl::opt<int>
29FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
30                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
31                          " 1: do it  2: do it aggressively"),
32                 cl::init(2));
33
34static cl::opt<int> UsePrecDivF32(
35    "nvptx-prec-divf32", cl::ZeroOrMore,
36    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
37             " IEEE Compliant F32 div.rnd if avaiable."),
38    cl::init(2));
39
40static cl::opt<bool>
41UsePrecSqrtF32("nvptx-prec-sqrtf32",
42          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
43          cl::init(true));
44
45static cl::opt<bool>
46FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore,
47           cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
48           cl::init(false));
49
50
51/// createNVPTXISelDag - This pass converts a legalized DAG into a
52/// NVPTX-specific DAG, ready for instruction scheduling.
53FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
54                                       llvm::CodeGenOpt::Level OptLevel) {
55  return new NVPTXDAGToDAGISel(TM, OptLevel);
56}
57
58NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
59                                     CodeGenOpt::Level OptLevel)
60    : SelectionDAGISel(tm, OptLevel),
61      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
62
63  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
64  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
65  doFMAF32AGG =
66      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
67  doFMAF64AGG =
68      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
69
70  allowFMA = (FMAContractLevel >= 1);
71
72  doMulWide = (OptLevel > 0);
73}
74
75int NVPTXDAGToDAGISel::getDivF32Level() const {
76  if (UsePrecDivF32.getNumOccurrences() > 0) {
77    // If nvptx-prec-div32=N is used on the command-line, always honor it
78    return UsePrecDivF32;
79  } else {
80    // Otherwise, use div.approx if fast math is enabled
81    if (TM.Options.UnsafeFPMath)
82      return 0;
83    else
84      return 2;
85  }
86}
87
88bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
89  if (UsePrecSqrtF32.getNumOccurrences() > 0) {
90    // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
91    return UsePrecSqrtF32;
92  } else {
93    // Otherwise, use sqrt.approx if fast math is enabled
94    if (TM.Options.UnsafeFPMath)
95      return false;
96    else
97      return true;
98  }
99}
100
101bool NVPTXDAGToDAGISel::useF32FTZ() const {
102  if (FtzEnabled.getNumOccurrences() > 0) {
103    // If nvptx-f32ftz is used on the command-line, always honor it
104    return FtzEnabled;
105  } else {
106    const Function *F = MF->getFunction();
107    // Otherwise, check for an nvptx-f32ftz attribute on the function
108    if (F->hasFnAttribute("nvptx-f32ftz"))
109      return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
110                                              "nvptx-f32ftz")
111                                              .getValueAsString() == "true");
112    else
113      return false;
114  }
115}
116
117/// Select - Select instructions not customized! Used for
118/// expanded, promoted and normal instructions.
119SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
120
121  if (N->isMachineOpcode())
122    return NULL; // Already selected.
123
124  SDNode *ResNode = NULL;
125  switch (N->getOpcode()) {
126  case ISD::LOAD:
127    ResNode = SelectLoad(N);
128    break;
129  case ISD::STORE:
130    ResNode = SelectStore(N);
131    break;
132  case NVPTXISD::LoadV2:
133  case NVPTXISD::LoadV4:
134    ResNode = SelectLoadVector(N);
135    break;
136  case NVPTXISD::LDGV2:
137  case NVPTXISD::LDGV4:
138  case NVPTXISD::LDUV2:
139  case NVPTXISD::LDUV4:
140    ResNode = SelectLDGLDUVector(N);
141    break;
142  case NVPTXISD::StoreV2:
143  case NVPTXISD::StoreV4:
144    ResNode = SelectStoreVector(N);
145    break;
146  case NVPTXISD::LoadParam:
147  case NVPTXISD::LoadParamV2:
148  case NVPTXISD::LoadParamV4:
149    ResNode = SelectLoadParam(N);
150    break;
151  case NVPTXISD::StoreRetval:
152  case NVPTXISD::StoreRetvalV2:
153  case NVPTXISD::StoreRetvalV4:
154    ResNode = SelectStoreRetval(N);
155    break;
156  case NVPTXISD::StoreParam:
157  case NVPTXISD::StoreParamV2:
158  case NVPTXISD::StoreParamV4:
159  case NVPTXISD::StoreParamS32:
160  case NVPTXISD::StoreParamU32:
161    ResNode = SelectStoreParam(N);
162    break;
163  default:
164    break;
165  }
166  if (ResNode)
167    return ResNode;
168  return SelectCode(N);
169}
170
171static unsigned int getCodeAddrSpace(MemSDNode *N,
172                                     const NVPTXSubtarget &Subtarget) {
173  const Value *Src = N->getSrcValue();
174
175  if (!Src)
176    return NVPTX::PTXLdStInstCode::GENERIC;
177
178  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
179    switch (PT->getAddressSpace()) {
180    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
181    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
182    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
183    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
184    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
185    case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
186    default: break;
187    }
188  }
189  return NVPTX::PTXLdStInstCode::GENERIC;
190}
191
192SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
193  SDLoc dl(N);
194  LoadSDNode *LD = cast<LoadSDNode>(N);
195  EVT LoadedVT = LD->getMemoryVT();
196  SDNode *NVPTXLD = NULL;
197
198  // do not support pre/post inc/dec
199  if (LD->isIndexed())
200    return NULL;
201
202  if (!LoadedVT.isSimple())
203    return NULL;
204
205  // Address Space Setting
206  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
207
208  // Volatile Setting
209  // - .volatile is only availalble for .global and .shared
210  bool isVolatile = LD->isVolatile();
211  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
212      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
213      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
214    isVolatile = false;
215
216  // Vector Setting
217  MVT SimpleVT = LoadedVT.getSimpleVT();
218  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
219  if (SimpleVT.isVector()) {
220    unsigned num = SimpleVT.getVectorNumElements();
221    if (num == 2)
222      vecType = NVPTX::PTXLdStInstCode::V2;
223    else if (num == 4)
224      vecType = NVPTX::PTXLdStInstCode::V4;
225    else
226      return NULL;
227  }
228
229  // Type Setting: fromType + fromTypeWidth
230  //
231  // Sign   : ISD::SEXTLOAD
232  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
233  //          type is integer
234  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
235  MVT ScalarVT = SimpleVT.getScalarType();
236  // Read at least 8 bits (predicates are stored as 8-bit values)
237  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
238  unsigned int fromType;
239  if ((LD->getExtensionType() == ISD::SEXTLOAD))
240    fromType = NVPTX::PTXLdStInstCode::Signed;
241  else if (ScalarVT.isFloatingPoint())
242    fromType = NVPTX::PTXLdStInstCode::Float;
243  else
244    fromType = NVPTX::PTXLdStInstCode::Unsigned;
245
246  // Create the machine instruction DAG
247  SDValue Chain = N->getOperand(0);
248  SDValue N1 = N->getOperand(1);
249  SDValue Addr;
250  SDValue Offset, Base;
251  unsigned Opcode;
252  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
253
254  if (SelectDirectAddr(N1, Addr)) {
255    switch (TargetVT) {
256    case MVT::i8:
257      Opcode = NVPTX::LD_i8_avar;
258      break;
259    case MVT::i16:
260      Opcode = NVPTX::LD_i16_avar;
261      break;
262    case MVT::i32:
263      Opcode = NVPTX::LD_i32_avar;
264      break;
265    case MVT::i64:
266      Opcode = NVPTX::LD_i64_avar;
267      break;
268    case MVT::f32:
269      Opcode = NVPTX::LD_f32_avar;
270      break;
271    case MVT::f64:
272      Opcode = NVPTX::LD_f64_avar;
273      break;
274    default:
275      return NULL;
276    }
277    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
278                      getI32Imm(vecType), getI32Imm(fromType),
279                      getI32Imm(fromTypeWidth), Addr, Chain };
280    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
281  } else if (Subtarget.is64Bit()
282                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
283                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
284    switch (TargetVT) {
285    case MVT::i8:
286      Opcode = NVPTX::LD_i8_asi;
287      break;
288    case MVT::i16:
289      Opcode = NVPTX::LD_i16_asi;
290      break;
291    case MVT::i32:
292      Opcode = NVPTX::LD_i32_asi;
293      break;
294    case MVT::i64:
295      Opcode = NVPTX::LD_i64_asi;
296      break;
297    case MVT::f32:
298      Opcode = NVPTX::LD_f32_asi;
299      break;
300    case MVT::f64:
301      Opcode = NVPTX::LD_f64_asi;
302      break;
303    default:
304      return NULL;
305    }
306    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
307                      getI32Imm(vecType), getI32Imm(fromType),
308                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
309    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
310  } else if (Subtarget.is64Bit()
311                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
312                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
313    if (Subtarget.is64Bit()) {
314      switch (TargetVT) {
315      case MVT::i8:
316        Opcode = NVPTX::LD_i8_ari_64;
317        break;
318      case MVT::i16:
319        Opcode = NVPTX::LD_i16_ari_64;
320        break;
321      case MVT::i32:
322        Opcode = NVPTX::LD_i32_ari_64;
323        break;
324      case MVT::i64:
325        Opcode = NVPTX::LD_i64_ari_64;
326        break;
327      case MVT::f32:
328        Opcode = NVPTX::LD_f32_ari_64;
329        break;
330      case MVT::f64:
331        Opcode = NVPTX::LD_f64_ari_64;
332        break;
333      default:
334        return NULL;
335      }
336    } else {
337      switch (TargetVT) {
338      case MVT::i8:
339        Opcode = NVPTX::LD_i8_ari;
340        break;
341      case MVT::i16:
342        Opcode = NVPTX::LD_i16_ari;
343        break;
344      case MVT::i32:
345        Opcode = NVPTX::LD_i32_ari;
346        break;
347      case MVT::i64:
348        Opcode = NVPTX::LD_i64_ari;
349        break;
350      case MVT::f32:
351        Opcode = NVPTX::LD_f32_ari;
352        break;
353      case MVT::f64:
354        Opcode = NVPTX::LD_f64_ari;
355        break;
356      default:
357        return NULL;
358      }
359    }
360    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
361                      getI32Imm(vecType), getI32Imm(fromType),
362                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
363    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
364  } else {
365    if (Subtarget.is64Bit()) {
366      switch (TargetVT) {
367      case MVT::i8:
368        Opcode = NVPTX::LD_i8_areg_64;
369        break;
370      case MVT::i16:
371        Opcode = NVPTX::LD_i16_areg_64;
372        break;
373      case MVT::i32:
374        Opcode = NVPTX::LD_i32_areg_64;
375        break;
376      case MVT::i64:
377        Opcode = NVPTX::LD_i64_areg_64;
378        break;
379      case MVT::f32:
380        Opcode = NVPTX::LD_f32_areg_64;
381        break;
382      case MVT::f64:
383        Opcode = NVPTX::LD_f64_areg_64;
384        break;
385      default:
386        return NULL;
387      }
388    } else {
389      switch (TargetVT) {
390      case MVT::i8:
391        Opcode = NVPTX::LD_i8_areg;
392        break;
393      case MVT::i16:
394        Opcode = NVPTX::LD_i16_areg;
395        break;
396      case MVT::i32:
397        Opcode = NVPTX::LD_i32_areg;
398        break;
399      case MVT::i64:
400        Opcode = NVPTX::LD_i64_areg;
401        break;
402      case MVT::f32:
403        Opcode = NVPTX::LD_f32_areg;
404        break;
405      case MVT::f64:
406        Opcode = NVPTX::LD_f64_areg;
407        break;
408      default:
409        return NULL;
410      }
411    }
412    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
413                      getI32Imm(vecType), getI32Imm(fromType),
414                      getI32Imm(fromTypeWidth), N1, Chain };
415    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
416  }
417
418  if (NVPTXLD != NULL) {
419    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
420    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
421    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
422  }
423
424  return NVPTXLD;
425}
426
427SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
428
429  SDValue Chain = N->getOperand(0);
430  SDValue Op1 = N->getOperand(1);
431  SDValue Addr, Offset, Base;
432  unsigned Opcode;
433  SDLoc DL(N);
434  SDNode *LD;
435  MemSDNode *MemSD = cast<MemSDNode>(N);
436  EVT LoadedVT = MemSD->getMemoryVT();
437
438  if (!LoadedVT.isSimple())
439    return NULL;
440
441  // Address Space Setting
442  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
443
444  // Volatile Setting
445  // - .volatile is only availalble for .global and .shared
446  bool IsVolatile = MemSD->isVolatile();
447  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
448      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
449      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
450    IsVolatile = false;
451
452  // Vector Setting
453  MVT SimpleVT = LoadedVT.getSimpleVT();
454
455  // Type Setting: fromType + fromTypeWidth
456  //
457  // Sign   : ISD::SEXTLOAD
458  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
459  //          type is integer
460  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
461  MVT ScalarVT = SimpleVT.getScalarType();
462  // Read at least 8 bits (predicates are stored as 8-bit values)
463  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
464  unsigned int FromType;
465  // The last operand holds the original LoadSDNode::getExtensionType() value
466  unsigned ExtensionType = cast<ConstantSDNode>(
467      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
468  if (ExtensionType == ISD::SEXTLOAD)
469    FromType = NVPTX::PTXLdStInstCode::Signed;
470  else if (ScalarVT.isFloatingPoint())
471    FromType = NVPTX::PTXLdStInstCode::Float;
472  else
473    FromType = NVPTX::PTXLdStInstCode::Unsigned;
474
475  unsigned VecType;
476
477  switch (N->getOpcode()) {
478  case NVPTXISD::LoadV2:
479    VecType = NVPTX::PTXLdStInstCode::V2;
480    break;
481  case NVPTXISD::LoadV4:
482    VecType = NVPTX::PTXLdStInstCode::V4;
483    break;
484  default:
485    return NULL;
486  }
487
488  EVT EltVT = N->getValueType(0);
489
490  if (SelectDirectAddr(Op1, Addr)) {
491    switch (N->getOpcode()) {
492    default:
493      return NULL;
494    case NVPTXISD::LoadV2:
495      switch (EltVT.getSimpleVT().SimpleTy) {
496      default:
497        return NULL;
498      case MVT::i8:
499        Opcode = NVPTX::LDV_i8_v2_avar;
500        break;
501      case MVT::i16:
502        Opcode = NVPTX::LDV_i16_v2_avar;
503        break;
504      case MVT::i32:
505        Opcode = NVPTX::LDV_i32_v2_avar;
506        break;
507      case MVT::i64:
508        Opcode = NVPTX::LDV_i64_v2_avar;
509        break;
510      case MVT::f32:
511        Opcode = NVPTX::LDV_f32_v2_avar;
512        break;
513      case MVT::f64:
514        Opcode = NVPTX::LDV_f64_v2_avar;
515        break;
516      }
517      break;
518    case NVPTXISD::LoadV4:
519      switch (EltVT.getSimpleVT().SimpleTy) {
520      default:
521        return NULL;
522      case MVT::i8:
523        Opcode = NVPTX::LDV_i8_v4_avar;
524        break;
525      case MVT::i16:
526        Opcode = NVPTX::LDV_i16_v4_avar;
527        break;
528      case MVT::i32:
529        Opcode = NVPTX::LDV_i32_v4_avar;
530        break;
531      case MVT::f32:
532        Opcode = NVPTX::LDV_f32_v4_avar;
533        break;
534      }
535      break;
536    }
537
538    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
539                      getI32Imm(VecType), getI32Imm(FromType),
540                      getI32Imm(FromTypeWidth), Addr, Chain };
541    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
542  } else if (Subtarget.is64Bit()
543                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
544                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
545    switch (N->getOpcode()) {
546    default:
547      return NULL;
548    case NVPTXISD::LoadV2:
549      switch (EltVT.getSimpleVT().SimpleTy) {
550      default:
551        return NULL;
552      case MVT::i8:
553        Opcode = NVPTX::LDV_i8_v2_asi;
554        break;
555      case MVT::i16:
556        Opcode = NVPTX::LDV_i16_v2_asi;
557        break;
558      case MVT::i32:
559        Opcode = NVPTX::LDV_i32_v2_asi;
560        break;
561      case MVT::i64:
562        Opcode = NVPTX::LDV_i64_v2_asi;
563        break;
564      case MVT::f32:
565        Opcode = NVPTX::LDV_f32_v2_asi;
566        break;
567      case MVT::f64:
568        Opcode = NVPTX::LDV_f64_v2_asi;
569        break;
570      }
571      break;
572    case NVPTXISD::LoadV4:
573      switch (EltVT.getSimpleVT().SimpleTy) {
574      default:
575        return NULL;
576      case MVT::i8:
577        Opcode = NVPTX::LDV_i8_v4_asi;
578        break;
579      case MVT::i16:
580        Opcode = NVPTX::LDV_i16_v4_asi;
581        break;
582      case MVT::i32:
583        Opcode = NVPTX::LDV_i32_v4_asi;
584        break;
585      case MVT::f32:
586        Opcode = NVPTX::LDV_f32_v4_asi;
587        break;
588      }
589      break;
590    }
591
592    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
593                      getI32Imm(VecType), getI32Imm(FromType),
594                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
595    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
596  } else if (Subtarget.is64Bit()
597                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
598                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
599    if (Subtarget.is64Bit()) {
600      switch (N->getOpcode()) {
601      default:
602        return NULL;
603      case NVPTXISD::LoadV2:
604        switch (EltVT.getSimpleVT().SimpleTy) {
605        default:
606          return NULL;
607        case MVT::i8:
608          Opcode = NVPTX::LDV_i8_v2_ari_64;
609          break;
610        case MVT::i16:
611          Opcode = NVPTX::LDV_i16_v2_ari_64;
612          break;
613        case MVT::i32:
614          Opcode = NVPTX::LDV_i32_v2_ari_64;
615          break;
616        case MVT::i64:
617          Opcode = NVPTX::LDV_i64_v2_ari_64;
618          break;
619        case MVT::f32:
620          Opcode = NVPTX::LDV_f32_v2_ari_64;
621          break;
622        case MVT::f64:
623          Opcode = NVPTX::LDV_f64_v2_ari_64;
624          break;
625        }
626        break;
627      case NVPTXISD::LoadV4:
628        switch (EltVT.getSimpleVT().SimpleTy) {
629        default:
630          return NULL;
631        case MVT::i8:
632          Opcode = NVPTX::LDV_i8_v4_ari_64;
633          break;
634        case MVT::i16:
635          Opcode = NVPTX::LDV_i16_v4_ari_64;
636          break;
637        case MVT::i32:
638          Opcode = NVPTX::LDV_i32_v4_ari_64;
639          break;
640        case MVT::f32:
641          Opcode = NVPTX::LDV_f32_v4_ari_64;
642          break;
643        }
644        break;
645      }
646    } else {
647      switch (N->getOpcode()) {
648      default:
649        return NULL;
650      case NVPTXISD::LoadV2:
651        switch (EltVT.getSimpleVT().SimpleTy) {
652        default:
653          return NULL;
654        case MVT::i8:
655          Opcode = NVPTX::LDV_i8_v2_ari;
656          break;
657        case MVT::i16:
658          Opcode = NVPTX::LDV_i16_v2_ari;
659          break;
660        case MVT::i32:
661          Opcode = NVPTX::LDV_i32_v2_ari;
662          break;
663        case MVT::i64:
664          Opcode = NVPTX::LDV_i64_v2_ari;
665          break;
666        case MVT::f32:
667          Opcode = NVPTX::LDV_f32_v2_ari;
668          break;
669        case MVT::f64:
670          Opcode = NVPTX::LDV_f64_v2_ari;
671          break;
672        }
673        break;
674      case NVPTXISD::LoadV4:
675        switch (EltVT.getSimpleVT().SimpleTy) {
676        default:
677          return NULL;
678        case MVT::i8:
679          Opcode = NVPTX::LDV_i8_v4_ari;
680          break;
681        case MVT::i16:
682          Opcode = NVPTX::LDV_i16_v4_ari;
683          break;
684        case MVT::i32:
685          Opcode = NVPTX::LDV_i32_v4_ari;
686          break;
687        case MVT::f32:
688          Opcode = NVPTX::LDV_f32_v4_ari;
689          break;
690        }
691        break;
692      }
693    }
694
695    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
696                      getI32Imm(VecType), getI32Imm(FromType),
697                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
698
699    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
700  } else {
701    if (Subtarget.is64Bit()) {
702      switch (N->getOpcode()) {
703      default:
704        return NULL;
705      case NVPTXISD::LoadV2:
706        switch (EltVT.getSimpleVT().SimpleTy) {
707        default:
708          return NULL;
709        case MVT::i8:
710          Opcode = NVPTX::LDV_i8_v2_areg_64;
711          break;
712        case MVT::i16:
713          Opcode = NVPTX::LDV_i16_v2_areg_64;
714          break;
715        case MVT::i32:
716          Opcode = NVPTX::LDV_i32_v2_areg_64;
717          break;
718        case MVT::i64:
719          Opcode = NVPTX::LDV_i64_v2_areg_64;
720          break;
721        case MVT::f32:
722          Opcode = NVPTX::LDV_f32_v2_areg_64;
723          break;
724        case MVT::f64:
725          Opcode = NVPTX::LDV_f64_v2_areg_64;
726          break;
727        }
728        break;
729      case NVPTXISD::LoadV4:
730        switch (EltVT.getSimpleVT().SimpleTy) {
731        default:
732          return NULL;
733        case MVT::i8:
734          Opcode = NVPTX::LDV_i8_v4_areg_64;
735          break;
736        case MVT::i16:
737          Opcode = NVPTX::LDV_i16_v4_areg_64;
738          break;
739        case MVT::i32:
740          Opcode = NVPTX::LDV_i32_v4_areg_64;
741          break;
742        case MVT::f32:
743          Opcode = NVPTX::LDV_f32_v4_areg_64;
744          break;
745        }
746        break;
747      }
748    } else {
749      switch (N->getOpcode()) {
750      default:
751        return NULL;
752      case NVPTXISD::LoadV2:
753        switch (EltVT.getSimpleVT().SimpleTy) {
754        default:
755          return NULL;
756        case MVT::i8:
757          Opcode = NVPTX::LDV_i8_v2_areg;
758          break;
759        case MVT::i16:
760          Opcode = NVPTX::LDV_i16_v2_areg;
761          break;
762        case MVT::i32:
763          Opcode = NVPTX::LDV_i32_v2_areg;
764          break;
765        case MVT::i64:
766          Opcode = NVPTX::LDV_i64_v2_areg;
767          break;
768        case MVT::f32:
769          Opcode = NVPTX::LDV_f32_v2_areg;
770          break;
771        case MVT::f64:
772          Opcode = NVPTX::LDV_f64_v2_areg;
773          break;
774        }
775        break;
776      case NVPTXISD::LoadV4:
777        switch (EltVT.getSimpleVT().SimpleTy) {
778        default:
779          return NULL;
780        case MVT::i8:
781          Opcode = NVPTX::LDV_i8_v4_areg;
782          break;
783        case MVT::i16:
784          Opcode = NVPTX::LDV_i16_v4_areg;
785          break;
786        case MVT::i32:
787          Opcode = NVPTX::LDV_i32_v4_areg;
788          break;
789        case MVT::f32:
790          Opcode = NVPTX::LDV_f32_v4_areg;
791          break;
792        }
793        break;
794      }
795    }
796
797    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
798                      getI32Imm(VecType), getI32Imm(FromType),
799                      getI32Imm(FromTypeWidth), Op1, Chain };
800    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
801  }
802
803  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
804  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
805  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
806
807  return LD;
808}
809
810SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
811
812  SDValue Chain = N->getOperand(0);
813  SDValue Op1 = N->getOperand(1);
814  unsigned Opcode;
815  SDLoc DL(N);
816  SDNode *LD;
817  MemSDNode *Mem = cast<MemSDNode>(N);
818  SDValue Base, Offset, Addr;
819
820  EVT EltVT = Mem->getMemoryVT().getVectorElementType();
821
822  if (SelectDirectAddr(Op1, Addr)) {
823    switch (N->getOpcode()) {
824    default:
825      return NULL;
826    case NVPTXISD::LDGV2:
827      switch (EltVT.getSimpleVT().SimpleTy) {
828      default:
829        return NULL;
830      case MVT::i8:
831        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
832        break;
833      case MVT::i16:
834        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
835        break;
836      case MVT::i32:
837        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
838        break;
839      case MVT::i64:
840        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
841        break;
842      case MVT::f32:
843        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
844        break;
845      case MVT::f64:
846        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
847        break;
848      }
849      break;
850    case NVPTXISD::LDUV2:
851      switch (EltVT.getSimpleVT().SimpleTy) {
852      default:
853        return NULL;
854      case MVT::i8:
855        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
856        break;
857      case MVT::i16:
858        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
859        break;
860      case MVT::i32:
861        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
862        break;
863      case MVT::i64:
864        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
865        break;
866      case MVT::f32:
867        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
868        break;
869      case MVT::f64:
870        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
871        break;
872      }
873      break;
874    case NVPTXISD::LDGV4:
875      switch (EltVT.getSimpleVT().SimpleTy) {
876      default:
877        return NULL;
878      case MVT::i8:
879        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
880        break;
881      case MVT::i16:
882        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
883        break;
884      case MVT::i32:
885        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
886        break;
887      case MVT::f32:
888        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
889        break;
890      }
891      break;
892    case NVPTXISD::LDUV4:
893      switch (EltVT.getSimpleVT().SimpleTy) {
894      default:
895        return NULL;
896      case MVT::i8:
897        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
898        break;
899      case MVT::i16:
900        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
901        break;
902      case MVT::i32:
903        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
904        break;
905      case MVT::f32:
906        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
907        break;
908      }
909      break;
910    }
911
912    SDValue Ops[] = { Addr, Chain };
913    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
914                                ArrayRef<SDValue>(Ops, 2));
915  } else if (Subtarget.is64Bit()
916                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
917                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
918    if (Subtarget.is64Bit()) {
919      switch (N->getOpcode()) {
920      default:
921        return NULL;
922      case NVPTXISD::LDGV2:
923        switch (EltVT.getSimpleVT().SimpleTy) {
924        default:
925          return NULL;
926        case MVT::i8:
927          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
928          break;
929        case MVT::i16:
930          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
931          break;
932        case MVT::i32:
933          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
934          break;
935        case MVT::i64:
936          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
937          break;
938        case MVT::f32:
939          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
940          break;
941        case MVT::f64:
942          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
943          break;
944        }
945        break;
946      case NVPTXISD::LDUV2:
947        switch (EltVT.getSimpleVT().SimpleTy) {
948        default:
949          return NULL;
950        case MVT::i8:
951          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
952          break;
953        case MVT::i16:
954          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
955          break;
956        case MVT::i32:
957          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
958          break;
959        case MVT::i64:
960          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
961          break;
962        case MVT::f32:
963          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
964          break;
965        case MVT::f64:
966          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
967          break;
968        }
969        break;
970      case NVPTXISD::LDGV4:
971        switch (EltVT.getSimpleVT().SimpleTy) {
972        default:
973          return NULL;
974        case MVT::i8:
975          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
976          break;
977        case MVT::i16:
978          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
979          break;
980        case MVT::i32:
981          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
982          break;
983        case MVT::f32:
984          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
985          break;
986        }
987        break;
988      case NVPTXISD::LDUV4:
989        switch (EltVT.getSimpleVT().SimpleTy) {
990        default:
991          return NULL;
992        case MVT::i8:
993          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
994          break;
995        case MVT::i16:
996          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
997          break;
998        case MVT::i32:
999          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1000          break;
1001        case MVT::f32:
1002          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1003          break;
1004        }
1005        break;
1006      }
1007    } else {
1008      switch (N->getOpcode()) {
1009      default:
1010        return NULL;
1011      case NVPTXISD::LDGV2:
1012        switch (EltVT.getSimpleVT().SimpleTy) {
1013        default:
1014          return NULL;
1015        case MVT::i8:
1016          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1017          break;
1018        case MVT::i16:
1019          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1020          break;
1021        case MVT::i32:
1022          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1023          break;
1024        case MVT::i64:
1025          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1026          break;
1027        case MVT::f32:
1028          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1029          break;
1030        case MVT::f64:
1031          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1032          break;
1033        }
1034        break;
1035      case NVPTXISD::LDUV2:
1036        switch (EltVT.getSimpleVT().SimpleTy) {
1037        default:
1038          return NULL;
1039        case MVT::i8:
1040          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1041          break;
1042        case MVT::i16:
1043          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1044          break;
1045        case MVT::i32:
1046          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1047          break;
1048        case MVT::i64:
1049          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1050          break;
1051        case MVT::f32:
1052          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1053          break;
1054        case MVT::f64:
1055          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1056          break;
1057        }
1058        break;
1059      case NVPTXISD::LDGV4:
1060        switch (EltVT.getSimpleVT().SimpleTy) {
1061        default:
1062          return NULL;
1063        case MVT::i8:
1064          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1065          break;
1066        case MVT::i16:
1067          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1068          break;
1069        case MVT::i32:
1070          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1071          break;
1072        case MVT::f32:
1073          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1074          break;
1075        }
1076        break;
1077      case NVPTXISD::LDUV4:
1078        switch (EltVT.getSimpleVT().SimpleTy) {
1079        default:
1080          return NULL;
1081        case MVT::i8:
1082          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1083          break;
1084        case MVT::i16:
1085          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1086          break;
1087        case MVT::i32:
1088          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1089          break;
1090        case MVT::f32:
1091          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1092          break;
1093        }
1094        break;
1095      }
1096    }
1097
1098    SDValue Ops[] = { Base, Offset, Chain };
1099
1100    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1101                                ArrayRef<SDValue>(Ops, 3));
1102  } else {
1103    if (Subtarget.is64Bit()) {
1104      switch (N->getOpcode()) {
1105      default:
1106        return NULL;
1107      case NVPTXISD::LDGV2:
1108        switch (EltVT.getSimpleVT().SimpleTy) {
1109        default:
1110          return NULL;
1111        case MVT::i8:
1112          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1113          break;
1114        case MVT::i16:
1115          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1116          break;
1117        case MVT::i32:
1118          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1119          break;
1120        case MVT::i64:
1121          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1122          break;
1123        case MVT::f32:
1124          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1125          break;
1126        case MVT::f64:
1127          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1128          break;
1129        }
1130        break;
1131      case NVPTXISD::LDUV2:
1132        switch (EltVT.getSimpleVT().SimpleTy) {
1133        default:
1134          return NULL;
1135        case MVT::i8:
1136          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1137          break;
1138        case MVT::i16:
1139          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1140          break;
1141        case MVT::i32:
1142          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1143          break;
1144        case MVT::i64:
1145          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1146          break;
1147        case MVT::f32:
1148          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1149          break;
1150        case MVT::f64:
1151          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1152          break;
1153        }
1154        break;
1155      case NVPTXISD::LDGV4:
1156        switch (EltVT.getSimpleVT().SimpleTy) {
1157        default:
1158          return NULL;
1159        case MVT::i8:
1160          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1161          break;
1162        case MVT::i16:
1163          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1164          break;
1165        case MVT::i32:
1166          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1167          break;
1168        case MVT::f32:
1169          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1170          break;
1171        }
1172        break;
1173      case NVPTXISD::LDUV4:
1174        switch (EltVT.getSimpleVT().SimpleTy) {
1175        default:
1176          return NULL;
1177        case MVT::i8:
1178          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1179          break;
1180        case MVT::i16:
1181          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1182          break;
1183        case MVT::i32:
1184          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1185          break;
1186        case MVT::f32:
1187          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1188          break;
1189        }
1190        break;
1191      }
1192    } else {
1193      switch (N->getOpcode()) {
1194      default:
1195        return NULL;
1196      case NVPTXISD::LDGV2:
1197        switch (EltVT.getSimpleVT().SimpleTy) {
1198        default:
1199          return NULL;
1200        case MVT::i8:
1201          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1202          break;
1203        case MVT::i16:
1204          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1205          break;
1206        case MVT::i32:
1207          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1208          break;
1209        case MVT::i64:
1210          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1211          break;
1212        case MVT::f32:
1213          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1214          break;
1215        case MVT::f64:
1216          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1217          break;
1218        }
1219        break;
1220      case NVPTXISD::LDUV2:
1221        switch (EltVT.getSimpleVT().SimpleTy) {
1222        default:
1223          return NULL;
1224        case MVT::i8:
1225          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1226          break;
1227        case MVT::i16:
1228          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1229          break;
1230        case MVT::i32:
1231          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1232          break;
1233        case MVT::i64:
1234          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1235          break;
1236        case MVT::f32:
1237          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1238          break;
1239        case MVT::f64:
1240          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1241          break;
1242        }
1243        break;
1244      case NVPTXISD::LDGV4:
1245        switch (EltVT.getSimpleVT().SimpleTy) {
1246        default:
1247          return NULL;
1248        case MVT::i8:
1249          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1250          break;
1251        case MVT::i16:
1252          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1253          break;
1254        case MVT::i32:
1255          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1256          break;
1257        case MVT::f32:
1258          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1259          break;
1260        }
1261        break;
1262      case NVPTXISD::LDUV4:
1263        switch (EltVT.getSimpleVT().SimpleTy) {
1264        default:
1265          return NULL;
1266        case MVT::i8:
1267          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1268          break;
1269        case MVT::i16:
1270          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1271          break;
1272        case MVT::i32:
1273          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1274          break;
1275        case MVT::f32:
1276          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1277          break;
1278        }
1279        break;
1280      }
1281    }
1282
1283    SDValue Ops[] = { Op1, Chain };
1284    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1285                                ArrayRef<SDValue>(Ops, 2));
1286  }
1287
1288  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1289  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1290  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1291
1292  return LD;
1293}
1294
1295SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1296  SDLoc dl(N);
1297  StoreSDNode *ST = cast<StoreSDNode>(N);
1298  EVT StoreVT = ST->getMemoryVT();
1299  SDNode *NVPTXST = NULL;
1300
1301  // do not support pre/post inc/dec
1302  if (ST->isIndexed())
1303    return NULL;
1304
1305  if (!StoreVT.isSimple())
1306    return NULL;
1307
1308  // Address Space Setting
1309  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1310
1311  // Volatile Setting
1312  // - .volatile is only availalble for .global and .shared
1313  bool isVolatile = ST->isVolatile();
1314  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1315      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1316      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1317    isVolatile = false;
1318
1319  // Vector Setting
1320  MVT SimpleVT = StoreVT.getSimpleVT();
1321  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1322  if (SimpleVT.isVector()) {
1323    unsigned num = SimpleVT.getVectorNumElements();
1324    if (num == 2)
1325      vecType = NVPTX::PTXLdStInstCode::V2;
1326    else if (num == 4)
1327      vecType = NVPTX::PTXLdStInstCode::V4;
1328    else
1329      return NULL;
1330  }
1331
1332  // Type Setting: toType + toTypeWidth
1333  // - for integer type, always use 'u'
1334  //
1335  MVT ScalarVT = SimpleVT.getScalarType();
1336  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1337  unsigned int toType;
1338  if (ScalarVT.isFloatingPoint())
1339    toType = NVPTX::PTXLdStInstCode::Float;
1340  else
1341    toType = NVPTX::PTXLdStInstCode::Unsigned;
1342
1343  // Create the machine instruction DAG
1344  SDValue Chain = N->getOperand(0);
1345  SDValue N1 = N->getOperand(1);
1346  SDValue N2 = N->getOperand(2);
1347  SDValue Addr;
1348  SDValue Offset, Base;
1349  unsigned Opcode;
1350  MVT::SimpleValueType SourceVT =
1351      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
1352
1353  if (SelectDirectAddr(N2, Addr)) {
1354    switch (SourceVT) {
1355    case MVT::i8:
1356      Opcode = NVPTX::ST_i8_avar;
1357      break;
1358    case MVT::i16:
1359      Opcode = NVPTX::ST_i16_avar;
1360      break;
1361    case MVT::i32:
1362      Opcode = NVPTX::ST_i32_avar;
1363      break;
1364    case MVT::i64:
1365      Opcode = NVPTX::ST_i64_avar;
1366      break;
1367    case MVT::f32:
1368      Opcode = NVPTX::ST_f32_avar;
1369      break;
1370    case MVT::f64:
1371      Opcode = NVPTX::ST_f64_avar;
1372      break;
1373    default:
1374      return NULL;
1375    }
1376    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1377                      getI32Imm(vecType), getI32Imm(toType),
1378                      getI32Imm(toTypeWidth), Addr, Chain };
1379    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1380  } else if (Subtarget.is64Bit()
1381                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1382                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1383    switch (SourceVT) {
1384    case MVT::i8:
1385      Opcode = NVPTX::ST_i8_asi;
1386      break;
1387    case MVT::i16:
1388      Opcode = NVPTX::ST_i16_asi;
1389      break;
1390    case MVT::i32:
1391      Opcode = NVPTX::ST_i32_asi;
1392      break;
1393    case MVT::i64:
1394      Opcode = NVPTX::ST_i64_asi;
1395      break;
1396    case MVT::f32:
1397      Opcode = NVPTX::ST_f32_asi;
1398      break;
1399    case MVT::f64:
1400      Opcode = NVPTX::ST_f64_asi;
1401      break;
1402    default:
1403      return NULL;
1404    }
1405    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1406                      getI32Imm(vecType), getI32Imm(toType),
1407                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1408    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1409  } else if (Subtarget.is64Bit()
1410                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1411                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1412    if (Subtarget.is64Bit()) {
1413      switch (SourceVT) {
1414      case MVT::i8:
1415        Opcode = NVPTX::ST_i8_ari_64;
1416        break;
1417      case MVT::i16:
1418        Opcode = NVPTX::ST_i16_ari_64;
1419        break;
1420      case MVT::i32:
1421        Opcode = NVPTX::ST_i32_ari_64;
1422        break;
1423      case MVT::i64:
1424        Opcode = NVPTX::ST_i64_ari_64;
1425        break;
1426      case MVT::f32:
1427        Opcode = NVPTX::ST_f32_ari_64;
1428        break;
1429      case MVT::f64:
1430        Opcode = NVPTX::ST_f64_ari_64;
1431        break;
1432      default:
1433        return NULL;
1434      }
1435    } else {
1436      switch (SourceVT) {
1437      case MVT::i8:
1438        Opcode = NVPTX::ST_i8_ari;
1439        break;
1440      case MVT::i16:
1441        Opcode = NVPTX::ST_i16_ari;
1442        break;
1443      case MVT::i32:
1444        Opcode = NVPTX::ST_i32_ari;
1445        break;
1446      case MVT::i64:
1447        Opcode = NVPTX::ST_i64_ari;
1448        break;
1449      case MVT::f32:
1450        Opcode = NVPTX::ST_f32_ari;
1451        break;
1452      case MVT::f64:
1453        Opcode = NVPTX::ST_f64_ari;
1454        break;
1455      default:
1456        return NULL;
1457      }
1458    }
1459    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1460                      getI32Imm(vecType), getI32Imm(toType),
1461                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1462    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1463  } else {
1464    if (Subtarget.is64Bit()) {
1465      switch (SourceVT) {
1466      case MVT::i8:
1467        Opcode = NVPTX::ST_i8_areg_64;
1468        break;
1469      case MVT::i16:
1470        Opcode = NVPTX::ST_i16_areg_64;
1471        break;
1472      case MVT::i32:
1473        Opcode = NVPTX::ST_i32_areg_64;
1474        break;
1475      case MVT::i64:
1476        Opcode = NVPTX::ST_i64_areg_64;
1477        break;
1478      case MVT::f32:
1479        Opcode = NVPTX::ST_f32_areg_64;
1480        break;
1481      case MVT::f64:
1482        Opcode = NVPTX::ST_f64_areg_64;
1483        break;
1484      default:
1485        return NULL;
1486      }
1487    } else {
1488      switch (SourceVT) {
1489      case MVT::i8:
1490        Opcode = NVPTX::ST_i8_areg;
1491        break;
1492      case MVT::i16:
1493        Opcode = NVPTX::ST_i16_areg;
1494        break;
1495      case MVT::i32:
1496        Opcode = NVPTX::ST_i32_areg;
1497        break;
1498      case MVT::i64:
1499        Opcode = NVPTX::ST_i64_areg;
1500        break;
1501      case MVT::f32:
1502        Opcode = NVPTX::ST_f32_areg;
1503        break;
1504      case MVT::f64:
1505        Opcode = NVPTX::ST_f64_areg;
1506        break;
1507      default:
1508        return NULL;
1509      }
1510    }
1511    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1512                      getI32Imm(vecType), getI32Imm(toType),
1513                      getI32Imm(toTypeWidth), N2, Chain };
1514    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1515  }
1516
1517  if (NVPTXST != NULL) {
1518    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1519    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1520    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1521  }
1522
1523  return NVPTXST;
1524}
1525
1526SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1527  SDValue Chain = N->getOperand(0);
1528  SDValue Op1 = N->getOperand(1);
1529  SDValue Addr, Offset, Base;
1530  unsigned Opcode;
1531  SDLoc DL(N);
1532  SDNode *ST;
1533  EVT EltVT = Op1.getValueType();
1534  MemSDNode *MemSD = cast<MemSDNode>(N);
1535  EVT StoreVT = MemSD->getMemoryVT();
1536
1537  // Address Space Setting
1538  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1539
1540  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1541    report_fatal_error("Cannot store to pointer that points to constant "
1542                       "memory space");
1543  }
1544
1545  // Volatile Setting
1546  // - .volatile is only availalble for .global and .shared
1547  bool IsVolatile = MemSD->isVolatile();
1548  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1549      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1550      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1551    IsVolatile = false;
1552
1553  // Type Setting: toType + toTypeWidth
1554  // - for integer type, always use 'u'
1555  assert(StoreVT.isSimple() && "Store value is not simple");
1556  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1557  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1558  unsigned ToType;
1559  if (ScalarVT.isFloatingPoint())
1560    ToType = NVPTX::PTXLdStInstCode::Float;
1561  else
1562    ToType = NVPTX::PTXLdStInstCode::Unsigned;
1563
1564  SmallVector<SDValue, 12> StOps;
1565  SDValue N2;
1566  unsigned VecType;
1567
1568  switch (N->getOpcode()) {
1569  case NVPTXISD::StoreV2:
1570    VecType = NVPTX::PTXLdStInstCode::V2;
1571    StOps.push_back(N->getOperand(1));
1572    StOps.push_back(N->getOperand(2));
1573    N2 = N->getOperand(3);
1574    break;
1575  case NVPTXISD::StoreV4:
1576    VecType = NVPTX::PTXLdStInstCode::V4;
1577    StOps.push_back(N->getOperand(1));
1578    StOps.push_back(N->getOperand(2));
1579    StOps.push_back(N->getOperand(3));
1580    StOps.push_back(N->getOperand(4));
1581    N2 = N->getOperand(5);
1582    break;
1583  default:
1584    return NULL;
1585  }
1586
1587  StOps.push_back(getI32Imm(IsVolatile));
1588  StOps.push_back(getI32Imm(CodeAddrSpace));
1589  StOps.push_back(getI32Imm(VecType));
1590  StOps.push_back(getI32Imm(ToType));
1591  StOps.push_back(getI32Imm(ToTypeWidth));
1592
1593  if (SelectDirectAddr(N2, Addr)) {
1594    switch (N->getOpcode()) {
1595    default:
1596      return NULL;
1597    case NVPTXISD::StoreV2:
1598      switch (EltVT.getSimpleVT().SimpleTy) {
1599      default:
1600        return NULL;
1601      case MVT::i8:
1602        Opcode = NVPTX::STV_i8_v2_avar;
1603        break;
1604      case MVT::i16:
1605        Opcode = NVPTX::STV_i16_v2_avar;
1606        break;
1607      case MVT::i32:
1608        Opcode = NVPTX::STV_i32_v2_avar;
1609        break;
1610      case MVT::i64:
1611        Opcode = NVPTX::STV_i64_v2_avar;
1612        break;
1613      case MVT::f32:
1614        Opcode = NVPTX::STV_f32_v2_avar;
1615        break;
1616      case MVT::f64:
1617        Opcode = NVPTX::STV_f64_v2_avar;
1618        break;
1619      }
1620      break;
1621    case NVPTXISD::StoreV4:
1622      switch (EltVT.getSimpleVT().SimpleTy) {
1623      default:
1624        return NULL;
1625      case MVT::i8:
1626        Opcode = NVPTX::STV_i8_v4_avar;
1627        break;
1628      case MVT::i16:
1629        Opcode = NVPTX::STV_i16_v4_avar;
1630        break;
1631      case MVT::i32:
1632        Opcode = NVPTX::STV_i32_v4_avar;
1633        break;
1634      case MVT::f32:
1635        Opcode = NVPTX::STV_f32_v4_avar;
1636        break;
1637      }
1638      break;
1639    }
1640    StOps.push_back(Addr);
1641  } else if (Subtarget.is64Bit()
1642                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1643                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1644    switch (N->getOpcode()) {
1645    default:
1646      return NULL;
1647    case NVPTXISD::StoreV2:
1648      switch (EltVT.getSimpleVT().SimpleTy) {
1649      default:
1650        return NULL;
1651      case MVT::i8:
1652        Opcode = NVPTX::STV_i8_v2_asi;
1653        break;
1654      case MVT::i16:
1655        Opcode = NVPTX::STV_i16_v2_asi;
1656        break;
1657      case MVT::i32:
1658        Opcode = NVPTX::STV_i32_v2_asi;
1659        break;
1660      case MVT::i64:
1661        Opcode = NVPTX::STV_i64_v2_asi;
1662        break;
1663      case MVT::f32:
1664        Opcode = NVPTX::STV_f32_v2_asi;
1665        break;
1666      case MVT::f64:
1667        Opcode = NVPTX::STV_f64_v2_asi;
1668        break;
1669      }
1670      break;
1671    case NVPTXISD::StoreV4:
1672      switch (EltVT.getSimpleVT().SimpleTy) {
1673      default:
1674        return NULL;
1675      case MVT::i8:
1676        Opcode = NVPTX::STV_i8_v4_asi;
1677        break;
1678      case MVT::i16:
1679        Opcode = NVPTX::STV_i16_v4_asi;
1680        break;
1681      case MVT::i32:
1682        Opcode = NVPTX::STV_i32_v4_asi;
1683        break;
1684      case MVT::f32:
1685        Opcode = NVPTX::STV_f32_v4_asi;
1686        break;
1687      }
1688      break;
1689    }
1690    StOps.push_back(Base);
1691    StOps.push_back(Offset);
1692  } else if (Subtarget.is64Bit()
1693                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1694                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1695    if (Subtarget.is64Bit()) {
1696      switch (N->getOpcode()) {
1697      default:
1698        return NULL;
1699      case NVPTXISD::StoreV2:
1700        switch (EltVT.getSimpleVT().SimpleTy) {
1701        default:
1702          return NULL;
1703        case MVT::i8:
1704          Opcode = NVPTX::STV_i8_v2_ari_64;
1705          break;
1706        case MVT::i16:
1707          Opcode = NVPTX::STV_i16_v2_ari_64;
1708          break;
1709        case MVT::i32:
1710          Opcode = NVPTX::STV_i32_v2_ari_64;
1711          break;
1712        case MVT::i64:
1713          Opcode = NVPTX::STV_i64_v2_ari_64;
1714          break;
1715        case MVT::f32:
1716          Opcode = NVPTX::STV_f32_v2_ari_64;
1717          break;
1718        case MVT::f64:
1719          Opcode = NVPTX::STV_f64_v2_ari_64;
1720          break;
1721        }
1722        break;
1723      case NVPTXISD::StoreV4:
1724        switch (EltVT.getSimpleVT().SimpleTy) {
1725        default:
1726          return NULL;
1727        case MVT::i8:
1728          Opcode = NVPTX::STV_i8_v4_ari_64;
1729          break;
1730        case MVT::i16:
1731          Opcode = NVPTX::STV_i16_v4_ari_64;
1732          break;
1733        case MVT::i32:
1734          Opcode = NVPTX::STV_i32_v4_ari_64;
1735          break;
1736        case MVT::f32:
1737          Opcode = NVPTX::STV_f32_v4_ari_64;
1738          break;
1739        }
1740        break;
1741      }
1742    } else {
1743      switch (N->getOpcode()) {
1744      default:
1745        return NULL;
1746      case NVPTXISD::StoreV2:
1747        switch (EltVT.getSimpleVT().SimpleTy) {
1748        default:
1749          return NULL;
1750        case MVT::i8:
1751          Opcode = NVPTX::STV_i8_v2_ari;
1752          break;
1753        case MVT::i16:
1754          Opcode = NVPTX::STV_i16_v2_ari;
1755          break;
1756        case MVT::i32:
1757          Opcode = NVPTX::STV_i32_v2_ari;
1758          break;
1759        case MVT::i64:
1760          Opcode = NVPTX::STV_i64_v2_ari;
1761          break;
1762        case MVT::f32:
1763          Opcode = NVPTX::STV_f32_v2_ari;
1764          break;
1765        case MVT::f64:
1766          Opcode = NVPTX::STV_f64_v2_ari;
1767          break;
1768        }
1769        break;
1770      case NVPTXISD::StoreV4:
1771        switch (EltVT.getSimpleVT().SimpleTy) {
1772        default:
1773          return NULL;
1774        case MVT::i8:
1775          Opcode = NVPTX::STV_i8_v4_ari;
1776          break;
1777        case MVT::i16:
1778          Opcode = NVPTX::STV_i16_v4_ari;
1779          break;
1780        case MVT::i32:
1781          Opcode = NVPTX::STV_i32_v4_ari;
1782          break;
1783        case MVT::f32:
1784          Opcode = NVPTX::STV_f32_v4_ari;
1785          break;
1786        }
1787        break;
1788      }
1789    }
1790    StOps.push_back(Base);
1791    StOps.push_back(Offset);
1792  } else {
1793    if (Subtarget.is64Bit()) {
1794      switch (N->getOpcode()) {
1795      default:
1796        return NULL;
1797      case NVPTXISD::StoreV2:
1798        switch (EltVT.getSimpleVT().SimpleTy) {
1799        default:
1800          return NULL;
1801        case MVT::i8:
1802          Opcode = NVPTX::STV_i8_v2_areg_64;
1803          break;
1804        case MVT::i16:
1805          Opcode = NVPTX::STV_i16_v2_areg_64;
1806          break;
1807        case MVT::i32:
1808          Opcode = NVPTX::STV_i32_v2_areg_64;
1809          break;
1810        case MVT::i64:
1811          Opcode = NVPTX::STV_i64_v2_areg_64;
1812          break;
1813        case MVT::f32:
1814          Opcode = NVPTX::STV_f32_v2_areg_64;
1815          break;
1816        case MVT::f64:
1817          Opcode = NVPTX::STV_f64_v2_areg_64;
1818          break;
1819        }
1820        break;
1821      case NVPTXISD::StoreV4:
1822        switch (EltVT.getSimpleVT().SimpleTy) {
1823        default:
1824          return NULL;
1825        case MVT::i8:
1826          Opcode = NVPTX::STV_i8_v4_areg_64;
1827          break;
1828        case MVT::i16:
1829          Opcode = NVPTX::STV_i16_v4_areg_64;
1830          break;
1831        case MVT::i32:
1832          Opcode = NVPTX::STV_i32_v4_areg_64;
1833          break;
1834        case MVT::f32:
1835          Opcode = NVPTX::STV_f32_v4_areg_64;
1836          break;
1837        }
1838        break;
1839      }
1840    } else {
1841      switch (N->getOpcode()) {
1842      default:
1843        return NULL;
1844      case NVPTXISD::StoreV2:
1845        switch (EltVT.getSimpleVT().SimpleTy) {
1846        default:
1847          return NULL;
1848        case MVT::i8:
1849          Opcode = NVPTX::STV_i8_v2_areg;
1850          break;
1851        case MVT::i16:
1852          Opcode = NVPTX::STV_i16_v2_areg;
1853          break;
1854        case MVT::i32:
1855          Opcode = NVPTX::STV_i32_v2_areg;
1856          break;
1857        case MVT::i64:
1858          Opcode = NVPTX::STV_i64_v2_areg;
1859          break;
1860        case MVT::f32:
1861          Opcode = NVPTX::STV_f32_v2_areg;
1862          break;
1863        case MVT::f64:
1864          Opcode = NVPTX::STV_f64_v2_areg;
1865          break;
1866        }
1867        break;
1868      case NVPTXISD::StoreV4:
1869        switch (EltVT.getSimpleVT().SimpleTy) {
1870        default:
1871          return NULL;
1872        case MVT::i8:
1873          Opcode = NVPTX::STV_i8_v4_areg;
1874          break;
1875        case MVT::i16:
1876          Opcode = NVPTX::STV_i16_v4_areg;
1877          break;
1878        case MVT::i32:
1879          Opcode = NVPTX::STV_i32_v4_areg;
1880          break;
1881        case MVT::f32:
1882          Opcode = NVPTX::STV_f32_v4_areg;
1883          break;
1884        }
1885        break;
1886      }
1887    }
1888    StOps.push_back(N2);
1889  }
1890
1891  StOps.push_back(Chain);
1892
1893  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
1894
1895  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1896  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1897  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1898
1899  return ST;
1900}
1901
1902SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
1903  SDValue Chain = Node->getOperand(0);
1904  SDValue Offset = Node->getOperand(2);
1905  SDValue Flag = Node->getOperand(3);
1906  SDLoc DL(Node);
1907  MemSDNode *Mem = cast<MemSDNode>(Node);
1908
1909  unsigned VecSize;
1910  switch (Node->getOpcode()) {
1911  default:
1912    return NULL;
1913  case NVPTXISD::LoadParam:
1914    VecSize = 1;
1915    break;
1916  case NVPTXISD::LoadParamV2:
1917    VecSize = 2;
1918    break;
1919  case NVPTXISD::LoadParamV4:
1920    VecSize = 4;
1921    break;
1922  }
1923
1924  EVT EltVT = Node->getValueType(0);
1925  EVT MemVT = Mem->getMemoryVT();
1926
1927  unsigned Opc = 0;
1928
1929  switch (VecSize) {
1930  default:
1931    return NULL;
1932  case 1:
1933    switch (MemVT.getSimpleVT().SimpleTy) {
1934    default:
1935      return NULL;
1936    case MVT::i1:
1937      Opc = NVPTX::LoadParamMemI8;
1938      break;
1939    case MVT::i8:
1940      Opc = NVPTX::LoadParamMemI8;
1941      break;
1942    case MVT::i16:
1943      Opc = NVPTX::LoadParamMemI16;
1944      break;
1945    case MVT::i32:
1946      Opc = NVPTX::LoadParamMemI32;
1947      break;
1948    case MVT::i64:
1949      Opc = NVPTX::LoadParamMemI64;
1950      break;
1951    case MVT::f32:
1952      Opc = NVPTX::LoadParamMemF32;
1953      break;
1954    case MVT::f64:
1955      Opc = NVPTX::LoadParamMemF64;
1956      break;
1957    }
1958    break;
1959  case 2:
1960    switch (MemVT.getSimpleVT().SimpleTy) {
1961    default:
1962      return NULL;
1963    case MVT::i1:
1964      Opc = NVPTX::LoadParamMemV2I8;
1965      break;
1966    case MVT::i8:
1967      Opc = NVPTX::LoadParamMemV2I8;
1968      break;
1969    case MVT::i16:
1970      Opc = NVPTX::LoadParamMemV2I16;
1971      break;
1972    case MVT::i32:
1973      Opc = NVPTX::LoadParamMemV2I32;
1974      break;
1975    case MVT::i64:
1976      Opc = NVPTX::LoadParamMemV2I64;
1977      break;
1978    case MVT::f32:
1979      Opc = NVPTX::LoadParamMemV2F32;
1980      break;
1981    case MVT::f64:
1982      Opc = NVPTX::LoadParamMemV2F64;
1983      break;
1984    }
1985    break;
1986  case 4:
1987    switch (MemVT.getSimpleVT().SimpleTy) {
1988    default:
1989      return NULL;
1990    case MVT::i1:
1991      Opc = NVPTX::LoadParamMemV4I8;
1992      break;
1993    case MVT::i8:
1994      Opc = NVPTX::LoadParamMemV4I8;
1995      break;
1996    case MVT::i16:
1997      Opc = NVPTX::LoadParamMemV4I16;
1998      break;
1999    case MVT::i32:
2000      Opc = NVPTX::LoadParamMemV4I32;
2001      break;
2002    case MVT::f32:
2003      Opc = NVPTX::LoadParamMemV4F32;
2004      break;
2005    }
2006    break;
2007  }
2008
2009  SDVTList VTs;
2010  if (VecSize == 1) {
2011    VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2012  } else if (VecSize == 2) {
2013    VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2014  } else {
2015    EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2016    VTs = CurDAG->getVTList(&EVTs[0], 5);
2017  }
2018
2019  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2020
2021  SmallVector<SDValue, 2> Ops;
2022  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2023  Ops.push_back(Chain);
2024  Ops.push_back(Flag);
2025
2026  SDNode *Ret =
2027      CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2028  return Ret;
2029}
2030
2031SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2032  SDLoc DL(N);
2033  SDValue Chain = N->getOperand(0);
2034  SDValue Offset = N->getOperand(1);
2035  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2036  MemSDNode *Mem = cast<MemSDNode>(N);
2037
2038  // How many elements do we have?
2039  unsigned NumElts = 1;
2040  switch (N->getOpcode()) {
2041  default:
2042    return NULL;
2043  case NVPTXISD::StoreRetval:
2044    NumElts = 1;
2045    break;
2046  case NVPTXISD::StoreRetvalV2:
2047    NumElts = 2;
2048    break;
2049  case NVPTXISD::StoreRetvalV4:
2050    NumElts = 4;
2051    break;
2052  }
2053
2054  // Build vector of operands
2055  SmallVector<SDValue, 6> Ops;
2056  for (unsigned i = 0; i < NumElts; ++i)
2057    Ops.push_back(N->getOperand(i + 2));
2058  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2059  Ops.push_back(Chain);
2060
2061  // Determine target opcode
2062  // If we have an i1, use an 8-bit store. The lowering code in
2063  // NVPTXISelLowering will have already emitted an upcast.
2064  unsigned Opcode = 0;
2065  switch (NumElts) {
2066  default:
2067    return NULL;
2068  case 1:
2069    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2070    default:
2071      return NULL;
2072    case MVT::i1:
2073      Opcode = NVPTX::StoreRetvalI8;
2074      break;
2075    case MVT::i8:
2076      Opcode = NVPTX::StoreRetvalI8;
2077      break;
2078    case MVT::i16:
2079      Opcode = NVPTX::StoreRetvalI16;
2080      break;
2081    case MVT::i32:
2082      Opcode = NVPTX::StoreRetvalI32;
2083      break;
2084    case MVT::i64:
2085      Opcode = NVPTX::StoreRetvalI64;
2086      break;
2087    case MVT::f32:
2088      Opcode = NVPTX::StoreRetvalF32;
2089      break;
2090    case MVT::f64:
2091      Opcode = NVPTX::StoreRetvalF64;
2092      break;
2093    }
2094    break;
2095  case 2:
2096    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2097    default:
2098      return NULL;
2099    case MVT::i1:
2100      Opcode = NVPTX::StoreRetvalV2I8;
2101      break;
2102    case MVT::i8:
2103      Opcode = NVPTX::StoreRetvalV2I8;
2104      break;
2105    case MVT::i16:
2106      Opcode = NVPTX::StoreRetvalV2I16;
2107      break;
2108    case MVT::i32:
2109      Opcode = NVPTX::StoreRetvalV2I32;
2110      break;
2111    case MVT::i64:
2112      Opcode = NVPTX::StoreRetvalV2I64;
2113      break;
2114    case MVT::f32:
2115      Opcode = NVPTX::StoreRetvalV2F32;
2116      break;
2117    case MVT::f64:
2118      Opcode = NVPTX::StoreRetvalV2F64;
2119      break;
2120    }
2121    break;
2122  case 4:
2123    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2124    default:
2125      return NULL;
2126    case MVT::i1:
2127      Opcode = NVPTX::StoreRetvalV4I8;
2128      break;
2129    case MVT::i8:
2130      Opcode = NVPTX::StoreRetvalV4I8;
2131      break;
2132    case MVT::i16:
2133      Opcode = NVPTX::StoreRetvalV4I16;
2134      break;
2135    case MVT::i32:
2136      Opcode = NVPTX::StoreRetvalV4I32;
2137      break;
2138    case MVT::f32:
2139      Opcode = NVPTX::StoreRetvalV4F32;
2140      break;
2141    }
2142    break;
2143  }
2144
2145  SDNode *Ret =
2146      CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2147  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2148  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2149  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2150
2151  return Ret;
2152}
2153
2154SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2155  SDLoc DL(N);
2156  SDValue Chain = N->getOperand(0);
2157  SDValue Param = N->getOperand(1);
2158  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2159  SDValue Offset = N->getOperand(2);
2160  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2161  MemSDNode *Mem = cast<MemSDNode>(N);
2162  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2163
2164  // How many elements do we have?
2165  unsigned NumElts = 1;
2166  switch (N->getOpcode()) {
2167  default:
2168    return NULL;
2169  case NVPTXISD::StoreParamU32:
2170  case NVPTXISD::StoreParamS32:
2171  case NVPTXISD::StoreParam:
2172    NumElts = 1;
2173    break;
2174  case NVPTXISD::StoreParamV2:
2175    NumElts = 2;
2176    break;
2177  case NVPTXISD::StoreParamV4:
2178    NumElts = 4;
2179    break;
2180  }
2181
2182  // Build vector of operands
2183  SmallVector<SDValue, 8> Ops;
2184  for (unsigned i = 0; i < NumElts; ++i)
2185    Ops.push_back(N->getOperand(i + 3));
2186  Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2187  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2188  Ops.push_back(Chain);
2189  Ops.push_back(Flag);
2190
2191  // Determine target opcode
2192  // If we have an i1, use an 8-bit store. The lowering code in
2193  // NVPTXISelLowering will have already emitted an upcast.
2194  unsigned Opcode = 0;
2195  switch (N->getOpcode()) {
2196  default:
2197    switch (NumElts) {
2198    default:
2199      return NULL;
2200    case 1:
2201      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2202      default:
2203        return NULL;
2204      case MVT::i1:
2205        Opcode = NVPTX::StoreParamI8;
2206        break;
2207      case MVT::i8:
2208        Opcode = NVPTX::StoreParamI8;
2209        break;
2210      case MVT::i16:
2211        Opcode = NVPTX::StoreParamI16;
2212        break;
2213      case MVT::i32:
2214        Opcode = NVPTX::StoreParamI32;
2215        break;
2216      case MVT::i64:
2217        Opcode = NVPTX::StoreParamI64;
2218        break;
2219      case MVT::f32:
2220        Opcode = NVPTX::StoreParamF32;
2221        break;
2222      case MVT::f64:
2223        Opcode = NVPTX::StoreParamF64;
2224        break;
2225      }
2226      break;
2227    case 2:
2228      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2229      default:
2230        return NULL;
2231      case MVT::i1:
2232        Opcode = NVPTX::StoreParamV2I8;
2233        break;
2234      case MVT::i8:
2235        Opcode = NVPTX::StoreParamV2I8;
2236        break;
2237      case MVT::i16:
2238        Opcode = NVPTX::StoreParamV2I16;
2239        break;
2240      case MVT::i32:
2241        Opcode = NVPTX::StoreParamV2I32;
2242        break;
2243      case MVT::i64:
2244        Opcode = NVPTX::StoreParamV2I64;
2245        break;
2246      case MVT::f32:
2247        Opcode = NVPTX::StoreParamV2F32;
2248        break;
2249      case MVT::f64:
2250        Opcode = NVPTX::StoreParamV2F64;
2251        break;
2252      }
2253      break;
2254    case 4:
2255      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2256      default:
2257        return NULL;
2258      case MVT::i1:
2259        Opcode = NVPTX::StoreParamV4I8;
2260        break;
2261      case MVT::i8:
2262        Opcode = NVPTX::StoreParamV4I8;
2263        break;
2264      case MVT::i16:
2265        Opcode = NVPTX::StoreParamV4I16;
2266        break;
2267      case MVT::i32:
2268        Opcode = NVPTX::StoreParamV4I32;
2269        break;
2270      case MVT::f32:
2271        Opcode = NVPTX::StoreParamV4F32;
2272        break;
2273      }
2274      break;
2275    }
2276    break;
2277  // Special case: if we have a sign-extend/zero-extend node, insert the
2278  // conversion instruction first, and use that as the value operand to
2279  // the selected StoreParam node.
2280  case NVPTXISD::StoreParamU32: {
2281    Opcode = NVPTX::StoreParamI32;
2282    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2283                                                MVT::i32);
2284    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2285                                         MVT::i32, Ops[0], CvtNone);
2286    Ops[0] = SDValue(Cvt, 0);
2287    break;
2288  }
2289  case NVPTXISD::StoreParamS32: {
2290    Opcode = NVPTX::StoreParamI32;
2291    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2292                                                MVT::i32);
2293    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2294                                         MVT::i32, Ops[0], CvtNone);
2295    Ops[0] = SDValue(Cvt, 0);
2296    break;
2297  }
2298  }
2299
2300  SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2301  SDNode *Ret =
2302      CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2303  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2304  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2305  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2306
2307  return Ret;
2308}
2309
2310// SelectDirectAddr - Match a direct address for DAG.
2311// A direct address could be a globaladdress or externalsymbol.
2312bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
2313  // Return true if TGA or ES.
2314  if (N.getOpcode() == ISD::TargetGlobalAddress ||
2315      N.getOpcode() == ISD::TargetExternalSymbol) {
2316    Address = N;
2317    return true;
2318  }
2319  if (N.getOpcode() == NVPTXISD::Wrapper) {
2320    Address = N.getOperand(0);
2321    return true;
2322  }
2323  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2324    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
2325    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
2326      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
2327        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
2328  }
2329  return false;
2330}
2331
2332// symbol+offset
2333bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
2334    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2335  if (Addr.getOpcode() == ISD::ADD) {
2336    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2337      SDValue base = Addr.getOperand(0);
2338      if (SelectDirectAddr(base, Base)) {
2339        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2340        return true;
2341      }
2342    }
2343  }
2344  return false;
2345}
2346
2347// symbol+offset
2348bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
2349                                     SDValue &Base, SDValue &Offset) {
2350  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
2351}
2352
2353// symbol+offset
2354bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
2355                                       SDValue &Base, SDValue &Offset) {
2356  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
2357}
2358
2359// register+offset
2360bool NVPTXDAGToDAGISel::SelectADDRri_imp(
2361    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2362  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2363    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2364    Offset = CurDAG->getTargetConstant(0, mvt);
2365    return true;
2366  }
2367  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
2368      Addr.getOpcode() == ISD::TargetGlobalAddress)
2369    return false; // direct calls.
2370
2371  if (Addr.getOpcode() == ISD::ADD) {
2372    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
2373      return false;
2374    }
2375    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2376      if (FrameIndexSDNode *FIN =
2377              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
2378        // Constant offset from frame ref.
2379        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2380      else
2381        Base = Addr.getOperand(0);
2382      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2383      return true;
2384    }
2385  }
2386  return false;
2387}
2388
2389// register+offset
2390bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
2391                                     SDValue &Base, SDValue &Offset) {
2392  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
2393}
2394
2395// register+offset
2396bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
2397                                       SDValue &Base, SDValue &Offset) {
2398  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
2399}
2400
2401bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
2402                                                 unsigned int spN) const {
2403  const Value *Src = NULL;
2404  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
2405  // the classof() for MemSDNode does not include MemIntrinsicSDNode
2406  // (See SelectionDAGNodes.h). So we need to check for both.
2407  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
2408    Src = mN->getSrcValue();
2409  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
2410    Src = mN->getSrcValue();
2411  }
2412  if (!Src)
2413    return false;
2414  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
2415    return (PT->getAddressSpace() == spN);
2416  return false;
2417}
2418
2419/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
2420/// inline asm expressions.
2421bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
2422    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
2423  SDValue Op0, Op1;
2424  switch (ConstraintCode) {
2425  default:
2426    return true;
2427  case 'm': // memory
2428    if (SelectDirectAddr(Op, Op0)) {
2429      OutOps.push_back(Op0);
2430      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
2431      return false;
2432    }
2433    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
2434      OutOps.push_back(Op0);
2435      OutOps.push_back(Op1);
2436      return false;
2437    }
2438    break;
2439  }
2440  return true;
2441}
2442
2443// Return true if N is a undef or a constant.
2444// If N was undef, return a (i8imm 0) in Retval
2445// If N was imm, convert it to i8imm and return in Retval
2446// Note: The convert to i8imm is required, otherwise the
2447// pattern matcher inserts a bunch of IMOVi8rr to convert
2448// the imm to i8imm, and this causes instruction selection
2449// to fail.
2450bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
2451  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
2452    return false;
2453
2454  if (N.getOpcode() == ISD::UNDEF)
2455    Retval = CurDAG->getTargetConstant(0, MVT::i8);
2456  else {
2457    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
2458    unsigned retval = cn->getZExtValue();
2459    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
2460  }
2461  return true;
2462}
2463