NVPTXISelDAGToDAG.cpp revision fe16848601bdde6e3a5e0860199169dd171222a4
1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelDAGToDAG.h"
15#include "llvm/IR/GlobalValue.h"
16#include "llvm/IR/Instructions.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/Debug.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/raw_ostream.h"
21#include "llvm/Target/TargetIntrinsicInfo.h"
22
23#undef DEBUG_TYPE
24#define DEBUG_TYPE "nvptx-isel"
25
26using namespace llvm;
27
28static cl::opt<int>
29FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
30                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
31                          " 1: do it  2: do it aggressively"),
32                 cl::init(2));
33
34static cl::opt<int> UsePrecDivF32(
35    "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
36    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
37             " IEEE Compliant F32 div.rnd if avaiable."),
38    cl::init(2));
39
40static cl::opt<bool>
41UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
42          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
43          cl::init(true));
44
45static cl::opt<bool>
46FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
47           cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
48           cl::init(false));
49
50
51/// createNVPTXISelDag - This pass converts a legalized DAG into a
52/// NVPTX-specific DAG, ready for instruction scheduling.
53FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
54                                       llvm::CodeGenOpt::Level OptLevel) {
55  return new NVPTXDAGToDAGISel(TM, OptLevel);
56}
57
58NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
59                                     CodeGenOpt::Level OptLevel)
60    : SelectionDAGISel(tm, OptLevel),
61      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
62
63  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
64  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
65  doFMAF32AGG =
66      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
67  doFMAF64AGG =
68      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
69
70  allowFMA = (FMAContractLevel >= 1);
71
72  doMulWide = (OptLevel > 0);
73}
74
75int NVPTXDAGToDAGISel::getDivF32Level() const {
76  if (UsePrecDivF32.getNumOccurrences() > 0) {
77    // If nvptx-prec-div32=N is used on the command-line, always honor it
78    return UsePrecDivF32;
79  } else {
80    // Otherwise, use div.approx if fast math is enabled
81    if (TM.Options.UnsafeFPMath)
82      return 0;
83    else
84      return 2;
85  }
86}
87
88bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
89  if (UsePrecSqrtF32.getNumOccurrences() > 0) {
90    // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
91    return UsePrecSqrtF32;
92  } else {
93    // Otherwise, use sqrt.approx if fast math is enabled
94    if (TM.Options.UnsafeFPMath)
95      return false;
96    else
97      return true;
98  }
99}
100
101bool NVPTXDAGToDAGISel::useF32FTZ() const {
102  if (FtzEnabled.getNumOccurrences() > 0) {
103    // If nvptx-f32ftz is used on the command-line, always honor it
104    return FtzEnabled;
105  } else {
106    const Function *F = MF->getFunction();
107    // Otherwise, check for an nvptx-f32ftz attribute on the function
108    if (F->hasFnAttribute("nvptx-f32ftz"))
109      return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
110                                              "nvptx-f32ftz")
111                                              .getValueAsString() == "true");
112    else
113      return false;
114  }
115}
116
117/// Select - Select instructions not customized! Used for
118/// expanded, promoted and normal instructions.
119SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
120
121  if (N->isMachineOpcode()) {
122    N->setNodeId(-1);
123    return NULL; // Already selected.
124  }
125
126  SDNode *ResNode = NULL;
127  switch (N->getOpcode()) {
128  case ISD::LOAD:
129    ResNode = SelectLoad(N);
130    break;
131  case ISD::STORE:
132    ResNode = SelectStore(N);
133    break;
134  case NVPTXISD::LoadV2:
135  case NVPTXISD::LoadV4:
136    ResNode = SelectLoadVector(N);
137    break;
138  case NVPTXISD::LDGV2:
139  case NVPTXISD::LDGV4:
140  case NVPTXISD::LDUV2:
141  case NVPTXISD::LDUV4:
142    ResNode = SelectLDGLDUVector(N);
143    break;
144  case NVPTXISD::StoreV2:
145  case NVPTXISD::StoreV4:
146    ResNode = SelectStoreVector(N);
147    break;
148  case NVPTXISD::LoadParam:
149  case NVPTXISD::LoadParamV2:
150  case NVPTXISD::LoadParamV4:
151    ResNode = SelectLoadParam(N);
152    break;
153  case NVPTXISD::StoreRetval:
154  case NVPTXISD::StoreRetvalV2:
155  case NVPTXISD::StoreRetvalV4:
156    ResNode = SelectStoreRetval(N);
157    break;
158  case NVPTXISD::StoreParam:
159  case NVPTXISD::StoreParamV2:
160  case NVPTXISD::StoreParamV4:
161  case NVPTXISD::StoreParamS32:
162  case NVPTXISD::StoreParamU32:
163    ResNode = SelectStoreParam(N);
164    break;
165  default:
166    break;
167  }
168  if (ResNode)
169    return ResNode;
170  return SelectCode(N);
171}
172
173static unsigned int getCodeAddrSpace(MemSDNode *N,
174                                     const NVPTXSubtarget &Subtarget) {
175  const Value *Src = N->getSrcValue();
176
177  if (!Src)
178    return NVPTX::PTXLdStInstCode::GENERIC;
179
180  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
181    switch (PT->getAddressSpace()) {
182    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
183    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
184    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
185    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
186    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
187    case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
188    default: break;
189    }
190  }
191  return NVPTX::PTXLdStInstCode::GENERIC;
192}
193
194SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
195  SDLoc dl(N);
196  LoadSDNode *LD = cast<LoadSDNode>(N);
197  EVT LoadedVT = LD->getMemoryVT();
198  SDNode *NVPTXLD = NULL;
199
200  // do not support pre/post inc/dec
201  if (LD->isIndexed())
202    return NULL;
203
204  if (!LoadedVT.isSimple())
205    return NULL;
206
207  // Address Space Setting
208  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
209
210  // Volatile Setting
211  // - .volatile is only availalble for .global and .shared
212  bool isVolatile = LD->isVolatile();
213  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
214      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
215      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
216    isVolatile = false;
217
218  // Vector Setting
219  MVT SimpleVT = LoadedVT.getSimpleVT();
220  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
221  if (SimpleVT.isVector()) {
222    unsigned num = SimpleVT.getVectorNumElements();
223    if (num == 2)
224      vecType = NVPTX::PTXLdStInstCode::V2;
225    else if (num == 4)
226      vecType = NVPTX::PTXLdStInstCode::V4;
227    else
228      return NULL;
229  }
230
231  // Type Setting: fromType + fromTypeWidth
232  //
233  // Sign   : ISD::SEXTLOAD
234  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
235  //          type is integer
236  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
237  MVT ScalarVT = SimpleVT.getScalarType();
238  // Read at least 8 bits (predicates are stored as 8-bit values)
239  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
240  unsigned int fromType;
241  if ((LD->getExtensionType() == ISD::SEXTLOAD))
242    fromType = NVPTX::PTXLdStInstCode::Signed;
243  else if (ScalarVT.isFloatingPoint())
244    fromType = NVPTX::PTXLdStInstCode::Float;
245  else
246    fromType = NVPTX::PTXLdStInstCode::Unsigned;
247
248  // Create the machine instruction DAG
249  SDValue Chain = N->getOperand(0);
250  SDValue N1 = N->getOperand(1);
251  SDValue Addr;
252  SDValue Offset, Base;
253  unsigned Opcode;
254  MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
255
256  if (SelectDirectAddr(N1, Addr)) {
257    switch (TargetVT) {
258    case MVT::i8:
259      Opcode = NVPTX::LD_i8_avar;
260      break;
261    case MVT::i16:
262      Opcode = NVPTX::LD_i16_avar;
263      break;
264    case MVT::i32:
265      Opcode = NVPTX::LD_i32_avar;
266      break;
267    case MVT::i64:
268      Opcode = NVPTX::LD_i64_avar;
269      break;
270    case MVT::f32:
271      Opcode = NVPTX::LD_f32_avar;
272      break;
273    case MVT::f64:
274      Opcode = NVPTX::LD_f64_avar;
275      break;
276    default:
277      return NULL;
278    }
279    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
280                      getI32Imm(vecType), getI32Imm(fromType),
281                      getI32Imm(fromTypeWidth), Addr, Chain };
282    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
283  } else if (Subtarget.is64Bit()
284                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
285                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
286    switch (TargetVT) {
287    case MVT::i8:
288      Opcode = NVPTX::LD_i8_asi;
289      break;
290    case MVT::i16:
291      Opcode = NVPTX::LD_i16_asi;
292      break;
293    case MVT::i32:
294      Opcode = NVPTX::LD_i32_asi;
295      break;
296    case MVT::i64:
297      Opcode = NVPTX::LD_i64_asi;
298      break;
299    case MVT::f32:
300      Opcode = NVPTX::LD_f32_asi;
301      break;
302    case MVT::f64:
303      Opcode = NVPTX::LD_f64_asi;
304      break;
305    default:
306      return NULL;
307    }
308    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
309                      getI32Imm(vecType), getI32Imm(fromType),
310                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
311    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
312  } else if (Subtarget.is64Bit()
313                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
314                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
315    if (Subtarget.is64Bit()) {
316      switch (TargetVT) {
317      case MVT::i8:
318        Opcode = NVPTX::LD_i8_ari_64;
319        break;
320      case MVT::i16:
321        Opcode = NVPTX::LD_i16_ari_64;
322        break;
323      case MVT::i32:
324        Opcode = NVPTX::LD_i32_ari_64;
325        break;
326      case MVT::i64:
327        Opcode = NVPTX::LD_i64_ari_64;
328        break;
329      case MVT::f32:
330        Opcode = NVPTX::LD_f32_ari_64;
331        break;
332      case MVT::f64:
333        Opcode = NVPTX::LD_f64_ari_64;
334        break;
335      default:
336        return NULL;
337      }
338    } else {
339      switch (TargetVT) {
340      case MVT::i8:
341        Opcode = NVPTX::LD_i8_ari;
342        break;
343      case MVT::i16:
344        Opcode = NVPTX::LD_i16_ari;
345        break;
346      case MVT::i32:
347        Opcode = NVPTX::LD_i32_ari;
348        break;
349      case MVT::i64:
350        Opcode = NVPTX::LD_i64_ari;
351        break;
352      case MVT::f32:
353        Opcode = NVPTX::LD_f32_ari;
354        break;
355      case MVT::f64:
356        Opcode = NVPTX::LD_f64_ari;
357        break;
358      default:
359        return NULL;
360      }
361    }
362    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
363                      getI32Imm(vecType), getI32Imm(fromType),
364                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
365    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
366  } else {
367    if (Subtarget.is64Bit()) {
368      switch (TargetVT) {
369      case MVT::i8:
370        Opcode = NVPTX::LD_i8_areg_64;
371        break;
372      case MVT::i16:
373        Opcode = NVPTX::LD_i16_areg_64;
374        break;
375      case MVT::i32:
376        Opcode = NVPTX::LD_i32_areg_64;
377        break;
378      case MVT::i64:
379        Opcode = NVPTX::LD_i64_areg_64;
380        break;
381      case MVT::f32:
382        Opcode = NVPTX::LD_f32_areg_64;
383        break;
384      case MVT::f64:
385        Opcode = NVPTX::LD_f64_areg_64;
386        break;
387      default:
388        return NULL;
389      }
390    } else {
391      switch (TargetVT) {
392      case MVT::i8:
393        Opcode = NVPTX::LD_i8_areg;
394        break;
395      case MVT::i16:
396        Opcode = NVPTX::LD_i16_areg;
397        break;
398      case MVT::i32:
399        Opcode = NVPTX::LD_i32_areg;
400        break;
401      case MVT::i64:
402        Opcode = NVPTX::LD_i64_areg;
403        break;
404      case MVT::f32:
405        Opcode = NVPTX::LD_f32_areg;
406        break;
407      case MVT::f64:
408        Opcode = NVPTX::LD_f64_areg;
409        break;
410      default:
411        return NULL;
412      }
413    }
414    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
415                      getI32Imm(vecType), getI32Imm(fromType),
416                      getI32Imm(fromTypeWidth), N1, Chain };
417    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
418  }
419
420  if (NVPTXLD != NULL) {
421    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
422    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
423    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
424  }
425
426  return NVPTXLD;
427}
428
429SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
430
431  SDValue Chain = N->getOperand(0);
432  SDValue Op1 = N->getOperand(1);
433  SDValue Addr, Offset, Base;
434  unsigned Opcode;
435  SDLoc DL(N);
436  SDNode *LD;
437  MemSDNode *MemSD = cast<MemSDNode>(N);
438  EVT LoadedVT = MemSD->getMemoryVT();
439
440  if (!LoadedVT.isSimple())
441    return NULL;
442
443  // Address Space Setting
444  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
445
446  // Volatile Setting
447  // - .volatile is only availalble for .global and .shared
448  bool IsVolatile = MemSD->isVolatile();
449  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
450      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
451      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
452    IsVolatile = false;
453
454  // Vector Setting
455  MVT SimpleVT = LoadedVT.getSimpleVT();
456
457  // Type Setting: fromType + fromTypeWidth
458  //
459  // Sign   : ISD::SEXTLOAD
460  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
461  //          type is integer
462  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
463  MVT ScalarVT = SimpleVT.getScalarType();
464  // Read at least 8 bits (predicates are stored as 8-bit values)
465  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
466  unsigned int FromType;
467  // The last operand holds the original LoadSDNode::getExtensionType() value
468  unsigned ExtensionType = cast<ConstantSDNode>(
469      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
470  if (ExtensionType == ISD::SEXTLOAD)
471    FromType = NVPTX::PTXLdStInstCode::Signed;
472  else if (ScalarVT.isFloatingPoint())
473    FromType = NVPTX::PTXLdStInstCode::Float;
474  else
475    FromType = NVPTX::PTXLdStInstCode::Unsigned;
476
477  unsigned VecType;
478
479  switch (N->getOpcode()) {
480  case NVPTXISD::LoadV2:
481    VecType = NVPTX::PTXLdStInstCode::V2;
482    break;
483  case NVPTXISD::LoadV4:
484    VecType = NVPTX::PTXLdStInstCode::V4;
485    break;
486  default:
487    return NULL;
488  }
489
490  EVT EltVT = N->getValueType(0);
491
492  if (SelectDirectAddr(Op1, Addr)) {
493    switch (N->getOpcode()) {
494    default:
495      return NULL;
496    case NVPTXISD::LoadV2:
497      switch (EltVT.getSimpleVT().SimpleTy) {
498      default:
499        return NULL;
500      case MVT::i8:
501        Opcode = NVPTX::LDV_i8_v2_avar;
502        break;
503      case MVT::i16:
504        Opcode = NVPTX::LDV_i16_v2_avar;
505        break;
506      case MVT::i32:
507        Opcode = NVPTX::LDV_i32_v2_avar;
508        break;
509      case MVT::i64:
510        Opcode = NVPTX::LDV_i64_v2_avar;
511        break;
512      case MVT::f32:
513        Opcode = NVPTX::LDV_f32_v2_avar;
514        break;
515      case MVT::f64:
516        Opcode = NVPTX::LDV_f64_v2_avar;
517        break;
518      }
519      break;
520    case NVPTXISD::LoadV4:
521      switch (EltVT.getSimpleVT().SimpleTy) {
522      default:
523        return NULL;
524      case MVT::i8:
525        Opcode = NVPTX::LDV_i8_v4_avar;
526        break;
527      case MVT::i16:
528        Opcode = NVPTX::LDV_i16_v4_avar;
529        break;
530      case MVT::i32:
531        Opcode = NVPTX::LDV_i32_v4_avar;
532        break;
533      case MVT::f32:
534        Opcode = NVPTX::LDV_f32_v4_avar;
535        break;
536      }
537      break;
538    }
539
540    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
541                      getI32Imm(VecType), getI32Imm(FromType),
542                      getI32Imm(FromTypeWidth), Addr, Chain };
543    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
544  } else if (Subtarget.is64Bit()
545                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
546                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
547    switch (N->getOpcode()) {
548    default:
549      return NULL;
550    case NVPTXISD::LoadV2:
551      switch (EltVT.getSimpleVT().SimpleTy) {
552      default:
553        return NULL;
554      case MVT::i8:
555        Opcode = NVPTX::LDV_i8_v2_asi;
556        break;
557      case MVT::i16:
558        Opcode = NVPTX::LDV_i16_v2_asi;
559        break;
560      case MVT::i32:
561        Opcode = NVPTX::LDV_i32_v2_asi;
562        break;
563      case MVT::i64:
564        Opcode = NVPTX::LDV_i64_v2_asi;
565        break;
566      case MVT::f32:
567        Opcode = NVPTX::LDV_f32_v2_asi;
568        break;
569      case MVT::f64:
570        Opcode = NVPTX::LDV_f64_v2_asi;
571        break;
572      }
573      break;
574    case NVPTXISD::LoadV4:
575      switch (EltVT.getSimpleVT().SimpleTy) {
576      default:
577        return NULL;
578      case MVT::i8:
579        Opcode = NVPTX::LDV_i8_v4_asi;
580        break;
581      case MVT::i16:
582        Opcode = NVPTX::LDV_i16_v4_asi;
583        break;
584      case MVT::i32:
585        Opcode = NVPTX::LDV_i32_v4_asi;
586        break;
587      case MVT::f32:
588        Opcode = NVPTX::LDV_f32_v4_asi;
589        break;
590      }
591      break;
592    }
593
594    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
595                      getI32Imm(VecType), getI32Imm(FromType),
596                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
597    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
598  } else if (Subtarget.is64Bit()
599                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
600                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
601    if (Subtarget.is64Bit()) {
602      switch (N->getOpcode()) {
603      default:
604        return NULL;
605      case NVPTXISD::LoadV2:
606        switch (EltVT.getSimpleVT().SimpleTy) {
607        default:
608          return NULL;
609        case MVT::i8:
610          Opcode = NVPTX::LDV_i8_v2_ari_64;
611          break;
612        case MVT::i16:
613          Opcode = NVPTX::LDV_i16_v2_ari_64;
614          break;
615        case MVT::i32:
616          Opcode = NVPTX::LDV_i32_v2_ari_64;
617          break;
618        case MVT::i64:
619          Opcode = NVPTX::LDV_i64_v2_ari_64;
620          break;
621        case MVT::f32:
622          Opcode = NVPTX::LDV_f32_v2_ari_64;
623          break;
624        case MVT::f64:
625          Opcode = NVPTX::LDV_f64_v2_ari_64;
626          break;
627        }
628        break;
629      case NVPTXISD::LoadV4:
630        switch (EltVT.getSimpleVT().SimpleTy) {
631        default:
632          return NULL;
633        case MVT::i8:
634          Opcode = NVPTX::LDV_i8_v4_ari_64;
635          break;
636        case MVT::i16:
637          Opcode = NVPTX::LDV_i16_v4_ari_64;
638          break;
639        case MVT::i32:
640          Opcode = NVPTX::LDV_i32_v4_ari_64;
641          break;
642        case MVT::f32:
643          Opcode = NVPTX::LDV_f32_v4_ari_64;
644          break;
645        }
646        break;
647      }
648    } else {
649      switch (N->getOpcode()) {
650      default:
651        return NULL;
652      case NVPTXISD::LoadV2:
653        switch (EltVT.getSimpleVT().SimpleTy) {
654        default:
655          return NULL;
656        case MVT::i8:
657          Opcode = NVPTX::LDV_i8_v2_ari;
658          break;
659        case MVT::i16:
660          Opcode = NVPTX::LDV_i16_v2_ari;
661          break;
662        case MVT::i32:
663          Opcode = NVPTX::LDV_i32_v2_ari;
664          break;
665        case MVT::i64:
666          Opcode = NVPTX::LDV_i64_v2_ari;
667          break;
668        case MVT::f32:
669          Opcode = NVPTX::LDV_f32_v2_ari;
670          break;
671        case MVT::f64:
672          Opcode = NVPTX::LDV_f64_v2_ari;
673          break;
674        }
675        break;
676      case NVPTXISD::LoadV4:
677        switch (EltVT.getSimpleVT().SimpleTy) {
678        default:
679          return NULL;
680        case MVT::i8:
681          Opcode = NVPTX::LDV_i8_v4_ari;
682          break;
683        case MVT::i16:
684          Opcode = NVPTX::LDV_i16_v4_ari;
685          break;
686        case MVT::i32:
687          Opcode = NVPTX::LDV_i32_v4_ari;
688          break;
689        case MVT::f32:
690          Opcode = NVPTX::LDV_f32_v4_ari;
691          break;
692        }
693        break;
694      }
695    }
696
697    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
698                      getI32Imm(VecType), getI32Imm(FromType),
699                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
700
701    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
702  } else {
703    if (Subtarget.is64Bit()) {
704      switch (N->getOpcode()) {
705      default:
706        return NULL;
707      case NVPTXISD::LoadV2:
708        switch (EltVT.getSimpleVT().SimpleTy) {
709        default:
710          return NULL;
711        case MVT::i8:
712          Opcode = NVPTX::LDV_i8_v2_areg_64;
713          break;
714        case MVT::i16:
715          Opcode = NVPTX::LDV_i16_v2_areg_64;
716          break;
717        case MVT::i32:
718          Opcode = NVPTX::LDV_i32_v2_areg_64;
719          break;
720        case MVT::i64:
721          Opcode = NVPTX::LDV_i64_v2_areg_64;
722          break;
723        case MVT::f32:
724          Opcode = NVPTX::LDV_f32_v2_areg_64;
725          break;
726        case MVT::f64:
727          Opcode = NVPTX::LDV_f64_v2_areg_64;
728          break;
729        }
730        break;
731      case NVPTXISD::LoadV4:
732        switch (EltVT.getSimpleVT().SimpleTy) {
733        default:
734          return NULL;
735        case MVT::i8:
736          Opcode = NVPTX::LDV_i8_v4_areg_64;
737          break;
738        case MVT::i16:
739          Opcode = NVPTX::LDV_i16_v4_areg_64;
740          break;
741        case MVT::i32:
742          Opcode = NVPTX::LDV_i32_v4_areg_64;
743          break;
744        case MVT::f32:
745          Opcode = NVPTX::LDV_f32_v4_areg_64;
746          break;
747        }
748        break;
749      }
750    } else {
751      switch (N->getOpcode()) {
752      default:
753        return NULL;
754      case NVPTXISD::LoadV2:
755        switch (EltVT.getSimpleVT().SimpleTy) {
756        default:
757          return NULL;
758        case MVT::i8:
759          Opcode = NVPTX::LDV_i8_v2_areg;
760          break;
761        case MVT::i16:
762          Opcode = NVPTX::LDV_i16_v2_areg;
763          break;
764        case MVT::i32:
765          Opcode = NVPTX::LDV_i32_v2_areg;
766          break;
767        case MVT::i64:
768          Opcode = NVPTX::LDV_i64_v2_areg;
769          break;
770        case MVT::f32:
771          Opcode = NVPTX::LDV_f32_v2_areg;
772          break;
773        case MVT::f64:
774          Opcode = NVPTX::LDV_f64_v2_areg;
775          break;
776        }
777        break;
778      case NVPTXISD::LoadV4:
779        switch (EltVT.getSimpleVT().SimpleTy) {
780        default:
781          return NULL;
782        case MVT::i8:
783          Opcode = NVPTX::LDV_i8_v4_areg;
784          break;
785        case MVT::i16:
786          Opcode = NVPTX::LDV_i16_v4_areg;
787          break;
788        case MVT::i32:
789          Opcode = NVPTX::LDV_i32_v4_areg;
790          break;
791        case MVT::f32:
792          Opcode = NVPTX::LDV_f32_v4_areg;
793          break;
794        }
795        break;
796      }
797    }
798
799    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
800                      getI32Imm(VecType), getI32Imm(FromType),
801                      getI32Imm(FromTypeWidth), Op1, Chain };
802    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
803  }
804
805  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
806  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
807  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
808
809  return LD;
810}
811
812SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
813
814  SDValue Chain = N->getOperand(0);
815  SDValue Op1 = N->getOperand(1);
816  unsigned Opcode;
817  SDLoc DL(N);
818  SDNode *LD;
819  MemSDNode *Mem = cast<MemSDNode>(N);
820  SDValue Base, Offset, Addr;
821
822  EVT EltVT = Mem->getMemoryVT().getVectorElementType();
823
824  if (SelectDirectAddr(Op1, Addr)) {
825    switch (N->getOpcode()) {
826    default:
827      return NULL;
828    case NVPTXISD::LDGV2:
829      switch (EltVT.getSimpleVT().SimpleTy) {
830      default:
831        return NULL;
832      case MVT::i8:
833        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
834        break;
835      case MVT::i16:
836        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
837        break;
838      case MVT::i32:
839        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
840        break;
841      case MVT::i64:
842        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
843        break;
844      case MVT::f32:
845        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
846        break;
847      case MVT::f64:
848        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
849        break;
850      }
851      break;
852    case NVPTXISD::LDUV2:
853      switch (EltVT.getSimpleVT().SimpleTy) {
854      default:
855        return NULL;
856      case MVT::i8:
857        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
858        break;
859      case MVT::i16:
860        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
861        break;
862      case MVT::i32:
863        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
864        break;
865      case MVT::i64:
866        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
867        break;
868      case MVT::f32:
869        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
870        break;
871      case MVT::f64:
872        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
873        break;
874      }
875      break;
876    case NVPTXISD::LDGV4:
877      switch (EltVT.getSimpleVT().SimpleTy) {
878      default:
879        return NULL;
880      case MVT::i8:
881        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
882        break;
883      case MVT::i16:
884        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
885        break;
886      case MVT::i32:
887        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
888        break;
889      case MVT::f32:
890        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
891        break;
892      }
893      break;
894    case NVPTXISD::LDUV4:
895      switch (EltVT.getSimpleVT().SimpleTy) {
896      default:
897        return NULL;
898      case MVT::i8:
899        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
900        break;
901      case MVT::i16:
902        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
903        break;
904      case MVT::i32:
905        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
906        break;
907      case MVT::f32:
908        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
909        break;
910      }
911      break;
912    }
913
914    SDValue Ops[] = { Addr, Chain };
915    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
916                                ArrayRef<SDValue>(Ops, 2));
917  } else if (Subtarget.is64Bit()
918                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
919                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
920    if (Subtarget.is64Bit()) {
921      switch (N->getOpcode()) {
922      default:
923        return NULL;
924      case NVPTXISD::LDGV2:
925        switch (EltVT.getSimpleVT().SimpleTy) {
926        default:
927          return NULL;
928        case MVT::i8:
929          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
930          break;
931        case MVT::i16:
932          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
933          break;
934        case MVT::i32:
935          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
936          break;
937        case MVT::i64:
938          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
939          break;
940        case MVT::f32:
941          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
942          break;
943        case MVT::f64:
944          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
945          break;
946        }
947        break;
948      case NVPTXISD::LDUV2:
949        switch (EltVT.getSimpleVT().SimpleTy) {
950        default:
951          return NULL;
952        case MVT::i8:
953          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
954          break;
955        case MVT::i16:
956          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
957          break;
958        case MVT::i32:
959          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
960          break;
961        case MVT::i64:
962          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
963          break;
964        case MVT::f32:
965          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
966          break;
967        case MVT::f64:
968          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
969          break;
970        }
971        break;
972      case NVPTXISD::LDGV4:
973        switch (EltVT.getSimpleVT().SimpleTy) {
974        default:
975          return NULL;
976        case MVT::i8:
977          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
978          break;
979        case MVT::i16:
980          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
981          break;
982        case MVT::i32:
983          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
984          break;
985        case MVT::f32:
986          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
987          break;
988        }
989        break;
990      case NVPTXISD::LDUV4:
991        switch (EltVT.getSimpleVT().SimpleTy) {
992        default:
993          return NULL;
994        case MVT::i8:
995          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
996          break;
997        case MVT::i16:
998          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
999          break;
1000        case MVT::i32:
1001          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1002          break;
1003        case MVT::f32:
1004          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1005          break;
1006        }
1007        break;
1008      }
1009    } else {
1010      switch (N->getOpcode()) {
1011      default:
1012        return NULL;
1013      case NVPTXISD::LDGV2:
1014        switch (EltVT.getSimpleVT().SimpleTy) {
1015        default:
1016          return NULL;
1017        case MVT::i8:
1018          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1019          break;
1020        case MVT::i16:
1021          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1022          break;
1023        case MVT::i32:
1024          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1025          break;
1026        case MVT::i64:
1027          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1028          break;
1029        case MVT::f32:
1030          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1031          break;
1032        case MVT::f64:
1033          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1034          break;
1035        }
1036        break;
1037      case NVPTXISD::LDUV2:
1038        switch (EltVT.getSimpleVT().SimpleTy) {
1039        default:
1040          return NULL;
1041        case MVT::i8:
1042          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1043          break;
1044        case MVT::i16:
1045          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1046          break;
1047        case MVT::i32:
1048          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1049          break;
1050        case MVT::i64:
1051          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1052          break;
1053        case MVT::f32:
1054          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1055          break;
1056        case MVT::f64:
1057          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1058          break;
1059        }
1060        break;
1061      case NVPTXISD::LDGV4:
1062        switch (EltVT.getSimpleVT().SimpleTy) {
1063        default:
1064          return NULL;
1065        case MVT::i8:
1066          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1067          break;
1068        case MVT::i16:
1069          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1070          break;
1071        case MVT::i32:
1072          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1073          break;
1074        case MVT::f32:
1075          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1076          break;
1077        }
1078        break;
1079      case NVPTXISD::LDUV4:
1080        switch (EltVT.getSimpleVT().SimpleTy) {
1081        default:
1082          return NULL;
1083        case MVT::i8:
1084          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1085          break;
1086        case MVT::i16:
1087          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1088          break;
1089        case MVT::i32:
1090          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1091          break;
1092        case MVT::f32:
1093          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1094          break;
1095        }
1096        break;
1097      }
1098    }
1099
1100    SDValue Ops[] = { Base, Offset, Chain };
1101
1102    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1103                                ArrayRef<SDValue>(Ops, 3));
1104  } else {
1105    if (Subtarget.is64Bit()) {
1106      switch (N->getOpcode()) {
1107      default:
1108        return NULL;
1109      case NVPTXISD::LDGV2:
1110        switch (EltVT.getSimpleVT().SimpleTy) {
1111        default:
1112          return NULL;
1113        case MVT::i8:
1114          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1115          break;
1116        case MVT::i16:
1117          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1118          break;
1119        case MVT::i32:
1120          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1121          break;
1122        case MVT::i64:
1123          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1124          break;
1125        case MVT::f32:
1126          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1127          break;
1128        case MVT::f64:
1129          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1130          break;
1131        }
1132        break;
1133      case NVPTXISD::LDUV2:
1134        switch (EltVT.getSimpleVT().SimpleTy) {
1135        default:
1136          return NULL;
1137        case MVT::i8:
1138          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1139          break;
1140        case MVT::i16:
1141          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1142          break;
1143        case MVT::i32:
1144          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1145          break;
1146        case MVT::i64:
1147          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1148          break;
1149        case MVT::f32:
1150          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1151          break;
1152        case MVT::f64:
1153          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1154          break;
1155        }
1156        break;
1157      case NVPTXISD::LDGV4:
1158        switch (EltVT.getSimpleVT().SimpleTy) {
1159        default:
1160          return NULL;
1161        case MVT::i8:
1162          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1163          break;
1164        case MVT::i16:
1165          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1166          break;
1167        case MVT::i32:
1168          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1169          break;
1170        case MVT::f32:
1171          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1172          break;
1173        }
1174        break;
1175      case NVPTXISD::LDUV4:
1176        switch (EltVT.getSimpleVT().SimpleTy) {
1177        default:
1178          return NULL;
1179        case MVT::i8:
1180          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1181          break;
1182        case MVT::i16:
1183          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1184          break;
1185        case MVT::i32:
1186          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1187          break;
1188        case MVT::f32:
1189          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1190          break;
1191        }
1192        break;
1193      }
1194    } else {
1195      switch (N->getOpcode()) {
1196      default:
1197        return NULL;
1198      case NVPTXISD::LDGV2:
1199        switch (EltVT.getSimpleVT().SimpleTy) {
1200        default:
1201          return NULL;
1202        case MVT::i8:
1203          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1204          break;
1205        case MVT::i16:
1206          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1207          break;
1208        case MVT::i32:
1209          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1210          break;
1211        case MVT::i64:
1212          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1213          break;
1214        case MVT::f32:
1215          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1216          break;
1217        case MVT::f64:
1218          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1219          break;
1220        }
1221        break;
1222      case NVPTXISD::LDUV2:
1223        switch (EltVT.getSimpleVT().SimpleTy) {
1224        default:
1225          return NULL;
1226        case MVT::i8:
1227          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1228          break;
1229        case MVT::i16:
1230          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1231          break;
1232        case MVT::i32:
1233          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1234          break;
1235        case MVT::i64:
1236          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1237          break;
1238        case MVT::f32:
1239          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1240          break;
1241        case MVT::f64:
1242          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1243          break;
1244        }
1245        break;
1246      case NVPTXISD::LDGV4:
1247        switch (EltVT.getSimpleVT().SimpleTy) {
1248        default:
1249          return NULL;
1250        case MVT::i8:
1251          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1252          break;
1253        case MVT::i16:
1254          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1255          break;
1256        case MVT::i32:
1257          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1258          break;
1259        case MVT::f32:
1260          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1261          break;
1262        }
1263        break;
1264      case NVPTXISD::LDUV4:
1265        switch (EltVT.getSimpleVT().SimpleTy) {
1266        default:
1267          return NULL;
1268        case MVT::i8:
1269          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1270          break;
1271        case MVT::i16:
1272          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1273          break;
1274        case MVT::i32:
1275          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1276          break;
1277        case MVT::f32:
1278          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1279          break;
1280        }
1281        break;
1282      }
1283    }
1284
1285    SDValue Ops[] = { Op1, Chain };
1286    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
1287                                ArrayRef<SDValue>(Ops, 2));
1288  }
1289
1290  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1291  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1292  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1293
1294  return LD;
1295}
1296
1297SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1298  SDLoc dl(N);
1299  StoreSDNode *ST = cast<StoreSDNode>(N);
1300  EVT StoreVT = ST->getMemoryVT();
1301  SDNode *NVPTXST = NULL;
1302
1303  // do not support pre/post inc/dec
1304  if (ST->isIndexed())
1305    return NULL;
1306
1307  if (!StoreVT.isSimple())
1308    return NULL;
1309
1310  // Address Space Setting
1311  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1312
1313  // Volatile Setting
1314  // - .volatile is only availalble for .global and .shared
1315  bool isVolatile = ST->isVolatile();
1316  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1317      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1318      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1319    isVolatile = false;
1320
1321  // Vector Setting
1322  MVT SimpleVT = StoreVT.getSimpleVT();
1323  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1324  if (SimpleVT.isVector()) {
1325    unsigned num = SimpleVT.getVectorNumElements();
1326    if (num == 2)
1327      vecType = NVPTX::PTXLdStInstCode::V2;
1328    else if (num == 4)
1329      vecType = NVPTX::PTXLdStInstCode::V4;
1330    else
1331      return NULL;
1332  }
1333
1334  // Type Setting: toType + toTypeWidth
1335  // - for integer type, always use 'u'
1336  //
1337  MVT ScalarVT = SimpleVT.getScalarType();
1338  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1339  unsigned int toType;
1340  if (ScalarVT.isFloatingPoint())
1341    toType = NVPTX::PTXLdStInstCode::Float;
1342  else
1343    toType = NVPTX::PTXLdStInstCode::Unsigned;
1344
1345  // Create the machine instruction DAG
1346  SDValue Chain = N->getOperand(0);
1347  SDValue N1 = N->getOperand(1);
1348  SDValue N2 = N->getOperand(2);
1349  SDValue Addr;
1350  SDValue Offset, Base;
1351  unsigned Opcode;
1352  MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
1353
1354  if (SelectDirectAddr(N2, Addr)) {
1355    switch (SourceVT) {
1356    case MVT::i8:
1357      Opcode = NVPTX::ST_i8_avar;
1358      break;
1359    case MVT::i16:
1360      Opcode = NVPTX::ST_i16_avar;
1361      break;
1362    case MVT::i32:
1363      Opcode = NVPTX::ST_i32_avar;
1364      break;
1365    case MVT::i64:
1366      Opcode = NVPTX::ST_i64_avar;
1367      break;
1368    case MVT::f32:
1369      Opcode = NVPTX::ST_f32_avar;
1370      break;
1371    case MVT::f64:
1372      Opcode = NVPTX::ST_f64_avar;
1373      break;
1374    default:
1375      return NULL;
1376    }
1377    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1378                      getI32Imm(vecType), getI32Imm(toType),
1379                      getI32Imm(toTypeWidth), Addr, Chain };
1380    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1381  } else if (Subtarget.is64Bit()
1382                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1383                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1384    switch (SourceVT) {
1385    case MVT::i8:
1386      Opcode = NVPTX::ST_i8_asi;
1387      break;
1388    case MVT::i16:
1389      Opcode = NVPTX::ST_i16_asi;
1390      break;
1391    case MVT::i32:
1392      Opcode = NVPTX::ST_i32_asi;
1393      break;
1394    case MVT::i64:
1395      Opcode = NVPTX::ST_i64_asi;
1396      break;
1397    case MVT::f32:
1398      Opcode = NVPTX::ST_f32_asi;
1399      break;
1400    case MVT::f64:
1401      Opcode = NVPTX::ST_f64_asi;
1402      break;
1403    default:
1404      return NULL;
1405    }
1406    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1407                      getI32Imm(vecType), getI32Imm(toType),
1408                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1409    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1410  } else if (Subtarget.is64Bit()
1411                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1412                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1413    if (Subtarget.is64Bit()) {
1414      switch (SourceVT) {
1415      case MVT::i8:
1416        Opcode = NVPTX::ST_i8_ari_64;
1417        break;
1418      case MVT::i16:
1419        Opcode = NVPTX::ST_i16_ari_64;
1420        break;
1421      case MVT::i32:
1422        Opcode = NVPTX::ST_i32_ari_64;
1423        break;
1424      case MVT::i64:
1425        Opcode = NVPTX::ST_i64_ari_64;
1426        break;
1427      case MVT::f32:
1428        Opcode = NVPTX::ST_f32_ari_64;
1429        break;
1430      case MVT::f64:
1431        Opcode = NVPTX::ST_f64_ari_64;
1432        break;
1433      default:
1434        return NULL;
1435      }
1436    } else {
1437      switch (SourceVT) {
1438      case MVT::i8:
1439        Opcode = NVPTX::ST_i8_ari;
1440        break;
1441      case MVT::i16:
1442        Opcode = NVPTX::ST_i16_ari;
1443        break;
1444      case MVT::i32:
1445        Opcode = NVPTX::ST_i32_ari;
1446        break;
1447      case MVT::i64:
1448        Opcode = NVPTX::ST_i64_ari;
1449        break;
1450      case MVT::f32:
1451        Opcode = NVPTX::ST_f32_ari;
1452        break;
1453      case MVT::f64:
1454        Opcode = NVPTX::ST_f64_ari;
1455        break;
1456      default:
1457        return NULL;
1458      }
1459    }
1460    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1461                      getI32Imm(vecType), getI32Imm(toType),
1462                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1463    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1464  } else {
1465    if (Subtarget.is64Bit()) {
1466      switch (SourceVT) {
1467      case MVT::i8:
1468        Opcode = NVPTX::ST_i8_areg_64;
1469        break;
1470      case MVT::i16:
1471        Opcode = NVPTX::ST_i16_areg_64;
1472        break;
1473      case MVT::i32:
1474        Opcode = NVPTX::ST_i32_areg_64;
1475        break;
1476      case MVT::i64:
1477        Opcode = NVPTX::ST_i64_areg_64;
1478        break;
1479      case MVT::f32:
1480        Opcode = NVPTX::ST_f32_areg_64;
1481        break;
1482      case MVT::f64:
1483        Opcode = NVPTX::ST_f64_areg_64;
1484        break;
1485      default:
1486        return NULL;
1487      }
1488    } else {
1489      switch (SourceVT) {
1490      case MVT::i8:
1491        Opcode = NVPTX::ST_i8_areg;
1492        break;
1493      case MVT::i16:
1494        Opcode = NVPTX::ST_i16_areg;
1495        break;
1496      case MVT::i32:
1497        Opcode = NVPTX::ST_i32_areg;
1498        break;
1499      case MVT::i64:
1500        Opcode = NVPTX::ST_i64_areg;
1501        break;
1502      case MVT::f32:
1503        Opcode = NVPTX::ST_f32_areg;
1504        break;
1505      case MVT::f64:
1506        Opcode = NVPTX::ST_f64_areg;
1507        break;
1508      default:
1509        return NULL;
1510      }
1511    }
1512    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1513                      getI32Imm(vecType), getI32Imm(toType),
1514                      getI32Imm(toTypeWidth), N2, Chain };
1515    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1516  }
1517
1518  if (NVPTXST != NULL) {
1519    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1520    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1521    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1522  }
1523
1524  return NVPTXST;
1525}
1526
1527SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1528  SDValue Chain = N->getOperand(0);
1529  SDValue Op1 = N->getOperand(1);
1530  SDValue Addr, Offset, Base;
1531  unsigned Opcode;
1532  SDLoc DL(N);
1533  SDNode *ST;
1534  EVT EltVT = Op1.getValueType();
1535  MemSDNode *MemSD = cast<MemSDNode>(N);
1536  EVT StoreVT = MemSD->getMemoryVT();
1537
1538  // Address Space Setting
1539  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1540
1541  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1542    report_fatal_error("Cannot store to pointer that points to constant "
1543                       "memory space");
1544  }
1545
1546  // Volatile Setting
1547  // - .volatile is only availalble for .global and .shared
1548  bool IsVolatile = MemSD->isVolatile();
1549  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1550      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1551      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1552    IsVolatile = false;
1553
1554  // Type Setting: toType + toTypeWidth
1555  // - for integer type, always use 'u'
1556  assert(StoreVT.isSimple() && "Store value is not simple");
1557  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1558  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1559  unsigned ToType;
1560  if (ScalarVT.isFloatingPoint())
1561    ToType = NVPTX::PTXLdStInstCode::Float;
1562  else
1563    ToType = NVPTX::PTXLdStInstCode::Unsigned;
1564
1565  SmallVector<SDValue, 12> StOps;
1566  SDValue N2;
1567  unsigned VecType;
1568
1569  switch (N->getOpcode()) {
1570  case NVPTXISD::StoreV2:
1571    VecType = NVPTX::PTXLdStInstCode::V2;
1572    StOps.push_back(N->getOperand(1));
1573    StOps.push_back(N->getOperand(2));
1574    N2 = N->getOperand(3);
1575    break;
1576  case NVPTXISD::StoreV4:
1577    VecType = NVPTX::PTXLdStInstCode::V4;
1578    StOps.push_back(N->getOperand(1));
1579    StOps.push_back(N->getOperand(2));
1580    StOps.push_back(N->getOperand(3));
1581    StOps.push_back(N->getOperand(4));
1582    N2 = N->getOperand(5);
1583    break;
1584  default:
1585    return NULL;
1586  }
1587
1588  StOps.push_back(getI32Imm(IsVolatile));
1589  StOps.push_back(getI32Imm(CodeAddrSpace));
1590  StOps.push_back(getI32Imm(VecType));
1591  StOps.push_back(getI32Imm(ToType));
1592  StOps.push_back(getI32Imm(ToTypeWidth));
1593
1594  if (SelectDirectAddr(N2, Addr)) {
1595    switch (N->getOpcode()) {
1596    default:
1597      return NULL;
1598    case NVPTXISD::StoreV2:
1599      switch (EltVT.getSimpleVT().SimpleTy) {
1600      default:
1601        return NULL;
1602      case MVT::i8:
1603        Opcode = NVPTX::STV_i8_v2_avar;
1604        break;
1605      case MVT::i16:
1606        Opcode = NVPTX::STV_i16_v2_avar;
1607        break;
1608      case MVT::i32:
1609        Opcode = NVPTX::STV_i32_v2_avar;
1610        break;
1611      case MVT::i64:
1612        Opcode = NVPTX::STV_i64_v2_avar;
1613        break;
1614      case MVT::f32:
1615        Opcode = NVPTX::STV_f32_v2_avar;
1616        break;
1617      case MVT::f64:
1618        Opcode = NVPTX::STV_f64_v2_avar;
1619        break;
1620      }
1621      break;
1622    case NVPTXISD::StoreV4:
1623      switch (EltVT.getSimpleVT().SimpleTy) {
1624      default:
1625        return NULL;
1626      case MVT::i8:
1627        Opcode = NVPTX::STV_i8_v4_avar;
1628        break;
1629      case MVT::i16:
1630        Opcode = NVPTX::STV_i16_v4_avar;
1631        break;
1632      case MVT::i32:
1633        Opcode = NVPTX::STV_i32_v4_avar;
1634        break;
1635      case MVT::f32:
1636        Opcode = NVPTX::STV_f32_v4_avar;
1637        break;
1638      }
1639      break;
1640    }
1641    StOps.push_back(Addr);
1642  } else if (Subtarget.is64Bit()
1643                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1644                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1645    switch (N->getOpcode()) {
1646    default:
1647      return NULL;
1648    case NVPTXISD::StoreV2:
1649      switch (EltVT.getSimpleVT().SimpleTy) {
1650      default:
1651        return NULL;
1652      case MVT::i8:
1653        Opcode = NVPTX::STV_i8_v2_asi;
1654        break;
1655      case MVT::i16:
1656        Opcode = NVPTX::STV_i16_v2_asi;
1657        break;
1658      case MVT::i32:
1659        Opcode = NVPTX::STV_i32_v2_asi;
1660        break;
1661      case MVT::i64:
1662        Opcode = NVPTX::STV_i64_v2_asi;
1663        break;
1664      case MVT::f32:
1665        Opcode = NVPTX::STV_f32_v2_asi;
1666        break;
1667      case MVT::f64:
1668        Opcode = NVPTX::STV_f64_v2_asi;
1669        break;
1670      }
1671      break;
1672    case NVPTXISD::StoreV4:
1673      switch (EltVT.getSimpleVT().SimpleTy) {
1674      default:
1675        return NULL;
1676      case MVT::i8:
1677        Opcode = NVPTX::STV_i8_v4_asi;
1678        break;
1679      case MVT::i16:
1680        Opcode = NVPTX::STV_i16_v4_asi;
1681        break;
1682      case MVT::i32:
1683        Opcode = NVPTX::STV_i32_v4_asi;
1684        break;
1685      case MVT::f32:
1686        Opcode = NVPTX::STV_f32_v4_asi;
1687        break;
1688      }
1689      break;
1690    }
1691    StOps.push_back(Base);
1692    StOps.push_back(Offset);
1693  } else if (Subtarget.is64Bit()
1694                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1695                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1696    if (Subtarget.is64Bit()) {
1697      switch (N->getOpcode()) {
1698      default:
1699        return NULL;
1700      case NVPTXISD::StoreV2:
1701        switch (EltVT.getSimpleVT().SimpleTy) {
1702        default:
1703          return NULL;
1704        case MVT::i8:
1705          Opcode = NVPTX::STV_i8_v2_ari_64;
1706          break;
1707        case MVT::i16:
1708          Opcode = NVPTX::STV_i16_v2_ari_64;
1709          break;
1710        case MVT::i32:
1711          Opcode = NVPTX::STV_i32_v2_ari_64;
1712          break;
1713        case MVT::i64:
1714          Opcode = NVPTX::STV_i64_v2_ari_64;
1715          break;
1716        case MVT::f32:
1717          Opcode = NVPTX::STV_f32_v2_ari_64;
1718          break;
1719        case MVT::f64:
1720          Opcode = NVPTX::STV_f64_v2_ari_64;
1721          break;
1722        }
1723        break;
1724      case NVPTXISD::StoreV4:
1725        switch (EltVT.getSimpleVT().SimpleTy) {
1726        default:
1727          return NULL;
1728        case MVT::i8:
1729          Opcode = NVPTX::STV_i8_v4_ari_64;
1730          break;
1731        case MVT::i16:
1732          Opcode = NVPTX::STV_i16_v4_ari_64;
1733          break;
1734        case MVT::i32:
1735          Opcode = NVPTX::STV_i32_v4_ari_64;
1736          break;
1737        case MVT::f32:
1738          Opcode = NVPTX::STV_f32_v4_ari_64;
1739          break;
1740        }
1741        break;
1742      }
1743    } else {
1744      switch (N->getOpcode()) {
1745      default:
1746        return NULL;
1747      case NVPTXISD::StoreV2:
1748        switch (EltVT.getSimpleVT().SimpleTy) {
1749        default:
1750          return NULL;
1751        case MVT::i8:
1752          Opcode = NVPTX::STV_i8_v2_ari;
1753          break;
1754        case MVT::i16:
1755          Opcode = NVPTX::STV_i16_v2_ari;
1756          break;
1757        case MVT::i32:
1758          Opcode = NVPTX::STV_i32_v2_ari;
1759          break;
1760        case MVT::i64:
1761          Opcode = NVPTX::STV_i64_v2_ari;
1762          break;
1763        case MVT::f32:
1764          Opcode = NVPTX::STV_f32_v2_ari;
1765          break;
1766        case MVT::f64:
1767          Opcode = NVPTX::STV_f64_v2_ari;
1768          break;
1769        }
1770        break;
1771      case NVPTXISD::StoreV4:
1772        switch (EltVT.getSimpleVT().SimpleTy) {
1773        default:
1774          return NULL;
1775        case MVT::i8:
1776          Opcode = NVPTX::STV_i8_v4_ari;
1777          break;
1778        case MVT::i16:
1779          Opcode = NVPTX::STV_i16_v4_ari;
1780          break;
1781        case MVT::i32:
1782          Opcode = NVPTX::STV_i32_v4_ari;
1783          break;
1784        case MVT::f32:
1785          Opcode = NVPTX::STV_f32_v4_ari;
1786          break;
1787        }
1788        break;
1789      }
1790    }
1791    StOps.push_back(Base);
1792    StOps.push_back(Offset);
1793  } else {
1794    if (Subtarget.is64Bit()) {
1795      switch (N->getOpcode()) {
1796      default:
1797        return NULL;
1798      case NVPTXISD::StoreV2:
1799        switch (EltVT.getSimpleVT().SimpleTy) {
1800        default:
1801          return NULL;
1802        case MVT::i8:
1803          Opcode = NVPTX::STV_i8_v2_areg_64;
1804          break;
1805        case MVT::i16:
1806          Opcode = NVPTX::STV_i16_v2_areg_64;
1807          break;
1808        case MVT::i32:
1809          Opcode = NVPTX::STV_i32_v2_areg_64;
1810          break;
1811        case MVT::i64:
1812          Opcode = NVPTX::STV_i64_v2_areg_64;
1813          break;
1814        case MVT::f32:
1815          Opcode = NVPTX::STV_f32_v2_areg_64;
1816          break;
1817        case MVT::f64:
1818          Opcode = NVPTX::STV_f64_v2_areg_64;
1819          break;
1820        }
1821        break;
1822      case NVPTXISD::StoreV4:
1823        switch (EltVT.getSimpleVT().SimpleTy) {
1824        default:
1825          return NULL;
1826        case MVT::i8:
1827          Opcode = NVPTX::STV_i8_v4_areg_64;
1828          break;
1829        case MVT::i16:
1830          Opcode = NVPTX::STV_i16_v4_areg_64;
1831          break;
1832        case MVT::i32:
1833          Opcode = NVPTX::STV_i32_v4_areg_64;
1834          break;
1835        case MVT::f32:
1836          Opcode = NVPTX::STV_f32_v4_areg_64;
1837          break;
1838        }
1839        break;
1840      }
1841    } else {
1842      switch (N->getOpcode()) {
1843      default:
1844        return NULL;
1845      case NVPTXISD::StoreV2:
1846        switch (EltVT.getSimpleVT().SimpleTy) {
1847        default:
1848          return NULL;
1849        case MVT::i8:
1850          Opcode = NVPTX::STV_i8_v2_areg;
1851          break;
1852        case MVT::i16:
1853          Opcode = NVPTX::STV_i16_v2_areg;
1854          break;
1855        case MVT::i32:
1856          Opcode = NVPTX::STV_i32_v2_areg;
1857          break;
1858        case MVT::i64:
1859          Opcode = NVPTX::STV_i64_v2_areg;
1860          break;
1861        case MVT::f32:
1862          Opcode = NVPTX::STV_f32_v2_areg;
1863          break;
1864        case MVT::f64:
1865          Opcode = NVPTX::STV_f64_v2_areg;
1866          break;
1867        }
1868        break;
1869      case NVPTXISD::StoreV4:
1870        switch (EltVT.getSimpleVT().SimpleTy) {
1871        default:
1872          return NULL;
1873        case MVT::i8:
1874          Opcode = NVPTX::STV_i8_v4_areg;
1875          break;
1876        case MVT::i16:
1877          Opcode = NVPTX::STV_i16_v4_areg;
1878          break;
1879        case MVT::i32:
1880          Opcode = NVPTX::STV_i32_v4_areg;
1881          break;
1882        case MVT::f32:
1883          Opcode = NVPTX::STV_f32_v4_areg;
1884          break;
1885        }
1886        break;
1887      }
1888    }
1889    StOps.push_back(N2);
1890  }
1891
1892  StOps.push_back(Chain);
1893
1894  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
1895
1896  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1897  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1898  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1899
1900  return ST;
1901}
1902
1903SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
1904  SDValue Chain = Node->getOperand(0);
1905  SDValue Offset = Node->getOperand(2);
1906  SDValue Flag = Node->getOperand(3);
1907  SDLoc DL(Node);
1908  MemSDNode *Mem = cast<MemSDNode>(Node);
1909
1910  unsigned VecSize;
1911  switch (Node->getOpcode()) {
1912  default:
1913    return NULL;
1914  case NVPTXISD::LoadParam:
1915    VecSize = 1;
1916    break;
1917  case NVPTXISD::LoadParamV2:
1918    VecSize = 2;
1919    break;
1920  case NVPTXISD::LoadParamV4:
1921    VecSize = 4;
1922    break;
1923  }
1924
1925  EVT EltVT = Node->getValueType(0);
1926  EVT MemVT = Mem->getMemoryVT();
1927
1928  unsigned Opc = 0;
1929
1930  switch (VecSize) {
1931  default:
1932    return NULL;
1933  case 1:
1934    switch (MemVT.getSimpleVT().SimpleTy) {
1935    default:
1936      return NULL;
1937    case MVT::i1:
1938      Opc = NVPTX::LoadParamMemI8;
1939      break;
1940    case MVT::i8:
1941      Opc = NVPTX::LoadParamMemI8;
1942      break;
1943    case MVT::i16:
1944      Opc = NVPTX::LoadParamMemI16;
1945      break;
1946    case MVT::i32:
1947      Opc = NVPTX::LoadParamMemI32;
1948      break;
1949    case MVT::i64:
1950      Opc = NVPTX::LoadParamMemI64;
1951      break;
1952    case MVT::f32:
1953      Opc = NVPTX::LoadParamMemF32;
1954      break;
1955    case MVT::f64:
1956      Opc = NVPTX::LoadParamMemF64;
1957      break;
1958    }
1959    break;
1960  case 2:
1961    switch (MemVT.getSimpleVT().SimpleTy) {
1962    default:
1963      return NULL;
1964    case MVT::i1:
1965      Opc = NVPTX::LoadParamMemV2I8;
1966      break;
1967    case MVT::i8:
1968      Opc = NVPTX::LoadParamMemV2I8;
1969      break;
1970    case MVT::i16:
1971      Opc = NVPTX::LoadParamMemV2I16;
1972      break;
1973    case MVT::i32:
1974      Opc = NVPTX::LoadParamMemV2I32;
1975      break;
1976    case MVT::i64:
1977      Opc = NVPTX::LoadParamMemV2I64;
1978      break;
1979    case MVT::f32:
1980      Opc = NVPTX::LoadParamMemV2F32;
1981      break;
1982    case MVT::f64:
1983      Opc = NVPTX::LoadParamMemV2F64;
1984      break;
1985    }
1986    break;
1987  case 4:
1988    switch (MemVT.getSimpleVT().SimpleTy) {
1989    default:
1990      return NULL;
1991    case MVT::i1:
1992      Opc = NVPTX::LoadParamMemV4I8;
1993      break;
1994    case MVT::i8:
1995      Opc = NVPTX::LoadParamMemV4I8;
1996      break;
1997    case MVT::i16:
1998      Opc = NVPTX::LoadParamMemV4I16;
1999      break;
2000    case MVT::i32:
2001      Opc = NVPTX::LoadParamMemV4I32;
2002      break;
2003    case MVT::f32:
2004      Opc = NVPTX::LoadParamMemV4F32;
2005      break;
2006    }
2007    break;
2008  }
2009
2010  SDVTList VTs;
2011  if (VecSize == 1) {
2012    VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2013  } else if (VecSize == 2) {
2014    VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2015  } else {
2016    EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2017    VTs = CurDAG->getVTList(&EVTs[0], 5);
2018  }
2019
2020  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2021
2022  SmallVector<SDValue, 2> Ops;
2023  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2024  Ops.push_back(Chain);
2025  Ops.push_back(Flag);
2026
2027  SDNode *Ret =
2028      CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2029  return Ret;
2030}
2031
2032SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2033  SDLoc DL(N);
2034  SDValue Chain = N->getOperand(0);
2035  SDValue Offset = N->getOperand(1);
2036  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2037  MemSDNode *Mem = cast<MemSDNode>(N);
2038
2039  // How many elements do we have?
2040  unsigned NumElts = 1;
2041  switch (N->getOpcode()) {
2042  default:
2043    return NULL;
2044  case NVPTXISD::StoreRetval:
2045    NumElts = 1;
2046    break;
2047  case NVPTXISD::StoreRetvalV2:
2048    NumElts = 2;
2049    break;
2050  case NVPTXISD::StoreRetvalV4:
2051    NumElts = 4;
2052    break;
2053  }
2054
2055  // Build vector of operands
2056  SmallVector<SDValue, 6> Ops;
2057  for (unsigned i = 0; i < NumElts; ++i)
2058    Ops.push_back(N->getOperand(i + 2));
2059  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2060  Ops.push_back(Chain);
2061
2062  // Determine target opcode
2063  // If we have an i1, use an 8-bit store. The lowering code in
2064  // NVPTXISelLowering will have already emitted an upcast.
2065  unsigned Opcode = 0;
2066  switch (NumElts) {
2067  default:
2068    return NULL;
2069  case 1:
2070    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2071    default:
2072      return NULL;
2073    case MVT::i1:
2074      Opcode = NVPTX::StoreRetvalI8;
2075      break;
2076    case MVT::i8:
2077      Opcode = NVPTX::StoreRetvalI8;
2078      break;
2079    case MVT::i16:
2080      Opcode = NVPTX::StoreRetvalI16;
2081      break;
2082    case MVT::i32:
2083      Opcode = NVPTX::StoreRetvalI32;
2084      break;
2085    case MVT::i64:
2086      Opcode = NVPTX::StoreRetvalI64;
2087      break;
2088    case MVT::f32:
2089      Opcode = NVPTX::StoreRetvalF32;
2090      break;
2091    case MVT::f64:
2092      Opcode = NVPTX::StoreRetvalF64;
2093      break;
2094    }
2095    break;
2096  case 2:
2097    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2098    default:
2099      return NULL;
2100    case MVT::i1:
2101      Opcode = NVPTX::StoreRetvalV2I8;
2102      break;
2103    case MVT::i8:
2104      Opcode = NVPTX::StoreRetvalV2I8;
2105      break;
2106    case MVT::i16:
2107      Opcode = NVPTX::StoreRetvalV2I16;
2108      break;
2109    case MVT::i32:
2110      Opcode = NVPTX::StoreRetvalV2I32;
2111      break;
2112    case MVT::i64:
2113      Opcode = NVPTX::StoreRetvalV2I64;
2114      break;
2115    case MVT::f32:
2116      Opcode = NVPTX::StoreRetvalV2F32;
2117      break;
2118    case MVT::f64:
2119      Opcode = NVPTX::StoreRetvalV2F64;
2120      break;
2121    }
2122    break;
2123  case 4:
2124    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2125    default:
2126      return NULL;
2127    case MVT::i1:
2128      Opcode = NVPTX::StoreRetvalV4I8;
2129      break;
2130    case MVT::i8:
2131      Opcode = NVPTX::StoreRetvalV4I8;
2132      break;
2133    case MVT::i16:
2134      Opcode = NVPTX::StoreRetvalV4I16;
2135      break;
2136    case MVT::i32:
2137      Opcode = NVPTX::StoreRetvalV4I32;
2138      break;
2139    case MVT::f32:
2140      Opcode = NVPTX::StoreRetvalV4F32;
2141      break;
2142    }
2143    break;
2144  }
2145
2146  SDNode *Ret =
2147      CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2148  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2149  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2150  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2151
2152  return Ret;
2153}
2154
2155SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2156  SDLoc DL(N);
2157  SDValue Chain = N->getOperand(0);
2158  SDValue Param = N->getOperand(1);
2159  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2160  SDValue Offset = N->getOperand(2);
2161  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2162  MemSDNode *Mem = cast<MemSDNode>(N);
2163  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2164
2165  // How many elements do we have?
2166  unsigned NumElts = 1;
2167  switch (N->getOpcode()) {
2168  default:
2169    return NULL;
2170  case NVPTXISD::StoreParamU32:
2171  case NVPTXISD::StoreParamS32:
2172  case NVPTXISD::StoreParam:
2173    NumElts = 1;
2174    break;
2175  case NVPTXISD::StoreParamV2:
2176    NumElts = 2;
2177    break;
2178  case NVPTXISD::StoreParamV4:
2179    NumElts = 4;
2180    break;
2181  }
2182
2183  // Build vector of operands
2184  SmallVector<SDValue, 8> Ops;
2185  for (unsigned i = 0; i < NumElts; ++i)
2186    Ops.push_back(N->getOperand(i + 3));
2187  Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2188  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2189  Ops.push_back(Chain);
2190  Ops.push_back(Flag);
2191
2192  // Determine target opcode
2193  // If we have an i1, use an 8-bit store. The lowering code in
2194  // NVPTXISelLowering will have already emitted an upcast.
2195  unsigned Opcode = 0;
2196  switch (N->getOpcode()) {
2197  default:
2198    switch (NumElts) {
2199    default:
2200      return NULL;
2201    case 1:
2202      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2203      default:
2204        return NULL;
2205      case MVT::i1:
2206        Opcode = NVPTX::StoreParamI8;
2207        break;
2208      case MVT::i8:
2209        Opcode = NVPTX::StoreParamI8;
2210        break;
2211      case MVT::i16:
2212        Opcode = NVPTX::StoreParamI16;
2213        break;
2214      case MVT::i32:
2215        Opcode = NVPTX::StoreParamI32;
2216        break;
2217      case MVT::i64:
2218        Opcode = NVPTX::StoreParamI64;
2219        break;
2220      case MVT::f32:
2221        Opcode = NVPTX::StoreParamF32;
2222        break;
2223      case MVT::f64:
2224        Opcode = NVPTX::StoreParamF64;
2225        break;
2226      }
2227      break;
2228    case 2:
2229      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2230      default:
2231        return NULL;
2232      case MVT::i1:
2233        Opcode = NVPTX::StoreParamV2I8;
2234        break;
2235      case MVT::i8:
2236        Opcode = NVPTX::StoreParamV2I8;
2237        break;
2238      case MVT::i16:
2239        Opcode = NVPTX::StoreParamV2I16;
2240        break;
2241      case MVT::i32:
2242        Opcode = NVPTX::StoreParamV2I32;
2243        break;
2244      case MVT::i64:
2245        Opcode = NVPTX::StoreParamV2I64;
2246        break;
2247      case MVT::f32:
2248        Opcode = NVPTX::StoreParamV2F32;
2249        break;
2250      case MVT::f64:
2251        Opcode = NVPTX::StoreParamV2F64;
2252        break;
2253      }
2254      break;
2255    case 4:
2256      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2257      default:
2258        return NULL;
2259      case MVT::i1:
2260        Opcode = NVPTX::StoreParamV4I8;
2261        break;
2262      case MVT::i8:
2263        Opcode = NVPTX::StoreParamV4I8;
2264        break;
2265      case MVT::i16:
2266        Opcode = NVPTX::StoreParamV4I16;
2267        break;
2268      case MVT::i32:
2269        Opcode = NVPTX::StoreParamV4I32;
2270        break;
2271      case MVT::f32:
2272        Opcode = NVPTX::StoreParamV4F32;
2273        break;
2274      }
2275      break;
2276    }
2277    break;
2278  // Special case: if we have a sign-extend/zero-extend node, insert the
2279  // conversion instruction first, and use that as the value operand to
2280  // the selected StoreParam node.
2281  case NVPTXISD::StoreParamU32: {
2282    Opcode = NVPTX::StoreParamI32;
2283    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2284                                                MVT::i32);
2285    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2286                                         MVT::i32, Ops[0], CvtNone);
2287    Ops[0] = SDValue(Cvt, 0);
2288    break;
2289  }
2290  case NVPTXISD::StoreParamS32: {
2291    Opcode = NVPTX::StoreParamI32;
2292    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2293                                                MVT::i32);
2294    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2295                                         MVT::i32, Ops[0], CvtNone);
2296    Ops[0] = SDValue(Cvt, 0);
2297    break;
2298  }
2299  }
2300
2301  SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2302  SDNode *Ret =
2303      CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2304  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2305  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2306  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2307
2308  return Ret;
2309}
2310
2311// SelectDirectAddr - Match a direct address for DAG.
2312// A direct address could be a globaladdress or externalsymbol.
2313bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
2314  // Return true if TGA or ES.
2315  if (N.getOpcode() == ISD::TargetGlobalAddress ||
2316      N.getOpcode() == ISD::TargetExternalSymbol) {
2317    Address = N;
2318    return true;
2319  }
2320  if (N.getOpcode() == NVPTXISD::Wrapper) {
2321    Address = N.getOperand(0);
2322    return true;
2323  }
2324  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2325    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
2326    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
2327      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
2328        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
2329  }
2330  return false;
2331}
2332
2333// symbol+offset
2334bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
2335    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2336  if (Addr.getOpcode() == ISD::ADD) {
2337    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2338      SDValue base = Addr.getOperand(0);
2339      if (SelectDirectAddr(base, Base)) {
2340        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2341        return true;
2342      }
2343    }
2344  }
2345  return false;
2346}
2347
2348// symbol+offset
2349bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
2350                                     SDValue &Base, SDValue &Offset) {
2351  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
2352}
2353
2354// symbol+offset
2355bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
2356                                       SDValue &Base, SDValue &Offset) {
2357  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
2358}
2359
2360// register+offset
2361bool NVPTXDAGToDAGISel::SelectADDRri_imp(
2362    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
2363  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2364    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2365    Offset = CurDAG->getTargetConstant(0, mvt);
2366    return true;
2367  }
2368  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
2369      Addr.getOpcode() == ISD::TargetGlobalAddress)
2370    return false; // direct calls.
2371
2372  if (Addr.getOpcode() == ISD::ADD) {
2373    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
2374      return false;
2375    }
2376    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2377      if (FrameIndexSDNode *FIN =
2378              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
2379        // Constant offset from frame ref.
2380        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
2381      else
2382        Base = Addr.getOperand(0);
2383      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
2384      return true;
2385    }
2386  }
2387  return false;
2388}
2389
2390// register+offset
2391bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
2392                                     SDValue &Base, SDValue &Offset) {
2393  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
2394}
2395
2396// register+offset
2397bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
2398                                       SDValue &Base, SDValue &Offset) {
2399  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
2400}
2401
2402bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
2403                                                 unsigned int spN) const {
2404  const Value *Src = NULL;
2405  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
2406  // the classof() for MemSDNode does not include MemIntrinsicSDNode
2407  // (See SelectionDAGNodes.h). So we need to check for both.
2408  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
2409    Src = mN->getSrcValue();
2410  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
2411    Src = mN->getSrcValue();
2412  }
2413  if (!Src)
2414    return false;
2415  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
2416    return (PT->getAddressSpace() == spN);
2417  return false;
2418}
2419
2420/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
2421/// inline asm expressions.
2422bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
2423    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
2424  SDValue Op0, Op1;
2425  switch (ConstraintCode) {
2426  default:
2427    return true;
2428  case 'm': // memory
2429    if (SelectDirectAddr(Op, Op0)) {
2430      OutOps.push_back(Op0);
2431      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
2432      return false;
2433    }
2434    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
2435      OutOps.push_back(Op0);
2436      OutOps.push_back(Op1);
2437      return false;
2438    }
2439    break;
2440  }
2441  return true;
2442}
2443
2444// Return true if N is a undef or a constant.
2445// If N was undef, return a (i8imm 0) in Retval
2446// If N was imm, convert it to i8imm and return in Retval
2447// Note: The convert to i8imm is required, otherwise the
2448// pattern matcher inserts a bunch of IMOVi8rr to convert
2449// the imm to i8imm, and this causes instruction selection
2450// to fail.
2451bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
2452  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
2453    return false;
2454
2455  if (N.getOpcode() == ISD::UNDEF)
2456    Retval = CurDAG->getTargetConstant(0, MVT::i8);
2457  else {
2458    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
2459    unsigned retval = cn->getZExtValue();
2460    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
2461  }
2462  return true;
2463}
2464