NVPTXISelDAGToDAG.cpp revision b9c26dcb2438266567ce94570bf294d00d10cc87
1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelDAGToDAG.h"
15#include "llvm/IR/GlobalValue.h"
16#include "llvm/IR/Instructions.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/Debug.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/raw_ostream.h"
21#include "llvm/Target/TargetIntrinsicInfo.h"
22
23#undef DEBUG_TYPE
24#define DEBUG_TYPE "nvptx-isel"
25
26using namespace llvm;
27
28static cl::opt<bool> UseFMADInstruction(
29    "nvptx-mad-enable", cl::ZeroOrMore,
30    cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
31    cl::init(false));
32
33static cl::opt<int>
34FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
35                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
36                          " 1: do it  2: do it aggressively"),
37                 cl::init(2));
38
39static cl::opt<int> UsePrecDivF32(
40    "nvptx-prec-divf32", cl::ZeroOrMore,
41    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
42             " IEEE Compliant F32 div.rnd if avaiable."),
43    cl::init(2));
44
45static cl::opt<bool>
46UsePrecSqrtF32("nvptx-prec-sqrtf32",
47          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
48          cl::init(true));
49
50/// createNVPTXISelDag - This pass converts a legalized DAG into a
51/// NVPTX-specific DAG, ready for instruction scheduling.
52FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
53                                       llvm::CodeGenOpt::Level OptLevel) {
54  return new NVPTXDAGToDAGISel(TM, OptLevel);
55}
56
57NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
58                                     CodeGenOpt::Level OptLevel)
59    : SelectionDAGISel(tm, OptLevel),
60      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
61  // Always do fma.f32 fpcontract if the target supports the instruction.
62  // Always do fma.f64 fpcontract if the target supports the instruction.
63  // Do mad.f32 is nvptx-mad-enable is specified and the target does not
64  // support fma.f32.
65
66  doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
67  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
68  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
69  doFMAF32AGG =
70      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
71  doFMAF64AGG =
72      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
73
74  allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
75
76  UseF32FTZ = false;
77
78  doMulWide = (OptLevel > 0);
79
80  // Decide how to translate f32 div
81  do_DIVF32_PREC = UsePrecDivF32;
82  // Decide how to translate f32 sqrt
83  do_SQRTF32_PREC = UsePrecSqrtF32;
84  // sm less than sm_20 does not support div.rnd. Use div.full.
85  if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
86    do_DIVF32_PREC = 1;
87
88}
89
90/// Select - Select instructions not customized! Used for
91/// expanded, promoted and normal instructions.
92SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
93
94  if (N->isMachineOpcode())
95    return NULL; // Already selected.
96
97  SDNode *ResNode = NULL;
98  switch (N->getOpcode()) {
99  case ISD::LOAD:
100    ResNode = SelectLoad(N);
101    break;
102  case ISD::STORE:
103    ResNode = SelectStore(N);
104    break;
105  case NVPTXISD::LoadV2:
106  case NVPTXISD::LoadV4:
107    ResNode = SelectLoadVector(N);
108    break;
109  case NVPTXISD::LDGV2:
110  case NVPTXISD::LDGV4:
111  case NVPTXISD::LDUV2:
112  case NVPTXISD::LDUV4:
113    ResNode = SelectLDGLDUVector(N);
114    break;
115  case NVPTXISD::StoreV2:
116  case NVPTXISD::StoreV4:
117    ResNode = SelectStoreVector(N);
118    break;
119  default:
120    break;
121  }
122  if (ResNode)
123    return ResNode;
124  return SelectCode(N);
125}
126
127static unsigned int getCodeAddrSpace(MemSDNode *N,
128                                     const NVPTXSubtarget &Subtarget) {
129  const Value *Src = N->getSrcValue();
130  if (!Src)
131    return NVPTX::PTXLdStInstCode::LOCAL;
132
133  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
134    switch (PT->getAddressSpace()) {
135    case llvm::ADDRESS_SPACE_LOCAL:
136      return NVPTX::PTXLdStInstCode::LOCAL;
137    case llvm::ADDRESS_SPACE_GLOBAL:
138      return NVPTX::PTXLdStInstCode::GLOBAL;
139    case llvm::ADDRESS_SPACE_SHARED:
140      return NVPTX::PTXLdStInstCode::SHARED;
141    case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
142      return NVPTX::PTXLdStInstCode::CONSTANT;
143    case llvm::ADDRESS_SPACE_GENERIC:
144      return NVPTX::PTXLdStInstCode::GENERIC;
145    case llvm::ADDRESS_SPACE_PARAM:
146      return NVPTX::PTXLdStInstCode::PARAM;
147    case llvm::ADDRESS_SPACE_CONST:
148      // If the arch supports generic address space, translate it to GLOBAL
149      // for correctness.
150      // If the arch does not support generic address space, then the arch
151      // does not really support ADDRESS_SPACE_CONST, translate it to
152      // to CONSTANT for better performance.
153      if (Subtarget.hasGenericLdSt())
154        return NVPTX::PTXLdStInstCode::GLOBAL;
155      else
156        return NVPTX::PTXLdStInstCode::CONSTANT;
157    default:
158      break;
159    }
160  }
161  return NVPTX::PTXLdStInstCode::LOCAL;
162}
163
164SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
165  DebugLoc dl = N->getDebugLoc();
166  LoadSDNode *LD = cast<LoadSDNode>(N);
167  EVT LoadedVT = LD->getMemoryVT();
168  SDNode *NVPTXLD = NULL;
169
170  // do not support pre/post inc/dec
171  if (LD->isIndexed())
172    return NULL;
173
174  if (!LoadedVT.isSimple())
175    return NULL;
176
177  // Address Space Setting
178  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
179
180  // Volatile Setting
181  // - .volatile is only availalble for .global and .shared
182  bool isVolatile = LD->isVolatile();
183  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
184      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
185      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
186    isVolatile = false;
187
188  // Vector Setting
189  MVT SimpleVT = LoadedVT.getSimpleVT();
190  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
191  if (SimpleVT.isVector()) {
192    unsigned num = SimpleVT.getVectorNumElements();
193    if (num == 2)
194      vecType = NVPTX::PTXLdStInstCode::V2;
195    else if (num == 4)
196      vecType = NVPTX::PTXLdStInstCode::V4;
197    else
198      return NULL;
199  }
200
201  // Type Setting: fromType + fromTypeWidth
202  //
203  // Sign   : ISD::SEXTLOAD
204  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
205  //          type is integer
206  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
207  MVT ScalarVT = SimpleVT.getScalarType();
208  unsigned fromTypeWidth = ScalarVT.getSizeInBits();
209  unsigned int fromType;
210  if ((LD->getExtensionType() == ISD::SEXTLOAD))
211    fromType = NVPTX::PTXLdStInstCode::Signed;
212  else if (ScalarVT.isFloatingPoint())
213    fromType = NVPTX::PTXLdStInstCode::Float;
214  else
215    fromType = NVPTX::PTXLdStInstCode::Unsigned;
216
217  // Create the machine instruction DAG
218  SDValue Chain = N->getOperand(0);
219  SDValue N1 = N->getOperand(1);
220  SDValue Addr;
221  SDValue Offset, Base;
222  unsigned Opcode;
223  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
224
225  if (SelectDirectAddr(N1, Addr)) {
226    switch (TargetVT) {
227    case MVT::i8:
228      Opcode = NVPTX::LD_i8_avar;
229      break;
230    case MVT::i16:
231      Opcode = NVPTX::LD_i16_avar;
232      break;
233    case MVT::i32:
234      Opcode = NVPTX::LD_i32_avar;
235      break;
236    case MVT::i64:
237      Opcode = NVPTX::LD_i64_avar;
238      break;
239    case MVT::f32:
240      Opcode = NVPTX::LD_f32_avar;
241      break;
242    case MVT::f64:
243      Opcode = NVPTX::LD_f64_avar;
244      break;
245    default:
246      return NULL;
247    }
248    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
249                      getI32Imm(vecType), getI32Imm(fromType),
250                      getI32Imm(fromTypeWidth), Addr, Chain };
251    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
252  } else if (Subtarget.is64Bit()
253                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
254                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
255    switch (TargetVT) {
256    case MVT::i8:
257      Opcode = NVPTX::LD_i8_asi;
258      break;
259    case MVT::i16:
260      Opcode = NVPTX::LD_i16_asi;
261      break;
262    case MVT::i32:
263      Opcode = NVPTX::LD_i32_asi;
264      break;
265    case MVT::i64:
266      Opcode = NVPTX::LD_i64_asi;
267      break;
268    case MVT::f32:
269      Opcode = NVPTX::LD_f32_asi;
270      break;
271    case MVT::f64:
272      Opcode = NVPTX::LD_f64_asi;
273      break;
274    default:
275      return NULL;
276    }
277    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
278                      getI32Imm(vecType), getI32Imm(fromType),
279                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
280    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
281  } else if (Subtarget.is64Bit()
282                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
283                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
284    if (Subtarget.is64Bit()) {
285      switch (TargetVT) {
286      case MVT::i8:
287        Opcode = NVPTX::LD_i8_ari_64;
288        break;
289      case MVT::i16:
290        Opcode = NVPTX::LD_i16_ari_64;
291        break;
292      case MVT::i32:
293        Opcode = NVPTX::LD_i32_ari_64;
294        break;
295      case MVT::i64:
296        Opcode = NVPTX::LD_i64_ari_64;
297        break;
298      case MVT::f32:
299        Opcode = NVPTX::LD_f32_ari_64;
300        break;
301      case MVT::f64:
302        Opcode = NVPTX::LD_f64_ari_64;
303        break;
304      default:
305        return NULL;
306      }
307    } else {
308      switch (TargetVT) {
309      case MVT::i8:
310        Opcode = NVPTX::LD_i8_ari;
311        break;
312      case MVT::i16:
313        Opcode = NVPTX::LD_i16_ari;
314        break;
315      case MVT::i32:
316        Opcode = NVPTX::LD_i32_ari;
317        break;
318      case MVT::i64:
319        Opcode = NVPTX::LD_i64_ari;
320        break;
321      case MVT::f32:
322        Opcode = NVPTX::LD_f32_ari;
323        break;
324      case MVT::f64:
325        Opcode = NVPTX::LD_f64_ari;
326        break;
327      default:
328        return NULL;
329      }
330    }
331    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
332                      getI32Imm(vecType), getI32Imm(fromType),
333                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
334    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
335  } else {
336    if (Subtarget.is64Bit()) {
337      switch (TargetVT) {
338      case MVT::i8:
339        Opcode = NVPTX::LD_i8_areg_64;
340        break;
341      case MVT::i16:
342        Opcode = NVPTX::LD_i16_areg_64;
343        break;
344      case MVT::i32:
345        Opcode = NVPTX::LD_i32_areg_64;
346        break;
347      case MVT::i64:
348        Opcode = NVPTX::LD_i64_areg_64;
349        break;
350      case MVT::f32:
351        Opcode = NVPTX::LD_f32_areg_64;
352        break;
353      case MVT::f64:
354        Opcode = NVPTX::LD_f64_areg_64;
355        break;
356      default:
357        return NULL;
358      }
359    } else {
360      switch (TargetVT) {
361      case MVT::i8:
362        Opcode = NVPTX::LD_i8_areg;
363        break;
364      case MVT::i16:
365        Opcode = NVPTX::LD_i16_areg;
366        break;
367      case MVT::i32:
368        Opcode = NVPTX::LD_i32_areg;
369        break;
370      case MVT::i64:
371        Opcode = NVPTX::LD_i64_areg;
372        break;
373      case MVT::f32:
374        Opcode = NVPTX::LD_f32_areg;
375        break;
376      case MVT::f64:
377        Opcode = NVPTX::LD_f64_areg;
378        break;
379      default:
380        return NULL;
381      }
382    }
383    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
384                      getI32Imm(vecType), getI32Imm(fromType),
385                      getI32Imm(fromTypeWidth), N1, Chain };
386    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
387  }
388
389  if (NVPTXLD != NULL) {
390    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
391    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
392    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
393  }
394
395  return NVPTXLD;
396}
397
398SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
399
400  SDValue Chain = N->getOperand(0);
401  SDValue Op1 = N->getOperand(1);
402  SDValue Addr, Offset, Base;
403  unsigned Opcode;
404  DebugLoc DL = N->getDebugLoc();
405  SDNode *LD;
406  MemSDNode *MemSD = cast<MemSDNode>(N);
407  EVT LoadedVT = MemSD->getMemoryVT();
408
409  if (!LoadedVT.isSimple())
410    return NULL;
411
412  // Address Space Setting
413  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
414
415  // Volatile Setting
416  // - .volatile is only availalble for .global and .shared
417  bool IsVolatile = MemSD->isVolatile();
418  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
419      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
420      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
421    IsVolatile = false;
422
423  // Vector Setting
424  MVT SimpleVT = LoadedVT.getSimpleVT();
425
426  // Type Setting: fromType + fromTypeWidth
427  //
428  // Sign   : ISD::SEXTLOAD
429  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
430  //          type is integer
431  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
432  MVT ScalarVT = SimpleVT.getScalarType();
433  unsigned FromTypeWidth = ScalarVT.getSizeInBits();
434  unsigned int FromType;
435  // The last operand holds the original LoadSDNode::getExtensionType() value
436  unsigned ExtensionType = cast<ConstantSDNode>(
437      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
438  if (ExtensionType == ISD::SEXTLOAD)
439    FromType = NVPTX::PTXLdStInstCode::Signed;
440  else if (ScalarVT.isFloatingPoint())
441    FromType = NVPTX::PTXLdStInstCode::Float;
442  else
443    FromType = NVPTX::PTXLdStInstCode::Unsigned;
444
445  unsigned VecType;
446
447  switch (N->getOpcode()) {
448  case NVPTXISD::LoadV2:
449    VecType = NVPTX::PTXLdStInstCode::V2;
450    break;
451  case NVPTXISD::LoadV4:
452    VecType = NVPTX::PTXLdStInstCode::V4;
453    break;
454  default:
455    return NULL;
456  }
457
458  EVT EltVT = N->getValueType(0);
459
460  if (SelectDirectAddr(Op1, Addr)) {
461    switch (N->getOpcode()) {
462    default:
463      return NULL;
464    case NVPTXISD::LoadV2:
465      switch (EltVT.getSimpleVT().SimpleTy) {
466      default:
467        return NULL;
468      case MVT::i8:
469        Opcode = NVPTX::LDV_i8_v2_avar;
470        break;
471      case MVT::i16:
472        Opcode = NVPTX::LDV_i16_v2_avar;
473        break;
474      case MVT::i32:
475        Opcode = NVPTX::LDV_i32_v2_avar;
476        break;
477      case MVT::i64:
478        Opcode = NVPTX::LDV_i64_v2_avar;
479        break;
480      case MVT::f32:
481        Opcode = NVPTX::LDV_f32_v2_avar;
482        break;
483      case MVT::f64:
484        Opcode = NVPTX::LDV_f64_v2_avar;
485        break;
486      }
487      break;
488    case NVPTXISD::LoadV4:
489      switch (EltVT.getSimpleVT().SimpleTy) {
490      default:
491        return NULL;
492      case MVT::i8:
493        Opcode = NVPTX::LDV_i8_v4_avar;
494        break;
495      case MVT::i16:
496        Opcode = NVPTX::LDV_i16_v4_avar;
497        break;
498      case MVT::i32:
499        Opcode = NVPTX::LDV_i32_v4_avar;
500        break;
501      case MVT::f32:
502        Opcode = NVPTX::LDV_f32_v4_avar;
503        break;
504      }
505      break;
506    }
507
508    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
509                      getI32Imm(VecType), getI32Imm(FromType),
510                      getI32Imm(FromTypeWidth), Addr, Chain };
511    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
512  } else if (Subtarget.is64Bit()
513                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
514                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
515    switch (N->getOpcode()) {
516    default:
517      return NULL;
518    case NVPTXISD::LoadV2:
519      switch (EltVT.getSimpleVT().SimpleTy) {
520      default:
521        return NULL;
522      case MVT::i8:
523        Opcode = NVPTX::LDV_i8_v2_asi;
524        break;
525      case MVT::i16:
526        Opcode = NVPTX::LDV_i16_v2_asi;
527        break;
528      case MVT::i32:
529        Opcode = NVPTX::LDV_i32_v2_asi;
530        break;
531      case MVT::i64:
532        Opcode = NVPTX::LDV_i64_v2_asi;
533        break;
534      case MVT::f32:
535        Opcode = NVPTX::LDV_f32_v2_asi;
536        break;
537      case MVT::f64:
538        Opcode = NVPTX::LDV_f64_v2_asi;
539        break;
540      }
541      break;
542    case NVPTXISD::LoadV4:
543      switch (EltVT.getSimpleVT().SimpleTy) {
544      default:
545        return NULL;
546      case MVT::i8:
547        Opcode = NVPTX::LDV_i8_v4_asi;
548        break;
549      case MVT::i16:
550        Opcode = NVPTX::LDV_i16_v4_asi;
551        break;
552      case MVT::i32:
553        Opcode = NVPTX::LDV_i32_v4_asi;
554        break;
555      case MVT::f32:
556        Opcode = NVPTX::LDV_f32_v4_asi;
557        break;
558      }
559      break;
560    }
561
562    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
563                      getI32Imm(VecType), getI32Imm(FromType),
564                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
565    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
566  } else if (Subtarget.is64Bit()
567                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
568                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
569    if (Subtarget.is64Bit()) {
570      switch (N->getOpcode()) {
571      default:
572        return NULL;
573      case NVPTXISD::LoadV2:
574        switch (EltVT.getSimpleVT().SimpleTy) {
575        default:
576          return NULL;
577        case MVT::i8:
578          Opcode = NVPTX::LDV_i8_v2_ari_64;
579          break;
580        case MVT::i16:
581          Opcode = NVPTX::LDV_i16_v2_ari_64;
582          break;
583        case MVT::i32:
584          Opcode = NVPTX::LDV_i32_v2_ari_64;
585          break;
586        case MVT::i64:
587          Opcode = NVPTX::LDV_i64_v2_ari_64;
588          break;
589        case MVT::f32:
590          Opcode = NVPTX::LDV_f32_v2_ari_64;
591          break;
592        case MVT::f64:
593          Opcode = NVPTX::LDV_f64_v2_ari_64;
594          break;
595        }
596        break;
597      case NVPTXISD::LoadV4:
598        switch (EltVT.getSimpleVT().SimpleTy) {
599        default:
600          return NULL;
601        case MVT::i8:
602          Opcode = NVPTX::LDV_i8_v4_ari_64;
603          break;
604        case MVT::i16:
605          Opcode = NVPTX::LDV_i16_v4_ari_64;
606          break;
607        case MVT::i32:
608          Opcode = NVPTX::LDV_i32_v4_ari_64;
609          break;
610        case MVT::f32:
611          Opcode = NVPTX::LDV_f32_v4_ari_64;
612          break;
613        }
614        break;
615      }
616    } else {
617      switch (N->getOpcode()) {
618      default:
619        return NULL;
620      case NVPTXISD::LoadV2:
621        switch (EltVT.getSimpleVT().SimpleTy) {
622        default:
623          return NULL;
624        case MVT::i8:
625          Opcode = NVPTX::LDV_i8_v2_ari;
626          break;
627        case MVT::i16:
628          Opcode = NVPTX::LDV_i16_v2_ari;
629          break;
630        case MVT::i32:
631          Opcode = NVPTX::LDV_i32_v2_ari;
632          break;
633        case MVT::i64:
634          Opcode = NVPTX::LDV_i64_v2_ari;
635          break;
636        case MVT::f32:
637          Opcode = NVPTX::LDV_f32_v2_ari;
638          break;
639        case MVT::f64:
640          Opcode = NVPTX::LDV_f64_v2_ari;
641          break;
642        }
643        break;
644      case NVPTXISD::LoadV4:
645        switch (EltVT.getSimpleVT().SimpleTy) {
646        default:
647          return NULL;
648        case MVT::i8:
649          Opcode = NVPTX::LDV_i8_v4_ari;
650          break;
651        case MVT::i16:
652          Opcode = NVPTX::LDV_i16_v4_ari;
653          break;
654        case MVT::i32:
655          Opcode = NVPTX::LDV_i32_v4_ari;
656          break;
657        case MVT::f32:
658          Opcode = NVPTX::LDV_f32_v4_ari;
659          break;
660        }
661        break;
662      }
663    }
664
665    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
666                      getI32Imm(VecType), getI32Imm(FromType),
667                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
668
669    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
670  } else {
671    if (Subtarget.is64Bit()) {
672      switch (N->getOpcode()) {
673      default:
674        return NULL;
675      case NVPTXISD::LoadV2:
676        switch (EltVT.getSimpleVT().SimpleTy) {
677        default:
678          return NULL;
679        case MVT::i8:
680          Opcode = NVPTX::LDV_i8_v2_areg_64;
681          break;
682        case MVT::i16:
683          Opcode = NVPTX::LDV_i16_v2_areg_64;
684          break;
685        case MVT::i32:
686          Opcode = NVPTX::LDV_i32_v2_areg_64;
687          break;
688        case MVT::i64:
689          Opcode = NVPTX::LDV_i64_v2_areg_64;
690          break;
691        case MVT::f32:
692          Opcode = NVPTX::LDV_f32_v2_areg_64;
693          break;
694        case MVT::f64:
695          Opcode = NVPTX::LDV_f64_v2_areg_64;
696          break;
697        }
698        break;
699      case NVPTXISD::LoadV4:
700        switch (EltVT.getSimpleVT().SimpleTy) {
701        default:
702          return NULL;
703        case MVT::i8:
704          Opcode = NVPTX::LDV_i8_v4_areg_64;
705          break;
706        case MVT::i16:
707          Opcode = NVPTX::LDV_i16_v4_areg_64;
708          break;
709        case MVT::i32:
710          Opcode = NVPTX::LDV_i32_v4_areg_64;
711          break;
712        case MVT::f32:
713          Opcode = NVPTX::LDV_f32_v4_areg_64;
714          break;
715        }
716        break;
717      }
718    } else {
719      switch (N->getOpcode()) {
720      default:
721        return NULL;
722      case NVPTXISD::LoadV2:
723        switch (EltVT.getSimpleVT().SimpleTy) {
724        default:
725          return NULL;
726        case MVT::i8:
727          Opcode = NVPTX::LDV_i8_v2_areg;
728          break;
729        case MVT::i16:
730          Opcode = NVPTX::LDV_i16_v2_areg;
731          break;
732        case MVT::i32:
733          Opcode = NVPTX::LDV_i32_v2_areg;
734          break;
735        case MVT::i64:
736          Opcode = NVPTX::LDV_i64_v2_areg;
737          break;
738        case MVT::f32:
739          Opcode = NVPTX::LDV_f32_v2_areg;
740          break;
741        case MVT::f64:
742          Opcode = NVPTX::LDV_f64_v2_areg;
743          break;
744        }
745        break;
746      case NVPTXISD::LoadV4:
747        switch (EltVT.getSimpleVT().SimpleTy) {
748        default:
749          return NULL;
750        case MVT::i8:
751          Opcode = NVPTX::LDV_i8_v4_areg;
752          break;
753        case MVT::i16:
754          Opcode = NVPTX::LDV_i16_v4_areg;
755          break;
756        case MVT::i32:
757          Opcode = NVPTX::LDV_i32_v4_areg;
758          break;
759        case MVT::f32:
760          Opcode = NVPTX::LDV_f32_v4_areg;
761          break;
762        }
763        break;
764      }
765    }
766
767    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
768                      getI32Imm(VecType), getI32Imm(FromType),
769                      getI32Imm(FromTypeWidth), Op1, Chain };
770    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
771  }
772
773  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
774  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
775  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
776
777  return LD;
778}
779
780SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
781
782  SDValue Chain = N->getOperand(0);
783  SDValue Op1 = N->getOperand(1);
784  unsigned Opcode;
785  DebugLoc DL = N->getDebugLoc();
786  SDNode *LD;
787
788  EVT RetVT = N->getValueType(0);
789
790  // Select opcode
791  if (Subtarget.is64Bit()) {
792    switch (N->getOpcode()) {
793    default:
794      return NULL;
795    case NVPTXISD::LDGV2:
796      switch (RetVT.getSimpleVT().SimpleTy) {
797      default:
798        return NULL;
799      case MVT::i8:
800        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
801        break;
802      case MVT::i16:
803        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
804        break;
805      case MVT::i32:
806        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
807        break;
808      case MVT::i64:
809        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
810        break;
811      case MVT::f32:
812        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
813        break;
814      case MVT::f64:
815        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
816        break;
817      }
818      break;
819    case NVPTXISD::LDGV4:
820      switch (RetVT.getSimpleVT().SimpleTy) {
821      default:
822        return NULL;
823      case MVT::i8:
824        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
825        break;
826      case MVT::i16:
827        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
828        break;
829      case MVT::i32:
830        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
831        break;
832      case MVT::f32:
833        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
834        break;
835      }
836      break;
837    case NVPTXISD::LDUV2:
838      switch (RetVT.getSimpleVT().SimpleTy) {
839      default:
840        return NULL;
841      case MVT::i8:
842        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
843        break;
844      case MVT::i16:
845        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
846        break;
847      case MVT::i32:
848        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
849        break;
850      case MVT::i64:
851        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
852        break;
853      case MVT::f32:
854        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
855        break;
856      case MVT::f64:
857        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
858        break;
859      }
860      break;
861    case NVPTXISD::LDUV4:
862      switch (RetVT.getSimpleVT().SimpleTy) {
863      default:
864        return NULL;
865      case MVT::i8:
866        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
867        break;
868      case MVT::i16:
869        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
870        break;
871      case MVT::i32:
872        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
873        break;
874      case MVT::f32:
875        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
876        break;
877      }
878      break;
879    }
880  } else {
881    switch (N->getOpcode()) {
882    default:
883      return NULL;
884    case NVPTXISD::LDGV2:
885      switch (RetVT.getSimpleVT().SimpleTy) {
886      default:
887        return NULL;
888      case MVT::i8:
889        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
890        break;
891      case MVT::i16:
892        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
893        break;
894      case MVT::i32:
895        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
896        break;
897      case MVT::i64:
898        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
899        break;
900      case MVT::f32:
901        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
902        break;
903      case MVT::f64:
904        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
905        break;
906      }
907      break;
908    case NVPTXISD::LDGV4:
909      switch (RetVT.getSimpleVT().SimpleTy) {
910      default:
911        return NULL;
912      case MVT::i8:
913        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
914        break;
915      case MVT::i16:
916        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
917        break;
918      case MVT::i32:
919        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
920        break;
921      case MVT::f32:
922        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
923        break;
924      }
925      break;
926    case NVPTXISD::LDUV2:
927      switch (RetVT.getSimpleVT().SimpleTy) {
928      default:
929        return NULL;
930      case MVT::i8:
931        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
932        break;
933      case MVT::i16:
934        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
935        break;
936      case MVT::i32:
937        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
938        break;
939      case MVT::i64:
940        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
941        break;
942      case MVT::f32:
943        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
944        break;
945      case MVT::f64:
946        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
947        break;
948      }
949      break;
950    case NVPTXISD::LDUV4:
951      switch (RetVT.getSimpleVT().SimpleTy) {
952      default:
953        return NULL;
954      case MVT::i8:
955        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
956        break;
957      case MVT::i16:
958        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
959        break;
960      case MVT::i32:
961        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
962        break;
963      case MVT::f32:
964        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
965        break;
966      }
967      break;
968    }
969  }
970
971  SDValue Ops[] = { Op1, Chain };
972  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
973
974  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
975  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
976  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
977
978  return LD;
979}
980
981SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
982  DebugLoc dl = N->getDebugLoc();
983  StoreSDNode *ST = cast<StoreSDNode>(N);
984  EVT StoreVT = ST->getMemoryVT();
985  SDNode *NVPTXST = NULL;
986
987  // do not support pre/post inc/dec
988  if (ST->isIndexed())
989    return NULL;
990
991  if (!StoreVT.isSimple())
992    return NULL;
993
994  // Address Space Setting
995  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
996
997  // Volatile Setting
998  // - .volatile is only availalble for .global and .shared
999  bool isVolatile = ST->isVolatile();
1000  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1001      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1002      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1003    isVolatile = false;
1004
1005  // Vector Setting
1006  MVT SimpleVT = StoreVT.getSimpleVT();
1007  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1008  if (SimpleVT.isVector()) {
1009    unsigned num = SimpleVT.getVectorNumElements();
1010    if (num == 2)
1011      vecType = NVPTX::PTXLdStInstCode::V2;
1012    else if (num == 4)
1013      vecType = NVPTX::PTXLdStInstCode::V4;
1014    else
1015      return NULL;
1016  }
1017
1018  // Type Setting: toType + toTypeWidth
1019  // - for integer type, always use 'u'
1020  //
1021  MVT ScalarVT = SimpleVT.getScalarType();
1022  unsigned toTypeWidth = ScalarVT.getSizeInBits();
1023  unsigned int toType;
1024  if (ScalarVT.isFloatingPoint())
1025    toType = NVPTX::PTXLdStInstCode::Float;
1026  else
1027    toType = NVPTX::PTXLdStInstCode::Unsigned;
1028
1029  // Create the machine instruction DAG
1030  SDValue Chain = N->getOperand(0);
1031  SDValue N1 = N->getOperand(1);
1032  SDValue N2 = N->getOperand(2);
1033  SDValue Addr;
1034  SDValue Offset, Base;
1035  unsigned Opcode;
1036  MVT::SimpleValueType SourceVT =
1037      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
1038
1039  if (SelectDirectAddr(N2, Addr)) {
1040    switch (SourceVT) {
1041    case MVT::i8:
1042      Opcode = NVPTX::ST_i8_avar;
1043      break;
1044    case MVT::i16:
1045      Opcode = NVPTX::ST_i16_avar;
1046      break;
1047    case MVT::i32:
1048      Opcode = NVPTX::ST_i32_avar;
1049      break;
1050    case MVT::i64:
1051      Opcode = NVPTX::ST_i64_avar;
1052      break;
1053    case MVT::f32:
1054      Opcode = NVPTX::ST_f32_avar;
1055      break;
1056    case MVT::f64:
1057      Opcode = NVPTX::ST_f64_avar;
1058      break;
1059    default:
1060      return NULL;
1061    }
1062    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1063                      getI32Imm(vecType), getI32Imm(toType),
1064                      getI32Imm(toTypeWidth), Addr, Chain };
1065    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1066  } else if (Subtarget.is64Bit()
1067                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1068                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1069    switch (SourceVT) {
1070    case MVT::i8:
1071      Opcode = NVPTX::ST_i8_asi;
1072      break;
1073    case MVT::i16:
1074      Opcode = NVPTX::ST_i16_asi;
1075      break;
1076    case MVT::i32:
1077      Opcode = NVPTX::ST_i32_asi;
1078      break;
1079    case MVT::i64:
1080      Opcode = NVPTX::ST_i64_asi;
1081      break;
1082    case MVT::f32:
1083      Opcode = NVPTX::ST_f32_asi;
1084      break;
1085    case MVT::f64:
1086      Opcode = NVPTX::ST_f64_asi;
1087      break;
1088    default:
1089      return NULL;
1090    }
1091    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1092                      getI32Imm(vecType), getI32Imm(toType),
1093                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1094    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1095  } else if (Subtarget.is64Bit()
1096                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1097                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1098    if (Subtarget.is64Bit()) {
1099      switch (SourceVT) {
1100      case MVT::i8:
1101        Opcode = NVPTX::ST_i8_ari_64;
1102        break;
1103      case MVT::i16:
1104        Opcode = NVPTX::ST_i16_ari_64;
1105        break;
1106      case MVT::i32:
1107        Opcode = NVPTX::ST_i32_ari_64;
1108        break;
1109      case MVT::i64:
1110        Opcode = NVPTX::ST_i64_ari_64;
1111        break;
1112      case MVT::f32:
1113        Opcode = NVPTX::ST_f32_ari_64;
1114        break;
1115      case MVT::f64:
1116        Opcode = NVPTX::ST_f64_ari_64;
1117        break;
1118      default:
1119        return NULL;
1120      }
1121    } else {
1122      switch (SourceVT) {
1123      case MVT::i8:
1124        Opcode = NVPTX::ST_i8_ari;
1125        break;
1126      case MVT::i16:
1127        Opcode = NVPTX::ST_i16_ari;
1128        break;
1129      case MVT::i32:
1130        Opcode = NVPTX::ST_i32_ari;
1131        break;
1132      case MVT::i64:
1133        Opcode = NVPTX::ST_i64_ari;
1134        break;
1135      case MVT::f32:
1136        Opcode = NVPTX::ST_f32_ari;
1137        break;
1138      case MVT::f64:
1139        Opcode = NVPTX::ST_f64_ari;
1140        break;
1141      default:
1142        return NULL;
1143      }
1144    }
1145    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1146                      getI32Imm(vecType), getI32Imm(toType),
1147                      getI32Imm(toTypeWidth), Base, Offset, Chain };
1148    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1149  } else {
1150    if (Subtarget.is64Bit()) {
1151      switch (SourceVT) {
1152      case MVT::i8:
1153        Opcode = NVPTX::ST_i8_areg_64;
1154        break;
1155      case MVT::i16:
1156        Opcode = NVPTX::ST_i16_areg_64;
1157        break;
1158      case MVT::i32:
1159        Opcode = NVPTX::ST_i32_areg_64;
1160        break;
1161      case MVT::i64:
1162        Opcode = NVPTX::ST_i64_areg_64;
1163        break;
1164      case MVT::f32:
1165        Opcode = NVPTX::ST_f32_areg_64;
1166        break;
1167      case MVT::f64:
1168        Opcode = NVPTX::ST_f64_areg_64;
1169        break;
1170      default:
1171        return NULL;
1172      }
1173    } else {
1174      switch (SourceVT) {
1175      case MVT::i8:
1176        Opcode = NVPTX::ST_i8_areg;
1177        break;
1178      case MVT::i16:
1179        Opcode = NVPTX::ST_i16_areg;
1180        break;
1181      case MVT::i32:
1182        Opcode = NVPTX::ST_i32_areg;
1183        break;
1184      case MVT::i64:
1185        Opcode = NVPTX::ST_i64_areg;
1186        break;
1187      case MVT::f32:
1188        Opcode = NVPTX::ST_f32_areg;
1189        break;
1190      case MVT::f64:
1191        Opcode = NVPTX::ST_f64_areg;
1192        break;
1193      default:
1194        return NULL;
1195      }
1196    }
1197    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1198                      getI32Imm(vecType), getI32Imm(toType),
1199                      getI32Imm(toTypeWidth), N2, Chain };
1200    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1201  }
1202
1203  if (NVPTXST != NULL) {
1204    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1205    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1206    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1207  }
1208
1209  return NVPTXST;
1210}
1211
1212SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1213  SDValue Chain = N->getOperand(0);
1214  SDValue Op1 = N->getOperand(1);
1215  SDValue Addr, Offset, Base;
1216  unsigned Opcode;
1217  DebugLoc DL = N->getDebugLoc();
1218  SDNode *ST;
1219  EVT EltVT = Op1.getValueType();
1220  MemSDNode *MemSD = cast<MemSDNode>(N);
1221  EVT StoreVT = MemSD->getMemoryVT();
1222
1223  // Address Space Setting
1224  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
1225
1226  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1227    report_fatal_error("Cannot store to pointer that points to constant "
1228                       "memory space");
1229  }
1230
1231  // Volatile Setting
1232  // - .volatile is only availalble for .global and .shared
1233  bool IsVolatile = MemSD->isVolatile();
1234  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1235      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1236      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1237    IsVolatile = false;
1238
1239  // Type Setting: toType + toTypeWidth
1240  // - for integer type, always use 'u'
1241  assert(StoreVT.isSimple() && "Store value is not simple");
1242  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
1243  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
1244  unsigned ToType;
1245  if (ScalarVT.isFloatingPoint())
1246    ToType = NVPTX::PTXLdStInstCode::Float;
1247  else
1248    ToType = NVPTX::PTXLdStInstCode::Unsigned;
1249
1250  SmallVector<SDValue, 12> StOps;
1251  SDValue N2;
1252  unsigned VecType;
1253
1254  switch (N->getOpcode()) {
1255  case NVPTXISD::StoreV2:
1256    VecType = NVPTX::PTXLdStInstCode::V2;
1257    StOps.push_back(N->getOperand(1));
1258    StOps.push_back(N->getOperand(2));
1259    N2 = N->getOperand(3);
1260    break;
1261  case NVPTXISD::StoreV4:
1262    VecType = NVPTX::PTXLdStInstCode::V4;
1263    StOps.push_back(N->getOperand(1));
1264    StOps.push_back(N->getOperand(2));
1265    StOps.push_back(N->getOperand(3));
1266    StOps.push_back(N->getOperand(4));
1267    N2 = N->getOperand(5);
1268    break;
1269  default:
1270    return NULL;
1271  }
1272
1273  StOps.push_back(getI32Imm(IsVolatile));
1274  StOps.push_back(getI32Imm(CodeAddrSpace));
1275  StOps.push_back(getI32Imm(VecType));
1276  StOps.push_back(getI32Imm(ToType));
1277  StOps.push_back(getI32Imm(ToTypeWidth));
1278
1279  if (SelectDirectAddr(N2, Addr)) {
1280    switch (N->getOpcode()) {
1281    default:
1282      return NULL;
1283    case NVPTXISD::StoreV2:
1284      switch (EltVT.getSimpleVT().SimpleTy) {
1285      default:
1286        return NULL;
1287      case MVT::i8:
1288        Opcode = NVPTX::STV_i8_v2_avar;
1289        break;
1290      case MVT::i16:
1291        Opcode = NVPTX::STV_i16_v2_avar;
1292        break;
1293      case MVT::i32:
1294        Opcode = NVPTX::STV_i32_v2_avar;
1295        break;
1296      case MVT::i64:
1297        Opcode = NVPTX::STV_i64_v2_avar;
1298        break;
1299      case MVT::f32:
1300        Opcode = NVPTX::STV_f32_v2_avar;
1301        break;
1302      case MVT::f64:
1303        Opcode = NVPTX::STV_f64_v2_avar;
1304        break;
1305      }
1306      break;
1307    case NVPTXISD::StoreV4:
1308      switch (EltVT.getSimpleVT().SimpleTy) {
1309      default:
1310        return NULL;
1311      case MVT::i8:
1312        Opcode = NVPTX::STV_i8_v4_avar;
1313        break;
1314      case MVT::i16:
1315        Opcode = NVPTX::STV_i16_v4_avar;
1316        break;
1317      case MVT::i32:
1318        Opcode = NVPTX::STV_i32_v4_avar;
1319        break;
1320      case MVT::f32:
1321        Opcode = NVPTX::STV_f32_v4_avar;
1322        break;
1323      }
1324      break;
1325    }
1326    StOps.push_back(Addr);
1327  } else if (Subtarget.is64Bit()
1328                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1329                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1330    switch (N->getOpcode()) {
1331    default:
1332      return NULL;
1333    case NVPTXISD::StoreV2:
1334      switch (EltVT.getSimpleVT().SimpleTy) {
1335      default:
1336        return NULL;
1337      case MVT::i8:
1338        Opcode = NVPTX::STV_i8_v2_asi;
1339        break;
1340      case MVT::i16:
1341        Opcode = NVPTX::STV_i16_v2_asi;
1342        break;
1343      case MVT::i32:
1344        Opcode = NVPTX::STV_i32_v2_asi;
1345        break;
1346      case MVT::i64:
1347        Opcode = NVPTX::STV_i64_v2_asi;
1348        break;
1349      case MVT::f32:
1350        Opcode = NVPTX::STV_f32_v2_asi;
1351        break;
1352      case MVT::f64:
1353        Opcode = NVPTX::STV_f64_v2_asi;
1354        break;
1355      }
1356      break;
1357    case NVPTXISD::StoreV4:
1358      switch (EltVT.getSimpleVT().SimpleTy) {
1359      default:
1360        return NULL;
1361      case MVT::i8:
1362        Opcode = NVPTX::STV_i8_v4_asi;
1363        break;
1364      case MVT::i16:
1365        Opcode = NVPTX::STV_i16_v4_asi;
1366        break;
1367      case MVT::i32:
1368        Opcode = NVPTX::STV_i32_v4_asi;
1369        break;
1370      case MVT::f32:
1371        Opcode = NVPTX::STV_f32_v4_asi;
1372        break;
1373      }
1374      break;
1375    }
1376    StOps.push_back(Base);
1377    StOps.push_back(Offset);
1378  } else if (Subtarget.is64Bit()
1379                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1380                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1381    if (Subtarget.is64Bit()) {
1382      switch (N->getOpcode()) {
1383      default:
1384        return NULL;
1385      case NVPTXISD::StoreV2:
1386        switch (EltVT.getSimpleVT().SimpleTy) {
1387        default:
1388          return NULL;
1389        case MVT::i8:
1390          Opcode = NVPTX::STV_i8_v2_ari_64;
1391          break;
1392        case MVT::i16:
1393          Opcode = NVPTX::STV_i16_v2_ari_64;
1394          break;
1395        case MVT::i32:
1396          Opcode = NVPTX::STV_i32_v2_ari_64;
1397          break;
1398        case MVT::i64:
1399          Opcode = NVPTX::STV_i64_v2_ari_64;
1400          break;
1401        case MVT::f32:
1402          Opcode = NVPTX::STV_f32_v2_ari_64;
1403          break;
1404        case MVT::f64:
1405          Opcode = NVPTX::STV_f64_v2_ari_64;
1406          break;
1407        }
1408        break;
1409      case NVPTXISD::StoreV4:
1410        switch (EltVT.getSimpleVT().SimpleTy) {
1411        default:
1412          return NULL;
1413        case MVT::i8:
1414          Opcode = NVPTX::STV_i8_v4_ari_64;
1415          break;
1416        case MVT::i16:
1417          Opcode = NVPTX::STV_i16_v4_ari_64;
1418          break;
1419        case MVT::i32:
1420          Opcode = NVPTX::STV_i32_v4_ari_64;
1421          break;
1422        case MVT::f32:
1423          Opcode = NVPTX::STV_f32_v4_ari_64;
1424          break;
1425        }
1426        break;
1427      }
1428    } else {
1429      switch (N->getOpcode()) {
1430      default:
1431        return NULL;
1432      case NVPTXISD::StoreV2:
1433        switch (EltVT.getSimpleVT().SimpleTy) {
1434        default:
1435          return NULL;
1436        case MVT::i8:
1437          Opcode = NVPTX::STV_i8_v2_ari;
1438          break;
1439        case MVT::i16:
1440          Opcode = NVPTX::STV_i16_v2_ari;
1441          break;
1442        case MVT::i32:
1443          Opcode = NVPTX::STV_i32_v2_ari;
1444          break;
1445        case MVT::i64:
1446          Opcode = NVPTX::STV_i64_v2_ari;
1447          break;
1448        case MVT::f32:
1449          Opcode = NVPTX::STV_f32_v2_ari;
1450          break;
1451        case MVT::f64:
1452          Opcode = NVPTX::STV_f64_v2_ari;
1453          break;
1454        }
1455        break;
1456      case NVPTXISD::StoreV4:
1457        switch (EltVT.getSimpleVT().SimpleTy) {
1458        default:
1459          return NULL;
1460        case MVT::i8:
1461          Opcode = NVPTX::STV_i8_v4_ari;
1462          break;
1463        case MVT::i16:
1464          Opcode = NVPTX::STV_i16_v4_ari;
1465          break;
1466        case MVT::i32:
1467          Opcode = NVPTX::STV_i32_v4_ari;
1468          break;
1469        case MVT::f32:
1470          Opcode = NVPTX::STV_f32_v4_ari;
1471          break;
1472        }
1473        break;
1474      }
1475    }
1476    StOps.push_back(Base);
1477    StOps.push_back(Offset);
1478  } else {
1479    if (Subtarget.is64Bit()) {
1480      switch (N->getOpcode()) {
1481      default:
1482        return NULL;
1483      case NVPTXISD::StoreV2:
1484        switch (EltVT.getSimpleVT().SimpleTy) {
1485        default:
1486          return NULL;
1487        case MVT::i8:
1488          Opcode = NVPTX::STV_i8_v2_areg_64;
1489          break;
1490        case MVT::i16:
1491          Opcode = NVPTX::STV_i16_v2_areg_64;
1492          break;
1493        case MVT::i32:
1494          Opcode = NVPTX::STV_i32_v2_areg_64;
1495          break;
1496        case MVT::i64:
1497          Opcode = NVPTX::STV_i64_v2_areg_64;
1498          break;
1499        case MVT::f32:
1500          Opcode = NVPTX::STV_f32_v2_areg_64;
1501          break;
1502        case MVT::f64:
1503          Opcode = NVPTX::STV_f64_v2_areg_64;
1504          break;
1505        }
1506        break;
1507      case NVPTXISD::StoreV4:
1508        switch (EltVT.getSimpleVT().SimpleTy) {
1509        default:
1510          return NULL;
1511        case MVT::i8:
1512          Opcode = NVPTX::STV_i8_v4_areg_64;
1513          break;
1514        case MVT::i16:
1515          Opcode = NVPTX::STV_i16_v4_areg_64;
1516          break;
1517        case MVT::i32:
1518          Opcode = NVPTX::STV_i32_v4_areg_64;
1519          break;
1520        case MVT::f32:
1521          Opcode = NVPTX::STV_f32_v4_areg_64;
1522          break;
1523        }
1524        break;
1525      }
1526    } else {
1527      switch (N->getOpcode()) {
1528      default:
1529        return NULL;
1530      case NVPTXISD::StoreV2:
1531        switch (EltVT.getSimpleVT().SimpleTy) {
1532        default:
1533          return NULL;
1534        case MVT::i8:
1535          Opcode = NVPTX::STV_i8_v2_areg;
1536          break;
1537        case MVT::i16:
1538          Opcode = NVPTX::STV_i16_v2_areg;
1539          break;
1540        case MVT::i32:
1541          Opcode = NVPTX::STV_i32_v2_areg;
1542          break;
1543        case MVT::i64:
1544          Opcode = NVPTX::STV_i64_v2_areg;
1545          break;
1546        case MVT::f32:
1547          Opcode = NVPTX::STV_f32_v2_areg;
1548          break;
1549        case MVT::f64:
1550          Opcode = NVPTX::STV_f64_v2_areg;
1551          break;
1552        }
1553        break;
1554      case NVPTXISD::StoreV4:
1555        switch (EltVT.getSimpleVT().SimpleTy) {
1556        default:
1557          return NULL;
1558        case MVT::i8:
1559          Opcode = NVPTX::STV_i8_v4_areg;
1560          break;
1561        case MVT::i16:
1562          Opcode = NVPTX::STV_i16_v4_areg;
1563          break;
1564        case MVT::i32:
1565          Opcode = NVPTX::STV_i32_v4_areg;
1566          break;
1567        case MVT::f32:
1568          Opcode = NVPTX::STV_f32_v4_areg;
1569          break;
1570        }
1571        break;
1572      }
1573    }
1574    StOps.push_back(N2);
1575  }
1576
1577  StOps.push_back(Chain);
1578
1579  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
1580
1581  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1582  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1583  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1584
1585  return ST;
1586}
1587
1588// SelectDirectAddr - Match a direct address for DAG.
1589// A direct address could be a globaladdress or externalsymbol.
1590bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
1591  // Return true if TGA or ES.
1592  if (N.getOpcode() == ISD::TargetGlobalAddress ||
1593      N.getOpcode() == ISD::TargetExternalSymbol) {
1594    Address = N;
1595    return true;
1596  }
1597  if (N.getOpcode() == NVPTXISD::Wrapper) {
1598    Address = N.getOperand(0);
1599    return true;
1600  }
1601  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
1602    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
1603    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
1604      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
1605        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
1606  }
1607  return false;
1608}
1609
1610// symbol+offset
1611bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
1612    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
1613  if (Addr.getOpcode() == ISD::ADD) {
1614    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
1615      SDValue base = Addr.getOperand(0);
1616      if (SelectDirectAddr(base, Base)) {
1617        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
1618        return true;
1619      }
1620    }
1621  }
1622  return false;
1623}
1624
1625// symbol+offset
1626bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
1627                                     SDValue &Base, SDValue &Offset) {
1628  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
1629}
1630
1631// symbol+offset
1632bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
1633                                       SDValue &Base, SDValue &Offset) {
1634  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
1635}
1636
1637// register+offset
1638bool NVPTXDAGToDAGISel::SelectADDRri_imp(
1639    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
1640  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
1641    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
1642    Offset = CurDAG->getTargetConstant(0, mvt);
1643    return true;
1644  }
1645  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
1646      Addr.getOpcode() == ISD::TargetGlobalAddress)
1647    return false; // direct calls.
1648
1649  if (Addr.getOpcode() == ISD::ADD) {
1650    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
1651      return false;
1652    }
1653    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
1654      if (FrameIndexSDNode *FIN =
1655              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
1656        // Constant offset from frame ref.
1657        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
1658      else
1659        Base = Addr.getOperand(0);
1660      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
1661      return true;
1662    }
1663  }
1664  return false;
1665}
1666
1667// register+offset
1668bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
1669                                     SDValue &Base, SDValue &Offset) {
1670  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
1671}
1672
1673// register+offset
1674bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
1675                                       SDValue &Base, SDValue &Offset) {
1676  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
1677}
1678
1679bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
1680                                                 unsigned int spN) const {
1681  const Value *Src = NULL;
1682  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
1683  // the classof() for MemSDNode does not include MemIntrinsicSDNode
1684  // (See SelectionDAGNodes.h). So we need to check for both.
1685  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
1686    Src = mN->getSrcValue();
1687  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
1688    Src = mN->getSrcValue();
1689  }
1690  if (!Src)
1691    return false;
1692  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
1693    return (PT->getAddressSpace() == spN);
1694  return false;
1695}
1696
1697/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
1698/// inline asm expressions.
1699bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
1700    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
1701  SDValue Op0, Op1;
1702  switch (ConstraintCode) {
1703  default:
1704    return true;
1705  case 'm': // memory
1706    if (SelectDirectAddr(Op, Op0)) {
1707      OutOps.push_back(Op0);
1708      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
1709      return false;
1710    }
1711    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
1712      OutOps.push_back(Op0);
1713      OutOps.push_back(Op1);
1714      return false;
1715    }
1716    break;
1717  }
1718  return true;
1719}
1720
1721// Return true if N is a undef or a constant.
1722// If N was undef, return a (i8imm 0) in Retval
1723// If N was imm, convert it to i8imm and return in Retval
1724// Note: The convert to i8imm is required, otherwise the
1725// pattern matcher inserts a bunch of IMOVi8rr to convert
1726// the imm to i8imm, and this causes instruction selection
1727// to fail.
1728bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
1729  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
1730    return false;
1731
1732  if (N.getOpcode() == ISD::UNDEF)
1733    Retval = CurDAG->getTargetConstant(0, MVT::i8);
1734  else {
1735    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
1736    unsigned retval = cn->getZExtValue();
1737    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
1738  }
1739  return true;
1740}
1741