NVPTXISelDAGToDAG.cpp revision 7eacad03efda36e09ebd96e95d7891cadaaa9087
1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14
15#include "NVPTXISelDAGToDAG.h"
16#include "llvm/IR/GlobalValue.h"
17#include "llvm/IR/Instructions.h"
18#include "llvm/Support/CommandLine.h"
19#include "llvm/Support/Debug.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/raw_ostream.h"
22#include "llvm/Target/TargetIntrinsicInfo.h"
23
24#undef DEBUG_TYPE
25#define DEBUG_TYPE "nvptx-isel"
26
27using namespace llvm;
28
29
30static cl::opt<bool>
31UseFMADInstruction("nvptx-mad-enable",
32                   cl::ZeroOrMore,
33                cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
34                   cl::init(false));
35
36static cl::opt<int>
37FMAContractLevel("nvptx-fma-level",
38                 cl::ZeroOrMore,
39                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
40                     " 1: do it  2: do it aggressively"),
41                     cl::init(2));
42
43
44static cl::opt<int>
45UsePrecDivF32("nvptx-prec-divf32",
46              cl::ZeroOrMore,
47             cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
48                  " IEEE Compliant F32 div.rnd if avaiable."),
49                  cl::init(2));
50
51/// createNVPTXISelDag - This pass converts a legalized DAG into a
52/// NVPTX-specific DAG, ready for instruction scheduling.
53FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
54                                       llvm::CodeGenOpt::Level OptLevel) {
55  return new NVPTXDAGToDAGISel(TM, OptLevel);
56}
57
58
59NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
60                                     CodeGenOpt::Level OptLevel)
61: SelectionDAGISel(tm, OptLevel),
62  Subtarget(tm.getSubtarget<NVPTXSubtarget>())
63{
64  // Always do fma.f32 fpcontract if the target supports the instruction.
65  // Always do fma.f64 fpcontract if the target supports the instruction.
66  // Do mad.f32 is nvptx-mad-enable is specified and the target does not
67  // support fma.f32.
68
69  doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
70  doFMAF32 =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
71      (FMAContractLevel>=1);
72  doFMAF64 =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
73      (FMAContractLevel>=1);
74  doFMAF32AGG =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
75      (FMAContractLevel==2);
76  doFMAF64AGG =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
77      (FMAContractLevel==2);
78
79  allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
80
81  UseF32FTZ = false;
82
83  doMulWide = (OptLevel > 0);
84
85  // Decide how to translate f32 div
86  do_DIVF32_PREC = UsePrecDivF32;
87  // sm less than sm_20 does not support div.rnd. Use div.full.
88  if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
89    do_DIVF32_PREC = 1;
90
91}
92
93/// Select - Select instructions not customized! Used for
94/// expanded, promoted and normal instructions.
95SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
96
97  if (N->isMachineOpcode())
98    return NULL;   // Already selected.
99
100  SDNode *ResNode = NULL;
101  switch (N->getOpcode()) {
102  case ISD::LOAD:
103    ResNode = SelectLoad(N);
104    break;
105  case ISD::STORE:
106    ResNode = SelectStore(N);
107    break;
108  case NVPTXISD::LoadV2:
109  case NVPTXISD::LoadV4:
110    ResNode = SelectLoadVector(N);
111    break;
112  case NVPTXISD::LDGV2:
113  case NVPTXISD::LDGV4:
114  case NVPTXISD::LDUV2:
115  case NVPTXISD::LDUV4:
116    ResNode = SelectLDGLDUVector(N);
117    break;
118  case NVPTXISD::StoreV2:
119  case NVPTXISD::StoreV4:
120    ResNode = SelectStoreVector(N);
121    break;
122  default: break;
123  }
124  if (ResNode)
125    return ResNode;
126  return SelectCode(N);
127}
128
129
130static unsigned int
131getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
132{
133  const Value *Src = N->getSrcValue();
134  if (!Src)
135    return NVPTX::PTXLdStInstCode::LOCAL;
136
137  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
138    switch (PT->getAddressSpace()) {
139    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
140    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
141    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
142    case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
143      return NVPTX::PTXLdStInstCode::CONSTANT;
144    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
145    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
146    case llvm::ADDRESS_SPACE_CONST:
147      // If the arch supports generic address space, translate it to GLOBAL
148      // for correctness.
149      // If the arch does not support generic address space, then the arch
150      // does not really support ADDRESS_SPACE_CONST, translate it to
151      // to CONSTANT for better performance.
152      if (Subtarget.hasGenericLdSt())
153        return NVPTX::PTXLdStInstCode::GLOBAL;
154      else
155        return NVPTX::PTXLdStInstCode::CONSTANT;
156    default: break;
157    }
158  }
159  return NVPTX::PTXLdStInstCode::LOCAL;
160}
161
162
163SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
164  DebugLoc dl = N->getDebugLoc();
165  LoadSDNode *LD = cast<LoadSDNode>(N);
166  EVT LoadedVT = LD->getMemoryVT();
167  SDNode *NVPTXLD= NULL;
168
169  // do not support pre/post inc/dec
170  if (LD->isIndexed())
171    return NULL;
172
173  if (!LoadedVT.isSimple())
174    return NULL;
175
176  // Address Space Setting
177  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
178
179  // Volatile Setting
180  // - .volatile is only availalble for .global and .shared
181  bool isVolatile = LD->isVolatile();
182  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
183      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
184      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
185    isVolatile = false;
186
187  // Vector Setting
188  MVT SimpleVT = LoadedVT.getSimpleVT();
189  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
190  if (SimpleVT.isVector()) {
191    unsigned num = SimpleVT.getVectorNumElements();
192    if (num == 2)
193      vecType = NVPTX::PTXLdStInstCode::V2;
194    else if (num == 4)
195      vecType = NVPTX::PTXLdStInstCode::V4;
196    else
197      return NULL;
198  }
199
200  // Type Setting: fromType + fromTypeWidth
201  //
202  // Sign   : ISD::SEXTLOAD
203  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
204  //          type is integer
205  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
206  MVT ScalarVT = SimpleVT.getScalarType();
207  unsigned fromTypeWidth =  ScalarVT.getSizeInBits();
208  unsigned int fromType;
209  if ((LD->getExtensionType() == ISD::SEXTLOAD))
210    fromType = NVPTX::PTXLdStInstCode::Signed;
211  else if (ScalarVT.isFloatingPoint())
212    fromType = NVPTX::PTXLdStInstCode::Float;
213  else
214    fromType = NVPTX::PTXLdStInstCode::Unsigned;
215
216  // Create the machine instruction DAG
217  SDValue Chain = N->getOperand(0);
218  SDValue N1 = N->getOperand(1);
219  SDValue Addr;
220  SDValue Offset, Base;
221  unsigned Opcode;
222  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
223
224  if (SelectDirectAddr(N1, Addr)) {
225    switch (TargetVT) {
226    case MVT::i8:    Opcode = NVPTX::LD_i8_avar; break;
227    case MVT::i16:   Opcode = NVPTX::LD_i16_avar; break;
228    case MVT::i32:   Opcode = NVPTX::LD_i32_avar; break;
229    case MVT::i64:   Opcode = NVPTX::LD_i64_avar; break;
230    case MVT::f32:   Opcode = NVPTX::LD_f32_avar; break;
231    case MVT::f64:   Opcode = NVPTX::LD_f64_avar; break;
232    default: return NULL;
233    }
234    SDValue Ops[] = { getI32Imm(isVolatile),
235                      getI32Imm(codeAddrSpace),
236                      getI32Imm(vecType),
237                      getI32Imm(fromType),
238                      getI32Imm(fromTypeWidth),
239                      Addr, Chain };
240    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
241                                     MVT::Other, Ops, 7);
242  } else if (Subtarget.is64Bit()?
243      SelectADDRsi64(N1.getNode(), N1, Base, Offset):
244      SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
245    switch (TargetVT) {
246    case MVT::i8:    Opcode = NVPTX::LD_i8_asi; break;
247    case MVT::i16:   Opcode = NVPTX::LD_i16_asi; break;
248    case MVT::i32:   Opcode = NVPTX::LD_i32_asi; break;
249    case MVT::i64:   Opcode = NVPTX::LD_i64_asi; break;
250    case MVT::f32:   Opcode = NVPTX::LD_f32_asi; break;
251    case MVT::f64:   Opcode = NVPTX::LD_f64_asi; break;
252    default: return NULL;
253    }
254    SDValue Ops[] = { getI32Imm(isVolatile),
255                      getI32Imm(codeAddrSpace),
256                      getI32Imm(vecType),
257                      getI32Imm(fromType),
258                      getI32Imm(fromTypeWidth),
259                      Base, Offset, Chain };
260    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
261                                     MVT::Other, Ops, 8);
262  } else if (Subtarget.is64Bit()?
263      SelectADDRri64(N1.getNode(), N1, Base, Offset):
264      SelectADDRri(N1.getNode(), N1, Base, Offset)) {
265    if (Subtarget.is64Bit()) {
266      switch (TargetVT) {
267      case MVT::i8:    Opcode = NVPTX::LD_i8_ari_64; break;
268      case MVT::i16:   Opcode = NVPTX::LD_i16_ari_64; break;
269      case MVT::i32:   Opcode = NVPTX::LD_i32_ari_64; break;
270      case MVT::i64:   Opcode = NVPTX::LD_i64_ari_64; break;
271      case MVT::f32:   Opcode = NVPTX::LD_f32_ari_64; break;
272      case MVT::f64:   Opcode = NVPTX::LD_f64_ari_64; break;
273      default: return NULL;
274      }
275    } else {
276      switch (TargetVT) {
277      case MVT::i8:    Opcode = NVPTX::LD_i8_ari; break;
278      case MVT::i16:   Opcode = NVPTX::LD_i16_ari; break;
279      case MVT::i32:   Opcode = NVPTX::LD_i32_ari; break;
280      case MVT::i64:   Opcode = NVPTX::LD_i64_ari; break;
281      case MVT::f32:   Opcode = NVPTX::LD_f32_ari; break;
282      case MVT::f64:   Opcode = NVPTX::LD_f64_ari; break;
283      default: return NULL;
284      }
285    }
286    SDValue Ops[] = { getI32Imm(isVolatile),
287                      getI32Imm(codeAddrSpace),
288                      getI32Imm(vecType),
289                      getI32Imm(fromType),
290                      getI32Imm(fromTypeWidth),
291                      Base, Offset, Chain };
292    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
293                                     MVT::Other, Ops, 8);
294  }
295  else {
296    if (Subtarget.is64Bit()) {
297      switch (TargetVT) {
298      case MVT::i8:    Opcode = NVPTX::LD_i8_areg_64; break;
299      case MVT::i16:   Opcode = NVPTX::LD_i16_areg_64; break;
300      case MVT::i32:   Opcode = NVPTX::LD_i32_areg_64; break;
301      case MVT::i64:   Opcode = NVPTX::LD_i64_areg_64; break;
302      case MVT::f32:   Opcode = NVPTX::LD_f32_areg_64; break;
303      case MVT::f64:   Opcode = NVPTX::LD_f64_areg_64; break;
304      default: return NULL;
305      }
306    } else {
307      switch (TargetVT) {
308      case MVT::i8:    Opcode = NVPTX::LD_i8_areg; break;
309      case MVT::i16:   Opcode = NVPTX::LD_i16_areg; break;
310      case MVT::i32:   Opcode = NVPTX::LD_i32_areg; break;
311      case MVT::i64:   Opcode = NVPTX::LD_i64_areg; break;
312      case MVT::f32:   Opcode = NVPTX::LD_f32_areg; break;
313      case MVT::f64:   Opcode = NVPTX::LD_f64_areg; break;
314      default: return NULL;
315      }
316    }
317    SDValue Ops[] = { getI32Imm(isVolatile),
318                      getI32Imm(codeAddrSpace),
319                      getI32Imm(vecType),
320                      getI32Imm(fromType),
321                      getI32Imm(fromTypeWidth),
322                      N1, Chain };
323    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
324                                     MVT::Other, Ops, 7);
325  }
326
327  if (NVPTXLD != NULL) {
328    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
329    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
330    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
331  }
332
333  return NVPTXLD;
334}
335
336SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
337
338  SDValue Chain = N->getOperand(0);
339  SDValue Op1 = N->getOperand(1);
340  SDValue Addr, Offset, Base;
341  unsigned Opcode;
342  DebugLoc DL = N->getDebugLoc();
343  SDNode *LD;
344  MemSDNode *MemSD = cast<MemSDNode>(N);
345  EVT LoadedVT = MemSD->getMemoryVT();
346
347
348  if (!LoadedVT.isSimple())
349     return NULL;
350
351  // Address Space Setting
352  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
353
354  // Volatile Setting
355  // - .volatile is only availalble for .global and .shared
356  bool IsVolatile = MemSD->isVolatile();
357  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
358      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
359      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
360    IsVolatile = false;
361
362  // Vector Setting
363  MVT SimpleVT = LoadedVT.getSimpleVT();
364
365  // Type Setting: fromType + fromTypeWidth
366  //
367  // Sign   : ISD::SEXTLOAD
368  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
369  //          type is integer
370  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
371  MVT ScalarVT = SimpleVT.getScalarType();
372  unsigned FromTypeWidth =  ScalarVT.getSizeInBits();
373  unsigned int FromType;
374  // The last operand holds the original LoadSDNode::getExtensionType() value
375  unsigned ExtensionType =
376    cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
377  if (ExtensionType == ISD::SEXTLOAD)
378    FromType = NVPTX::PTXLdStInstCode::Signed;
379  else if (ScalarVT.isFloatingPoint())
380    FromType = NVPTX::PTXLdStInstCode::Float;
381  else
382    FromType = NVPTX::PTXLdStInstCode::Unsigned;
383
384  unsigned VecType;
385
386  switch (N->getOpcode()) {
387  case NVPTXISD::LoadV2:  VecType = NVPTX::PTXLdStInstCode::V2; break;
388  case NVPTXISD::LoadV4:  VecType = NVPTX::PTXLdStInstCode::V4; break;
389  default: return NULL;
390  }
391
392  EVT EltVT = N->getValueType(0);
393
394  if (SelectDirectAddr(Op1, Addr)) {
395    switch (N->getOpcode()) {
396    default: return NULL;
397    case NVPTXISD::LoadV2:
398      switch (EltVT.getSimpleVT().SimpleTy) {
399      default: return NULL;
400      case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_avar; break;
401      case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_avar; break;
402      case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_avar; break;
403      case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_avar; break;
404      case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_avar; break;
405      case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_avar; break;
406      }
407      break;
408    case NVPTXISD::LoadV4:
409      switch (EltVT.getSimpleVT().SimpleTy) {
410      default: return NULL;
411      case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_avar; break;
412      case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_avar; break;
413      case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_avar; break;
414      case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_avar; break;
415      }
416      break;
417    }
418
419    SDValue Ops[] = { getI32Imm(IsVolatile),
420                      getI32Imm(CodeAddrSpace),
421                      getI32Imm(VecType),
422                      getI32Imm(FromType),
423                      getI32Imm(FromTypeWidth),
424                      Addr, Chain };
425    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
426  } else if (Subtarget.is64Bit()?
427             SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
428             SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
429    switch (N->getOpcode()) {
430    default: return NULL;
431    case NVPTXISD::LoadV2:
432      switch (EltVT.getSimpleVT().SimpleTy) {
433      default: return NULL;
434      case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_asi; break;
435      case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_asi; break;
436      case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_asi; break;
437      case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_asi; break;
438      case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_asi; break;
439      case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_asi; break;
440      }
441      break;
442    case NVPTXISD::LoadV4:
443      switch (EltVT.getSimpleVT().SimpleTy) {
444      default: return NULL;
445      case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_asi; break;
446      case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_asi; break;
447      case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_asi; break;
448      case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_asi; break;
449      }
450      break;
451    }
452
453    SDValue Ops[] = { getI32Imm(IsVolatile),
454                      getI32Imm(CodeAddrSpace),
455                      getI32Imm(VecType),
456                      getI32Imm(FromType),
457                      getI32Imm(FromTypeWidth),
458                      Base, Offset, Chain };
459    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
460  } else if (Subtarget.is64Bit()?
461             SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
462             SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
463    if (Subtarget.is64Bit()) {
464      switch (N->getOpcode()) {
465      default: return NULL;
466      case NVPTXISD::LoadV2:
467        switch (EltVT.getSimpleVT().SimpleTy) {
468        default: return NULL;
469        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_ari_64; break;
470        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_ari_64; break;
471        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_ari_64; break;
472        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_ari_64; break;
473        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_ari_64; break;
474        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_ari_64; break;
475        }
476        break;
477      case NVPTXISD::LoadV4:
478        switch (EltVT.getSimpleVT().SimpleTy) {
479        default: return NULL;
480        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_ari_64; break;
481        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_ari_64; break;
482        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_ari_64; break;
483        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_ari_64; break;
484        }
485        break;
486      }
487    } else {
488      switch (N->getOpcode()) {
489      default: return NULL;
490      case NVPTXISD::LoadV2:
491        switch (EltVT.getSimpleVT().SimpleTy) {
492        default: return NULL;
493        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_ari; break;
494        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_ari; break;
495        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_ari; break;
496        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_ari; break;
497        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_ari; break;
498        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_ari; break;
499        }
500        break;
501      case NVPTXISD::LoadV4:
502        switch (EltVT.getSimpleVT().SimpleTy) {
503        default: return NULL;
504        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_ari; break;
505        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_ari; break;
506        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_ari; break;
507        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_ari; break;
508        }
509        break;
510      }
511    }
512
513    SDValue Ops[] = { getI32Imm(IsVolatile),
514                      getI32Imm(CodeAddrSpace),
515                      getI32Imm(VecType),
516                      getI32Imm(FromType),
517                      getI32Imm(FromTypeWidth),
518                      Base, Offset, Chain };
519
520    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
521  } else {
522    if (Subtarget.is64Bit()) {
523      switch (N->getOpcode()) {
524      default: return NULL;
525      case NVPTXISD::LoadV2:
526        switch (EltVT.getSimpleVT().SimpleTy) {
527        default: return NULL;
528        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_areg_64; break;
529        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_areg_64; break;
530        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_areg_64; break;
531        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_areg_64; break;
532        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_areg_64; break;
533        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_areg_64; break;
534        }
535        break;
536      case NVPTXISD::LoadV4:
537        switch (EltVT.getSimpleVT().SimpleTy) {
538        default: return NULL;
539        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_areg_64; break;
540        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_areg_64; break;
541        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_areg_64; break;
542        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_areg_64; break;
543        }
544        break;
545      }
546    } else {
547      switch (N->getOpcode()) {
548      default: return NULL;
549      case NVPTXISD::LoadV2:
550        switch (EltVT.getSimpleVT().SimpleTy) {
551        default: return NULL;
552        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_areg; break;
553        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_areg; break;
554        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_areg; break;
555        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_areg; break;
556        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_areg; break;
557        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_areg; break;
558        }
559        break;
560      case NVPTXISD::LoadV4:
561        switch (EltVT.getSimpleVT().SimpleTy) {
562        default: return NULL;
563        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_areg; break;
564        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_areg; break;
565        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_areg; break;
566        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_areg; break;
567        }
568        break;
569      }
570    }
571
572    SDValue Ops[] = { getI32Imm(IsVolatile),
573                      getI32Imm(CodeAddrSpace),
574                      getI32Imm(VecType),
575                      getI32Imm(FromType),
576                      getI32Imm(FromTypeWidth),
577                      Op1, Chain };
578    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
579  }
580
581  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
582  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
583  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
584
585  return LD;
586}
587
588SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
589
590  SDValue Chain = N->getOperand(0);
591  SDValue Op1 = N->getOperand(1);
592  unsigned Opcode;
593  DebugLoc DL = N->getDebugLoc();
594  SDNode *LD;
595
596  EVT RetVT = N->getValueType(0);
597
598  // Select opcode
599  if (Subtarget.is64Bit()) {
600    switch (N->getOpcode()) {
601    default: return NULL;
602    case NVPTXISD::LDGV2:
603      switch (RetVT.getSimpleVT().SimpleTy) {
604      default: return NULL;
605      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
606      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
607      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
608      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
609      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
610      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
611      }
612      break;
613    case NVPTXISD::LDGV4:
614      switch (RetVT.getSimpleVT().SimpleTy) {
615      default: return NULL;
616      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
617      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
618      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
619      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
620      }
621      break;
622    case NVPTXISD::LDUV2:
623      switch (RetVT.getSimpleVT().SimpleTy) {
624      default: return NULL;
625      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
626      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
627      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
628      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
629      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
630      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
631      }
632      break;
633    case NVPTXISD::LDUV4:
634      switch (RetVT.getSimpleVT().SimpleTy) {
635      default: return NULL;
636      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
637      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
638      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
639      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
640      }
641      break;
642    }
643  } else {
644    switch (N->getOpcode()) {
645    default: return NULL;
646    case NVPTXISD::LDGV2:
647      switch (RetVT.getSimpleVT().SimpleTy) {
648      default: return NULL;
649      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
650      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
651      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
652      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
653      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
654      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
655      }
656      break;
657    case NVPTXISD::LDGV4:
658      switch (RetVT.getSimpleVT().SimpleTy) {
659      default: return NULL;
660      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
661      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
662      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
663      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
664      }
665      break;
666    case NVPTXISD::LDUV2:
667      switch (RetVT.getSimpleVT().SimpleTy) {
668      default: return NULL;
669      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
670      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
671      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
672      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
673      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
674      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
675      }
676      break;
677    case NVPTXISD::LDUV4:
678      switch (RetVT.getSimpleVT().SimpleTy) {
679      default: return NULL;
680      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
681      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
682      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
683      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
684      }
685      break;
686    }
687  }
688
689  SDValue Ops[] = { Op1, Chain };
690  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
691
692  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
693  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
694  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
695
696  return LD;
697}
698
699
700SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
701  DebugLoc dl = N->getDebugLoc();
702  StoreSDNode *ST = cast<StoreSDNode>(N);
703  EVT StoreVT = ST->getMemoryVT();
704  SDNode *NVPTXST = NULL;
705
706  // do not support pre/post inc/dec
707  if (ST->isIndexed())
708    return NULL;
709
710  if (!StoreVT.isSimple())
711    return NULL;
712
713  // Address Space Setting
714  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
715
716  // Volatile Setting
717  // - .volatile is only availalble for .global and .shared
718  bool isVolatile = ST->isVolatile();
719  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
720      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
721      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
722    isVolatile = false;
723
724  // Vector Setting
725  MVT SimpleVT = StoreVT.getSimpleVT();
726  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
727  if (SimpleVT.isVector()) {
728    unsigned num = SimpleVT.getVectorNumElements();
729    if (num == 2)
730      vecType = NVPTX::PTXLdStInstCode::V2;
731    else if (num == 4)
732      vecType = NVPTX::PTXLdStInstCode::V4;
733    else
734      return NULL;
735  }
736
737  // Type Setting: toType + toTypeWidth
738  // - for integer type, always use 'u'
739  //
740  MVT ScalarVT = SimpleVT.getScalarType();
741  unsigned toTypeWidth =  ScalarVT.getSizeInBits();
742  unsigned int toType;
743  if (ScalarVT.isFloatingPoint())
744    toType = NVPTX::PTXLdStInstCode::Float;
745  else
746    toType = NVPTX::PTXLdStInstCode::Unsigned;
747
748  // Create the machine instruction DAG
749  SDValue Chain = N->getOperand(0);
750  SDValue N1 = N->getOperand(1);
751  SDValue N2 = N->getOperand(2);
752  SDValue Addr;
753  SDValue Offset, Base;
754  unsigned Opcode;
755  MVT::SimpleValueType SourceVT =
756      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
757
758  if (SelectDirectAddr(N2, Addr)) {
759    switch (SourceVT) {
760    case MVT::i8:    Opcode = NVPTX::ST_i8_avar; break;
761    case MVT::i16:   Opcode = NVPTX::ST_i16_avar; break;
762    case MVT::i32:   Opcode = NVPTX::ST_i32_avar; break;
763    case MVT::i64:   Opcode = NVPTX::ST_i64_avar; break;
764    case MVT::f32:   Opcode = NVPTX::ST_f32_avar; break;
765    case MVT::f64:   Opcode = NVPTX::ST_f64_avar; break;
766    default: return NULL;
767    }
768    SDValue Ops[] = { N1,
769                      getI32Imm(isVolatile),
770                      getI32Imm(codeAddrSpace),
771                      getI32Imm(vecType),
772                      getI32Imm(toType),
773                      getI32Imm(toTypeWidth),
774                      Addr, Chain };
775    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
776                                     MVT::Other, Ops, 8);
777  } else if (Subtarget.is64Bit()?
778      SelectADDRsi64(N2.getNode(), N2, Base, Offset):
779      SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
780    switch (SourceVT) {
781    case MVT::i8:    Opcode = NVPTX::ST_i8_asi; break;
782    case MVT::i16:   Opcode = NVPTX::ST_i16_asi; break;
783    case MVT::i32:   Opcode = NVPTX::ST_i32_asi; break;
784    case MVT::i64:   Opcode = NVPTX::ST_i64_asi; break;
785    case MVT::f32:   Opcode = NVPTX::ST_f32_asi; break;
786    case MVT::f64:   Opcode = NVPTX::ST_f64_asi; break;
787    default: return NULL;
788    }
789    SDValue Ops[] = { N1,
790                      getI32Imm(isVolatile),
791                      getI32Imm(codeAddrSpace),
792                      getI32Imm(vecType),
793                      getI32Imm(toType),
794                      getI32Imm(toTypeWidth),
795                      Base, Offset, Chain };
796    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
797                                     MVT::Other, Ops, 9);
798  } else if (Subtarget.is64Bit()?
799      SelectADDRri64(N2.getNode(), N2, Base, Offset):
800      SelectADDRri(N2.getNode(), N2, Base, Offset)) {
801    if (Subtarget.is64Bit()) {
802      switch (SourceVT) {
803      case MVT::i8:    Opcode = NVPTX::ST_i8_ari_64; break;
804      case MVT::i16:   Opcode = NVPTX::ST_i16_ari_64; break;
805      case MVT::i32:   Opcode = NVPTX::ST_i32_ari_64; break;
806      case MVT::i64:   Opcode = NVPTX::ST_i64_ari_64; break;
807      case MVT::f32:   Opcode = NVPTX::ST_f32_ari_64; break;
808      case MVT::f64:   Opcode = NVPTX::ST_f64_ari_64; break;
809      default: return NULL;
810      }
811    } else {
812      switch (SourceVT) {
813      case MVT::i8:    Opcode = NVPTX::ST_i8_ari; break;
814      case MVT::i16:   Opcode = NVPTX::ST_i16_ari; break;
815      case MVT::i32:   Opcode = NVPTX::ST_i32_ari; break;
816      case MVT::i64:   Opcode = NVPTX::ST_i64_ari; break;
817      case MVT::f32:   Opcode = NVPTX::ST_f32_ari; break;
818      case MVT::f64:   Opcode = NVPTX::ST_f64_ari; break;
819      default: return NULL;
820      }
821    }
822    SDValue Ops[] = { N1,
823                      getI32Imm(isVolatile),
824                      getI32Imm(codeAddrSpace),
825                      getI32Imm(vecType),
826                      getI32Imm(toType),
827                      getI32Imm(toTypeWidth),
828                      Base, Offset, Chain };
829    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
830                                     MVT::Other, Ops, 9);
831  } else {
832    if (Subtarget.is64Bit()) {
833      switch (SourceVT) {
834      case MVT::i8:    Opcode = NVPTX::ST_i8_areg_64; break;
835      case MVT::i16:   Opcode = NVPTX::ST_i16_areg_64; break;
836      case MVT::i32:   Opcode = NVPTX::ST_i32_areg_64; break;
837      case MVT::i64:   Opcode = NVPTX::ST_i64_areg_64; break;
838      case MVT::f32:   Opcode = NVPTX::ST_f32_areg_64; break;
839      case MVT::f64:   Opcode = NVPTX::ST_f64_areg_64; break;
840      default: return NULL;
841      }
842    } else {
843      switch (SourceVT) {
844      case MVT::i8:    Opcode = NVPTX::ST_i8_areg; break;
845      case MVT::i16:   Opcode = NVPTX::ST_i16_areg; break;
846      case MVT::i32:   Opcode = NVPTX::ST_i32_areg; break;
847      case MVT::i64:   Opcode = NVPTX::ST_i64_areg; break;
848      case MVT::f32:   Opcode = NVPTX::ST_f32_areg; break;
849      case MVT::f64:   Opcode = NVPTX::ST_f64_areg; break;
850      default: return NULL;
851      }
852    }
853    SDValue Ops[] = { N1,
854                      getI32Imm(isVolatile),
855                      getI32Imm(codeAddrSpace),
856                      getI32Imm(vecType),
857                      getI32Imm(toType),
858                      getI32Imm(toTypeWidth),
859                      N2, Chain };
860    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
861                                     MVT::Other, Ops, 8);
862  }
863
864  if (NVPTXST != NULL) {
865    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
866    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
867    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
868  }
869
870  return NVPTXST;
871}
872
873SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
874  SDValue Chain = N->getOperand(0);
875  SDValue Op1 = N->getOperand(1);
876  SDValue Addr, Offset, Base;
877  unsigned Opcode;
878  DebugLoc DL = N->getDebugLoc();
879  SDNode *ST;
880  EVT EltVT = Op1.getValueType();
881  MemSDNode *MemSD = cast<MemSDNode>(N);
882  EVT StoreVT = MemSD->getMemoryVT();
883
884  // Address Space Setting
885  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
886
887  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
888    report_fatal_error("Cannot store to pointer that points to constant "
889                       "memory space");
890  }
891
892  // Volatile Setting
893  // - .volatile is only availalble for .global and .shared
894  bool IsVolatile = MemSD->isVolatile();
895  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
896      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
897      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
898    IsVolatile = false;
899
900  // Type Setting: toType + toTypeWidth
901  // - for integer type, always use 'u'
902  assert(StoreVT.isSimple() && "Store value is not simple");
903  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
904  unsigned ToTypeWidth =  ScalarVT.getSizeInBits();
905  unsigned ToType;
906  if (ScalarVT.isFloatingPoint())
907    ToType = NVPTX::PTXLdStInstCode::Float;
908  else
909    ToType = NVPTX::PTXLdStInstCode::Unsigned;
910
911
912  SmallVector<SDValue, 12> StOps;
913  SDValue N2;
914  unsigned VecType;
915
916  switch (N->getOpcode()) {
917  case NVPTXISD::StoreV2:
918    VecType = NVPTX::PTXLdStInstCode::V2;
919    StOps.push_back(N->getOperand(1));
920    StOps.push_back(N->getOperand(2));
921    N2 = N->getOperand(3);
922    break;
923  case NVPTXISD::StoreV4:
924    VecType = NVPTX::PTXLdStInstCode::V4;
925    StOps.push_back(N->getOperand(1));
926    StOps.push_back(N->getOperand(2));
927    StOps.push_back(N->getOperand(3));
928    StOps.push_back(N->getOperand(4));
929    N2 = N->getOperand(5);
930    break;
931  default: return NULL;
932  }
933
934  StOps.push_back(getI32Imm(IsVolatile));
935  StOps.push_back(getI32Imm(CodeAddrSpace));
936  StOps.push_back(getI32Imm(VecType));
937  StOps.push_back(getI32Imm(ToType));
938  StOps.push_back(getI32Imm(ToTypeWidth));
939
940  if (SelectDirectAddr(N2, Addr)) {
941    switch (N->getOpcode()) {
942    default: return NULL;
943    case NVPTXISD::StoreV2:
944      switch (EltVT.getSimpleVT().SimpleTy) {
945      default: return NULL;
946      case MVT::i8:   Opcode = NVPTX::STV_i8_v2_avar; break;
947      case MVT::i16:  Opcode = NVPTX::STV_i16_v2_avar; break;
948      case MVT::i32:  Opcode = NVPTX::STV_i32_v2_avar; break;
949      case MVT::i64:  Opcode = NVPTX::STV_i64_v2_avar; break;
950      case MVT::f32:  Opcode = NVPTX::STV_f32_v2_avar; break;
951      case MVT::f64:  Opcode = NVPTX::STV_f64_v2_avar; break;
952      }
953      break;
954    case NVPTXISD::StoreV4:
955      switch (EltVT.getSimpleVT().SimpleTy) {
956      default: return NULL;
957      case MVT::i8:   Opcode = NVPTX::STV_i8_v4_avar; break;
958      case MVT::i16:  Opcode = NVPTX::STV_i16_v4_avar; break;
959      case MVT::i32:  Opcode = NVPTX::STV_i32_v4_avar; break;
960      case MVT::f32:  Opcode = NVPTX::STV_f32_v4_avar; break;
961      }
962      break;
963    }
964    StOps.push_back(Addr);
965  } else if (Subtarget.is64Bit()?
966             SelectADDRsi64(N2.getNode(), N2, Base, Offset):
967             SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
968    switch (N->getOpcode()) {
969    default: return NULL;
970    case NVPTXISD::StoreV2:
971      switch (EltVT.getSimpleVT().SimpleTy) {
972      default: return NULL;
973      case MVT::i8:   Opcode = NVPTX::STV_i8_v2_asi; break;
974      case MVT::i16:  Opcode = NVPTX::STV_i16_v2_asi; break;
975      case MVT::i32:  Opcode = NVPTX::STV_i32_v2_asi; break;
976      case MVT::i64:  Opcode = NVPTX::STV_i64_v2_asi; break;
977      case MVT::f32:  Opcode = NVPTX::STV_f32_v2_asi; break;
978      case MVT::f64:  Opcode = NVPTX::STV_f64_v2_asi; break;
979      }
980      break;
981    case NVPTXISD::StoreV4:
982      switch (EltVT.getSimpleVT().SimpleTy) {
983      default: return NULL;
984      case MVT::i8:   Opcode = NVPTX::STV_i8_v4_asi; break;
985      case MVT::i16:  Opcode = NVPTX::STV_i16_v4_asi; break;
986      case MVT::i32:  Opcode = NVPTX::STV_i32_v4_asi; break;
987      case MVT::f32:  Opcode = NVPTX::STV_f32_v4_asi; break;
988      }
989      break;
990    }
991    StOps.push_back(Base);
992    StOps.push_back(Offset);
993  } else if (Subtarget.is64Bit()?
994             SelectADDRri64(N2.getNode(), N2, Base, Offset):
995             SelectADDRri(N2.getNode(), N2, Base, Offset)) {
996    if (Subtarget.is64Bit()) {
997      switch (N->getOpcode()) {
998      default: return NULL;
999      case NVPTXISD::StoreV2:
1000        switch (EltVT.getSimpleVT().SimpleTy) {
1001        default: return NULL;
1002        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_ari_64; break;
1003        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_ari_64; break;
1004        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_ari_64; break;
1005        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_ari_64; break;
1006        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_ari_64; break;
1007        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_ari_64; break;
1008        }
1009        break;
1010      case NVPTXISD::StoreV4:
1011        switch (EltVT.getSimpleVT().SimpleTy) {
1012        default: return NULL;
1013        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_ari_64; break;
1014        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_ari_64; break;
1015        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_ari_64; break;
1016        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_ari_64; break;
1017        }
1018        break;
1019      }
1020    } else {
1021      switch (N->getOpcode()) {
1022      default: return NULL;
1023      case NVPTXISD::StoreV2:
1024        switch (EltVT.getSimpleVT().SimpleTy) {
1025        default: return NULL;
1026        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_ari; break;
1027        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_ari; break;
1028        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_ari; break;
1029        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_ari; break;
1030        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_ari; break;
1031        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_ari; break;
1032        }
1033        break;
1034      case NVPTXISD::StoreV4:
1035        switch (EltVT.getSimpleVT().SimpleTy) {
1036        default: return NULL;
1037        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_ari; break;
1038        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_ari; break;
1039        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_ari; break;
1040        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_ari; break;
1041        }
1042        break;
1043      }
1044    }
1045    StOps.push_back(Base);
1046    StOps.push_back(Offset);
1047  } else {
1048    if (Subtarget.is64Bit()) {
1049      switch (N->getOpcode()) {
1050      default: return NULL;
1051      case NVPTXISD::StoreV2:
1052        switch (EltVT.getSimpleVT().SimpleTy) {
1053        default: return NULL;
1054        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_areg_64; break;
1055        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_areg_64; break;
1056        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_areg_64; break;
1057        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_areg_64; break;
1058        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_areg_64; break;
1059        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_areg_64; break;
1060        }
1061        break;
1062      case NVPTXISD::StoreV4:
1063        switch (EltVT.getSimpleVT().SimpleTy) {
1064        default: return NULL;
1065        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_areg_64; break;
1066        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_areg_64; break;
1067        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_areg_64; break;
1068        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_areg_64; break;
1069        }
1070        break;
1071      }
1072    } else {
1073      switch (N->getOpcode()) {
1074      default: return NULL;
1075      case NVPTXISD::StoreV2:
1076        switch (EltVT.getSimpleVT().SimpleTy) {
1077        default: return NULL;
1078        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_areg; break;
1079        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_areg; break;
1080        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_areg; break;
1081        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_areg; break;
1082        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_areg; break;
1083        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_areg; break;
1084        }
1085        break;
1086      case NVPTXISD::StoreV4:
1087        switch (EltVT.getSimpleVT().SimpleTy) {
1088        default: return NULL;
1089        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_areg; break;
1090        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_areg; break;
1091        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_areg; break;
1092        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_areg; break;
1093        }
1094        break;
1095      }
1096    }
1097    StOps.push_back(N2);
1098  }
1099
1100  StOps.push_back(Chain);
1101
1102  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
1103
1104  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1105  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1106  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1107
1108  return ST;
1109}
1110
1111// SelectDirectAddr - Match a direct address for DAG.
1112// A direct address could be a globaladdress or externalsymbol.
1113bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
1114  // Return true if TGA or ES.
1115  if (N.getOpcode() == ISD::TargetGlobalAddress
1116      || N.getOpcode() == ISD::TargetExternalSymbol) {
1117    Address = N;
1118    return true;
1119  }
1120  if (N.getOpcode() == NVPTXISD::Wrapper) {
1121    Address = N.getOperand(0);
1122    return true;
1123  }
1124  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
1125    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
1126    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
1127      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
1128        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
1129  }
1130  return false;
1131}
1132
1133// symbol+offset
1134bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
1135                                         SDValue &Base, SDValue &Offset,
1136                                         MVT mvt) {
1137  if (Addr.getOpcode() == ISD::ADD) {
1138    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
1139      SDValue base=Addr.getOperand(0);
1140      if (SelectDirectAddr(base, Base)) {
1141        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
1142        return true;
1143      }
1144    }
1145  }
1146  return false;
1147}
1148
1149// symbol+offset
1150bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
1151                                     SDValue &Base, SDValue &Offset) {
1152  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
1153}
1154
1155// symbol+offset
1156bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
1157                                       SDValue &Base, SDValue &Offset) {
1158  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
1159}
1160
1161// register+offset
1162bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
1163                                         SDValue &Base, SDValue &Offset,
1164                                         MVT mvt) {
1165  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
1166    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
1167    Offset = CurDAG->getTargetConstant(0, mvt);
1168    return true;
1169  }
1170  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
1171      Addr.getOpcode() == ISD::TargetGlobalAddress)
1172    return false;  // direct calls.
1173
1174  if (Addr.getOpcode() == ISD::ADD) {
1175    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
1176      return false;
1177    }
1178    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
1179      if (FrameIndexSDNode *FIN =
1180          dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
1181        // Constant offset from frame ref.
1182        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
1183      else
1184        Base = Addr.getOperand(0);
1185      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
1186      return true;
1187    }
1188  }
1189  return false;
1190}
1191
1192// register+offset
1193bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
1194                                     SDValue &Base, SDValue &Offset) {
1195  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
1196}
1197
1198// register+offset
1199bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
1200                                       SDValue &Base, SDValue &Offset) {
1201  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
1202}
1203
1204bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
1205                                                 unsigned int spN) const {
1206  const Value *Src = NULL;
1207  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
1208  // the classof() for MemSDNode does not include MemIntrinsicSDNode
1209  // (See SelectionDAGNodes.h). So we need to check for both.
1210  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
1211    Src = mN->getSrcValue();
1212  }
1213  else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
1214    Src = mN->getSrcValue();
1215  }
1216  if (!Src)
1217    return false;
1218  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
1219    return (PT->getAddressSpace() == spN);
1220  return false;
1221}
1222
1223/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
1224/// inline asm expressions.
1225bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
1226                                                     char ConstraintCode,
1227                                                 std::vector<SDValue> &OutOps) {
1228  SDValue Op0, Op1;
1229  switch (ConstraintCode) {
1230  default: return true;
1231  case 'm':   // memory
1232    if (SelectDirectAddr(Op, Op0)) {
1233      OutOps.push_back(Op0);
1234      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
1235      return false;
1236    }
1237    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
1238      OutOps.push_back(Op0);
1239      OutOps.push_back(Op1);
1240      return false;
1241    }
1242    break;
1243  }
1244  return true;
1245}
1246
1247// Return true if N is a undef or a constant.
1248// If N was undef, return a (i8imm 0) in Retval
1249// If N was imm, convert it to i8imm and return in Retval
1250// Note: The convert to i8imm is required, otherwise the
1251// pattern matcher inserts a bunch of IMOVi8rr to convert
1252// the imm to i8imm, and this causes instruction selection
1253// to fail.
1254bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
1255                                   SDValue &Retval) {
1256  if (!(N.getOpcode() == ISD::UNDEF) &&
1257      !(N.getOpcode() == ISD::Constant))
1258    return false;
1259
1260  if (N.getOpcode() == ISD::UNDEF)
1261    Retval = CurDAG->getTargetConstant(0, MVT::i8);
1262  else {
1263    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
1264    unsigned retval = cn->getZExtValue();
1265    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
1266  }
1267  return true;
1268}
1269