NVPTXISelDAGToDAG.cpp revision af878315192a9fa5b534364e327c24aeb8d73b5a
1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14
15#include "llvm/Instructions.h"
16#include "llvm/Support/raw_ostream.h"
17#include "NVPTXISelDAGToDAG.h"
18#include "llvm/Support/Debug.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Target/TargetIntrinsicInfo.h"
22#include "llvm/GlobalValue.h"
23
24#undef DEBUG_TYPE
25#define DEBUG_TYPE "nvptx-isel"
26
27using namespace llvm;
28
29
30static cl::opt<bool>
31UseFMADInstruction("nvptx-mad-enable",
32                   cl::ZeroOrMore,
33                cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
34                   cl::init(false));
35
36static cl::opt<int>
37FMAContractLevel("nvptx-fma-level",
38                 cl::ZeroOrMore,
39                 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
40                     " 1: do it  2: do it aggressively"),
41                     cl::init(2));
42
43
44static cl::opt<int>
45UsePrecDivF32("nvptx-prec-divf32",
46              cl::ZeroOrMore,
47             cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
48                  " IEEE Compliant F32 div.rnd if avaiable."),
49                  cl::init(2));
50
51/// createNVPTXISelDag - This pass converts a legalized DAG into a
52/// NVPTX-specific DAG, ready for instruction scheduling.
53FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
54                                       llvm::CodeGenOpt::Level OptLevel) {
55  return new NVPTXDAGToDAGISel(TM, OptLevel);
56}
57
58
59NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
60                                     CodeGenOpt::Level OptLevel)
61: SelectionDAGISel(tm, OptLevel),
62  Subtarget(tm.getSubtarget<NVPTXSubtarget>())
63{
64  // Always do fma.f32 fpcontract if the target supports the instruction.
65  // Always do fma.f64 fpcontract if the target supports the instruction.
66  // Do mad.f32 is nvptx-mad-enable is specified and the target does not
67  // support fma.f32.
68
69  doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
70  doFMAF32 =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
71      (FMAContractLevel>=1);
72  doFMAF64 =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
73      (FMAContractLevel>=1);
74  doFMAF32AGG =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
75      (FMAContractLevel==2);
76  doFMAF64AGG =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
77      (FMAContractLevel==2);
78
79  allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
80
81  UseF32FTZ = false;
82
83  doMulWide = (OptLevel > 0);
84
85  // Decide how to translate f32 div
86  do_DIVF32_PREC = UsePrecDivF32;
87  // sm less than sm_20 does not support div.rnd. Use div.full.
88  if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
89    do_DIVF32_PREC = 1;
90
91}
92
93/// Select - Select instructions not customized! Used for
94/// expanded, promoted and normal instructions.
95SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
96
97  if (N->isMachineOpcode())
98    return NULL;   // Already selected.
99
100  SDNode *ResNode = NULL;
101  switch (N->getOpcode()) {
102  case ISD::LOAD:
103    ResNode = SelectLoad(N);
104    break;
105  case ISD::STORE:
106    ResNode = SelectStore(N);
107    break;
108  }
109  if (ResNode)
110    return ResNode;
111  return SelectCode(N);
112}
113
114
115static unsigned int
116getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
117{
118  const Value *Src = N->getSrcValue();
119  if (!Src)
120    return NVPTX::PTXLdStInstCode::LOCAL;
121
122  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
123    switch (PT->getAddressSpace()) {
124    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
125    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
126    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
127    case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
128      return NVPTX::PTXLdStInstCode::CONSTANT;
129    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
130    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
131    case llvm::ADDRESS_SPACE_CONST:
132      // If the arch supports generic address space, translate it to GLOBAL
133      // for correctness.
134      // If the arch does not support generic address space, then the arch
135      // does not really support ADDRESS_SPACE_CONST, translate it to
136      // to CONSTANT for better performance.
137      if (Subtarget.hasGenericLdSt())
138        return NVPTX::PTXLdStInstCode::GLOBAL;
139      else
140        return NVPTX::PTXLdStInstCode::CONSTANT;
141    default: break;
142    }
143  }
144  return NVPTX::PTXLdStInstCode::LOCAL;
145}
146
147
148SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
149  DebugLoc dl = N->getDebugLoc();
150  LoadSDNode *LD = cast<LoadSDNode>(N);
151  EVT LoadedVT = LD->getMemoryVT();
152  SDNode *NVPTXLD= NULL;
153
154  // do not support pre/post inc/dec
155  if (LD->isIndexed())
156    return NULL;
157
158  if (!LoadedVT.isSimple())
159    return NULL;
160
161  // Address Space Setting
162  unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
163
164  // Volatile Setting
165  // - .volatile is only availalble for .global and .shared
166  bool isVolatile = LD->isVolatile();
167  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
168      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
169      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
170    isVolatile = false;
171
172  // Vector Setting
173  MVT SimpleVT = LoadedVT.getSimpleVT();
174  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
175  if (SimpleVT.isVector()) {
176    unsigned num = SimpleVT.getVectorNumElements();
177    if (num == 2)
178      vecType = NVPTX::PTXLdStInstCode::V2;
179    else if (num == 4)
180      vecType = NVPTX::PTXLdStInstCode::V4;
181    else
182      return NULL;
183  }
184
185  // Type Setting: fromType + fromTypeWidth
186  //
187  // Sign   : ISD::SEXTLOAD
188  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
189  //          type is integer
190  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
191  MVT ScalarVT = SimpleVT.getScalarType();
192  unsigned fromTypeWidth =  ScalarVT.getSizeInBits();
193  unsigned int fromType;
194  if ((LD->getExtensionType() == ISD::SEXTLOAD))
195    fromType = NVPTX::PTXLdStInstCode::Signed;
196  else if (ScalarVT.isFloatingPoint())
197    fromType = NVPTX::PTXLdStInstCode::Float;
198  else
199    fromType = NVPTX::PTXLdStInstCode::Unsigned;
200
201  // Create the machine instruction DAG
202  SDValue Chain = N->getOperand(0);
203  SDValue N1 = N->getOperand(1);
204  SDValue Addr;
205  SDValue Offset, Base;
206  unsigned Opcode;
207  MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
208
209  if (SelectDirectAddr(N1, Addr)) {
210    switch (TargetVT) {
211    case MVT::i8:    Opcode = NVPTX::LD_i8_avar; break;
212    case MVT::i16:   Opcode = NVPTX::LD_i16_avar; break;
213    case MVT::i32:   Opcode = NVPTX::LD_i32_avar; break;
214    case MVT::i64:   Opcode = NVPTX::LD_i64_avar; break;
215    case MVT::f32:   Opcode = NVPTX::LD_f32_avar; break;
216    case MVT::f64:   Opcode = NVPTX::LD_f64_avar; break;
217    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_avar; break;
218    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
219    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
220    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
221    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
222    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
223    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_avar; break;
224    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
225    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
226    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
227    default: return NULL;
228    }
229    SDValue Ops[] = { getI32Imm(isVolatile),
230                      getI32Imm(codeAddrSpace),
231                      getI32Imm(vecType),
232                      getI32Imm(fromType),
233                      getI32Imm(fromTypeWidth),
234                      Addr, Chain };
235    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
236                                     MVT::Other, Ops, 7);
237  } else if (Subtarget.is64Bit()?
238      SelectADDRsi64(N1.getNode(), N1, Base, Offset):
239      SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
240    switch (TargetVT) {
241    case MVT::i8:    Opcode = NVPTX::LD_i8_asi; break;
242    case MVT::i16:   Opcode = NVPTX::LD_i16_asi; break;
243    case MVT::i32:   Opcode = NVPTX::LD_i32_asi; break;
244    case MVT::i64:   Opcode = NVPTX::LD_i64_asi; break;
245    case MVT::f32:   Opcode = NVPTX::LD_f32_asi; break;
246    case MVT::f64:   Opcode = NVPTX::LD_f64_asi; break;
247    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_asi; break;
248    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
249    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
250    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
251    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
252    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
253    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_asi; break;
254    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
255    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
256    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
257    default: return NULL;
258    }
259    SDValue Ops[] = { getI32Imm(isVolatile),
260                      getI32Imm(codeAddrSpace),
261                      getI32Imm(vecType),
262                      getI32Imm(fromType),
263                      getI32Imm(fromTypeWidth),
264                      Base, Offset, Chain };
265    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
266                                     MVT::Other, Ops, 8);
267  } else if (Subtarget.is64Bit()?
268      SelectADDRri64(N1.getNode(), N1, Base, Offset):
269      SelectADDRri(N1.getNode(), N1, Base, Offset)) {
270    switch (TargetVT) {
271    case MVT::i8:    Opcode = NVPTX::LD_i8_ari; break;
272    case MVT::i16:   Opcode = NVPTX::LD_i16_ari; break;
273    case MVT::i32:   Opcode = NVPTX::LD_i32_ari; break;
274    case MVT::i64:   Opcode = NVPTX::LD_i64_ari; break;
275    case MVT::f32:   Opcode = NVPTX::LD_f32_ari; break;
276    case MVT::f64:   Opcode = NVPTX::LD_f64_ari; break;
277    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_ari; break;
278    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
279    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
280    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
281    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
282    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
283    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_ari; break;
284    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
285    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
286    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
287    default: return NULL;
288    }
289    SDValue Ops[] = { getI32Imm(isVolatile),
290                      getI32Imm(codeAddrSpace),
291                      getI32Imm(vecType),
292                      getI32Imm(fromType),
293                      getI32Imm(fromTypeWidth),
294                      Base, Offset, Chain };
295    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
296                                     MVT::Other, Ops, 8);
297  }
298  else {
299    switch (TargetVT) {
300    case MVT::i8:    Opcode = NVPTX::LD_i8_areg; break;
301    case MVT::i16:   Opcode = NVPTX::LD_i16_areg; break;
302    case MVT::i32:   Opcode = NVPTX::LD_i32_areg; break;
303    case MVT::i64:   Opcode = NVPTX::LD_i64_areg; break;
304    case MVT::f32:   Opcode = NVPTX::LD_f32_areg; break;
305    case MVT::f64:   Opcode = NVPTX::LD_f64_areg; break;
306    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_areg; break;
307    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
308    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
309    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
310    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
311    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
312    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_areg; break;
313    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
314    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
315    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
316    default: return NULL;
317    }
318    SDValue Ops[] = { getI32Imm(isVolatile),
319                      getI32Imm(codeAddrSpace),
320                      getI32Imm(vecType),
321                      getI32Imm(fromType),
322                      getI32Imm(fromTypeWidth),
323                      N1, Chain };
324    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
325                                     MVT::Other, Ops, 7);
326  }
327
328  if (NVPTXLD != NULL) {
329    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
330    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
331    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
332  }
333
334  return NVPTXLD;
335}
336
337SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
338  DebugLoc dl = N->getDebugLoc();
339  StoreSDNode *ST = cast<StoreSDNode>(N);
340  EVT StoreVT = ST->getMemoryVT();
341  SDNode *NVPTXST = NULL;
342
343  // do not support pre/post inc/dec
344  if (ST->isIndexed())
345    return NULL;
346
347  if (!StoreVT.isSimple())
348    return NULL;
349
350  // Address Space Setting
351  unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
352
353  // Volatile Setting
354  // - .volatile is only availalble for .global and .shared
355  bool isVolatile = ST->isVolatile();
356  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
357      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
358      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
359    isVolatile = false;
360
361  // Vector Setting
362  MVT SimpleVT = StoreVT.getSimpleVT();
363  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
364  if (SimpleVT.isVector()) {
365    unsigned num = SimpleVT.getVectorNumElements();
366    if (num == 2)
367      vecType = NVPTX::PTXLdStInstCode::V2;
368    else if (num == 4)
369      vecType = NVPTX::PTXLdStInstCode::V4;
370    else
371      return NULL;
372  }
373
374  // Type Setting: toType + toTypeWidth
375  // - for integer type, always use 'u'
376  //
377  MVT ScalarVT = SimpleVT.getScalarType();
378  unsigned toTypeWidth =  ScalarVT.getSizeInBits();
379  unsigned int toType;
380  if (ScalarVT.isFloatingPoint())
381    toType = NVPTX::PTXLdStInstCode::Float;
382  else
383    toType = NVPTX::PTXLdStInstCode::Unsigned;
384
385  // Create the machine instruction DAG
386  SDValue Chain = N->getOperand(0);
387  SDValue N1 = N->getOperand(1);
388  SDValue N2 = N->getOperand(2);
389  SDValue Addr;
390  SDValue Offset, Base;
391  unsigned Opcode;
392  MVT::SimpleValueType SourceVT =
393      N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
394
395  if (SelectDirectAddr(N2, Addr)) {
396    switch (SourceVT) {
397    case MVT::i8:    Opcode = NVPTX::ST_i8_avar; break;
398    case MVT::i16:   Opcode = NVPTX::ST_i16_avar; break;
399    case MVT::i32:   Opcode = NVPTX::ST_i32_avar; break;
400    case MVT::i64:   Opcode = NVPTX::ST_i64_avar; break;
401    case MVT::f32:   Opcode = NVPTX::ST_f32_avar; break;
402    case MVT::f64:   Opcode = NVPTX::ST_f64_avar; break;
403    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_avar; break;
404    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
405    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
406    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
407    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
408    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
409    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_avar; break;
410    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
411    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
412    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
413    default: return NULL;
414    }
415    SDValue Ops[] = { N1,
416                      getI32Imm(isVolatile),
417                      getI32Imm(codeAddrSpace),
418                      getI32Imm(vecType),
419                      getI32Imm(toType),
420                      getI32Imm(toTypeWidth),
421                      Addr, Chain };
422    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
423                                     MVT::Other, Ops, 8);
424  } else if (Subtarget.is64Bit()?
425      SelectADDRsi64(N2.getNode(), N2, Base, Offset):
426      SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
427    switch (SourceVT) {
428    case MVT::i8:    Opcode = NVPTX::ST_i8_asi; break;
429    case MVT::i16:   Opcode = NVPTX::ST_i16_asi; break;
430    case MVT::i32:   Opcode = NVPTX::ST_i32_asi; break;
431    case MVT::i64:   Opcode = NVPTX::ST_i64_asi; break;
432    case MVT::f32:   Opcode = NVPTX::ST_f32_asi; break;
433    case MVT::f64:   Opcode = NVPTX::ST_f64_asi; break;
434    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_asi; break;
435    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
436    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
437    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
438    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
439    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
440    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_asi; break;
441    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
442    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
443    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
444    default: return NULL;
445    }
446    SDValue Ops[] = { N1,
447                      getI32Imm(isVolatile),
448                      getI32Imm(codeAddrSpace),
449                      getI32Imm(vecType),
450                      getI32Imm(toType),
451                      getI32Imm(toTypeWidth),
452                      Base, Offset, Chain };
453    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
454                                     MVT::Other, Ops, 9);
455  } else if (Subtarget.is64Bit()?
456      SelectADDRri64(N2.getNode(), N2, Base, Offset):
457      SelectADDRri(N2.getNode(), N2, Base, Offset)) {
458    switch (SourceVT) {
459    case MVT::i8:    Opcode = NVPTX::ST_i8_ari; break;
460    case MVT::i16:   Opcode = NVPTX::ST_i16_ari; break;
461    case MVT::i32:   Opcode = NVPTX::ST_i32_ari; break;
462    case MVT::i64:   Opcode = NVPTX::ST_i64_ari; break;
463    case MVT::f32:   Opcode = NVPTX::ST_f32_ari; break;
464    case MVT::f64:   Opcode = NVPTX::ST_f64_ari; break;
465    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_ari; break;
466    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
467    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
468    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
469    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
470    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
471    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_ari; break;
472    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
473    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
474    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
475    default: return NULL;
476    }
477    SDValue Ops[] = { N1,
478                      getI32Imm(isVolatile),
479                      getI32Imm(codeAddrSpace),
480                      getI32Imm(vecType),
481                      getI32Imm(toType),
482                      getI32Imm(toTypeWidth),
483                      Base, Offset, Chain };
484    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
485                                     MVT::Other, Ops, 9);
486  } else {
487    switch (SourceVT) {
488    case MVT::i8:    Opcode = NVPTX::ST_i8_areg; break;
489    case MVT::i16:   Opcode = NVPTX::ST_i16_areg; break;
490    case MVT::i32:   Opcode = NVPTX::ST_i32_areg; break;
491    case MVT::i64:   Opcode = NVPTX::ST_i64_areg; break;
492    case MVT::f32:   Opcode = NVPTX::ST_f32_areg; break;
493    case MVT::f64:   Opcode = NVPTX::ST_f64_areg; break;
494    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_areg; break;
495    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
496    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
497    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
498    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
499    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
500    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_areg; break;
501    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
502    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
503    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
504    default: return NULL;
505    }
506    SDValue Ops[] = { N1,
507                      getI32Imm(isVolatile),
508                      getI32Imm(codeAddrSpace),
509                      getI32Imm(vecType),
510                      getI32Imm(toType),
511                      getI32Imm(toTypeWidth),
512                      N2, Chain };
513    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
514                                     MVT::Other, Ops, 8);
515  }
516
517  if (NVPTXST != NULL) {
518    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
519    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
520    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
521  }
522
523  return NVPTXST;
524}
525
526// SelectDirectAddr - Match a direct address for DAG.
527// A direct address could be a globaladdress or externalsymbol.
528bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
529  // Return true if TGA or ES.
530  if (N.getOpcode() == ISD::TargetGlobalAddress
531      || N.getOpcode() == ISD::TargetExternalSymbol) {
532    Address = N;
533    return true;
534  }
535  if (N.getOpcode() == NVPTXISD::Wrapper) {
536    Address = N.getOperand(0);
537    return true;
538  }
539  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
540    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
541    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
542      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
543        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
544  }
545  return false;
546}
547
548// symbol+offset
549bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
550                                         SDValue &Base, SDValue &Offset,
551                                         MVT mvt) {
552  if (Addr.getOpcode() == ISD::ADD) {
553    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
554      SDValue base=Addr.getOperand(0);
555      if (SelectDirectAddr(base, Base)) {
556        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
557        return true;
558      }
559    }
560  }
561  return false;
562}
563
564// symbol+offset
565bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
566                                     SDValue &Base, SDValue &Offset) {
567  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
568}
569
570// symbol+offset
571bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
572                                       SDValue &Base, SDValue &Offset) {
573  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
574}
575
576// register+offset
577bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
578                                         SDValue &Base, SDValue &Offset,
579                                         MVT mvt) {
580  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
581    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
582    Offset = CurDAG->getTargetConstant(0, mvt);
583    return true;
584  }
585  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
586      Addr.getOpcode() == ISD::TargetGlobalAddress)
587    return false;  // direct calls.
588
589  if (Addr.getOpcode() == ISD::ADD) {
590    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
591      return false;
592    }
593    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
594      if (FrameIndexSDNode *FIN =
595          dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
596        // Constant offset from frame ref.
597        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
598      else
599        Base = Addr.getOperand(0);
600      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
601      return true;
602    }
603  }
604  return false;
605}
606
607// register+offset
608bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
609                                     SDValue &Base, SDValue &Offset) {
610  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
611}
612
613// register+offset
614bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
615                                       SDValue &Base, SDValue &Offset) {
616  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
617}
618
619bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
620                                                 unsigned int spN) const {
621  const Value *Src = NULL;
622  // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
623  // the classof() for MemSDNode does not include MemIntrinsicSDNode
624  // (See SelectionDAGNodes.h). So we need to check for both.
625  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
626    Src = mN->getSrcValue();
627  }
628  else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
629    Src = mN->getSrcValue();
630  }
631  if (!Src)
632    return false;
633  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
634    return (PT->getAddressSpace() == spN);
635  return false;
636}
637
638/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
639/// inline asm expressions.
640bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
641                                                     char ConstraintCode,
642                                                 std::vector<SDValue> &OutOps) {
643  SDValue Op0, Op1;
644  switch (ConstraintCode) {
645  default: return true;
646  case 'm':   // memory
647    if (SelectDirectAddr(Op, Op0)) {
648      OutOps.push_back(Op0);
649      OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
650      return false;
651    }
652    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
653      OutOps.push_back(Op0);
654      OutOps.push_back(Op1);
655      return false;
656    }
657    break;
658  }
659  return true;
660}
661
662// Return true if N is a undef or a constant.
663// If N was undef, return a (i8imm 0) in Retval
664// If N was imm, convert it to i8imm and return in Retval
665// Note: The convert to i8imm is required, otherwise the
666// pattern matcher inserts a bunch of IMOVi8rr to convert
667// the imm to i8imm, and this causes instruction selection
668// to fail.
669bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
670                                   SDValue &Retval) {
671  if (!(N.getOpcode() == ISD::UNDEF) &&
672      !(N.getOpcode() == ISD::Constant))
673    return false;
674
675  if (N.getOpcode() == ISD::UNDEF)
676    Retval = CurDAG->getTargetConstant(0, MVT::i8);
677  else {
678    ConstantSDNode *cn = cast<ConstantSDNode>(N.getNode());
679    unsigned retval = cn->getZExtValue();
680    Retval = CurDAG->getTargetConstant(retval, MVT::i8);
681  }
682  return true;
683}
684