1//
2//                     The LLVM Compiler Infrastructure
3//
4// This file is distributed under the University of Illinois Open Source
5// License. See LICENSE.TXT for details.
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that NVPTX uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelLowering.h"
15#include "NVPTX.h"
16#include "NVPTXTargetMachine.h"
17#include "NVPTXTargetObjectFile.h"
18#include "NVPTXUtilities.h"
19#include "llvm/CodeGen/Analysis.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
25#include "llvm/IR/CallSite.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/GlobalValue.h"
29#include "llvm/IR/IntrinsicInst.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/Module.h"
32#include "llvm/MC/MCSectionELF.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Support/raw_ostream.h"
38#include <sstream>
39
40#undef DEBUG_TYPE
41#define DEBUG_TYPE "nvptx-lower"
42
43using namespace llvm;
44
45static unsigned int uniqueCallSite = 0;
46
47static cl::opt<bool> sched4reg(
48    "nvptx-sched4reg",
49    cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
50
51static cl::opt<unsigned>
52FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
53                    cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
54                             " 1: do it  2: do it aggressively"),
55                    cl::init(2));
56
57static bool IsPTXVectorType(MVT VT) {
58  switch (VT.SimpleTy) {
59  default:
60    return false;
61  case MVT::v2i1:
62  case MVT::v4i1:
63  case MVT::v2i8:
64  case MVT::v4i8:
65  case MVT::v2i16:
66  case MVT::v4i16:
67  case MVT::v2i32:
68  case MVT::v4i32:
69  case MVT::v2i64:
70  case MVT::v2f32:
71  case MVT::v4f32:
72  case MVT::v2f64:
73    return true;
74  }
75}
76
77/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
78/// EVTs that compose it.  Unlike ComputeValueVTs, this will break apart vectors
79/// into their primitive components.
80/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
81/// same number of types as the Ins/Outs arrays in LowerFormalArguments,
82/// LowerCall, and LowerReturn.
83static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
84                               SmallVectorImpl<EVT> &ValueVTs,
85                               SmallVectorImpl<uint64_t> *Offsets = nullptr,
86                               uint64_t StartingOffset = 0) {
87  SmallVector<EVT, 16> TempVTs;
88  SmallVector<uint64_t, 16> TempOffsets;
89
90  ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset);
91  for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
92    EVT VT = TempVTs[i];
93    uint64_t Off = TempOffsets[i];
94    if (VT.isVector())
95      for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) {
96        ValueVTs.push_back(VT.getVectorElementType());
97        if (Offsets)
98          Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize());
99      }
100    else {
101      ValueVTs.push_back(VT);
102      if (Offsets)
103        Offsets->push_back(Off);
104    }
105  }
106}
107
108// NVPTXTargetLowering Constructor.
109NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
110                                         const NVPTXSubtarget &STI)
111    : TargetLowering(TM), nvTM(&TM), STI(STI) {
112
113  // always lower memset, memcpy, and memmove intrinsics to load/store
114  // instructions, rather
115  // then generating calls to memset, mempcy or memmove.
116  MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
117  MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
118  MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
119
120  setBooleanContents(ZeroOrNegativeOneBooleanContent);
121  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
122
123  // Jump is Expensive. Don't create extra control flow for 'and', 'or'
124  // condition branches.
125  setJumpIsExpensive(true);
126
127  // By default, use the Source scheduling
128  if (sched4reg)
129    setSchedulingPreference(Sched::RegPressure);
130  else
131    setSchedulingPreference(Sched::Source);
132
133  addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
134  addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
135  addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
136  addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
137  addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
138  addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
139
140  // Operations not directly supported by NVPTX.
141  setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
142  setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
143  setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
144  setOperationAction(ISD::SELECT_CC, MVT::i8, Expand);
145  setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
146  setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
147  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
148  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
149  setOperationAction(ISD::BR_CC, MVT::f64, Expand);
150  setOperationAction(ISD::BR_CC, MVT::i1, Expand);
151  setOperationAction(ISD::BR_CC, MVT::i8, Expand);
152  setOperationAction(ISD::BR_CC, MVT::i16, Expand);
153  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
154  setOperationAction(ISD::BR_CC, MVT::i64, Expand);
155  // Some SIGN_EXTEND_INREG can be done using cvt instruction.
156  // For others we will expand to a SHL/SRA pair.
157  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
158  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
159  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
160  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
161  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
162
163  setOperationAction(ISD::SHL_PARTS, MVT::i32  , Custom);
164  setOperationAction(ISD::SRA_PARTS, MVT::i32  , Custom);
165  setOperationAction(ISD::SRL_PARTS, MVT::i32  , Custom);
166  setOperationAction(ISD::SHL_PARTS, MVT::i64  , Custom);
167  setOperationAction(ISD::SRA_PARTS, MVT::i64  , Custom);
168  setOperationAction(ISD::SRL_PARTS, MVT::i64  , Custom);
169
170  if (STI.hasROT64()) {
171    setOperationAction(ISD::ROTL, MVT::i64, Legal);
172    setOperationAction(ISD::ROTR, MVT::i64, Legal);
173  } else {
174    setOperationAction(ISD::ROTL, MVT::i64, Expand);
175    setOperationAction(ISD::ROTR, MVT::i64, Expand);
176  }
177  if (STI.hasROT32()) {
178    setOperationAction(ISD::ROTL, MVT::i32, Legal);
179    setOperationAction(ISD::ROTR, MVT::i32, Legal);
180  } else {
181    setOperationAction(ISD::ROTL, MVT::i32, Expand);
182    setOperationAction(ISD::ROTR, MVT::i32, Expand);
183  }
184
185  setOperationAction(ISD::ROTL, MVT::i16, Expand);
186  setOperationAction(ISD::ROTR, MVT::i16, Expand);
187  setOperationAction(ISD::ROTL, MVT::i8, Expand);
188  setOperationAction(ISD::ROTR, MVT::i8, Expand);
189  setOperationAction(ISD::BSWAP, MVT::i16, Expand);
190  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
191  setOperationAction(ISD::BSWAP, MVT::i64, Expand);
192
193  // Indirect branch is not supported.
194  // This also disables Jump Table creation.
195  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
196  setOperationAction(ISD::BRIND, MVT::Other, Expand);
197
198  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
199  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
200
201  // We want to legalize constant related memmove and memcopy
202  // intrinsics.
203  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
204
205  // Turn FP extload into load/fextend
206  setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
207  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
208  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
209  // Turn FP truncstore into trunc + store.
210  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
211  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
212  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
213
214  // PTX does not support load / store predicate registers
215  setOperationAction(ISD::LOAD, MVT::i1, Custom);
216  setOperationAction(ISD::STORE, MVT::i1, Custom);
217
218  for (MVT VT : MVT::integer_valuetypes()) {
219    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
220    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
221    setTruncStoreAction(VT, MVT::i1, Expand);
222  }
223
224  // This is legal in NVPTX
225  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
226  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
227
228  // TRAP can be lowered to PTX trap
229  setOperationAction(ISD::TRAP, MVT::Other, Legal);
230
231  setOperationAction(ISD::ADDC, MVT::i64, Expand);
232  setOperationAction(ISD::ADDE, MVT::i64, Expand);
233
234  // Register custom handling for vector loads/stores
235  for (MVT VT : MVT::vector_valuetypes()) {
236    if (IsPTXVectorType(VT)) {
237      setOperationAction(ISD::LOAD, VT, Custom);
238      setOperationAction(ISD::STORE, VT, Custom);
239      setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
240    }
241  }
242
243  // Custom handling for i8 intrinsics
244  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
245
246  setOperationAction(ISD::CTLZ, MVT::i16, Legal);
247  setOperationAction(ISD::CTLZ, MVT::i32, Legal);
248  setOperationAction(ISD::CTLZ, MVT::i64, Legal);
249  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal);
250  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal);
251  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
252  setOperationAction(ISD::CTTZ, MVT::i16, Expand);
253  setOperationAction(ISD::CTTZ, MVT::i32, Expand);
254  setOperationAction(ISD::CTTZ, MVT::i64, Expand);
255  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
256  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
257  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
258  setOperationAction(ISD::CTPOP, MVT::i16, Legal);
259  setOperationAction(ISD::CTPOP, MVT::i32, Legal);
260  setOperationAction(ISD::CTPOP, MVT::i64, Legal);
261
262  // PTX does not directly support SELP of i1, so promote to i32 first
263  setOperationAction(ISD::SELECT, MVT::i1, Custom);
264
265  // We have some custom DAG combine patterns for these nodes
266  setTargetDAGCombine(ISD::ADD);
267  setTargetDAGCombine(ISD::AND);
268  setTargetDAGCombine(ISD::FADD);
269  setTargetDAGCombine(ISD::MUL);
270  setTargetDAGCombine(ISD::SHL);
271
272  // Now deduce the information based on the above mentioned
273  // actions
274  computeRegisterProperties(STI.getRegisterInfo());
275}
276
277const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
278  switch (Opcode) {
279  default:
280    return nullptr;
281  case NVPTXISD::CALL:
282    return "NVPTXISD::CALL";
283  case NVPTXISD::RET_FLAG:
284    return "NVPTXISD::RET_FLAG";
285  case NVPTXISD::Wrapper:
286    return "NVPTXISD::Wrapper";
287  case NVPTXISD::DeclareParam:
288    return "NVPTXISD::DeclareParam";
289  case NVPTXISD::DeclareScalarParam:
290    return "NVPTXISD::DeclareScalarParam";
291  case NVPTXISD::DeclareRet:
292    return "NVPTXISD::DeclareRet";
293  case NVPTXISD::DeclareRetParam:
294    return "NVPTXISD::DeclareRetParam";
295  case NVPTXISD::PrintCall:
296    return "NVPTXISD::PrintCall";
297  case NVPTXISD::LoadParam:
298    return "NVPTXISD::LoadParam";
299  case NVPTXISD::LoadParamV2:
300    return "NVPTXISD::LoadParamV2";
301  case NVPTXISD::LoadParamV4:
302    return "NVPTXISD::LoadParamV4";
303  case NVPTXISD::StoreParam:
304    return "NVPTXISD::StoreParam";
305  case NVPTXISD::StoreParamV2:
306    return "NVPTXISD::StoreParamV2";
307  case NVPTXISD::StoreParamV4:
308    return "NVPTXISD::StoreParamV4";
309  case NVPTXISD::StoreParamS32:
310    return "NVPTXISD::StoreParamS32";
311  case NVPTXISD::StoreParamU32:
312    return "NVPTXISD::StoreParamU32";
313  case NVPTXISD::CallArgBegin:
314    return "NVPTXISD::CallArgBegin";
315  case NVPTXISD::CallArg:
316    return "NVPTXISD::CallArg";
317  case NVPTXISD::LastCallArg:
318    return "NVPTXISD::LastCallArg";
319  case NVPTXISD::CallArgEnd:
320    return "NVPTXISD::CallArgEnd";
321  case NVPTXISD::CallVoid:
322    return "NVPTXISD::CallVoid";
323  case NVPTXISD::CallVal:
324    return "NVPTXISD::CallVal";
325  case NVPTXISD::CallSymbol:
326    return "NVPTXISD::CallSymbol";
327  case NVPTXISD::Prototype:
328    return "NVPTXISD::Prototype";
329  case NVPTXISD::MoveParam:
330    return "NVPTXISD::MoveParam";
331  case NVPTXISD::StoreRetval:
332    return "NVPTXISD::StoreRetval";
333  case NVPTXISD::StoreRetvalV2:
334    return "NVPTXISD::StoreRetvalV2";
335  case NVPTXISD::StoreRetvalV4:
336    return "NVPTXISD::StoreRetvalV4";
337  case NVPTXISD::PseudoUseParam:
338    return "NVPTXISD::PseudoUseParam";
339  case NVPTXISD::RETURN:
340    return "NVPTXISD::RETURN";
341  case NVPTXISD::CallSeqBegin:
342    return "NVPTXISD::CallSeqBegin";
343  case NVPTXISD::CallSeqEnd:
344    return "NVPTXISD::CallSeqEnd";
345  case NVPTXISD::CallPrototype:
346    return "NVPTXISD::CallPrototype";
347  case NVPTXISD::LoadV2:
348    return "NVPTXISD::LoadV2";
349  case NVPTXISD::LoadV4:
350    return "NVPTXISD::LoadV4";
351  case NVPTXISD::LDGV2:
352    return "NVPTXISD::LDGV2";
353  case NVPTXISD::LDGV4:
354    return "NVPTXISD::LDGV4";
355  case NVPTXISD::LDUV2:
356    return "NVPTXISD::LDUV2";
357  case NVPTXISD::LDUV4:
358    return "NVPTXISD::LDUV4";
359  case NVPTXISD::StoreV2:
360    return "NVPTXISD::StoreV2";
361  case NVPTXISD::StoreV4:
362    return "NVPTXISD::StoreV4";
363  case NVPTXISD::FUN_SHFL_CLAMP:
364    return "NVPTXISD::FUN_SHFL_CLAMP";
365  case NVPTXISD::FUN_SHFR_CLAMP:
366    return "NVPTXISD::FUN_SHFR_CLAMP";
367  case NVPTXISD::IMAD:
368    return "NVPTXISD::IMAD";
369  case NVPTXISD::MUL_WIDE_SIGNED:
370    return "NVPTXISD::MUL_WIDE_SIGNED";
371  case NVPTXISD::MUL_WIDE_UNSIGNED:
372    return "NVPTXISD::MUL_WIDE_UNSIGNED";
373  case NVPTXISD::Tex1DFloatS32:        return "NVPTXISD::Tex1DFloatS32";
374  case NVPTXISD::Tex1DFloatFloat:      return "NVPTXISD::Tex1DFloatFloat";
375  case NVPTXISD::Tex1DFloatFloatLevel:
376    return "NVPTXISD::Tex1DFloatFloatLevel";
377  case NVPTXISD::Tex1DFloatFloatGrad:
378    return "NVPTXISD::Tex1DFloatFloatGrad";
379  case NVPTXISD::Tex1DS32S32:          return "NVPTXISD::Tex1DS32S32";
380  case NVPTXISD::Tex1DS32Float:        return "NVPTXISD::Tex1DS32Float";
381  case NVPTXISD::Tex1DS32FloatLevel:
382    return "NVPTXISD::Tex1DS32FloatLevel";
383  case NVPTXISD::Tex1DS32FloatGrad:
384    return "NVPTXISD::Tex1DS32FloatGrad";
385  case NVPTXISD::Tex1DU32S32:          return "NVPTXISD::Tex1DU32S32";
386  case NVPTXISD::Tex1DU32Float:        return "NVPTXISD::Tex1DU32Float";
387  case NVPTXISD::Tex1DU32FloatLevel:
388    return "NVPTXISD::Tex1DU32FloatLevel";
389  case NVPTXISD::Tex1DU32FloatGrad:
390    return "NVPTXISD::Tex1DU32FloatGrad";
391  case NVPTXISD::Tex1DArrayFloatS32:   return "NVPTXISD::Tex1DArrayFloatS32";
392  case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
393  case NVPTXISD::Tex1DArrayFloatFloatLevel:
394    return "NVPTXISD::Tex1DArrayFloatFloatLevel";
395  case NVPTXISD::Tex1DArrayFloatFloatGrad:
396    return "NVPTXISD::Tex1DArrayFloatFloatGrad";
397  case NVPTXISD::Tex1DArrayS32S32:     return "NVPTXISD::Tex1DArrayS32S32";
398  case NVPTXISD::Tex1DArrayS32Float:   return "NVPTXISD::Tex1DArrayS32Float";
399  case NVPTXISD::Tex1DArrayS32FloatLevel:
400    return "NVPTXISD::Tex1DArrayS32FloatLevel";
401  case NVPTXISD::Tex1DArrayS32FloatGrad:
402    return "NVPTXISD::Tex1DArrayS32FloatGrad";
403  case NVPTXISD::Tex1DArrayU32S32:     return "NVPTXISD::Tex1DArrayU32S32";
404  case NVPTXISD::Tex1DArrayU32Float:   return "NVPTXISD::Tex1DArrayU32Float";
405  case NVPTXISD::Tex1DArrayU32FloatLevel:
406    return "NVPTXISD::Tex1DArrayU32FloatLevel";
407  case NVPTXISD::Tex1DArrayU32FloatGrad:
408    return "NVPTXISD::Tex1DArrayU32FloatGrad";
409  case NVPTXISD::Tex2DFloatS32:        return "NVPTXISD::Tex2DFloatS32";
410  case NVPTXISD::Tex2DFloatFloat:      return "NVPTXISD::Tex2DFloatFloat";
411  case NVPTXISD::Tex2DFloatFloatLevel:
412    return "NVPTXISD::Tex2DFloatFloatLevel";
413  case NVPTXISD::Tex2DFloatFloatGrad:
414    return "NVPTXISD::Tex2DFloatFloatGrad";
415  case NVPTXISD::Tex2DS32S32:          return "NVPTXISD::Tex2DS32S32";
416  case NVPTXISD::Tex2DS32Float:        return "NVPTXISD::Tex2DS32Float";
417  case NVPTXISD::Tex2DS32FloatLevel:
418    return "NVPTXISD::Tex2DS32FloatLevel";
419  case NVPTXISD::Tex2DS32FloatGrad:
420    return "NVPTXISD::Tex2DS32FloatGrad";
421  case NVPTXISD::Tex2DU32S32:          return "NVPTXISD::Tex2DU32S32";
422  case NVPTXISD::Tex2DU32Float:        return "NVPTXISD::Tex2DU32Float";
423  case NVPTXISD::Tex2DU32FloatLevel:
424    return "NVPTXISD::Tex2DU32FloatLevel";
425  case NVPTXISD::Tex2DU32FloatGrad:
426    return "NVPTXISD::Tex2DU32FloatGrad";
427  case NVPTXISD::Tex2DArrayFloatS32:   return "NVPTXISD::Tex2DArrayFloatS32";
428  case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
429  case NVPTXISD::Tex2DArrayFloatFloatLevel:
430    return "NVPTXISD::Tex2DArrayFloatFloatLevel";
431  case NVPTXISD::Tex2DArrayFloatFloatGrad:
432    return "NVPTXISD::Tex2DArrayFloatFloatGrad";
433  case NVPTXISD::Tex2DArrayS32S32:     return "NVPTXISD::Tex2DArrayS32S32";
434  case NVPTXISD::Tex2DArrayS32Float:   return "NVPTXISD::Tex2DArrayS32Float";
435  case NVPTXISD::Tex2DArrayS32FloatLevel:
436    return "NVPTXISD::Tex2DArrayS32FloatLevel";
437  case NVPTXISD::Tex2DArrayS32FloatGrad:
438    return "NVPTXISD::Tex2DArrayS32FloatGrad";
439  case NVPTXISD::Tex2DArrayU32S32:     return "NVPTXISD::Tex2DArrayU32S32";
440  case NVPTXISD::Tex2DArrayU32Float:   return "NVPTXISD::Tex2DArrayU32Float";
441  case NVPTXISD::Tex2DArrayU32FloatLevel:
442    return "NVPTXISD::Tex2DArrayU32FloatLevel";
443  case NVPTXISD::Tex2DArrayU32FloatGrad:
444    return "NVPTXISD::Tex2DArrayU32FloatGrad";
445  case NVPTXISD::Tex3DFloatS32:        return "NVPTXISD::Tex3DFloatS32";
446  case NVPTXISD::Tex3DFloatFloat:      return "NVPTXISD::Tex3DFloatFloat";
447  case NVPTXISD::Tex3DFloatFloatLevel:
448    return "NVPTXISD::Tex3DFloatFloatLevel";
449  case NVPTXISD::Tex3DFloatFloatGrad:
450    return "NVPTXISD::Tex3DFloatFloatGrad";
451  case NVPTXISD::Tex3DS32S32:          return "NVPTXISD::Tex3DS32S32";
452  case NVPTXISD::Tex3DS32Float:        return "NVPTXISD::Tex3DS32Float";
453  case NVPTXISD::Tex3DS32FloatLevel:
454    return "NVPTXISD::Tex3DS32FloatLevel";
455  case NVPTXISD::Tex3DS32FloatGrad:
456    return "NVPTXISD::Tex3DS32FloatGrad";
457  case NVPTXISD::Tex3DU32S32:          return "NVPTXISD::Tex3DU32S32";
458  case NVPTXISD::Tex3DU32Float:        return "NVPTXISD::Tex3DU32Float";
459  case NVPTXISD::Tex3DU32FloatLevel:
460    return "NVPTXISD::Tex3DU32FloatLevel";
461  case NVPTXISD::Tex3DU32FloatGrad:
462    return "NVPTXISD::Tex3DU32FloatGrad";
463  case NVPTXISD::TexCubeFloatFloat:      return "NVPTXISD::TexCubeFloatFloat";
464  case NVPTXISD::TexCubeFloatFloatLevel:
465    return "NVPTXISD::TexCubeFloatFloatLevel";
466  case NVPTXISD::TexCubeS32Float:        return "NVPTXISD::TexCubeS32Float";
467  case NVPTXISD::TexCubeS32FloatLevel:
468    return "NVPTXISD::TexCubeS32FloatLevel";
469  case NVPTXISD::TexCubeU32Float:        return "NVPTXISD::TexCubeU32Float";
470  case NVPTXISD::TexCubeU32FloatLevel:
471    return "NVPTXISD::TexCubeU32FloatLevel";
472  case NVPTXISD::TexCubeArrayFloatFloat:
473    return "NVPTXISD::TexCubeArrayFloatFloat";
474  case NVPTXISD::TexCubeArrayFloatFloatLevel:
475    return "NVPTXISD::TexCubeArrayFloatFloatLevel";
476  case NVPTXISD::TexCubeArrayS32Float:
477    return "NVPTXISD::TexCubeArrayS32Float";
478  case NVPTXISD::TexCubeArrayS32FloatLevel:
479    return "NVPTXISD::TexCubeArrayS32FloatLevel";
480  case NVPTXISD::TexCubeArrayU32Float:
481    return "NVPTXISD::TexCubeArrayU32Float";
482  case NVPTXISD::TexCubeArrayU32FloatLevel:
483    return "NVPTXISD::TexCubeArrayU32FloatLevel";
484  case NVPTXISD::Tld4R2DFloatFloat:
485    return "NVPTXISD::Tld4R2DFloatFloat";
486  case NVPTXISD::Tld4G2DFloatFloat:
487    return "NVPTXISD::Tld4G2DFloatFloat";
488  case NVPTXISD::Tld4B2DFloatFloat:
489    return "NVPTXISD::Tld4B2DFloatFloat";
490  case NVPTXISD::Tld4A2DFloatFloat:
491    return "NVPTXISD::Tld4A2DFloatFloat";
492  case NVPTXISD::Tld4R2DS64Float:
493    return "NVPTXISD::Tld4R2DS64Float";
494  case NVPTXISD::Tld4G2DS64Float:
495    return "NVPTXISD::Tld4G2DS64Float";
496  case NVPTXISD::Tld4B2DS64Float:
497    return "NVPTXISD::Tld4B2DS64Float";
498  case NVPTXISD::Tld4A2DS64Float:
499    return "NVPTXISD::Tld4A2DS64Float";
500  case NVPTXISD::Tld4R2DU64Float:
501    return "NVPTXISD::Tld4R2DU64Float";
502  case NVPTXISD::Tld4G2DU64Float:
503    return "NVPTXISD::Tld4G2DU64Float";
504  case NVPTXISD::Tld4B2DU64Float:
505    return "NVPTXISD::Tld4B2DU64Float";
506  case NVPTXISD::Tld4A2DU64Float:
507    return "NVPTXISD::Tld4A2DU64Float";
508
509  case NVPTXISD::TexUnified1DFloatS32:
510    return "NVPTXISD::TexUnified1DFloatS32";
511  case NVPTXISD::TexUnified1DFloatFloat:
512    return "NVPTXISD::TexUnified1DFloatFloat";
513  case NVPTXISD::TexUnified1DFloatFloatLevel:
514    return "NVPTXISD::TexUnified1DFloatFloatLevel";
515  case NVPTXISD::TexUnified1DFloatFloatGrad:
516    return "NVPTXISD::TexUnified1DFloatFloatGrad";
517  case NVPTXISD::TexUnified1DS32S32:
518    return "NVPTXISD::TexUnified1DS32S32";
519  case NVPTXISD::TexUnified1DS32Float:
520    return "NVPTXISD::TexUnified1DS32Float";
521  case NVPTXISD::TexUnified1DS32FloatLevel:
522    return "NVPTXISD::TexUnified1DS32FloatLevel";
523  case NVPTXISD::TexUnified1DS32FloatGrad:
524    return "NVPTXISD::TexUnified1DS32FloatGrad";
525  case NVPTXISD::TexUnified1DU32S32:
526    return "NVPTXISD::TexUnified1DU32S32";
527  case NVPTXISD::TexUnified1DU32Float:
528    return "NVPTXISD::TexUnified1DU32Float";
529  case NVPTXISD::TexUnified1DU32FloatLevel:
530    return "NVPTXISD::TexUnified1DU32FloatLevel";
531  case NVPTXISD::TexUnified1DU32FloatGrad:
532    return "NVPTXISD::TexUnified1DU32FloatGrad";
533  case NVPTXISD::TexUnified1DArrayFloatS32:
534    return "NVPTXISD::TexUnified1DArrayFloatS32";
535  case NVPTXISD::TexUnified1DArrayFloatFloat:
536    return "NVPTXISD::TexUnified1DArrayFloatFloat";
537  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
538    return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
539  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
540    return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
541  case NVPTXISD::TexUnified1DArrayS32S32:
542    return "NVPTXISD::TexUnified1DArrayS32S32";
543  case NVPTXISD::TexUnified1DArrayS32Float:
544    return "NVPTXISD::TexUnified1DArrayS32Float";
545  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
546    return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
547  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
548    return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
549  case NVPTXISD::TexUnified1DArrayU32S32:
550    return "NVPTXISD::TexUnified1DArrayU32S32";
551  case NVPTXISD::TexUnified1DArrayU32Float:
552    return "NVPTXISD::TexUnified1DArrayU32Float";
553  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
554    return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
555  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
556    return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
557  case NVPTXISD::TexUnified2DFloatS32:
558    return "NVPTXISD::TexUnified2DFloatS32";
559  case NVPTXISD::TexUnified2DFloatFloat:
560    return "NVPTXISD::TexUnified2DFloatFloat";
561  case NVPTXISD::TexUnified2DFloatFloatLevel:
562    return "NVPTXISD::TexUnified2DFloatFloatLevel";
563  case NVPTXISD::TexUnified2DFloatFloatGrad:
564    return "NVPTXISD::TexUnified2DFloatFloatGrad";
565  case NVPTXISD::TexUnified2DS32S32:
566    return "NVPTXISD::TexUnified2DS32S32";
567  case NVPTXISD::TexUnified2DS32Float:
568    return "NVPTXISD::TexUnified2DS32Float";
569  case NVPTXISD::TexUnified2DS32FloatLevel:
570    return "NVPTXISD::TexUnified2DS32FloatLevel";
571  case NVPTXISD::TexUnified2DS32FloatGrad:
572    return "NVPTXISD::TexUnified2DS32FloatGrad";
573  case NVPTXISD::TexUnified2DU32S32:
574    return "NVPTXISD::TexUnified2DU32S32";
575  case NVPTXISD::TexUnified2DU32Float:
576    return "NVPTXISD::TexUnified2DU32Float";
577  case NVPTXISD::TexUnified2DU32FloatLevel:
578    return "NVPTXISD::TexUnified2DU32FloatLevel";
579  case NVPTXISD::TexUnified2DU32FloatGrad:
580    return "NVPTXISD::TexUnified2DU32FloatGrad";
581  case NVPTXISD::TexUnified2DArrayFloatS32:
582    return "NVPTXISD::TexUnified2DArrayFloatS32";
583  case NVPTXISD::TexUnified2DArrayFloatFloat:
584    return "NVPTXISD::TexUnified2DArrayFloatFloat";
585  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
586    return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
587  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
588    return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
589  case NVPTXISD::TexUnified2DArrayS32S32:
590    return "NVPTXISD::TexUnified2DArrayS32S32";
591  case NVPTXISD::TexUnified2DArrayS32Float:
592    return "NVPTXISD::TexUnified2DArrayS32Float";
593  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
594    return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
595  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
596    return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
597  case NVPTXISD::TexUnified2DArrayU32S32:
598    return "NVPTXISD::TexUnified2DArrayU32S32";
599  case NVPTXISD::TexUnified2DArrayU32Float:
600    return "NVPTXISD::TexUnified2DArrayU32Float";
601  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
602    return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
603  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
604    return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
605  case NVPTXISD::TexUnified3DFloatS32:
606    return "NVPTXISD::TexUnified3DFloatS32";
607  case NVPTXISD::TexUnified3DFloatFloat:
608    return "NVPTXISD::TexUnified3DFloatFloat";
609  case NVPTXISD::TexUnified3DFloatFloatLevel:
610    return "NVPTXISD::TexUnified3DFloatFloatLevel";
611  case NVPTXISD::TexUnified3DFloatFloatGrad:
612    return "NVPTXISD::TexUnified3DFloatFloatGrad";
613  case NVPTXISD::TexUnified3DS32S32:
614    return "NVPTXISD::TexUnified3DS32S32";
615  case NVPTXISD::TexUnified3DS32Float:
616    return "NVPTXISD::TexUnified3DS32Float";
617  case NVPTXISD::TexUnified3DS32FloatLevel:
618    return "NVPTXISD::TexUnified3DS32FloatLevel";
619  case NVPTXISD::TexUnified3DS32FloatGrad:
620    return "NVPTXISD::TexUnified3DS32FloatGrad";
621  case NVPTXISD::TexUnified3DU32S32:
622    return "NVPTXISD::TexUnified3DU32S32";
623  case NVPTXISD::TexUnified3DU32Float:
624    return "NVPTXISD::TexUnified3DU32Float";
625  case NVPTXISD::TexUnified3DU32FloatLevel:
626    return "NVPTXISD::TexUnified3DU32FloatLevel";
627  case NVPTXISD::TexUnified3DU32FloatGrad:
628    return "NVPTXISD::TexUnified3DU32FloatGrad";
629  case NVPTXISD::TexUnifiedCubeFloatFloat:
630    return "NVPTXISD::TexUnifiedCubeFloatFloat";
631  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
632    return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
633  case NVPTXISD::TexUnifiedCubeS32Float:
634    return "NVPTXISD::TexUnifiedCubeS32Float";
635  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
636    return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
637  case NVPTXISD::TexUnifiedCubeU32Float:
638    return "NVPTXISD::TexUnifiedCubeU32Float";
639  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
640    return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
641  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
642    return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
643  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
644    return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
645  case NVPTXISD::TexUnifiedCubeArrayS32Float:
646    return "NVPTXISD::TexUnifiedCubeArrayS32Float";
647  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
648    return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
649  case NVPTXISD::TexUnifiedCubeArrayU32Float:
650    return "NVPTXISD::TexUnifiedCubeArrayU32Float";
651  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
652    return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
653  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
654    return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
655  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
656    return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
657  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
658    return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
659  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
660    return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
661  case NVPTXISD::Tld4UnifiedR2DS64Float:
662    return "NVPTXISD::Tld4UnifiedR2DS64Float";
663  case NVPTXISD::Tld4UnifiedG2DS64Float:
664    return "NVPTXISD::Tld4UnifiedG2DS64Float";
665  case NVPTXISD::Tld4UnifiedB2DS64Float:
666    return "NVPTXISD::Tld4UnifiedB2DS64Float";
667  case NVPTXISD::Tld4UnifiedA2DS64Float:
668    return "NVPTXISD::Tld4UnifiedA2DS64Float";
669  case NVPTXISD::Tld4UnifiedR2DU64Float:
670    return "NVPTXISD::Tld4UnifiedR2DU64Float";
671  case NVPTXISD::Tld4UnifiedG2DU64Float:
672    return "NVPTXISD::Tld4UnifiedG2DU64Float";
673  case NVPTXISD::Tld4UnifiedB2DU64Float:
674    return "NVPTXISD::Tld4UnifiedB2DU64Float";
675  case NVPTXISD::Tld4UnifiedA2DU64Float:
676    return "NVPTXISD::Tld4UnifiedA2DU64Float";
677
678  case NVPTXISD::Suld1DI8Clamp:          return "NVPTXISD::Suld1DI8Clamp";
679  case NVPTXISD::Suld1DI16Clamp:         return "NVPTXISD::Suld1DI16Clamp";
680  case NVPTXISD::Suld1DI32Clamp:         return "NVPTXISD::Suld1DI32Clamp";
681  case NVPTXISD::Suld1DI64Clamp:         return "NVPTXISD::Suld1DI64Clamp";
682  case NVPTXISD::Suld1DV2I8Clamp:        return "NVPTXISD::Suld1DV2I8Clamp";
683  case NVPTXISD::Suld1DV2I16Clamp:       return "NVPTXISD::Suld1DV2I16Clamp";
684  case NVPTXISD::Suld1DV2I32Clamp:       return "NVPTXISD::Suld1DV2I32Clamp";
685  case NVPTXISD::Suld1DV2I64Clamp:       return "NVPTXISD::Suld1DV2I64Clamp";
686  case NVPTXISD::Suld1DV4I8Clamp:        return "NVPTXISD::Suld1DV4I8Clamp";
687  case NVPTXISD::Suld1DV4I16Clamp:       return "NVPTXISD::Suld1DV4I16Clamp";
688  case NVPTXISD::Suld1DV4I32Clamp:       return "NVPTXISD::Suld1DV4I32Clamp";
689
690  case NVPTXISD::Suld1DArrayI8Clamp:   return "NVPTXISD::Suld1DArrayI8Clamp";
691  case NVPTXISD::Suld1DArrayI16Clamp:  return "NVPTXISD::Suld1DArrayI16Clamp";
692  case NVPTXISD::Suld1DArrayI32Clamp:  return "NVPTXISD::Suld1DArrayI32Clamp";
693  case NVPTXISD::Suld1DArrayI64Clamp:  return "NVPTXISD::Suld1DArrayI64Clamp";
694  case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
695  case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
696  case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
697  case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
698  case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
699  case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
700  case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
701
702  case NVPTXISD::Suld2DI8Clamp:          return "NVPTXISD::Suld2DI8Clamp";
703  case NVPTXISD::Suld2DI16Clamp:         return "NVPTXISD::Suld2DI16Clamp";
704  case NVPTXISD::Suld2DI32Clamp:         return "NVPTXISD::Suld2DI32Clamp";
705  case NVPTXISD::Suld2DI64Clamp:         return "NVPTXISD::Suld2DI64Clamp";
706  case NVPTXISD::Suld2DV2I8Clamp:        return "NVPTXISD::Suld2DV2I8Clamp";
707  case NVPTXISD::Suld2DV2I16Clamp:       return "NVPTXISD::Suld2DV2I16Clamp";
708  case NVPTXISD::Suld2DV2I32Clamp:       return "NVPTXISD::Suld2DV2I32Clamp";
709  case NVPTXISD::Suld2DV2I64Clamp:       return "NVPTXISD::Suld2DV2I64Clamp";
710  case NVPTXISD::Suld2DV4I8Clamp:        return "NVPTXISD::Suld2DV4I8Clamp";
711  case NVPTXISD::Suld2DV4I16Clamp:       return "NVPTXISD::Suld2DV4I16Clamp";
712  case NVPTXISD::Suld2DV4I32Clamp:       return "NVPTXISD::Suld2DV4I32Clamp";
713
714  case NVPTXISD::Suld2DArrayI8Clamp:   return "NVPTXISD::Suld2DArrayI8Clamp";
715  case NVPTXISD::Suld2DArrayI16Clamp:  return "NVPTXISD::Suld2DArrayI16Clamp";
716  case NVPTXISD::Suld2DArrayI32Clamp:  return "NVPTXISD::Suld2DArrayI32Clamp";
717  case NVPTXISD::Suld2DArrayI64Clamp:  return "NVPTXISD::Suld2DArrayI64Clamp";
718  case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
719  case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
720  case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
721  case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
722  case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
723  case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
724  case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
725
726  case NVPTXISD::Suld3DI8Clamp:          return "NVPTXISD::Suld3DI8Clamp";
727  case NVPTXISD::Suld3DI16Clamp:         return "NVPTXISD::Suld3DI16Clamp";
728  case NVPTXISD::Suld3DI32Clamp:         return "NVPTXISD::Suld3DI32Clamp";
729  case NVPTXISD::Suld3DI64Clamp:         return "NVPTXISD::Suld3DI64Clamp";
730  case NVPTXISD::Suld3DV2I8Clamp:        return "NVPTXISD::Suld3DV2I8Clamp";
731  case NVPTXISD::Suld3DV2I16Clamp:       return "NVPTXISD::Suld3DV2I16Clamp";
732  case NVPTXISD::Suld3DV2I32Clamp:       return "NVPTXISD::Suld3DV2I32Clamp";
733  case NVPTXISD::Suld3DV2I64Clamp:       return "NVPTXISD::Suld3DV2I64Clamp";
734  case NVPTXISD::Suld3DV4I8Clamp:        return "NVPTXISD::Suld3DV4I8Clamp";
735  case NVPTXISD::Suld3DV4I16Clamp:       return "NVPTXISD::Suld3DV4I16Clamp";
736  case NVPTXISD::Suld3DV4I32Clamp:       return "NVPTXISD::Suld3DV4I32Clamp";
737
738  case NVPTXISD::Suld1DI8Trap:          return "NVPTXISD::Suld1DI8Trap";
739  case NVPTXISD::Suld1DI16Trap:         return "NVPTXISD::Suld1DI16Trap";
740  case NVPTXISD::Suld1DI32Trap:         return "NVPTXISD::Suld1DI32Trap";
741  case NVPTXISD::Suld1DI64Trap:         return "NVPTXISD::Suld1DI64Trap";
742  case NVPTXISD::Suld1DV2I8Trap:        return "NVPTXISD::Suld1DV2I8Trap";
743  case NVPTXISD::Suld1DV2I16Trap:       return "NVPTXISD::Suld1DV2I16Trap";
744  case NVPTXISD::Suld1DV2I32Trap:       return "NVPTXISD::Suld1DV2I32Trap";
745  case NVPTXISD::Suld1DV2I64Trap:       return "NVPTXISD::Suld1DV2I64Trap";
746  case NVPTXISD::Suld1DV4I8Trap:        return "NVPTXISD::Suld1DV4I8Trap";
747  case NVPTXISD::Suld1DV4I16Trap:       return "NVPTXISD::Suld1DV4I16Trap";
748  case NVPTXISD::Suld1DV4I32Trap:       return "NVPTXISD::Suld1DV4I32Trap";
749
750  case NVPTXISD::Suld1DArrayI8Trap:     return "NVPTXISD::Suld1DArrayI8Trap";
751  case NVPTXISD::Suld1DArrayI16Trap:    return "NVPTXISD::Suld1DArrayI16Trap";
752  case NVPTXISD::Suld1DArrayI32Trap:    return "NVPTXISD::Suld1DArrayI32Trap";
753  case NVPTXISD::Suld1DArrayI64Trap:    return "NVPTXISD::Suld1DArrayI64Trap";
754  case NVPTXISD::Suld1DArrayV2I8Trap:   return "NVPTXISD::Suld1DArrayV2I8Trap";
755  case NVPTXISD::Suld1DArrayV2I16Trap:  return "NVPTXISD::Suld1DArrayV2I16Trap";
756  case NVPTXISD::Suld1DArrayV2I32Trap:  return "NVPTXISD::Suld1DArrayV2I32Trap";
757  case NVPTXISD::Suld1DArrayV2I64Trap:  return "NVPTXISD::Suld1DArrayV2I64Trap";
758  case NVPTXISD::Suld1DArrayV4I8Trap:   return "NVPTXISD::Suld1DArrayV4I8Trap";
759  case NVPTXISD::Suld1DArrayV4I16Trap:  return "NVPTXISD::Suld1DArrayV4I16Trap";
760  case NVPTXISD::Suld1DArrayV4I32Trap:  return "NVPTXISD::Suld1DArrayV4I32Trap";
761
762  case NVPTXISD::Suld2DI8Trap:          return "NVPTXISD::Suld2DI8Trap";
763  case NVPTXISD::Suld2DI16Trap:         return "NVPTXISD::Suld2DI16Trap";
764  case NVPTXISD::Suld2DI32Trap:         return "NVPTXISD::Suld2DI32Trap";
765  case NVPTXISD::Suld2DI64Trap:         return "NVPTXISD::Suld2DI64Trap";
766  case NVPTXISD::Suld2DV2I8Trap:        return "NVPTXISD::Suld2DV2I8Trap";
767  case NVPTXISD::Suld2DV2I16Trap:       return "NVPTXISD::Suld2DV2I16Trap";
768  case NVPTXISD::Suld2DV2I32Trap:       return "NVPTXISD::Suld2DV2I32Trap";
769  case NVPTXISD::Suld2DV2I64Trap:       return "NVPTXISD::Suld2DV2I64Trap";
770  case NVPTXISD::Suld2DV4I8Trap:        return "NVPTXISD::Suld2DV4I8Trap";
771  case NVPTXISD::Suld2DV4I16Trap:       return "NVPTXISD::Suld2DV4I16Trap";
772  case NVPTXISD::Suld2DV4I32Trap:       return "NVPTXISD::Suld2DV4I32Trap";
773
774  case NVPTXISD::Suld2DArrayI8Trap:     return "NVPTXISD::Suld2DArrayI8Trap";
775  case NVPTXISD::Suld2DArrayI16Trap:    return "NVPTXISD::Suld2DArrayI16Trap";
776  case NVPTXISD::Suld2DArrayI32Trap:    return "NVPTXISD::Suld2DArrayI32Trap";
777  case NVPTXISD::Suld2DArrayI64Trap:    return "NVPTXISD::Suld2DArrayI64Trap";
778  case NVPTXISD::Suld2DArrayV2I8Trap:   return "NVPTXISD::Suld2DArrayV2I8Trap";
779  case NVPTXISD::Suld2DArrayV2I16Trap:  return "NVPTXISD::Suld2DArrayV2I16Trap";
780  case NVPTXISD::Suld2DArrayV2I32Trap:  return "NVPTXISD::Suld2DArrayV2I32Trap";
781  case NVPTXISD::Suld2DArrayV2I64Trap:  return "NVPTXISD::Suld2DArrayV2I64Trap";
782  case NVPTXISD::Suld2DArrayV4I8Trap:   return "NVPTXISD::Suld2DArrayV4I8Trap";
783  case NVPTXISD::Suld2DArrayV4I16Trap:  return "NVPTXISD::Suld2DArrayV4I16Trap";
784  case NVPTXISD::Suld2DArrayV4I32Trap:  return "NVPTXISD::Suld2DArrayV4I32Trap";
785
786  case NVPTXISD::Suld3DI8Trap:          return "NVPTXISD::Suld3DI8Trap";
787  case NVPTXISD::Suld3DI16Trap:         return "NVPTXISD::Suld3DI16Trap";
788  case NVPTXISD::Suld3DI32Trap:         return "NVPTXISD::Suld3DI32Trap";
789  case NVPTXISD::Suld3DI64Trap:         return "NVPTXISD::Suld3DI64Trap";
790  case NVPTXISD::Suld3DV2I8Trap:        return "NVPTXISD::Suld3DV2I8Trap";
791  case NVPTXISD::Suld3DV2I16Trap:       return "NVPTXISD::Suld3DV2I16Trap";
792  case NVPTXISD::Suld3DV2I32Trap:       return "NVPTXISD::Suld3DV2I32Trap";
793  case NVPTXISD::Suld3DV2I64Trap:       return "NVPTXISD::Suld3DV2I64Trap";
794  case NVPTXISD::Suld3DV4I8Trap:        return "NVPTXISD::Suld3DV4I8Trap";
795  case NVPTXISD::Suld3DV4I16Trap:       return "NVPTXISD::Suld3DV4I16Trap";
796  case NVPTXISD::Suld3DV4I32Trap:       return "NVPTXISD::Suld3DV4I32Trap";
797
798  case NVPTXISD::Suld1DI8Zero:          return "NVPTXISD::Suld1DI8Zero";
799  case NVPTXISD::Suld1DI16Zero:         return "NVPTXISD::Suld1DI16Zero";
800  case NVPTXISD::Suld1DI32Zero:         return "NVPTXISD::Suld1DI32Zero";
801  case NVPTXISD::Suld1DI64Zero:         return "NVPTXISD::Suld1DI64Zero";
802  case NVPTXISD::Suld1DV2I8Zero:        return "NVPTXISD::Suld1DV2I8Zero";
803  case NVPTXISD::Suld1DV2I16Zero:       return "NVPTXISD::Suld1DV2I16Zero";
804  case NVPTXISD::Suld1DV2I32Zero:       return "NVPTXISD::Suld1DV2I32Zero";
805  case NVPTXISD::Suld1DV2I64Zero:       return "NVPTXISD::Suld1DV2I64Zero";
806  case NVPTXISD::Suld1DV4I8Zero:        return "NVPTXISD::Suld1DV4I8Zero";
807  case NVPTXISD::Suld1DV4I16Zero:       return "NVPTXISD::Suld1DV4I16Zero";
808  case NVPTXISD::Suld1DV4I32Zero:       return "NVPTXISD::Suld1DV4I32Zero";
809
810  case NVPTXISD::Suld1DArrayI8Zero:     return "NVPTXISD::Suld1DArrayI8Zero";
811  case NVPTXISD::Suld1DArrayI16Zero:    return "NVPTXISD::Suld1DArrayI16Zero";
812  case NVPTXISD::Suld1DArrayI32Zero:    return "NVPTXISD::Suld1DArrayI32Zero";
813  case NVPTXISD::Suld1DArrayI64Zero:    return "NVPTXISD::Suld1DArrayI64Zero";
814  case NVPTXISD::Suld1DArrayV2I8Zero:   return "NVPTXISD::Suld1DArrayV2I8Zero";
815  case NVPTXISD::Suld1DArrayV2I16Zero:  return "NVPTXISD::Suld1DArrayV2I16Zero";
816  case NVPTXISD::Suld1DArrayV2I32Zero:  return "NVPTXISD::Suld1DArrayV2I32Zero";
817  case NVPTXISD::Suld1DArrayV2I64Zero:  return "NVPTXISD::Suld1DArrayV2I64Zero";
818  case NVPTXISD::Suld1DArrayV4I8Zero:   return "NVPTXISD::Suld1DArrayV4I8Zero";
819  case NVPTXISD::Suld1DArrayV4I16Zero:  return "NVPTXISD::Suld1DArrayV4I16Zero";
820  case NVPTXISD::Suld1DArrayV4I32Zero:  return "NVPTXISD::Suld1DArrayV4I32Zero";
821
822  case NVPTXISD::Suld2DI8Zero:          return "NVPTXISD::Suld2DI8Zero";
823  case NVPTXISD::Suld2DI16Zero:         return "NVPTXISD::Suld2DI16Zero";
824  case NVPTXISD::Suld2DI32Zero:         return "NVPTXISD::Suld2DI32Zero";
825  case NVPTXISD::Suld2DI64Zero:         return "NVPTXISD::Suld2DI64Zero";
826  case NVPTXISD::Suld2DV2I8Zero:        return "NVPTXISD::Suld2DV2I8Zero";
827  case NVPTXISD::Suld2DV2I16Zero:       return "NVPTXISD::Suld2DV2I16Zero";
828  case NVPTXISD::Suld2DV2I32Zero:       return "NVPTXISD::Suld2DV2I32Zero";
829  case NVPTXISD::Suld2DV2I64Zero:       return "NVPTXISD::Suld2DV2I64Zero";
830  case NVPTXISD::Suld2DV4I8Zero:        return "NVPTXISD::Suld2DV4I8Zero";
831  case NVPTXISD::Suld2DV4I16Zero:       return "NVPTXISD::Suld2DV4I16Zero";
832  case NVPTXISD::Suld2DV4I32Zero:       return "NVPTXISD::Suld2DV4I32Zero";
833
834  case NVPTXISD::Suld2DArrayI8Zero:     return "NVPTXISD::Suld2DArrayI8Zero";
835  case NVPTXISD::Suld2DArrayI16Zero:    return "NVPTXISD::Suld2DArrayI16Zero";
836  case NVPTXISD::Suld2DArrayI32Zero:    return "NVPTXISD::Suld2DArrayI32Zero";
837  case NVPTXISD::Suld2DArrayI64Zero:    return "NVPTXISD::Suld2DArrayI64Zero";
838  case NVPTXISD::Suld2DArrayV2I8Zero:   return "NVPTXISD::Suld2DArrayV2I8Zero";
839  case NVPTXISD::Suld2DArrayV2I16Zero:  return "NVPTXISD::Suld2DArrayV2I16Zero";
840  case NVPTXISD::Suld2DArrayV2I32Zero:  return "NVPTXISD::Suld2DArrayV2I32Zero";
841  case NVPTXISD::Suld2DArrayV2I64Zero:  return "NVPTXISD::Suld2DArrayV2I64Zero";
842  case NVPTXISD::Suld2DArrayV4I8Zero:   return "NVPTXISD::Suld2DArrayV4I8Zero";
843  case NVPTXISD::Suld2DArrayV4I16Zero:  return "NVPTXISD::Suld2DArrayV4I16Zero";
844  case NVPTXISD::Suld2DArrayV4I32Zero:  return "NVPTXISD::Suld2DArrayV4I32Zero";
845
846  case NVPTXISD::Suld3DI8Zero:          return "NVPTXISD::Suld3DI8Zero";
847  case NVPTXISD::Suld3DI16Zero:         return "NVPTXISD::Suld3DI16Zero";
848  case NVPTXISD::Suld3DI32Zero:         return "NVPTXISD::Suld3DI32Zero";
849  case NVPTXISD::Suld3DI64Zero:         return "NVPTXISD::Suld3DI64Zero";
850  case NVPTXISD::Suld3DV2I8Zero:        return "NVPTXISD::Suld3DV2I8Zero";
851  case NVPTXISD::Suld3DV2I16Zero:       return "NVPTXISD::Suld3DV2I16Zero";
852  case NVPTXISD::Suld3DV2I32Zero:       return "NVPTXISD::Suld3DV2I32Zero";
853  case NVPTXISD::Suld3DV2I64Zero:       return "NVPTXISD::Suld3DV2I64Zero";
854  case NVPTXISD::Suld3DV4I8Zero:        return "NVPTXISD::Suld3DV4I8Zero";
855  case NVPTXISD::Suld3DV4I16Zero:       return "NVPTXISD::Suld3DV4I16Zero";
856  case NVPTXISD::Suld3DV4I32Zero:       return "NVPTXISD::Suld3DV4I32Zero";
857  }
858}
859
860TargetLoweringBase::LegalizeTypeAction
861NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
862  if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
863    return TypeSplitVector;
864
865  return TargetLoweringBase::getPreferredVectorAction(VT);
866}
867
868SDValue
869NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
870  SDLoc dl(Op);
871  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
872  Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
873  return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
874}
875
876std::string
877NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
878                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
879                                  unsigned retAlignment,
880                                  const ImmutableCallSite *CS) const {
881
882  bool isABI = (STI.getSmVersion() >= 20);
883  assert(isABI && "Non-ABI compilation is not supported");
884  if (!isABI)
885    return "";
886
887  std::stringstream O;
888  O << "prototype_" << uniqueCallSite << " : .callprototype ";
889
890  if (retTy->getTypeID() == Type::VoidTyID) {
891    O << "()";
892  } else {
893    O << "(";
894    if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
895      unsigned size = 0;
896      if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
897        size = ITy->getBitWidth();
898        if (size < 32)
899          size = 32;
900      } else {
901        assert(retTy->isFloatingPointTy() &&
902               "Floating point type expected here");
903        size = retTy->getPrimitiveSizeInBits();
904      }
905
906      O << ".param .b" << size << " _";
907    } else if (isa<PointerType>(retTy)) {
908      O << ".param .b" << getPointerTy().getSizeInBits() << " _";
909    } else if ((retTy->getTypeID() == Type::StructTyID) ||
910               isa<VectorType>(retTy)) {
911      O << ".param .align "
912        << retAlignment
913        << " .b8 _["
914        << getDataLayout()->getTypeAllocSize(retTy) << "]";
915    } else {
916      llvm_unreachable("Unknown return type");
917    }
918    O << ") ";
919  }
920  O << "_ (";
921
922  bool first = true;
923  MVT thePointerTy = getPointerTy();
924
925  unsigned OIdx = 0;
926  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
927    Type *Ty = Args[i].Ty;
928    if (!first) {
929      O << ", ";
930    }
931    first = false;
932
933    if (!Outs[OIdx].Flags.isByVal()) {
934      if (Ty->isAggregateType() || Ty->isVectorTy()) {
935        unsigned align = 0;
936        const CallInst *CallI = cast<CallInst>(CS->getInstruction());
937        const DataLayout *TD = getDataLayout();
938        // +1 because index 0 is reserved for return type alignment
939        if (!llvm::getAlign(*CallI, i + 1, align))
940          align = TD->getABITypeAlignment(Ty);
941        unsigned sz = TD->getTypeAllocSize(Ty);
942        O << ".param .align " << align << " .b8 ";
943        O << "_";
944        O << "[" << sz << "]";
945        // update the index for Outs
946        SmallVector<EVT, 16> vtparts;
947        ComputeValueVTs(*this, Ty, vtparts);
948        if (unsigned len = vtparts.size())
949          OIdx += len - 1;
950        continue;
951      }
952       // i8 types in IR will be i16 types in SDAG
953      assert((getValueType(Ty) == Outs[OIdx].VT ||
954             (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
955             "type mismatch between callee prototype and arguments");
956      // scalar type
957      unsigned sz = 0;
958      if (isa<IntegerType>(Ty)) {
959        sz = cast<IntegerType>(Ty)->getBitWidth();
960        if (sz < 32)
961          sz = 32;
962      } else if (isa<PointerType>(Ty))
963        sz = thePointerTy.getSizeInBits();
964      else
965        sz = Ty->getPrimitiveSizeInBits();
966      O << ".param .b" << sz << " ";
967      O << "_";
968      continue;
969    }
970    const PointerType *PTy = dyn_cast<PointerType>(Ty);
971    assert(PTy && "Param with byval attribute should be a pointer type");
972    Type *ETy = PTy->getElementType();
973
974    unsigned align = Outs[OIdx].Flags.getByValAlign();
975    unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
976    O << ".param .align " << align << " .b8 ";
977    O << "_";
978    O << "[" << sz << "]";
979  }
980  O << ");";
981  return O.str();
982}
983
984unsigned
985NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
986                                          const ImmutableCallSite *CS,
987                                          Type *Ty,
988                                          unsigned Idx) const {
989  const DataLayout *TD = getDataLayout();
990  unsigned Align = 0;
991  const Value *DirectCallee = CS->getCalledFunction();
992
993  if (!DirectCallee) {
994    // We don't have a direct function symbol, but that may be because of
995    // constant cast instructions in the call.
996    const Instruction *CalleeI = CS->getInstruction();
997    assert(CalleeI && "Call target is not a function or derived value?");
998
999    // With bitcast'd call targets, the instruction will be the call
1000    if (isa<CallInst>(CalleeI)) {
1001      // Check if we have call alignment metadata
1002      if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align))
1003        return Align;
1004
1005      const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
1006      // Ignore any bitcast instructions
1007      while(isa<ConstantExpr>(CalleeV)) {
1008        const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
1009        if (!CE->isCast())
1010          break;
1011        // Look through the bitcast
1012        CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
1013      }
1014
1015      // We have now looked past all of the bitcasts.  Do we finally have a
1016      // Function?
1017      if (isa<Function>(CalleeV))
1018        DirectCallee = CalleeV;
1019    }
1020  }
1021
1022  // Check for function alignment information if we found that the
1023  // ultimate target is a Function
1024  if (DirectCallee)
1025    if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align))
1026      return Align;
1027
1028  // Call is indirect or alignment information is not available, fall back to
1029  // the ABI type alignment
1030  return TD->getABITypeAlignment(Ty);
1031}
1032
1033SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1034                                       SmallVectorImpl<SDValue> &InVals) const {
1035  SelectionDAG &DAG = CLI.DAG;
1036  SDLoc dl = CLI.DL;
1037  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1038  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1039  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1040  SDValue Chain = CLI.Chain;
1041  SDValue Callee = CLI.Callee;
1042  bool &isTailCall = CLI.IsTailCall;
1043  ArgListTy &Args = CLI.getArgs();
1044  Type *retTy = CLI.RetTy;
1045  ImmutableCallSite *CS = CLI.CS;
1046
1047  bool isABI = (STI.getSmVersion() >= 20);
1048  assert(isABI && "Non-ABI compilation is not supported");
1049  if (!isABI)
1050    return Chain;
1051  const DataLayout *TD = getDataLayout();
1052  MachineFunction &MF = DAG.getMachineFunction();
1053  const Function *F = MF.getFunction();
1054
1055  SDValue tempChain = Chain;
1056  Chain =
1057      DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
1058                           dl);
1059  SDValue InFlag = Chain.getValue(1);
1060
1061  unsigned paramCount = 0;
1062  // Args.size() and Outs.size() need not match.
1063  // Outs.size() will be larger
1064  //   * if there is an aggregate argument with multiple fields (each field
1065  //     showing up separately in Outs)
1066  //   * if there is a vector argument with more than typical vector-length
1067  //     elements (generally if more than 4) where each vector element is
1068  //     individually present in Outs.
1069  // So a different index should be used for indexing into Outs/OutVals.
1070  // See similar issue in LowerFormalArguments.
1071  unsigned OIdx = 0;
1072  // Declare the .params or .reg need to pass values
1073  // to the function
1074  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1075    EVT VT = Outs[OIdx].VT;
1076    Type *Ty = Args[i].Ty;
1077
1078    if (!Outs[OIdx].Flags.isByVal()) {
1079      if (Ty->isAggregateType()) {
1080        // aggregate
1081        SmallVector<EVT, 16> vtparts;
1082        SmallVector<uint64_t, 16> Offsets;
1083        ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0);
1084
1085        unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
1086        // declare .param .align <align> .b8 .param<n>[<size>];
1087        unsigned sz = TD->getTypeAllocSize(Ty);
1088        SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1089        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
1090                                      DAG.getConstant(paramCount, MVT::i32),
1091                                      DAG.getConstant(sz, MVT::i32), InFlag };
1092        Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1093                            DeclareParamOps);
1094        InFlag = Chain.getValue(1);
1095        for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
1096          EVT elemtype = vtparts[j];
1097          unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
1098          if (elemtype.isInteger() && (sz < 8))
1099            sz = 8;
1100          SDValue StVal = OutVals[OIdx];
1101          if (elemtype.getSizeInBits() < 16) {
1102            StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
1103          }
1104          SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1105          SDValue CopyParamOps[] = { Chain,
1106                                     DAG.getConstant(paramCount, MVT::i32),
1107                                     DAG.getConstant(Offsets[j], MVT::i32),
1108                                     StVal, InFlag };
1109          Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
1110                                          CopyParamVTs, CopyParamOps,
1111                                          elemtype, MachinePointerInfo(),
1112                                          ArgAlign);
1113          InFlag = Chain.getValue(1);
1114          ++OIdx;
1115        }
1116        if (vtparts.size() > 0)
1117          --OIdx;
1118        ++paramCount;
1119        continue;
1120      }
1121      if (Ty->isVectorTy()) {
1122        EVT ObjectVT = getValueType(Ty);
1123        unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
1124        // declare .param .align <align> .b8 .param<n>[<size>];
1125        unsigned sz = TD->getTypeAllocSize(Ty);
1126        SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1127        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
1128                                      DAG.getConstant(paramCount, MVT::i32),
1129                                      DAG.getConstant(sz, MVT::i32), InFlag };
1130        Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1131                            DeclareParamOps);
1132        InFlag = Chain.getValue(1);
1133        unsigned NumElts = ObjectVT.getVectorNumElements();
1134        EVT EltVT = ObjectVT.getVectorElementType();
1135        EVT MemVT = EltVT;
1136        bool NeedExtend = false;
1137        if (EltVT.getSizeInBits() < 16) {
1138          NeedExtend = true;
1139          EltVT = MVT::i16;
1140        }
1141
1142        // V1 store
1143        if (NumElts == 1) {
1144          SDValue Elt = OutVals[OIdx++];
1145          if (NeedExtend)
1146            Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt);
1147
1148          SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1149          SDValue CopyParamOps[] = { Chain,
1150                                     DAG.getConstant(paramCount, MVT::i32),
1151                                     DAG.getConstant(0, MVT::i32), Elt,
1152                                     InFlag };
1153          Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
1154                                          CopyParamVTs, CopyParamOps,
1155                                          MemVT, MachinePointerInfo());
1156          InFlag = Chain.getValue(1);
1157        } else if (NumElts == 2) {
1158          SDValue Elt0 = OutVals[OIdx++];
1159          SDValue Elt1 = OutVals[OIdx++];
1160          if (NeedExtend) {
1161            Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0);
1162            Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1);
1163          }
1164
1165          SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1166          SDValue CopyParamOps[] = { Chain,
1167                                     DAG.getConstant(paramCount, MVT::i32),
1168                                     DAG.getConstant(0, MVT::i32), Elt0, Elt1,
1169                                     InFlag };
1170          Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
1171                                          CopyParamVTs, CopyParamOps,
1172                                          MemVT, MachinePointerInfo());
1173          InFlag = Chain.getValue(1);
1174        } else {
1175          unsigned curOffset = 0;
1176          // V4 stores
1177          // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
1178          // the
1179          // vector will be expanded to a power of 2 elements, so we know we can
1180          // always round up to the next multiple of 4 when creating the vector
1181          // stores.
1182          // e.g.  4 elem => 1 st.v4
1183          //       6 elem => 2 st.v4
1184          //       8 elem => 2 st.v4
1185          //      11 elem => 3 st.v4
1186          unsigned VecSize = 4;
1187          if (EltVT.getSizeInBits() == 64)
1188            VecSize = 2;
1189
1190          // This is potentially only part of a vector, so assume all elements
1191          // are packed together.
1192          unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize;
1193
1194          for (unsigned i = 0; i < NumElts; i += VecSize) {
1195            // Get values
1196            SDValue StoreVal;
1197            SmallVector<SDValue, 8> Ops;
1198            Ops.push_back(Chain);
1199            Ops.push_back(DAG.getConstant(paramCount, MVT::i32));
1200            Ops.push_back(DAG.getConstant(curOffset, MVT::i32));
1201
1202            unsigned Opc = NVPTXISD::StoreParamV2;
1203
1204            StoreVal = OutVals[OIdx++];
1205            if (NeedExtend)
1206              StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1207            Ops.push_back(StoreVal);
1208
1209            if (i + 1 < NumElts) {
1210              StoreVal = OutVals[OIdx++];
1211              if (NeedExtend)
1212                StoreVal =
1213                    DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1214            } else {
1215              StoreVal = DAG.getUNDEF(EltVT);
1216            }
1217            Ops.push_back(StoreVal);
1218
1219            if (VecSize == 4) {
1220              Opc = NVPTXISD::StoreParamV4;
1221              if (i + 2 < NumElts) {
1222                StoreVal = OutVals[OIdx++];
1223                if (NeedExtend)
1224                  StoreVal =
1225                      DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1226              } else {
1227                StoreVal = DAG.getUNDEF(EltVT);
1228              }
1229              Ops.push_back(StoreVal);
1230
1231              if (i + 3 < NumElts) {
1232                StoreVal = OutVals[OIdx++];
1233                if (NeedExtend)
1234                  StoreVal =
1235                      DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
1236              } else {
1237                StoreVal = DAG.getUNDEF(EltVT);
1238              }
1239              Ops.push_back(StoreVal);
1240            }
1241
1242            Ops.push_back(InFlag);
1243
1244            SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1245            Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
1246                                            MemVT, MachinePointerInfo());
1247            InFlag = Chain.getValue(1);
1248            curOffset += PerStoreOffset;
1249          }
1250        }
1251        ++paramCount;
1252        --OIdx;
1253        continue;
1254      }
1255      // Plain scalar
1256      // for ABI,    declare .param .b<size> .param<n>;
1257      unsigned sz = VT.getSizeInBits();
1258      bool needExtend = false;
1259      if (VT.isInteger()) {
1260        if (sz < 16)
1261          needExtend = true;
1262        if (sz < 32)
1263          sz = 32;
1264      }
1265      SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1266      SDValue DeclareParamOps[] = { Chain,
1267                                    DAG.getConstant(paramCount, MVT::i32),
1268                                    DAG.getConstant(sz, MVT::i32),
1269                                    DAG.getConstant(0, MVT::i32), InFlag };
1270      Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
1271                          DeclareParamOps);
1272      InFlag = Chain.getValue(1);
1273      SDValue OutV = OutVals[OIdx];
1274      if (needExtend) {
1275        // zext/sext i1 to i16
1276        unsigned opc = ISD::ZERO_EXTEND;
1277        if (Outs[OIdx].Flags.isSExt())
1278          opc = ISD::SIGN_EXTEND;
1279        OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
1280      }
1281      SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1282      SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
1283                                 DAG.getConstant(0, MVT::i32), OutV, InFlag };
1284
1285      unsigned opcode = NVPTXISD::StoreParam;
1286      if (Outs[OIdx].Flags.isZExt())
1287        opcode = NVPTXISD::StoreParamU32;
1288      else if (Outs[OIdx].Flags.isSExt())
1289        opcode = NVPTXISD::StoreParamS32;
1290      Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
1291                                      VT, MachinePointerInfo());
1292
1293      InFlag = Chain.getValue(1);
1294      ++paramCount;
1295      continue;
1296    }
1297    // struct or vector
1298    SmallVector<EVT, 16> vtparts;
1299    SmallVector<uint64_t, 16> Offsets;
1300    const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
1301    assert(PTy && "Type of a byval parameter should be pointer");
1302    ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0);
1303
1304    // declare .param .align <align> .b8 .param<n>[<size>];
1305    unsigned sz = Outs[OIdx].Flags.getByValSize();
1306    SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1307    unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
1308    // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
1309    // so we don't need to worry about natural alignment or not.
1310    // See TargetLowering::LowerCallTo().
1311    SDValue DeclareParamOps[] = {
1312      Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32),
1313      DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
1314      InFlag
1315    };
1316    Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1317                        DeclareParamOps);
1318    InFlag = Chain.getValue(1);
1319    for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
1320      EVT elemtype = vtparts[j];
1321      int curOffset = Offsets[j];
1322      unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
1323      SDValue srcAddr =
1324          DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
1325                      DAG.getConstant(curOffset, getPointerTy()));
1326      SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
1327                                   MachinePointerInfo(), false, false, false,
1328                                   PartAlign);
1329      if (elemtype.getSizeInBits() < 16) {
1330        theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
1331      }
1332      SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1333      SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
1334                                 DAG.getConstant(curOffset, MVT::i32), theVal,
1335                                 InFlag };
1336      Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
1337                                      CopyParamOps, elemtype,
1338                                      MachinePointerInfo());
1339
1340      InFlag = Chain.getValue(1);
1341    }
1342    ++paramCount;
1343  }
1344
1345  GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
1346  unsigned retAlignment = 0;
1347
1348  // Handle Result
1349  if (Ins.size() > 0) {
1350    SmallVector<EVT, 16> resvtparts;
1351    ComputeValueVTs(*this, retTy, resvtparts);
1352
1353    // Declare
1354    //  .param .align 16 .b8 retval0[<size-in-bytes>], or
1355    //  .param .b<size-in-bits> retval0
1356    unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
1357    // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
1358    // these three types to match the logic in
1359    // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
1360    // Plus, this behavior is consistent with nvcc's.
1361    if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
1362        retTy->isPointerTy()) {
1363      // Scalar needs to be at least 32bit wide
1364      if (resultsz < 32)
1365        resultsz = 32;
1366      SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1367      SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
1368                                  DAG.getConstant(resultsz, MVT::i32),
1369                                  DAG.getConstant(0, MVT::i32), InFlag };
1370      Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
1371                          DeclareRetOps);
1372      InFlag = Chain.getValue(1);
1373    } else {
1374      retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
1375      SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1376      SDValue DeclareRetOps[] = { Chain,
1377                                  DAG.getConstant(retAlignment, MVT::i32),
1378                                  DAG.getConstant(resultsz / 8, MVT::i32),
1379                                  DAG.getConstant(0, MVT::i32), InFlag };
1380      Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
1381                          DeclareRetOps);
1382      InFlag = Chain.getValue(1);
1383    }
1384  }
1385
1386  if (!Func) {
1387    // This is indirect function call case : PTX requires a prototype of the
1388    // form
1389    // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
1390    // to be emitted, and the label has to used as the last arg of call
1391    // instruction.
1392    // The prototype is embedded in a string and put as the operand for a
1393    // CallPrototype SDNode which will print out to the value of the string.
1394    SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1395    std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS);
1396    const char *ProtoStr =
1397      nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
1398    SDValue ProtoOps[] = {
1399      Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
1400    };
1401    Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
1402    InFlag = Chain.getValue(1);
1403  }
1404  // Op to just print "call"
1405  SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1406  SDValue PrintCallOps[] = {
1407    Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag
1408  };
1409  Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
1410                      dl, PrintCallVTs, PrintCallOps);
1411  InFlag = Chain.getValue(1);
1412
1413  // Ops to print out the function name
1414  SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1415  SDValue CallVoidOps[] = { Chain, Callee, InFlag };
1416  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
1417  InFlag = Chain.getValue(1);
1418
1419  // Ops to print out the param list
1420  SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1421  SDValue CallArgBeginOps[] = { Chain, InFlag };
1422  Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
1423                      CallArgBeginOps);
1424  InFlag = Chain.getValue(1);
1425
1426  for (unsigned i = 0, e = paramCount; i != e; ++i) {
1427    unsigned opcode;
1428    if (i == (e - 1))
1429      opcode = NVPTXISD::LastCallArg;
1430    else
1431      opcode = NVPTXISD::CallArg;
1432    SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1433    SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
1434                             DAG.getConstant(i, MVT::i32), InFlag };
1435    Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
1436    InFlag = Chain.getValue(1);
1437  }
1438  SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1439  SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
1440                              InFlag };
1441  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
1442  InFlag = Chain.getValue(1);
1443
1444  if (!Func) {
1445    SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1446    SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
1447                               InFlag };
1448    Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
1449    InFlag = Chain.getValue(1);
1450  }
1451
1452  // Generate loads from param memory/moves from registers for result
1453  if (Ins.size() > 0) {
1454    if (retTy && retTy->isVectorTy()) {
1455      EVT ObjectVT = getValueType(retTy);
1456      unsigned NumElts = ObjectVT.getVectorNumElements();
1457      EVT EltVT = ObjectVT.getVectorElementType();
1458      assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
1459                                                      ObjectVT) == NumElts &&
1460             "Vector was not scalarized");
1461      unsigned sz = EltVT.getSizeInBits();
1462      bool needTruncate = sz < 8;
1463
1464      if (NumElts == 1) {
1465        // Just a simple load
1466        SmallVector<EVT, 4> LoadRetVTs;
1467        if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1468          // If loading i1/i8 result, generate
1469          //   load.b8 i16
1470          //   if i1
1471          //   trunc i16 to i1
1472          LoadRetVTs.push_back(MVT::i16);
1473        } else
1474          LoadRetVTs.push_back(EltVT);
1475        LoadRetVTs.push_back(MVT::Other);
1476        LoadRetVTs.push_back(MVT::Glue);
1477        SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32),
1478                                DAG.getConstant(0, MVT::i32), InFlag};
1479        SDValue retval = DAG.getMemIntrinsicNode(
1480            NVPTXISD::LoadParam, dl,
1481            DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
1482        Chain = retval.getValue(1);
1483        InFlag = retval.getValue(2);
1484        SDValue Ret0 = retval;
1485        if (needTruncate)
1486          Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0);
1487        InVals.push_back(Ret0);
1488      } else if (NumElts == 2) {
1489        // LoadV2
1490        SmallVector<EVT, 4> LoadRetVTs;
1491        if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1492          // If loading i1/i8 result, generate
1493          //   load.b8 i16
1494          //   if i1
1495          //   trunc i16 to i1
1496          LoadRetVTs.push_back(MVT::i16);
1497          LoadRetVTs.push_back(MVT::i16);
1498        } else {
1499          LoadRetVTs.push_back(EltVT);
1500          LoadRetVTs.push_back(EltVT);
1501        }
1502        LoadRetVTs.push_back(MVT::Other);
1503        LoadRetVTs.push_back(MVT::Glue);
1504        SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32),
1505                                DAG.getConstant(0, MVT::i32), InFlag};
1506        SDValue retval = DAG.getMemIntrinsicNode(
1507            NVPTXISD::LoadParamV2, dl,
1508            DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
1509        Chain = retval.getValue(2);
1510        InFlag = retval.getValue(3);
1511        SDValue Ret0 = retval.getValue(0);
1512        SDValue Ret1 = retval.getValue(1);
1513        if (needTruncate) {
1514          Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0);
1515          InVals.push_back(Ret0);
1516          Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1);
1517          InVals.push_back(Ret1);
1518        } else {
1519          InVals.push_back(Ret0);
1520          InVals.push_back(Ret1);
1521        }
1522      } else {
1523        // Split into N LoadV4
1524        unsigned Ofst = 0;
1525        unsigned VecSize = 4;
1526        unsigned Opc = NVPTXISD::LoadParamV4;
1527        if (EltVT.getSizeInBits() == 64) {
1528          VecSize = 2;
1529          Opc = NVPTXISD::LoadParamV2;
1530        }
1531        EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
1532        for (unsigned i = 0; i < NumElts; i += VecSize) {
1533          SmallVector<EVT, 8> LoadRetVTs;
1534          if (EltVT == MVT::i1 || EltVT == MVT::i8) {
1535            // If loading i1/i8 result, generate
1536            //   load.b8 i16
1537            //   if i1
1538            //   trunc i16 to i1
1539            for (unsigned j = 0; j < VecSize; ++j)
1540              LoadRetVTs.push_back(MVT::i16);
1541          } else {
1542            for (unsigned j = 0; j < VecSize; ++j)
1543              LoadRetVTs.push_back(EltVT);
1544          }
1545          LoadRetVTs.push_back(MVT::Other);
1546          LoadRetVTs.push_back(MVT::Glue);
1547          SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32),
1548                                  DAG.getConstant(Ofst, MVT::i32), InFlag};
1549          SDValue retval = DAG.getMemIntrinsicNode(
1550              Opc, dl, DAG.getVTList(LoadRetVTs),
1551              LoadRetOps, EltVT, MachinePointerInfo());
1552          if (VecSize == 2) {
1553            Chain = retval.getValue(2);
1554            InFlag = retval.getValue(3);
1555          } else {
1556            Chain = retval.getValue(4);
1557            InFlag = retval.getValue(5);
1558          }
1559
1560          for (unsigned j = 0; j < VecSize; ++j) {
1561            if (i + j >= NumElts)
1562              break;
1563            SDValue Elt = retval.getValue(j);
1564            if (needTruncate)
1565              Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
1566            InVals.push_back(Elt);
1567          }
1568          Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
1569        }
1570      }
1571    } else {
1572      SmallVector<EVT, 16> VTs;
1573      SmallVector<uint64_t, 16> Offsets;
1574      ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0);
1575      assert(VTs.size() == Ins.size() && "Bad value decomposition");
1576      unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
1577      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
1578        unsigned sz = VTs[i].getSizeInBits();
1579        unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
1580        bool needTruncate = sz < 8;
1581        if (VTs[i].isInteger() && (sz < 8))
1582          sz = 8;
1583
1584        SmallVector<EVT, 4> LoadRetVTs;
1585        EVT TheLoadType = VTs[i];
1586        if (retTy->isIntegerTy() &&
1587            TD->getTypeAllocSizeInBits(retTy) < 32) {
1588          // This is for integer types only, and specifically not for
1589          // aggregates.
1590          LoadRetVTs.push_back(MVT::i32);
1591          TheLoadType = MVT::i32;
1592        } else if (sz < 16) {
1593          // If loading i1/i8 result, generate
1594          //   load i8 (-> i16)
1595          //   trunc i16 to i1/i8
1596          LoadRetVTs.push_back(MVT::i16);
1597        } else
1598          LoadRetVTs.push_back(Ins[i].VT);
1599        LoadRetVTs.push_back(MVT::Other);
1600        LoadRetVTs.push_back(MVT::Glue);
1601
1602        SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32),
1603                                DAG.getConstant(Offsets[i], MVT::i32), InFlag};
1604        SDValue retval = DAG.getMemIntrinsicNode(
1605            NVPTXISD::LoadParam, dl,
1606            DAG.getVTList(LoadRetVTs), LoadRetOps,
1607            TheLoadType, MachinePointerInfo(), AlignI);
1608        Chain = retval.getValue(1);
1609        InFlag = retval.getValue(2);
1610        SDValue Ret0 = retval.getValue(0);
1611        if (needTruncate)
1612          Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
1613        InVals.push_back(Ret0);
1614      }
1615    }
1616  }
1617
1618  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
1619                             DAG.getIntPtrConstant(uniqueCallSite + 1, true),
1620                             InFlag, dl);
1621  uniqueCallSite++;
1622
1623  // set isTailCall to false for now, until we figure out how to express
1624  // tail call optimization in PTX
1625  isTailCall = false;
1626  return Chain;
1627}
1628
1629// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
1630// (see LegalizeDAG.cpp). This is slow and uses local memory.
1631// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
1632SDValue
1633NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
1634  SDNode *Node = Op.getNode();
1635  SDLoc dl(Node);
1636  SmallVector<SDValue, 8> Ops;
1637  unsigned NumOperands = Node->getNumOperands();
1638  for (unsigned i = 0; i < NumOperands; ++i) {
1639    SDValue SubOp = Node->getOperand(i);
1640    EVT VVT = SubOp.getNode()->getValueType(0);
1641    EVT EltVT = VVT.getVectorElementType();
1642    unsigned NumSubElem = VVT.getVectorNumElements();
1643    for (unsigned j = 0; j < NumSubElem; ++j) {
1644      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
1645                                DAG.getIntPtrConstant(j)));
1646    }
1647  }
1648  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
1649}
1650
1651/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
1652/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
1653///    amount, or
1654/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
1655///    amount.
1656SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
1657                                                  SelectionDAG &DAG) const {
1658  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
1659  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
1660
1661  EVT VT = Op.getValueType();
1662  unsigned VTBits = VT.getSizeInBits();
1663  SDLoc dl(Op);
1664  SDValue ShOpLo = Op.getOperand(0);
1665  SDValue ShOpHi = Op.getOperand(1);
1666  SDValue ShAmt  = Op.getOperand(2);
1667  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
1668
1669  if (VTBits == 32 && STI.getSmVersion() >= 35) {
1670
1671    // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
1672    // {dHi, dLo} = {aHi, aLo} >> Amt
1673    //   dHi = aHi >> Amt
1674    //   dLo = shf.r.clamp aLo, aHi, Amt
1675
1676    SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
1677    SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
1678                             ShAmt);
1679
1680    SDValue Ops[2] = { Lo, Hi };
1681    return DAG.getMergeValues(Ops, dl);
1682  }
1683  else {
1684
1685    // {dHi, dLo} = {aHi, aLo} >> Amt
1686    // - if (Amt>=size) then
1687    //      dLo = aHi >> (Amt-size)
1688    //      dHi = aHi >> Amt (this is either all 0 or all 1)
1689    //   else
1690    //      dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
1691    //      dHi = aHi >> Amt
1692
1693    SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1694                                   DAG.getConstant(VTBits, MVT::i32), ShAmt);
1695    SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
1696    SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1697                                     DAG.getConstant(VTBits, MVT::i32));
1698    SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
1699    SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
1700    SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
1701
1702    SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
1703                               DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
1704    SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
1705    SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
1706
1707    SDValue Ops[2] = { Lo, Hi };
1708    return DAG.getMergeValues(Ops, dl);
1709  }
1710}
1711
1712/// LowerShiftLeftParts - Lower SHL_PARTS, which
1713/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
1714///    amount, or
1715/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
1716///    amount.
1717SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
1718                                                 SelectionDAG &DAG) const {
1719  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
1720  assert(Op.getOpcode() == ISD::SHL_PARTS);
1721
1722  EVT VT = Op.getValueType();
1723  unsigned VTBits = VT.getSizeInBits();
1724  SDLoc dl(Op);
1725  SDValue ShOpLo = Op.getOperand(0);
1726  SDValue ShOpHi = Op.getOperand(1);
1727  SDValue ShAmt  = Op.getOperand(2);
1728
1729  if (VTBits == 32 && STI.getSmVersion() >= 35) {
1730
1731    // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
1732    // {dHi, dLo} = {aHi, aLo} << Amt
1733    //   dHi = shf.l.clamp aLo, aHi, Amt
1734    //   dLo = aLo << Amt
1735
1736    SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
1737                             ShAmt);
1738    SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1739
1740    SDValue Ops[2] = { Lo, Hi };
1741    return DAG.getMergeValues(Ops, dl);
1742  }
1743  else {
1744
1745    // {dHi, dLo} = {aHi, aLo} << Amt
1746    // - if (Amt>=size) then
1747    //      dLo = aLo << Amt (all 0)
1748    //      dLo = aLo << (Amt-size)
1749    //   else
1750    //      dLo = aLo << Amt
1751    //      dHi = (aHi << Amt) | (aLo >> (size-Amt))
1752
1753    SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1754                                   DAG.getConstant(VTBits, MVT::i32), ShAmt);
1755    SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
1756    SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1757                                     DAG.getConstant(VTBits, MVT::i32));
1758    SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
1759    SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
1760    SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
1761
1762    SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
1763                               DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
1764    SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1765    SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
1766
1767    SDValue Ops[2] = { Lo, Hi };
1768    return DAG.getMergeValues(Ops, dl);
1769  }
1770}
1771
1772SDValue
1773NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1774  switch (Op.getOpcode()) {
1775  case ISD::RETURNADDR:
1776    return SDValue();
1777  case ISD::FRAMEADDR:
1778    return SDValue();
1779  case ISD::GlobalAddress:
1780    return LowerGlobalAddress(Op, DAG);
1781  case ISD::INTRINSIC_W_CHAIN:
1782    return Op;
1783  case ISD::BUILD_VECTOR:
1784  case ISD::EXTRACT_SUBVECTOR:
1785    return Op;
1786  case ISD::CONCAT_VECTORS:
1787    return LowerCONCAT_VECTORS(Op, DAG);
1788  case ISD::STORE:
1789    return LowerSTORE(Op, DAG);
1790  case ISD::LOAD:
1791    return LowerLOAD(Op, DAG);
1792  case ISD::SHL_PARTS:
1793    return LowerShiftLeftParts(Op, DAG);
1794  case ISD::SRA_PARTS:
1795  case ISD::SRL_PARTS:
1796    return LowerShiftRightParts(Op, DAG);
1797  case ISD::SELECT:
1798    return LowerSelect(Op, DAG);
1799  default:
1800    llvm_unreachable("Custom lowering not defined for operation");
1801  }
1802}
1803
1804SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
1805  SDValue Op0 = Op->getOperand(0);
1806  SDValue Op1 = Op->getOperand(1);
1807  SDValue Op2 = Op->getOperand(2);
1808  SDLoc DL(Op.getNode());
1809
1810  assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
1811
1812  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
1813  Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
1814  SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
1815  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
1816
1817  return Trunc;
1818}
1819
1820SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1821  if (Op.getValueType() == MVT::i1)
1822    return LowerLOADi1(Op, DAG);
1823  else
1824    return SDValue();
1825}
1826
1827// v = ld i1* addr
1828//   =>
1829// v1 = ld i8* addr (-> i16)
1830// v = trunc i16 to i1
1831SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
1832  SDNode *Node = Op.getNode();
1833  LoadSDNode *LD = cast<LoadSDNode>(Node);
1834  SDLoc dl(Node);
1835  assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
1836  assert(Node->getValueType(0) == MVT::i1 &&
1837         "Custom lowering for i1 load only");
1838  SDValue newLD =
1839      DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
1840                  LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
1841                  LD->isInvariant(), LD->getAlignment());
1842  SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
1843  // The legalizer (the caller) is expecting two values from the legalized
1844  // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
1845  // in LegalizeDAG.cpp which also uses MergeValues.
1846  SDValue Ops[] = { result, LD->getChain() };
1847  return DAG.getMergeValues(Ops, dl);
1848}
1849
1850SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1851  EVT ValVT = Op.getOperand(1).getValueType();
1852  if (ValVT == MVT::i1)
1853    return LowerSTOREi1(Op, DAG);
1854  else if (ValVT.isVector())
1855    return LowerSTOREVector(Op, DAG);
1856  else
1857    return SDValue();
1858}
1859
1860SDValue
1861NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
1862  SDNode *N = Op.getNode();
1863  SDValue Val = N->getOperand(1);
1864  SDLoc DL(N);
1865  EVT ValVT = Val.getValueType();
1866
1867  if (ValVT.isVector()) {
1868    // We only handle "native" vector sizes for now, e.g. <4 x double> is not
1869    // legal.  We can (and should) split that into 2 stores of <2 x double> here
1870    // but I'm leaving that as a TODO for now.
1871    if (!ValVT.isSimple())
1872      return SDValue();
1873    switch (ValVT.getSimpleVT().SimpleTy) {
1874    default:
1875      return SDValue();
1876    case MVT::v2i8:
1877    case MVT::v2i16:
1878    case MVT::v2i32:
1879    case MVT::v2i64:
1880    case MVT::v2f32:
1881    case MVT::v2f64:
1882    case MVT::v4i8:
1883    case MVT::v4i16:
1884    case MVT::v4i32:
1885    case MVT::v4f32:
1886      // This is a "native" vector type
1887      break;
1888    }
1889
1890    MemSDNode *MemSD = cast<MemSDNode>(N);
1891    const DataLayout *TD = getDataLayout();
1892
1893    unsigned Align = MemSD->getAlignment();
1894    unsigned PrefAlign =
1895      TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
1896    if (Align < PrefAlign) {
1897      // This store is not sufficiently aligned, so bail out and let this vector
1898      // store be scalarized.  Note that we may still be able to emit smaller
1899      // vector stores.  For example, if we are storing a <4 x float> with an
1900      // alignment of 8, this check will fail but the legalizer will try again
1901      // with 2 x <2 x float>, which will succeed with an alignment of 8.
1902      return SDValue();
1903    }
1904
1905    unsigned Opcode = 0;
1906    EVT EltVT = ValVT.getVectorElementType();
1907    unsigned NumElts = ValVT.getVectorNumElements();
1908
1909    // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
1910    // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
1911    // stored type to i16 and propagate the "real" type as the memory type.
1912    bool NeedExt = false;
1913    if (EltVT.getSizeInBits() < 16)
1914      NeedExt = true;
1915
1916    switch (NumElts) {
1917    default:
1918      return SDValue();
1919    case 2:
1920      Opcode = NVPTXISD::StoreV2;
1921      break;
1922    case 4: {
1923      Opcode = NVPTXISD::StoreV4;
1924      break;
1925    }
1926    }
1927
1928    SmallVector<SDValue, 8> Ops;
1929
1930    // First is the chain
1931    Ops.push_back(N->getOperand(0));
1932
1933    // Then the split values
1934    for (unsigned i = 0; i < NumElts; ++i) {
1935      SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
1936                                   DAG.getIntPtrConstant(i));
1937      if (NeedExt)
1938        ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
1939      Ops.push_back(ExtVal);
1940    }
1941
1942    // Then any remaining arguments
1943    Ops.append(N->op_begin() + 2, N->op_end());
1944
1945    SDValue NewSt = DAG.getMemIntrinsicNode(
1946        Opcode, DL, DAG.getVTList(MVT::Other), Ops,
1947        MemSD->getMemoryVT(), MemSD->getMemOperand());
1948
1949    //return DCI.CombineTo(N, NewSt, true);
1950    return NewSt;
1951  }
1952
1953  return SDValue();
1954}
1955
1956// st i1 v, addr
1957//    =>
1958// v1 = zxt v to i16
1959// st.u8 i16, addr
1960SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
1961  SDNode *Node = Op.getNode();
1962  SDLoc dl(Node);
1963  StoreSDNode *ST = cast<StoreSDNode>(Node);
1964  SDValue Tmp1 = ST->getChain();
1965  SDValue Tmp2 = ST->getBasePtr();
1966  SDValue Tmp3 = ST->getValue();
1967  assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
1968  unsigned Alignment = ST->getAlignment();
1969  bool isVolatile = ST->isVolatile();
1970  bool isNonTemporal = ST->isNonTemporal();
1971  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
1972  SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
1973                                     ST->getPointerInfo(), MVT::i8, isNonTemporal,
1974                                     isVolatile, Alignment);
1975  return Result;
1976}
1977
1978SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
1979                                        int idx, EVT v) const {
1980  std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
1981  std::stringstream suffix;
1982  suffix << idx;
1983  *name += suffix.str();
1984  return DAG.getTargetExternalSymbol(name->c_str(), v);
1985}
1986
1987SDValue
1988NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
1989  std::string ParamSym;
1990  raw_string_ostream ParamStr(ParamSym);
1991
1992  ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx;
1993  ParamStr.flush();
1994
1995  std::string *SavedStr =
1996    nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str());
1997  return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
1998}
1999
2000SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
2001  return getExtSymb(DAG, ".HLPPARAM", idx);
2002}
2003
2004// Check to see if the kernel argument is image*_t or sampler_t
2005
2006bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
2007  static const char *const specialTypes[] = { "struct._image2d_t",
2008                                              "struct._image3d_t",
2009                                              "struct._sampler_t" };
2010
2011  const Type *Ty = arg->getType();
2012  const PointerType *PTy = dyn_cast<PointerType>(Ty);
2013
2014  if (!PTy)
2015    return false;
2016
2017  if (!context)
2018    return false;
2019
2020  const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
2021  const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
2022
2023  for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
2024    if (TypeName == specialTypes[i])
2025      return true;
2026
2027  return false;
2028}
2029
2030SDValue NVPTXTargetLowering::LowerFormalArguments(
2031    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2032    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
2033    SmallVectorImpl<SDValue> &InVals) const {
2034  MachineFunction &MF = DAG.getMachineFunction();
2035  const DataLayout *TD = getDataLayout();
2036
2037  const Function *F = MF.getFunction();
2038  const AttributeSet &PAL = F->getAttributes();
2039  const TargetLowering *TLI = STI.getTargetLowering();
2040
2041  SDValue Root = DAG.getRoot();
2042  std::vector<SDValue> OutChains;
2043
2044  bool isKernel = llvm::isKernelFunction(*F);
2045  bool isABI = (STI.getSmVersion() >= 20);
2046  assert(isABI && "Non-ABI compilation is not supported");
2047  if (!isABI)
2048    return Chain;
2049
2050  std::vector<Type *> argTypes;
2051  std::vector<const Argument *> theArgs;
2052  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
2053       I != E; ++I) {
2054    theArgs.push_back(I);
2055    argTypes.push_back(I->getType());
2056  }
2057  // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
2058  // Ins.size() will be larger
2059  //   * if there is an aggregate argument with multiple fields (each field
2060  //     showing up separately in Ins)
2061  //   * if there is a vector argument with more than typical vector-length
2062  //     elements (generally if more than 4) where each vector element is
2063  //     individually present in Ins.
2064  // So a different index should be used for indexing into Ins.
2065  // See similar issue in LowerCall.
2066  unsigned InsIdx = 0;
2067
2068  int idx = 0;
2069  for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
2070    Type *Ty = argTypes[i];
2071
2072    // If the kernel argument is image*_t or sampler_t, convert it to
2073    // a i32 constant holding the parameter position. This can later
2074    // matched in the AsmPrinter to output the correct mangled name.
2075    if (isImageOrSamplerVal(
2076            theArgs[i],
2077            (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
2078                                     : nullptr))) {
2079      assert(isKernel && "Only kernels can have image/sampler params");
2080      InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
2081      continue;
2082    }
2083
2084    if (theArgs[i]->use_empty()) {
2085      // argument is dead
2086      if (Ty->isAggregateType()) {
2087        SmallVector<EVT, 16> vtparts;
2088
2089        ComputePTXValueVTs(*this, Ty, vtparts);
2090        assert(vtparts.size() > 0 && "empty aggregate type not expected");
2091        for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
2092             ++parti) {
2093          InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2094          ++InsIdx;
2095        }
2096        if (vtparts.size() > 0)
2097          --InsIdx;
2098        continue;
2099      }
2100      if (Ty->isVectorTy()) {
2101        EVT ObjectVT = getValueType(Ty);
2102        unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
2103        for (unsigned parti = 0; parti < NumRegs; ++parti) {
2104          InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2105          ++InsIdx;
2106        }
2107        if (NumRegs > 0)
2108          --InsIdx;
2109        continue;
2110      }
2111      InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2112      continue;
2113    }
2114
2115    // In the following cases, assign a node order of "idx+1"
2116    // to newly created nodes. The SDNodes for params have to
2117    // appear in the same order as their order of appearance
2118    // in the original function. "idx+1" holds that order.
2119    if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
2120      if (Ty->isAggregateType()) {
2121        SmallVector<EVT, 16> vtparts;
2122        SmallVector<uint64_t, 16> offsets;
2123
2124        // NOTE: Here, we lose the ability to issue vector loads for vectors
2125        // that are a part of a struct.  This should be investigated in the
2126        // future.
2127        ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0);
2128        assert(vtparts.size() > 0 && "empty aggregate type not expected");
2129        bool aggregateIsPacked = false;
2130        if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
2131          aggregateIsPacked = STy->isPacked();
2132
2133        SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
2134        for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
2135             ++parti) {
2136          EVT partVT = vtparts[parti];
2137          Value *srcValue = Constant::getNullValue(
2138              PointerType::get(partVT.getTypeForEVT(F->getContext()),
2139                               llvm::ADDRESS_SPACE_PARAM));
2140          SDValue srcAddr =
2141              DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
2142                          DAG.getConstant(offsets[parti], getPointerTy()));
2143          unsigned partAlign =
2144              aggregateIsPacked ? 1
2145                                : TD->getABITypeAlignment(
2146                                      partVT.getTypeForEVT(F->getContext()));
2147          SDValue p;
2148          if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
2149            ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
2150                                     ISD::SEXTLOAD : ISD::ZEXTLOAD;
2151            p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
2152                               MachinePointerInfo(srcValue), partVT, false,
2153                               false, false, partAlign);
2154          } else {
2155            p = DAG.getLoad(partVT, dl, Root, srcAddr,
2156                            MachinePointerInfo(srcValue), false, false, false,
2157                            partAlign);
2158          }
2159          if (p.getNode())
2160            p.getNode()->setIROrder(idx + 1);
2161          InVals.push_back(p);
2162          ++InsIdx;
2163        }
2164        if (vtparts.size() > 0)
2165          --InsIdx;
2166        continue;
2167      }
2168      if (Ty->isVectorTy()) {
2169        EVT ObjectVT = getValueType(Ty);
2170        SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
2171        unsigned NumElts = ObjectVT.getVectorNumElements();
2172        assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
2173               "Vector was not scalarized");
2174        EVT EltVT = ObjectVT.getVectorElementType();
2175
2176        // V1 load
2177        // f32 = load ...
2178        if (NumElts == 1) {
2179          // We only have one element, so just directly load it
2180          Value *SrcValue = Constant::getNullValue(PointerType::get(
2181              EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
2182          SDValue P = DAG.getLoad(
2183              EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
2184              false, true,
2185              TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
2186          if (P.getNode())
2187            P.getNode()->setIROrder(idx + 1);
2188
2189          if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
2190            P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
2191          InVals.push_back(P);
2192          ++InsIdx;
2193        } else if (NumElts == 2) {
2194          // V2 load
2195          // f32,f32 = load ...
2196          EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
2197          Value *SrcValue = Constant::getNullValue(PointerType::get(
2198              VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
2199          SDValue P = DAG.getLoad(
2200              VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
2201              false, true,
2202              TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
2203          if (P.getNode())
2204            P.getNode()->setIROrder(idx + 1);
2205
2206          SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2207                                     DAG.getIntPtrConstant(0));
2208          SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2209                                     DAG.getIntPtrConstant(1));
2210
2211          if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
2212            Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
2213            Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1);
2214          }
2215
2216          InVals.push_back(Elt0);
2217          InVals.push_back(Elt1);
2218          InsIdx += 2;
2219        } else {
2220          // V4 loads
2221          // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
2222          // the
2223          // vector will be expanded to a power of 2 elements, so we know we can
2224          // always round up to the next multiple of 4 when creating the vector
2225          // loads.
2226          // e.g.  4 elem => 1 ld.v4
2227          //       6 elem => 2 ld.v4
2228          //       8 elem => 2 ld.v4
2229          //      11 elem => 3 ld.v4
2230          unsigned VecSize = 4;
2231          if (EltVT.getSizeInBits() == 64) {
2232            VecSize = 2;
2233          }
2234          EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
2235          unsigned Ofst = 0;
2236          for (unsigned i = 0; i < NumElts; i += VecSize) {
2237            Value *SrcValue = Constant::getNullValue(
2238                PointerType::get(VecVT.getTypeForEVT(F->getContext()),
2239                                 llvm::ADDRESS_SPACE_PARAM));
2240            SDValue SrcAddr =
2241                DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
2242                            DAG.getConstant(Ofst, getPointerTy()));
2243            SDValue P = DAG.getLoad(
2244                VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
2245                false, true,
2246                TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
2247            if (P.getNode())
2248              P.getNode()->setIROrder(idx + 1);
2249
2250            for (unsigned j = 0; j < VecSize; ++j) {
2251              if (i + j >= NumElts)
2252                break;
2253              SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
2254                                        DAG.getIntPtrConstant(j));
2255              if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
2256                Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
2257              InVals.push_back(Elt);
2258            }
2259            Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
2260          }
2261          InsIdx += NumElts;
2262        }
2263
2264        if (NumElts > 0)
2265          --InsIdx;
2266        continue;
2267      }
2268      // A plain scalar.
2269      EVT ObjectVT = getValueType(Ty);
2270      // If ABI, load from the param symbol
2271      SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
2272      Value *srcValue = Constant::getNullValue(PointerType::get(
2273          ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
2274      SDValue p;
2275       if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
2276        ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
2277                                       ISD::SEXTLOAD : ISD::ZEXTLOAD;
2278        p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
2279                           MachinePointerInfo(srcValue), ObjectVT, false, false,
2280                           false,
2281        TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
2282      } else {
2283        p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
2284                        MachinePointerInfo(srcValue), false, false, false,
2285        TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
2286      }
2287      if (p.getNode())
2288        p.getNode()->setIROrder(idx + 1);
2289      InVals.push_back(p);
2290      continue;
2291    }
2292
2293    // Param has ByVal attribute
2294    // Return MoveParam(param symbol).
2295    // Ideally, the param symbol can be returned directly,
2296    // but when SDNode builder decides to use it in a CopyToReg(),
2297    // machine instruction fails because TargetExternalSymbol
2298    // (not lowered) is target dependent, and CopyToReg assumes
2299    // the source is lowered.
2300    EVT ObjectVT = getValueType(Ty);
2301    assert(ObjectVT == Ins[InsIdx].VT &&
2302           "Ins type did not match function type");
2303    SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
2304    SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
2305    if (p.getNode())
2306      p.getNode()->setIROrder(idx + 1);
2307    if (isKernel)
2308      InVals.push_back(p);
2309    else {
2310      SDValue p2 = DAG.getNode(
2311          ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
2312          DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
2313      InVals.push_back(p2);
2314    }
2315  }
2316
2317  // Clang will check explicit VarArg and issue error if any. However, Clang
2318  // will let code with
2319  // implicit var arg like f() pass. See bug 617733.
2320  // We treat this case as if the arg list is empty.
2321  // if (F.isVarArg()) {
2322  // assert(0 && "VarArg not supported yet!");
2323  //}
2324
2325  if (!OutChains.empty())
2326    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
2327
2328  return Chain;
2329}
2330
2331
2332SDValue
2333NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2334                                 bool isVarArg,
2335                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
2336                                 const SmallVectorImpl<SDValue> &OutVals,
2337                                 SDLoc dl, SelectionDAG &DAG) const {
2338  MachineFunction &MF = DAG.getMachineFunction();
2339  const Function *F = MF.getFunction();
2340  Type *RetTy = F->getReturnType();
2341  const DataLayout *TD = getDataLayout();
2342
2343  bool isABI = (STI.getSmVersion() >= 20);
2344  assert(isABI && "Non-ABI compilation is not supported");
2345  if (!isABI)
2346    return Chain;
2347
2348  if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) {
2349    // If we have a vector type, the OutVals array will be the scalarized
2350    // components and we have combine them into 1 or more vector stores.
2351    unsigned NumElts = VTy->getNumElements();
2352    assert(NumElts == Outs.size() && "Bad scalarization of return value");
2353
2354    // const_cast can be removed in later LLVM versions
2355    EVT EltVT = getValueType(RetTy).getVectorElementType();
2356    bool NeedExtend = false;
2357    if (EltVT.getSizeInBits() < 16)
2358      NeedExtend = true;
2359
2360    // V1 store
2361    if (NumElts == 1) {
2362      SDValue StoreVal = OutVals[0];
2363      // We only have one element, so just directly store it
2364      if (NeedExtend)
2365        StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
2366      SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal };
2367      Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
2368                                      DAG.getVTList(MVT::Other), Ops,
2369                                      EltVT, MachinePointerInfo());
2370
2371    } else if (NumElts == 2) {
2372      // V2 store
2373      SDValue StoreVal0 = OutVals[0];
2374      SDValue StoreVal1 = OutVals[1];
2375
2376      if (NeedExtend) {
2377        StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0);
2378        StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
2379      }
2380
2381      SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0,
2382                        StoreVal1 };
2383      Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
2384                                      DAG.getVTList(MVT::Other), Ops,
2385                                      EltVT, MachinePointerInfo());
2386    } else {
2387      // V4 stores
2388      // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the
2389      // vector will be expanded to a power of 2 elements, so we know we can
2390      // always round up to the next multiple of 4 when creating the vector
2391      // stores.
2392      // e.g.  4 elem => 1 st.v4
2393      //       6 elem => 2 st.v4
2394      //       8 elem => 2 st.v4
2395      //      11 elem => 3 st.v4
2396
2397      unsigned VecSize = 4;
2398      if (OutVals[0].getValueType().getSizeInBits() == 64)
2399        VecSize = 2;
2400
2401      unsigned Offset = 0;
2402
2403      EVT VecVT =
2404          EVT::getVectorVT(F->getContext(), EltVT, VecSize);
2405      unsigned PerStoreOffset =
2406          TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
2407
2408      for (unsigned i = 0; i < NumElts; i += VecSize) {
2409        // Get values
2410        SDValue StoreVal;
2411        SmallVector<SDValue, 8> Ops;
2412        Ops.push_back(Chain);
2413        Ops.push_back(DAG.getConstant(Offset, MVT::i32));
2414        unsigned Opc = NVPTXISD::StoreRetvalV2;
2415        EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
2416
2417        StoreVal = OutVals[i];
2418        if (NeedExtend)
2419          StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2420        Ops.push_back(StoreVal);
2421
2422        if (i + 1 < NumElts) {
2423          StoreVal = OutVals[i + 1];
2424          if (NeedExtend)
2425            StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2426        } else {
2427          StoreVal = DAG.getUNDEF(ExtendedVT);
2428        }
2429        Ops.push_back(StoreVal);
2430
2431        if (VecSize == 4) {
2432          Opc = NVPTXISD::StoreRetvalV4;
2433          if (i + 2 < NumElts) {
2434            StoreVal = OutVals[i + 2];
2435            if (NeedExtend)
2436              StoreVal =
2437                  DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2438          } else {
2439            StoreVal = DAG.getUNDEF(ExtendedVT);
2440          }
2441          Ops.push_back(StoreVal);
2442
2443          if (i + 3 < NumElts) {
2444            StoreVal = OutVals[i + 3];
2445            if (NeedExtend)
2446              StoreVal =
2447                  DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
2448          } else {
2449            StoreVal = DAG.getUNDEF(ExtendedVT);
2450          }
2451          Ops.push_back(StoreVal);
2452        }
2453
2454        // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
2455        Chain =
2456            DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
2457                                    EltVT, MachinePointerInfo());
2458        Offset += PerStoreOffset;
2459      }
2460    }
2461  } else {
2462    SmallVector<EVT, 16> ValVTs;
2463    SmallVector<uint64_t, 16> Offsets;
2464    ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
2465    assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
2466
2467    for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2468      SDValue theVal = OutVals[i];
2469      EVT TheValType = theVal.getValueType();
2470      unsigned numElems = 1;
2471      if (TheValType.isVector())
2472        numElems = TheValType.getVectorNumElements();
2473      for (unsigned j = 0, je = numElems; j != je; ++j) {
2474        SDValue TmpVal = theVal;
2475        if (TheValType.isVector())
2476          TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
2477                               TheValType.getVectorElementType(), TmpVal,
2478                               DAG.getIntPtrConstant(j));
2479        EVT TheStoreType = ValVTs[i];
2480        if (RetTy->isIntegerTy() &&
2481            TD->getTypeAllocSizeInBits(RetTy) < 32) {
2482          // The following zero-extension is for integer types only, and
2483          // specifically not for aggregates.
2484          TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
2485          TheStoreType = MVT::i32;
2486        }
2487        else if (TmpVal.getValueType().getSizeInBits() < 16)
2488          TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
2489
2490        SDValue Ops[] = {
2491          Chain,
2492          DAG.getConstant(Offsets[i], MVT::i32),
2493          TmpVal };
2494        Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
2495                                        DAG.getVTList(MVT::Other), Ops,
2496                                        TheStoreType,
2497                                        MachinePointerInfo());
2498      }
2499    }
2500  }
2501
2502  return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
2503}
2504
2505
2506void NVPTXTargetLowering::LowerAsmOperandForConstraint(
2507    SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2508    SelectionDAG &DAG) const {
2509  if (Constraint.length() > 1)
2510    return;
2511  else
2512    TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2513}
2514
2515// NVPTX suuport vector of legal types of any length in Intrinsics because the
2516// NVPTX specific type legalizer
2517// will legalize them to the PTX supported length.
2518bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
2519  if (isTypeLegal(VT))
2520    return true;
2521  if (VT.isVector()) {
2522    MVT eVT = VT.getVectorElementType();
2523    if (isTypeLegal(eVT))
2524      return true;
2525  }
2526  return false;
2527}
2528
2529static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
2530  switch (Intrinsic) {
2531  default:
2532    return 0;
2533
2534  case Intrinsic::nvvm_tex_1d_v4f32_s32:
2535    return NVPTXISD::Tex1DFloatS32;
2536  case Intrinsic::nvvm_tex_1d_v4f32_f32:
2537    return NVPTXISD::Tex1DFloatFloat;
2538  case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
2539    return NVPTXISD::Tex1DFloatFloatLevel;
2540  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
2541    return NVPTXISD::Tex1DFloatFloatGrad;
2542  case Intrinsic::nvvm_tex_1d_v4s32_s32:
2543    return NVPTXISD::Tex1DS32S32;
2544  case Intrinsic::nvvm_tex_1d_v4s32_f32:
2545    return NVPTXISD::Tex1DS32Float;
2546  case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
2547    return NVPTXISD::Tex1DS32FloatLevel;
2548  case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
2549    return NVPTXISD::Tex1DS32FloatGrad;
2550  case Intrinsic::nvvm_tex_1d_v4u32_s32:
2551    return NVPTXISD::Tex1DU32S32;
2552  case Intrinsic::nvvm_tex_1d_v4u32_f32:
2553    return NVPTXISD::Tex1DU32Float;
2554  case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
2555    return NVPTXISD::Tex1DU32FloatLevel;
2556  case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
2557    return NVPTXISD::Tex1DU32FloatGrad;
2558
2559  case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
2560    return NVPTXISD::Tex1DArrayFloatS32;
2561  case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
2562    return NVPTXISD::Tex1DArrayFloatFloat;
2563  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
2564    return NVPTXISD::Tex1DArrayFloatFloatLevel;
2565  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
2566    return NVPTXISD::Tex1DArrayFloatFloatGrad;
2567  case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
2568    return NVPTXISD::Tex1DArrayS32S32;
2569  case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
2570    return NVPTXISD::Tex1DArrayS32Float;
2571  case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
2572    return NVPTXISD::Tex1DArrayS32FloatLevel;
2573  case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
2574    return NVPTXISD::Tex1DArrayS32FloatGrad;
2575  case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
2576    return NVPTXISD::Tex1DArrayU32S32;
2577  case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
2578    return NVPTXISD::Tex1DArrayU32Float;
2579  case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
2580    return NVPTXISD::Tex1DArrayU32FloatLevel;
2581  case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
2582    return NVPTXISD::Tex1DArrayU32FloatGrad;
2583
2584  case Intrinsic::nvvm_tex_2d_v4f32_s32:
2585    return NVPTXISD::Tex2DFloatS32;
2586  case Intrinsic::nvvm_tex_2d_v4f32_f32:
2587    return NVPTXISD::Tex2DFloatFloat;
2588  case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
2589    return NVPTXISD::Tex2DFloatFloatLevel;
2590  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
2591    return NVPTXISD::Tex2DFloatFloatGrad;
2592  case Intrinsic::nvvm_tex_2d_v4s32_s32:
2593    return NVPTXISD::Tex2DS32S32;
2594  case Intrinsic::nvvm_tex_2d_v4s32_f32:
2595    return NVPTXISD::Tex2DS32Float;
2596  case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
2597    return NVPTXISD::Tex2DS32FloatLevel;
2598  case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
2599    return NVPTXISD::Tex2DS32FloatGrad;
2600  case Intrinsic::nvvm_tex_2d_v4u32_s32:
2601    return NVPTXISD::Tex2DU32S32;
2602  case Intrinsic::nvvm_tex_2d_v4u32_f32:
2603    return NVPTXISD::Tex2DU32Float;
2604  case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
2605    return NVPTXISD::Tex2DU32FloatLevel;
2606  case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
2607    return NVPTXISD::Tex2DU32FloatGrad;
2608
2609  case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
2610    return NVPTXISD::Tex2DArrayFloatS32;
2611  case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
2612    return NVPTXISD::Tex2DArrayFloatFloat;
2613  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
2614    return NVPTXISD::Tex2DArrayFloatFloatLevel;
2615  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
2616    return NVPTXISD::Tex2DArrayFloatFloatGrad;
2617  case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
2618    return NVPTXISD::Tex2DArrayS32S32;
2619  case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
2620    return NVPTXISD::Tex2DArrayS32Float;
2621  case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
2622    return NVPTXISD::Tex2DArrayS32FloatLevel;
2623  case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
2624    return NVPTXISD::Tex2DArrayS32FloatGrad;
2625  case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
2626    return NVPTXISD::Tex2DArrayU32S32;
2627  case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
2628    return NVPTXISD::Tex2DArrayU32Float;
2629  case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
2630    return NVPTXISD::Tex2DArrayU32FloatLevel;
2631  case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
2632    return NVPTXISD::Tex2DArrayU32FloatGrad;
2633
2634  case Intrinsic::nvvm_tex_3d_v4f32_s32:
2635    return NVPTXISD::Tex3DFloatS32;
2636  case Intrinsic::nvvm_tex_3d_v4f32_f32:
2637    return NVPTXISD::Tex3DFloatFloat;
2638  case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
2639    return NVPTXISD::Tex3DFloatFloatLevel;
2640  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
2641    return NVPTXISD::Tex3DFloatFloatGrad;
2642  case Intrinsic::nvvm_tex_3d_v4s32_s32:
2643    return NVPTXISD::Tex3DS32S32;
2644  case Intrinsic::nvvm_tex_3d_v4s32_f32:
2645    return NVPTXISD::Tex3DS32Float;
2646  case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
2647    return NVPTXISD::Tex3DS32FloatLevel;
2648  case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
2649    return NVPTXISD::Tex3DS32FloatGrad;
2650  case Intrinsic::nvvm_tex_3d_v4u32_s32:
2651    return NVPTXISD::Tex3DU32S32;
2652  case Intrinsic::nvvm_tex_3d_v4u32_f32:
2653    return NVPTXISD::Tex3DU32Float;
2654  case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
2655    return NVPTXISD::Tex3DU32FloatLevel;
2656  case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
2657    return NVPTXISD::Tex3DU32FloatGrad;
2658
2659  case Intrinsic::nvvm_tex_cube_v4f32_f32:
2660    return NVPTXISD::TexCubeFloatFloat;
2661  case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
2662    return NVPTXISD::TexCubeFloatFloatLevel;
2663  case Intrinsic::nvvm_tex_cube_v4s32_f32:
2664    return NVPTXISD::TexCubeS32Float;
2665  case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
2666    return NVPTXISD::TexCubeS32FloatLevel;
2667  case Intrinsic::nvvm_tex_cube_v4u32_f32:
2668    return NVPTXISD::TexCubeU32Float;
2669  case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
2670    return NVPTXISD::TexCubeU32FloatLevel;
2671
2672  case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
2673    return NVPTXISD::TexCubeArrayFloatFloat;
2674  case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
2675    return NVPTXISD::TexCubeArrayFloatFloatLevel;
2676  case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
2677    return NVPTXISD::TexCubeArrayS32Float;
2678  case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
2679    return NVPTXISD::TexCubeArrayS32FloatLevel;
2680  case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
2681    return NVPTXISD::TexCubeArrayU32Float;
2682  case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
2683    return NVPTXISD::TexCubeArrayU32FloatLevel;
2684
2685  case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
2686    return NVPTXISD::Tld4R2DFloatFloat;
2687  case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
2688    return NVPTXISD::Tld4G2DFloatFloat;
2689  case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
2690    return NVPTXISD::Tld4B2DFloatFloat;
2691  case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
2692    return NVPTXISD::Tld4A2DFloatFloat;
2693  case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
2694    return NVPTXISD::Tld4R2DS64Float;
2695  case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
2696    return NVPTXISD::Tld4G2DS64Float;
2697  case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
2698    return NVPTXISD::Tld4B2DS64Float;
2699  case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
2700    return NVPTXISD::Tld4A2DS64Float;
2701  case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
2702    return NVPTXISD::Tld4R2DU64Float;
2703  case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
2704    return NVPTXISD::Tld4G2DU64Float;
2705  case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
2706    return NVPTXISD::Tld4B2DU64Float;
2707  case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
2708    return NVPTXISD::Tld4A2DU64Float;
2709
2710  case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
2711    return NVPTXISD::TexUnified1DFloatS32;
2712  case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
2713    return NVPTXISD::TexUnified1DFloatFloat;
2714  case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
2715    return NVPTXISD::TexUnified1DFloatFloatLevel;
2716  case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
2717    return NVPTXISD::TexUnified1DFloatFloatGrad;
2718  case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
2719    return NVPTXISD::TexUnified1DS32S32;
2720  case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
2721    return NVPTXISD::TexUnified1DS32Float;
2722  case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
2723    return NVPTXISD::TexUnified1DS32FloatLevel;
2724  case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
2725    return NVPTXISD::TexUnified1DS32FloatGrad;
2726  case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
2727    return NVPTXISD::TexUnified1DU32S32;
2728  case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
2729    return NVPTXISD::TexUnified1DU32Float;
2730  case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
2731    return NVPTXISD::TexUnified1DU32FloatLevel;
2732  case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
2733    return NVPTXISD::TexUnified1DU32FloatGrad;
2734
2735  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
2736    return NVPTXISD::TexUnified1DArrayFloatS32;
2737  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
2738    return NVPTXISD::TexUnified1DArrayFloatFloat;
2739  case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
2740    return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
2741  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
2742    return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
2743  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
2744    return NVPTXISD::TexUnified1DArrayS32S32;
2745  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
2746    return NVPTXISD::TexUnified1DArrayS32Float;
2747  case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
2748    return NVPTXISD::TexUnified1DArrayS32FloatLevel;
2749  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
2750    return NVPTXISD::TexUnified1DArrayS32FloatGrad;
2751  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
2752    return NVPTXISD::TexUnified1DArrayU32S32;
2753  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
2754    return NVPTXISD::TexUnified1DArrayU32Float;
2755  case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
2756    return NVPTXISD::TexUnified1DArrayU32FloatLevel;
2757  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
2758    return NVPTXISD::TexUnified1DArrayU32FloatGrad;
2759
2760  case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
2761    return NVPTXISD::TexUnified2DFloatS32;
2762  case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
2763    return NVPTXISD::TexUnified2DFloatFloat;
2764  case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
2765    return NVPTXISD::TexUnified2DFloatFloatLevel;
2766  case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
2767    return NVPTXISD::TexUnified2DFloatFloatGrad;
2768  case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
2769    return NVPTXISD::TexUnified2DS32S32;
2770  case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
2771    return NVPTXISD::TexUnified2DS32Float;
2772  case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
2773    return NVPTXISD::TexUnified2DS32FloatLevel;
2774  case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
2775    return NVPTXISD::TexUnified2DS32FloatGrad;
2776  case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
2777    return NVPTXISD::TexUnified2DU32S32;
2778  case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
2779    return NVPTXISD::TexUnified2DU32Float;
2780  case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
2781    return NVPTXISD::TexUnified2DU32FloatLevel;
2782  case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
2783    return NVPTXISD::TexUnified2DU32FloatGrad;
2784
2785  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
2786    return NVPTXISD::TexUnified2DArrayFloatS32;
2787  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
2788    return NVPTXISD::TexUnified2DArrayFloatFloat;
2789  case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
2790    return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
2791  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
2792    return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
2793  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
2794    return NVPTXISD::TexUnified2DArrayS32S32;
2795  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
2796    return NVPTXISD::TexUnified2DArrayS32Float;
2797  case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
2798    return NVPTXISD::TexUnified2DArrayS32FloatLevel;
2799  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
2800    return NVPTXISD::TexUnified2DArrayS32FloatGrad;
2801  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
2802    return NVPTXISD::TexUnified2DArrayU32S32;
2803  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
2804    return NVPTXISD::TexUnified2DArrayU32Float;
2805  case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
2806    return NVPTXISD::TexUnified2DArrayU32FloatLevel;
2807  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
2808    return NVPTXISD::TexUnified2DArrayU32FloatGrad;
2809
2810  case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
2811    return NVPTXISD::TexUnified3DFloatS32;
2812  case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
2813    return NVPTXISD::TexUnified3DFloatFloat;
2814  case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
2815    return NVPTXISD::TexUnified3DFloatFloatLevel;
2816  case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
2817    return NVPTXISD::TexUnified3DFloatFloatGrad;
2818  case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
2819    return NVPTXISD::TexUnified3DS32S32;
2820  case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
2821    return NVPTXISD::TexUnified3DS32Float;
2822  case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
2823    return NVPTXISD::TexUnified3DS32FloatLevel;
2824  case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
2825    return NVPTXISD::TexUnified3DS32FloatGrad;
2826  case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
2827    return NVPTXISD::TexUnified3DU32S32;
2828  case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
2829    return NVPTXISD::TexUnified3DU32Float;
2830  case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
2831    return NVPTXISD::TexUnified3DU32FloatLevel;
2832  case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
2833    return NVPTXISD::TexUnified3DU32FloatGrad;
2834
2835  case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
2836    return NVPTXISD::TexUnifiedCubeFloatFloat;
2837  case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
2838    return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
2839  case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
2840    return NVPTXISD::TexUnifiedCubeS32Float;
2841  case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
2842    return NVPTXISD::TexUnifiedCubeS32FloatLevel;
2843  case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
2844    return NVPTXISD::TexUnifiedCubeU32Float;
2845  case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
2846    return NVPTXISD::TexUnifiedCubeU32FloatLevel;
2847
2848  case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
2849    return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
2850  case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
2851    return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
2852  case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
2853    return NVPTXISD::TexUnifiedCubeArrayS32Float;
2854  case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
2855    return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
2856  case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
2857    return NVPTXISD::TexUnifiedCubeArrayU32Float;
2858  case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
2859    return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
2860
2861  case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
2862    return NVPTXISD::Tld4UnifiedR2DFloatFloat;
2863  case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
2864    return NVPTXISD::Tld4UnifiedG2DFloatFloat;
2865  case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
2866    return NVPTXISD::Tld4UnifiedB2DFloatFloat;
2867  case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
2868    return NVPTXISD::Tld4UnifiedA2DFloatFloat;
2869  case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
2870    return NVPTXISD::Tld4UnifiedR2DS64Float;
2871  case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
2872    return NVPTXISD::Tld4UnifiedG2DS64Float;
2873  case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
2874    return NVPTXISD::Tld4UnifiedB2DS64Float;
2875  case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
2876    return NVPTXISD::Tld4UnifiedA2DS64Float;
2877  case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
2878    return NVPTXISD::Tld4UnifiedR2DU64Float;
2879  case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
2880    return NVPTXISD::Tld4UnifiedG2DU64Float;
2881  case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
2882    return NVPTXISD::Tld4UnifiedB2DU64Float;
2883  case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
2884    return NVPTXISD::Tld4UnifiedA2DU64Float;
2885  }
2886}
2887
2888static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
2889  switch (Intrinsic) {
2890  default:
2891    return 0;
2892  case Intrinsic::nvvm_suld_1d_i8_clamp:
2893    return NVPTXISD::Suld1DI8Clamp;
2894  case Intrinsic::nvvm_suld_1d_i16_clamp:
2895    return NVPTXISD::Suld1DI16Clamp;
2896  case Intrinsic::nvvm_suld_1d_i32_clamp:
2897    return NVPTXISD::Suld1DI32Clamp;
2898  case Intrinsic::nvvm_suld_1d_i64_clamp:
2899    return NVPTXISD::Suld1DI64Clamp;
2900  case Intrinsic::nvvm_suld_1d_v2i8_clamp:
2901    return NVPTXISD::Suld1DV2I8Clamp;
2902  case Intrinsic::nvvm_suld_1d_v2i16_clamp:
2903    return NVPTXISD::Suld1DV2I16Clamp;
2904  case Intrinsic::nvvm_suld_1d_v2i32_clamp:
2905    return NVPTXISD::Suld1DV2I32Clamp;
2906  case Intrinsic::nvvm_suld_1d_v2i64_clamp:
2907    return NVPTXISD::Suld1DV2I64Clamp;
2908  case Intrinsic::nvvm_suld_1d_v4i8_clamp:
2909    return NVPTXISD::Suld1DV4I8Clamp;
2910  case Intrinsic::nvvm_suld_1d_v4i16_clamp:
2911    return NVPTXISD::Suld1DV4I16Clamp;
2912  case Intrinsic::nvvm_suld_1d_v4i32_clamp:
2913    return NVPTXISD::Suld1DV4I32Clamp;
2914  case Intrinsic::nvvm_suld_1d_array_i8_clamp:
2915    return NVPTXISD::Suld1DArrayI8Clamp;
2916  case Intrinsic::nvvm_suld_1d_array_i16_clamp:
2917    return NVPTXISD::Suld1DArrayI16Clamp;
2918  case Intrinsic::nvvm_suld_1d_array_i32_clamp:
2919    return NVPTXISD::Suld1DArrayI32Clamp;
2920  case Intrinsic::nvvm_suld_1d_array_i64_clamp:
2921    return NVPTXISD::Suld1DArrayI64Clamp;
2922  case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
2923    return NVPTXISD::Suld1DArrayV2I8Clamp;
2924  case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
2925    return NVPTXISD::Suld1DArrayV2I16Clamp;
2926  case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
2927    return NVPTXISD::Suld1DArrayV2I32Clamp;
2928  case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
2929    return NVPTXISD::Suld1DArrayV2I64Clamp;
2930  case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
2931    return NVPTXISD::Suld1DArrayV4I8Clamp;
2932  case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
2933    return NVPTXISD::Suld1DArrayV4I16Clamp;
2934  case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
2935    return NVPTXISD::Suld1DArrayV4I32Clamp;
2936  case Intrinsic::nvvm_suld_2d_i8_clamp:
2937    return NVPTXISD::Suld2DI8Clamp;
2938  case Intrinsic::nvvm_suld_2d_i16_clamp:
2939    return NVPTXISD::Suld2DI16Clamp;
2940  case Intrinsic::nvvm_suld_2d_i32_clamp:
2941    return NVPTXISD::Suld2DI32Clamp;
2942  case Intrinsic::nvvm_suld_2d_i64_clamp:
2943    return NVPTXISD::Suld2DI64Clamp;
2944  case Intrinsic::nvvm_suld_2d_v2i8_clamp:
2945    return NVPTXISD::Suld2DV2I8Clamp;
2946  case Intrinsic::nvvm_suld_2d_v2i16_clamp:
2947    return NVPTXISD::Suld2DV2I16Clamp;
2948  case Intrinsic::nvvm_suld_2d_v2i32_clamp:
2949    return NVPTXISD::Suld2DV2I32Clamp;
2950  case Intrinsic::nvvm_suld_2d_v2i64_clamp:
2951    return NVPTXISD::Suld2DV2I64Clamp;
2952  case Intrinsic::nvvm_suld_2d_v4i8_clamp:
2953    return NVPTXISD::Suld2DV4I8Clamp;
2954  case Intrinsic::nvvm_suld_2d_v4i16_clamp:
2955    return NVPTXISD::Suld2DV4I16Clamp;
2956  case Intrinsic::nvvm_suld_2d_v4i32_clamp:
2957    return NVPTXISD::Suld2DV4I32Clamp;
2958  case Intrinsic::nvvm_suld_2d_array_i8_clamp:
2959    return NVPTXISD::Suld2DArrayI8Clamp;
2960  case Intrinsic::nvvm_suld_2d_array_i16_clamp:
2961    return NVPTXISD::Suld2DArrayI16Clamp;
2962  case Intrinsic::nvvm_suld_2d_array_i32_clamp:
2963    return NVPTXISD::Suld2DArrayI32Clamp;
2964  case Intrinsic::nvvm_suld_2d_array_i64_clamp:
2965    return NVPTXISD::Suld2DArrayI64Clamp;
2966  case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
2967    return NVPTXISD::Suld2DArrayV2I8Clamp;
2968  case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
2969    return NVPTXISD::Suld2DArrayV2I16Clamp;
2970  case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
2971    return NVPTXISD::Suld2DArrayV2I32Clamp;
2972  case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
2973    return NVPTXISD::Suld2DArrayV2I64Clamp;
2974  case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
2975    return NVPTXISD::Suld2DArrayV4I8Clamp;
2976  case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
2977    return NVPTXISD::Suld2DArrayV4I16Clamp;
2978  case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
2979    return NVPTXISD::Suld2DArrayV4I32Clamp;
2980  case Intrinsic::nvvm_suld_3d_i8_clamp:
2981    return NVPTXISD::Suld3DI8Clamp;
2982  case Intrinsic::nvvm_suld_3d_i16_clamp:
2983    return NVPTXISD::Suld3DI16Clamp;
2984  case Intrinsic::nvvm_suld_3d_i32_clamp:
2985    return NVPTXISD::Suld3DI32Clamp;
2986  case Intrinsic::nvvm_suld_3d_i64_clamp:
2987    return NVPTXISD::Suld3DI64Clamp;
2988  case Intrinsic::nvvm_suld_3d_v2i8_clamp:
2989    return NVPTXISD::Suld3DV2I8Clamp;
2990  case Intrinsic::nvvm_suld_3d_v2i16_clamp:
2991    return NVPTXISD::Suld3DV2I16Clamp;
2992  case Intrinsic::nvvm_suld_3d_v2i32_clamp:
2993    return NVPTXISD::Suld3DV2I32Clamp;
2994  case Intrinsic::nvvm_suld_3d_v2i64_clamp:
2995    return NVPTXISD::Suld3DV2I64Clamp;
2996  case Intrinsic::nvvm_suld_3d_v4i8_clamp:
2997    return NVPTXISD::Suld3DV4I8Clamp;
2998  case Intrinsic::nvvm_suld_3d_v4i16_clamp:
2999    return NVPTXISD::Suld3DV4I16Clamp;
3000  case Intrinsic::nvvm_suld_3d_v4i32_clamp:
3001    return NVPTXISD::Suld3DV4I32Clamp;
3002  case Intrinsic::nvvm_suld_1d_i8_trap:
3003    return NVPTXISD::Suld1DI8Trap;
3004  case Intrinsic::nvvm_suld_1d_i16_trap:
3005    return NVPTXISD::Suld1DI16Trap;
3006  case Intrinsic::nvvm_suld_1d_i32_trap:
3007    return NVPTXISD::Suld1DI32Trap;
3008  case Intrinsic::nvvm_suld_1d_i64_trap:
3009    return NVPTXISD::Suld1DI64Trap;
3010  case Intrinsic::nvvm_suld_1d_v2i8_trap:
3011    return NVPTXISD::Suld1DV2I8Trap;
3012  case Intrinsic::nvvm_suld_1d_v2i16_trap:
3013    return NVPTXISD::Suld1DV2I16Trap;
3014  case Intrinsic::nvvm_suld_1d_v2i32_trap:
3015    return NVPTXISD::Suld1DV2I32Trap;
3016  case Intrinsic::nvvm_suld_1d_v2i64_trap:
3017    return NVPTXISD::Suld1DV2I64Trap;
3018  case Intrinsic::nvvm_suld_1d_v4i8_trap:
3019    return NVPTXISD::Suld1DV4I8Trap;
3020  case Intrinsic::nvvm_suld_1d_v4i16_trap:
3021    return NVPTXISD::Suld1DV4I16Trap;
3022  case Intrinsic::nvvm_suld_1d_v4i32_trap:
3023    return NVPTXISD::Suld1DV4I32Trap;
3024  case Intrinsic::nvvm_suld_1d_array_i8_trap:
3025    return NVPTXISD::Suld1DArrayI8Trap;
3026  case Intrinsic::nvvm_suld_1d_array_i16_trap:
3027    return NVPTXISD::Suld1DArrayI16Trap;
3028  case Intrinsic::nvvm_suld_1d_array_i32_trap:
3029    return NVPTXISD::Suld1DArrayI32Trap;
3030  case Intrinsic::nvvm_suld_1d_array_i64_trap:
3031    return NVPTXISD::Suld1DArrayI64Trap;
3032  case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3033    return NVPTXISD::Suld1DArrayV2I8Trap;
3034  case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3035    return NVPTXISD::Suld1DArrayV2I16Trap;
3036  case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3037    return NVPTXISD::Suld1DArrayV2I32Trap;
3038  case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3039    return NVPTXISD::Suld1DArrayV2I64Trap;
3040  case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3041    return NVPTXISD::Suld1DArrayV4I8Trap;
3042  case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3043    return NVPTXISD::Suld1DArrayV4I16Trap;
3044  case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3045    return NVPTXISD::Suld1DArrayV4I32Trap;
3046  case Intrinsic::nvvm_suld_2d_i8_trap:
3047    return NVPTXISD::Suld2DI8Trap;
3048  case Intrinsic::nvvm_suld_2d_i16_trap:
3049    return NVPTXISD::Suld2DI16Trap;
3050  case Intrinsic::nvvm_suld_2d_i32_trap:
3051    return NVPTXISD::Suld2DI32Trap;
3052  case Intrinsic::nvvm_suld_2d_i64_trap:
3053    return NVPTXISD::Suld2DI64Trap;
3054  case Intrinsic::nvvm_suld_2d_v2i8_trap:
3055    return NVPTXISD::Suld2DV2I8Trap;
3056  case Intrinsic::nvvm_suld_2d_v2i16_trap:
3057    return NVPTXISD::Suld2DV2I16Trap;
3058  case Intrinsic::nvvm_suld_2d_v2i32_trap:
3059    return NVPTXISD::Suld2DV2I32Trap;
3060  case Intrinsic::nvvm_suld_2d_v2i64_trap:
3061    return NVPTXISD::Suld2DV2I64Trap;
3062  case Intrinsic::nvvm_suld_2d_v4i8_trap:
3063    return NVPTXISD::Suld2DV4I8Trap;
3064  case Intrinsic::nvvm_suld_2d_v4i16_trap:
3065    return NVPTXISD::Suld2DV4I16Trap;
3066  case Intrinsic::nvvm_suld_2d_v4i32_trap:
3067    return NVPTXISD::Suld2DV4I32Trap;
3068  case Intrinsic::nvvm_suld_2d_array_i8_trap:
3069    return NVPTXISD::Suld2DArrayI8Trap;
3070  case Intrinsic::nvvm_suld_2d_array_i16_trap:
3071    return NVPTXISD::Suld2DArrayI16Trap;
3072  case Intrinsic::nvvm_suld_2d_array_i32_trap:
3073    return NVPTXISD::Suld2DArrayI32Trap;
3074  case Intrinsic::nvvm_suld_2d_array_i64_trap:
3075    return NVPTXISD::Suld2DArrayI64Trap;
3076  case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3077    return NVPTXISD::Suld2DArrayV2I8Trap;
3078  case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3079    return NVPTXISD::Suld2DArrayV2I16Trap;
3080  case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3081    return NVPTXISD::Suld2DArrayV2I32Trap;
3082  case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3083    return NVPTXISD::Suld2DArrayV2I64Trap;
3084  case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3085    return NVPTXISD::Suld2DArrayV4I8Trap;
3086  case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3087    return NVPTXISD::Suld2DArrayV4I16Trap;
3088  case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3089    return NVPTXISD::Suld2DArrayV4I32Trap;
3090  case Intrinsic::nvvm_suld_3d_i8_trap:
3091    return NVPTXISD::Suld3DI8Trap;
3092  case Intrinsic::nvvm_suld_3d_i16_trap:
3093    return NVPTXISD::Suld3DI16Trap;
3094  case Intrinsic::nvvm_suld_3d_i32_trap:
3095    return NVPTXISD::Suld3DI32Trap;
3096  case Intrinsic::nvvm_suld_3d_i64_trap:
3097    return NVPTXISD::Suld3DI64Trap;
3098  case Intrinsic::nvvm_suld_3d_v2i8_trap:
3099    return NVPTXISD::Suld3DV2I8Trap;
3100  case Intrinsic::nvvm_suld_3d_v2i16_trap:
3101    return NVPTXISD::Suld3DV2I16Trap;
3102  case Intrinsic::nvvm_suld_3d_v2i32_trap:
3103    return NVPTXISD::Suld3DV2I32Trap;
3104  case Intrinsic::nvvm_suld_3d_v2i64_trap:
3105    return NVPTXISD::Suld3DV2I64Trap;
3106  case Intrinsic::nvvm_suld_3d_v4i8_trap:
3107    return NVPTXISD::Suld3DV4I8Trap;
3108  case Intrinsic::nvvm_suld_3d_v4i16_trap:
3109    return NVPTXISD::Suld3DV4I16Trap;
3110  case Intrinsic::nvvm_suld_3d_v4i32_trap:
3111    return NVPTXISD::Suld3DV4I32Trap;
3112  case Intrinsic::nvvm_suld_1d_i8_zero:
3113    return NVPTXISD::Suld1DI8Zero;
3114  case Intrinsic::nvvm_suld_1d_i16_zero:
3115    return NVPTXISD::Suld1DI16Zero;
3116  case Intrinsic::nvvm_suld_1d_i32_zero:
3117    return NVPTXISD::Suld1DI32Zero;
3118  case Intrinsic::nvvm_suld_1d_i64_zero:
3119    return NVPTXISD::Suld1DI64Zero;
3120  case Intrinsic::nvvm_suld_1d_v2i8_zero:
3121    return NVPTXISD::Suld1DV2I8Zero;
3122  case Intrinsic::nvvm_suld_1d_v2i16_zero:
3123    return NVPTXISD::Suld1DV2I16Zero;
3124  case Intrinsic::nvvm_suld_1d_v2i32_zero:
3125    return NVPTXISD::Suld1DV2I32Zero;
3126  case Intrinsic::nvvm_suld_1d_v2i64_zero:
3127    return NVPTXISD::Suld1DV2I64Zero;
3128  case Intrinsic::nvvm_suld_1d_v4i8_zero:
3129    return NVPTXISD::Suld1DV4I8Zero;
3130  case Intrinsic::nvvm_suld_1d_v4i16_zero:
3131    return NVPTXISD::Suld1DV4I16Zero;
3132  case Intrinsic::nvvm_suld_1d_v4i32_zero:
3133    return NVPTXISD::Suld1DV4I32Zero;
3134  case Intrinsic::nvvm_suld_1d_array_i8_zero:
3135    return NVPTXISD::Suld1DArrayI8Zero;
3136  case Intrinsic::nvvm_suld_1d_array_i16_zero:
3137    return NVPTXISD::Suld1DArrayI16Zero;
3138  case Intrinsic::nvvm_suld_1d_array_i32_zero:
3139    return NVPTXISD::Suld1DArrayI32Zero;
3140  case Intrinsic::nvvm_suld_1d_array_i64_zero:
3141    return NVPTXISD::Suld1DArrayI64Zero;
3142  case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3143    return NVPTXISD::Suld1DArrayV2I8Zero;
3144  case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3145    return NVPTXISD::Suld1DArrayV2I16Zero;
3146  case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3147    return NVPTXISD::Suld1DArrayV2I32Zero;
3148  case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3149    return NVPTXISD::Suld1DArrayV2I64Zero;
3150  case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3151    return NVPTXISD::Suld1DArrayV4I8Zero;
3152  case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3153    return NVPTXISD::Suld1DArrayV4I16Zero;
3154  case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3155    return NVPTXISD::Suld1DArrayV4I32Zero;
3156  case Intrinsic::nvvm_suld_2d_i8_zero:
3157    return NVPTXISD::Suld2DI8Zero;
3158  case Intrinsic::nvvm_suld_2d_i16_zero:
3159    return NVPTXISD::Suld2DI16Zero;
3160  case Intrinsic::nvvm_suld_2d_i32_zero:
3161    return NVPTXISD::Suld2DI32Zero;
3162  case Intrinsic::nvvm_suld_2d_i64_zero:
3163    return NVPTXISD::Suld2DI64Zero;
3164  case Intrinsic::nvvm_suld_2d_v2i8_zero:
3165    return NVPTXISD::Suld2DV2I8Zero;
3166  case Intrinsic::nvvm_suld_2d_v2i16_zero:
3167    return NVPTXISD::Suld2DV2I16Zero;
3168  case Intrinsic::nvvm_suld_2d_v2i32_zero:
3169    return NVPTXISD::Suld2DV2I32Zero;
3170  case Intrinsic::nvvm_suld_2d_v2i64_zero:
3171    return NVPTXISD::Suld2DV2I64Zero;
3172  case Intrinsic::nvvm_suld_2d_v4i8_zero:
3173    return NVPTXISD::Suld2DV4I8Zero;
3174  case Intrinsic::nvvm_suld_2d_v4i16_zero:
3175    return NVPTXISD::Suld2DV4I16Zero;
3176  case Intrinsic::nvvm_suld_2d_v4i32_zero:
3177    return NVPTXISD::Suld2DV4I32Zero;
3178  case Intrinsic::nvvm_suld_2d_array_i8_zero:
3179    return NVPTXISD::Suld2DArrayI8Zero;
3180  case Intrinsic::nvvm_suld_2d_array_i16_zero:
3181    return NVPTXISD::Suld2DArrayI16Zero;
3182  case Intrinsic::nvvm_suld_2d_array_i32_zero:
3183    return NVPTXISD::Suld2DArrayI32Zero;
3184  case Intrinsic::nvvm_suld_2d_array_i64_zero:
3185    return NVPTXISD::Suld2DArrayI64Zero;
3186  case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3187    return NVPTXISD::Suld2DArrayV2I8Zero;
3188  case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3189    return NVPTXISD::Suld2DArrayV2I16Zero;
3190  case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3191    return NVPTXISD::Suld2DArrayV2I32Zero;
3192  case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3193    return NVPTXISD::Suld2DArrayV2I64Zero;
3194  case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3195    return NVPTXISD::Suld2DArrayV4I8Zero;
3196  case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3197    return NVPTXISD::Suld2DArrayV4I16Zero;
3198  case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3199    return NVPTXISD::Suld2DArrayV4I32Zero;
3200  case Intrinsic::nvvm_suld_3d_i8_zero:
3201    return NVPTXISD::Suld3DI8Zero;
3202  case Intrinsic::nvvm_suld_3d_i16_zero:
3203    return NVPTXISD::Suld3DI16Zero;
3204  case Intrinsic::nvvm_suld_3d_i32_zero:
3205    return NVPTXISD::Suld3DI32Zero;
3206  case Intrinsic::nvvm_suld_3d_i64_zero:
3207    return NVPTXISD::Suld3DI64Zero;
3208  case Intrinsic::nvvm_suld_3d_v2i8_zero:
3209    return NVPTXISD::Suld3DV2I8Zero;
3210  case Intrinsic::nvvm_suld_3d_v2i16_zero:
3211    return NVPTXISD::Suld3DV2I16Zero;
3212  case Intrinsic::nvvm_suld_3d_v2i32_zero:
3213    return NVPTXISD::Suld3DV2I32Zero;
3214  case Intrinsic::nvvm_suld_3d_v2i64_zero:
3215    return NVPTXISD::Suld3DV2I64Zero;
3216  case Intrinsic::nvvm_suld_3d_v4i8_zero:
3217    return NVPTXISD::Suld3DV4I8Zero;
3218  case Intrinsic::nvvm_suld_3d_v4i16_zero:
3219    return NVPTXISD::Suld3DV4I16Zero;
3220  case Intrinsic::nvvm_suld_3d_v4i32_zero:
3221    return NVPTXISD::Suld3DV4I32Zero;
3222  }
3223}
3224
3225// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
3226// TgtMemIntrinsic
3227// because we need the information that is only available in the "Value" type
3228// of destination
3229// pointer. In particular, the address space information.
3230bool NVPTXTargetLowering::getTgtMemIntrinsic(
3231    IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
3232  switch (Intrinsic) {
3233  default:
3234    return false;
3235
3236  case Intrinsic::nvvm_atomic_load_add_f32:
3237    Info.opc = ISD::INTRINSIC_W_CHAIN;
3238    Info.memVT = MVT::f32;
3239    Info.ptrVal = I.getArgOperand(0);
3240    Info.offset = 0;
3241    Info.vol = 0;
3242    Info.readMem = true;
3243    Info.writeMem = true;
3244    Info.align = 0;
3245    return true;
3246
3247  case Intrinsic::nvvm_atomic_load_inc_32:
3248  case Intrinsic::nvvm_atomic_load_dec_32:
3249    Info.opc = ISD::INTRINSIC_W_CHAIN;
3250    Info.memVT = MVT::i32;
3251    Info.ptrVal = I.getArgOperand(0);
3252    Info.offset = 0;
3253    Info.vol = 0;
3254    Info.readMem = true;
3255    Info.writeMem = true;
3256    Info.align = 0;
3257    return true;
3258
3259  case Intrinsic::nvvm_ldu_global_i:
3260  case Intrinsic::nvvm_ldu_global_f:
3261  case Intrinsic::nvvm_ldu_global_p: {
3262
3263    Info.opc = ISD::INTRINSIC_W_CHAIN;
3264    if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
3265      Info.memVT = getValueType(I.getType());
3266    else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
3267      Info.memVT = getPointerTy();
3268    else
3269      Info.memVT = getValueType(I.getType());
3270    Info.ptrVal = I.getArgOperand(0);
3271    Info.offset = 0;
3272    Info.vol = 0;
3273    Info.readMem = true;
3274    Info.writeMem = false;
3275    Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3276
3277    return true;
3278  }
3279  case Intrinsic::nvvm_ldg_global_i:
3280  case Intrinsic::nvvm_ldg_global_f:
3281  case Intrinsic::nvvm_ldg_global_p: {
3282
3283    Info.opc = ISD::INTRINSIC_W_CHAIN;
3284    if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
3285      Info.memVT = getValueType(I.getType());
3286    else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
3287      Info.memVT = getPointerTy();
3288    else
3289      Info.memVT = getValueType(I.getType());
3290    Info.ptrVal = I.getArgOperand(0);
3291    Info.offset = 0;
3292    Info.vol = 0;
3293    Info.readMem = true;
3294    Info.writeMem = false;
3295    Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3296
3297    return true;
3298  }
3299
3300  case Intrinsic::nvvm_tex_1d_v4f32_s32:
3301  case Intrinsic::nvvm_tex_1d_v4f32_f32:
3302  case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
3303  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
3304  case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
3305  case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
3306  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
3307  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
3308  case Intrinsic::nvvm_tex_2d_v4f32_s32:
3309  case Intrinsic::nvvm_tex_2d_v4f32_f32:
3310  case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
3311  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
3312  case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
3313  case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
3314  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
3315  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
3316  case Intrinsic::nvvm_tex_3d_v4f32_s32:
3317  case Intrinsic::nvvm_tex_3d_v4f32_f32:
3318  case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
3319  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
3320  case Intrinsic::nvvm_tex_cube_v4f32_f32:
3321  case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
3322  case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
3323  case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
3324  case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
3325  case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
3326  case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
3327  case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
3328  case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
3329  case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
3330  case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
3331  case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
3332  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
3333  case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
3334  case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
3335  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
3336  case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
3337  case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
3338  case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
3339  case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
3340  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
3341  case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
3342  case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
3343  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
3344  case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
3345  case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
3346  case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
3347  case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
3348  case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
3349  case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
3350  case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
3351  case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
3352  case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
3353  case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
3354  case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
3355  case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: {
3356    Info.opc = getOpcForTextureInstr(Intrinsic);
3357    Info.memVT = MVT::v4f32;
3358    Info.ptrVal = nullptr;
3359    Info.offset = 0;
3360    Info.vol = 0;
3361    Info.readMem = true;
3362    Info.writeMem = false;
3363    Info.align = 16;
3364    return true;
3365  }
3366  case Intrinsic::nvvm_tex_1d_v4s32_s32:
3367  case Intrinsic::nvvm_tex_1d_v4s32_f32:
3368  case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
3369  case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
3370  case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
3371  case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
3372  case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
3373  case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
3374  case Intrinsic::nvvm_tex_2d_v4s32_s32:
3375  case Intrinsic::nvvm_tex_2d_v4s32_f32:
3376  case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
3377  case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
3378  case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
3379  case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
3380  case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
3381  case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
3382  case Intrinsic::nvvm_tex_3d_v4s32_s32:
3383  case Intrinsic::nvvm_tex_3d_v4s32_f32:
3384  case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
3385  case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
3386  case Intrinsic::nvvm_tex_cube_v4s32_f32:
3387  case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
3388  case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
3389  case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
3390  case Intrinsic::nvvm_tex_cube_v4u32_f32:
3391  case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
3392  case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
3393  case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
3394  case Intrinsic::nvvm_tex_1d_v4u32_s32:
3395  case Intrinsic::nvvm_tex_1d_v4u32_f32:
3396  case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
3397  case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
3398  case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
3399  case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
3400  case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
3401  case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
3402  case Intrinsic::nvvm_tex_2d_v4u32_s32:
3403  case Intrinsic::nvvm_tex_2d_v4u32_f32:
3404  case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
3405  case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
3406  case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
3407  case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
3408  case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
3409  case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
3410  case Intrinsic::nvvm_tex_3d_v4u32_s32:
3411  case Intrinsic::nvvm_tex_3d_v4u32_f32:
3412  case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
3413  case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
3414  case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
3415  case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
3416  case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
3417  case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
3418  case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
3419  case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
3420  case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
3421  case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
3422  case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
3423  case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
3424  case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
3425  case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
3426  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
3427  case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
3428  case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
3429  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
3430  case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
3431  case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
3432  case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
3433  case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
3434  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
3435  case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
3436  case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
3437  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
3438  case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
3439  case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
3440  case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
3441  case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
3442  case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
3443  case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
3444  case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
3445  case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
3446  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
3447  case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
3448  case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
3449  case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
3450  case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
3451  case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
3452  case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
3453  case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
3454  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
3455  case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
3456  case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
3457  case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
3458  case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
3459  case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
3460  case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
3461  case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
3462  case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
3463  case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
3464  case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
3465  case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
3466  case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
3467  case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
3468  case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
3469  case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
3470  case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
3471  case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
3472  case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
3473  case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
3474  case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
3475  case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
3476  case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
3477  case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: {
3478    Info.opc = getOpcForTextureInstr(Intrinsic);
3479    Info.memVT = MVT::v4i32;
3480    Info.ptrVal = nullptr;
3481    Info.offset = 0;
3482    Info.vol = 0;
3483    Info.readMem = true;
3484    Info.writeMem = false;
3485    Info.align = 16;
3486    return true;
3487  }
3488  case Intrinsic::nvvm_suld_1d_i8_clamp:
3489  case Intrinsic::nvvm_suld_1d_v2i8_clamp:
3490  case Intrinsic::nvvm_suld_1d_v4i8_clamp:
3491  case Intrinsic::nvvm_suld_1d_array_i8_clamp:
3492  case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
3493  case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
3494  case Intrinsic::nvvm_suld_2d_i8_clamp:
3495  case Intrinsic::nvvm_suld_2d_v2i8_clamp:
3496  case Intrinsic::nvvm_suld_2d_v4i8_clamp:
3497  case Intrinsic::nvvm_suld_2d_array_i8_clamp:
3498  case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
3499  case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
3500  case Intrinsic::nvvm_suld_3d_i8_clamp:
3501  case Intrinsic::nvvm_suld_3d_v2i8_clamp:
3502  case Intrinsic::nvvm_suld_3d_v4i8_clamp:
3503  case Intrinsic::nvvm_suld_1d_i8_trap:
3504  case Intrinsic::nvvm_suld_1d_v2i8_trap:
3505  case Intrinsic::nvvm_suld_1d_v4i8_trap:
3506  case Intrinsic::nvvm_suld_1d_array_i8_trap:
3507  case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
3508  case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
3509  case Intrinsic::nvvm_suld_2d_i8_trap:
3510  case Intrinsic::nvvm_suld_2d_v2i8_trap:
3511  case Intrinsic::nvvm_suld_2d_v4i8_trap:
3512  case Intrinsic::nvvm_suld_2d_array_i8_trap:
3513  case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
3514  case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
3515  case Intrinsic::nvvm_suld_3d_i8_trap:
3516  case Intrinsic::nvvm_suld_3d_v2i8_trap:
3517  case Intrinsic::nvvm_suld_3d_v4i8_trap:
3518  case Intrinsic::nvvm_suld_1d_i8_zero:
3519  case Intrinsic::nvvm_suld_1d_v2i8_zero:
3520  case Intrinsic::nvvm_suld_1d_v4i8_zero:
3521  case Intrinsic::nvvm_suld_1d_array_i8_zero:
3522  case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
3523  case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
3524  case Intrinsic::nvvm_suld_2d_i8_zero:
3525  case Intrinsic::nvvm_suld_2d_v2i8_zero:
3526  case Intrinsic::nvvm_suld_2d_v4i8_zero:
3527  case Intrinsic::nvvm_suld_2d_array_i8_zero:
3528  case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
3529  case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
3530  case Intrinsic::nvvm_suld_3d_i8_zero:
3531  case Intrinsic::nvvm_suld_3d_v2i8_zero:
3532  case Intrinsic::nvvm_suld_3d_v4i8_zero: {
3533    Info.opc = getOpcForSurfaceInstr(Intrinsic);
3534    Info.memVT = MVT::i8;
3535    Info.ptrVal = nullptr;
3536    Info.offset = 0;
3537    Info.vol = 0;
3538    Info.readMem = true;
3539    Info.writeMem = false;
3540    Info.align = 16;
3541    return true;
3542  }
3543  case Intrinsic::nvvm_suld_1d_i16_clamp:
3544  case Intrinsic::nvvm_suld_1d_v2i16_clamp:
3545  case Intrinsic::nvvm_suld_1d_v4i16_clamp:
3546  case Intrinsic::nvvm_suld_1d_array_i16_clamp:
3547  case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
3548  case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
3549  case Intrinsic::nvvm_suld_2d_i16_clamp:
3550  case Intrinsic::nvvm_suld_2d_v2i16_clamp:
3551  case Intrinsic::nvvm_suld_2d_v4i16_clamp:
3552  case Intrinsic::nvvm_suld_2d_array_i16_clamp:
3553  case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
3554  case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
3555  case Intrinsic::nvvm_suld_3d_i16_clamp:
3556  case Intrinsic::nvvm_suld_3d_v2i16_clamp:
3557  case Intrinsic::nvvm_suld_3d_v4i16_clamp:
3558  case Intrinsic::nvvm_suld_1d_i16_trap:
3559  case Intrinsic::nvvm_suld_1d_v2i16_trap:
3560  case Intrinsic::nvvm_suld_1d_v4i16_trap:
3561  case Intrinsic::nvvm_suld_1d_array_i16_trap:
3562  case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
3563  case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
3564  case Intrinsic::nvvm_suld_2d_i16_trap:
3565  case Intrinsic::nvvm_suld_2d_v2i16_trap:
3566  case Intrinsic::nvvm_suld_2d_v4i16_trap:
3567  case Intrinsic::nvvm_suld_2d_array_i16_trap:
3568  case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
3569  case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
3570  case Intrinsic::nvvm_suld_3d_i16_trap:
3571  case Intrinsic::nvvm_suld_3d_v2i16_trap:
3572  case Intrinsic::nvvm_suld_3d_v4i16_trap:
3573  case Intrinsic::nvvm_suld_1d_i16_zero:
3574  case Intrinsic::nvvm_suld_1d_v2i16_zero:
3575  case Intrinsic::nvvm_suld_1d_v4i16_zero:
3576  case Intrinsic::nvvm_suld_1d_array_i16_zero:
3577  case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
3578  case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
3579  case Intrinsic::nvvm_suld_2d_i16_zero:
3580  case Intrinsic::nvvm_suld_2d_v2i16_zero:
3581  case Intrinsic::nvvm_suld_2d_v4i16_zero:
3582  case Intrinsic::nvvm_suld_2d_array_i16_zero:
3583  case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
3584  case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
3585  case Intrinsic::nvvm_suld_3d_i16_zero:
3586  case Intrinsic::nvvm_suld_3d_v2i16_zero:
3587  case Intrinsic::nvvm_suld_3d_v4i16_zero: {
3588    Info.opc = getOpcForSurfaceInstr(Intrinsic);
3589    Info.memVT = MVT::i16;
3590    Info.ptrVal = nullptr;
3591    Info.offset = 0;
3592    Info.vol = 0;
3593    Info.readMem = true;
3594    Info.writeMem = false;
3595    Info.align = 16;
3596    return true;
3597  }
3598  case Intrinsic::nvvm_suld_1d_i32_clamp:
3599  case Intrinsic::nvvm_suld_1d_v2i32_clamp:
3600  case Intrinsic::nvvm_suld_1d_v4i32_clamp:
3601  case Intrinsic::nvvm_suld_1d_array_i32_clamp:
3602  case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
3603  case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
3604  case Intrinsic::nvvm_suld_2d_i32_clamp:
3605  case Intrinsic::nvvm_suld_2d_v2i32_clamp:
3606  case Intrinsic::nvvm_suld_2d_v4i32_clamp:
3607  case Intrinsic::nvvm_suld_2d_array_i32_clamp:
3608  case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
3609  case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
3610  case Intrinsic::nvvm_suld_3d_i32_clamp:
3611  case Intrinsic::nvvm_suld_3d_v2i32_clamp:
3612  case Intrinsic::nvvm_suld_3d_v4i32_clamp:
3613  case Intrinsic::nvvm_suld_1d_i32_trap:
3614  case Intrinsic::nvvm_suld_1d_v2i32_trap:
3615  case Intrinsic::nvvm_suld_1d_v4i32_trap:
3616  case Intrinsic::nvvm_suld_1d_array_i32_trap:
3617  case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
3618  case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
3619  case Intrinsic::nvvm_suld_2d_i32_trap:
3620  case Intrinsic::nvvm_suld_2d_v2i32_trap:
3621  case Intrinsic::nvvm_suld_2d_v4i32_trap:
3622  case Intrinsic::nvvm_suld_2d_array_i32_trap:
3623  case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
3624  case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
3625  case Intrinsic::nvvm_suld_3d_i32_trap:
3626  case Intrinsic::nvvm_suld_3d_v2i32_trap:
3627  case Intrinsic::nvvm_suld_3d_v4i32_trap:
3628  case Intrinsic::nvvm_suld_1d_i32_zero:
3629  case Intrinsic::nvvm_suld_1d_v2i32_zero:
3630  case Intrinsic::nvvm_suld_1d_v4i32_zero:
3631  case Intrinsic::nvvm_suld_1d_array_i32_zero:
3632  case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
3633  case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
3634  case Intrinsic::nvvm_suld_2d_i32_zero:
3635  case Intrinsic::nvvm_suld_2d_v2i32_zero:
3636  case Intrinsic::nvvm_suld_2d_v4i32_zero:
3637  case Intrinsic::nvvm_suld_2d_array_i32_zero:
3638  case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
3639  case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
3640  case Intrinsic::nvvm_suld_3d_i32_zero:
3641  case Intrinsic::nvvm_suld_3d_v2i32_zero:
3642  case Intrinsic::nvvm_suld_3d_v4i32_zero: {
3643    Info.opc = getOpcForSurfaceInstr(Intrinsic);
3644    Info.memVT = MVT::i32;
3645    Info.ptrVal = nullptr;
3646    Info.offset = 0;
3647    Info.vol = 0;
3648    Info.readMem = true;
3649    Info.writeMem = false;
3650    Info.align = 16;
3651    return true;
3652  }
3653  case Intrinsic::nvvm_suld_1d_i64_clamp:
3654  case Intrinsic::nvvm_suld_1d_v2i64_clamp:
3655  case Intrinsic::nvvm_suld_1d_array_i64_clamp:
3656  case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
3657  case Intrinsic::nvvm_suld_2d_i64_clamp:
3658  case Intrinsic::nvvm_suld_2d_v2i64_clamp:
3659  case Intrinsic::nvvm_suld_2d_array_i64_clamp:
3660  case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
3661  case Intrinsic::nvvm_suld_3d_i64_clamp:
3662  case Intrinsic::nvvm_suld_3d_v2i64_clamp:
3663  case Intrinsic::nvvm_suld_1d_i64_trap:
3664  case Intrinsic::nvvm_suld_1d_v2i64_trap:
3665  case Intrinsic::nvvm_suld_1d_array_i64_trap:
3666  case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
3667  case Intrinsic::nvvm_suld_2d_i64_trap:
3668  case Intrinsic::nvvm_suld_2d_v2i64_trap:
3669  case Intrinsic::nvvm_suld_2d_array_i64_trap:
3670  case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
3671  case Intrinsic::nvvm_suld_3d_i64_trap:
3672  case Intrinsic::nvvm_suld_3d_v2i64_trap:
3673  case Intrinsic::nvvm_suld_1d_i64_zero:
3674  case Intrinsic::nvvm_suld_1d_v2i64_zero:
3675  case Intrinsic::nvvm_suld_1d_array_i64_zero:
3676  case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
3677  case Intrinsic::nvvm_suld_2d_i64_zero:
3678  case Intrinsic::nvvm_suld_2d_v2i64_zero:
3679  case Intrinsic::nvvm_suld_2d_array_i64_zero:
3680  case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
3681  case Intrinsic::nvvm_suld_3d_i64_zero:
3682  case Intrinsic::nvvm_suld_3d_v2i64_zero: {
3683    Info.opc = getOpcForSurfaceInstr(Intrinsic);
3684    Info.memVT = MVT::i64;
3685    Info.ptrVal = nullptr;
3686    Info.offset = 0;
3687    Info.vol = 0;
3688    Info.readMem = true;
3689    Info.writeMem = false;
3690    Info.align = 16;
3691    return true;
3692  }
3693  }
3694  return false;
3695}
3696
3697/// isLegalAddressingMode - Return true if the addressing mode represented
3698/// by AM is legal for this target, for a load/store of the specified type.
3699/// Used to guide target specific optimizations, like loop strength reduction
3700/// (LoopStrengthReduce.cpp) and memory optimization for address mode
3701/// (CodeGenPrepare.cpp)
3702bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3703                                                Type *Ty) const {
3704
3705  // AddrMode - This represents an addressing mode of:
3706  //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
3707  //
3708  // The legal address modes are
3709  // - [avar]
3710  // - [areg]
3711  // - [areg+immoff]
3712  // - [immAddr]
3713
3714  if (AM.BaseGV) {
3715    if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
3716      return false;
3717    return true;
3718  }
3719
3720  switch (AM.Scale) {
3721  case 0: // "r", "r+i" or "i" is allowed
3722    break;
3723  case 1:
3724    if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
3725      return false;
3726    // Otherwise we have r+i.
3727    break;
3728  default:
3729    // No scale > 1 is allowed
3730    return false;
3731  }
3732  return true;
3733}
3734
3735//===----------------------------------------------------------------------===//
3736//                         NVPTX Inline Assembly Support
3737//===----------------------------------------------------------------------===//
3738
3739/// getConstraintType - Given a constraint letter, return the type of
3740/// constraint it is for this target.
3741NVPTXTargetLowering::ConstraintType
3742NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
3743  if (Constraint.size() == 1) {
3744    switch (Constraint[0]) {
3745    default:
3746      break;
3747    case 'b':
3748    case 'r':
3749    case 'h':
3750    case 'c':
3751    case 'l':
3752    case 'f':
3753    case 'd':
3754    case '0':
3755    case 'N':
3756      return C_RegisterClass;
3757    }
3758  }
3759  return TargetLowering::getConstraintType(Constraint);
3760}
3761
3762std::pair<unsigned, const TargetRegisterClass *>
3763NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3764                                                  const std::string &Constraint,
3765                                                  MVT VT) const {
3766  if (Constraint.size() == 1) {
3767    switch (Constraint[0]) {
3768    case 'b':
3769      return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
3770    case 'c':
3771      return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
3772    case 'h':
3773      return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
3774    case 'r':
3775      return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
3776    case 'l':
3777    case 'N':
3778      return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
3779    case 'f':
3780      return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
3781    case 'd':
3782      return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
3783    }
3784  }
3785  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3786}
3787
3788/// getFunctionAlignment - Return the Log2 alignment of this function.
3789unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
3790  return 4;
3791}
3792
3793//===----------------------------------------------------------------------===//
3794//                         NVPTX DAG Combining
3795//===----------------------------------------------------------------------===//
3796
3797bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
3798                                   CodeGenOpt::Level OptLevel) const {
3799  const Function *F = MF.getFunction();
3800  const TargetOptions &TO = MF.getTarget().Options;
3801
3802  // Always honor command-line argument
3803  if (FMAContractLevelOpt.getNumOccurrences() > 0) {
3804    return FMAContractLevelOpt > 0;
3805  } else if (OptLevel == 0) {
3806    // Do not contract if we're not optimizing the code
3807    return false;
3808  } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
3809    // Honor TargetOptions flags that explicitly say fusion is okay
3810    return true;
3811  } else if (F->hasFnAttribute("unsafe-fp-math")) {
3812    // Check for unsafe-fp-math=true coming from Clang
3813    Attribute Attr = F->getFnAttribute("unsafe-fp-math");
3814    StringRef Val = Attr.getValueAsString();
3815    if (Val == "true")
3816      return true;
3817  }
3818
3819  // We did not have a clear indication that fusion is allowed, so assume not
3820  return false;
3821}
3822
3823/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
3824/// operands N0 and N1.  This is a helper for PerformADDCombine that is
3825/// called with the default operands, and if that fails, with commuted
3826/// operands.
3827static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
3828                                           TargetLowering::DAGCombinerInfo &DCI,
3829                                             const NVPTXSubtarget &Subtarget,
3830                                             CodeGenOpt::Level OptLevel) {
3831  SelectionDAG  &DAG = DCI.DAG;
3832  // Skip non-integer, non-scalar case
3833  EVT VT=N0.getValueType();
3834  if (VT.isVector())
3835    return SDValue();
3836
3837  // fold (add (mul a, b), c) -> (mad a, b, c)
3838  //
3839  if (N0.getOpcode() == ISD::MUL) {
3840    assert (VT.isInteger());
3841    // For integer:
3842    // Since integer multiply-add costs the same as integer multiply
3843    // but is more costly than integer add, do the fusion only when
3844    // the mul is only used in the add.
3845    if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
3846        !N0.getNode()->hasOneUse())
3847      return SDValue();
3848
3849    // Do the folding
3850    return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
3851                       N0.getOperand(0), N0.getOperand(1), N1);
3852  }
3853  else if (N0.getOpcode() == ISD::FMUL) {
3854    if (VT == MVT::f32 || VT == MVT::f64) {
3855      const auto *TLI = static_cast<const NVPTXTargetLowering *>(
3856          &DAG.getTargetLoweringInfo());
3857      if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
3858        return SDValue();
3859
3860      // For floating point:
3861      // Do the fusion only when the mul has less than 5 uses and all
3862      // are add.
3863      // The heuristic is that if a use is not an add, then that use
3864      // cannot be fused into fma, therefore mul is still needed anyway.
3865      // If there are more than 4 uses, even if they are all add, fusing
3866      // them will increase register pressue.
3867      //
3868      int numUses = 0;
3869      int nonAddCount = 0;
3870      for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
3871           UE = N0.getNode()->use_end();
3872           UI != UE; ++UI) {
3873        numUses++;
3874        SDNode *User = *UI;
3875        if (User->getOpcode() != ISD::FADD)
3876          ++nonAddCount;
3877      }
3878      if (numUses >= 5)
3879        return SDValue();
3880      if (nonAddCount) {
3881        int orderNo = N->getIROrder();
3882        int orderNo2 = N0.getNode()->getIROrder();
3883        // simple heuristics here for considering potential register
3884        // pressure, the logics here is that the differnce are used
3885        // to measure the distance between def and use, the longer distance
3886        // more likely cause register pressure.
3887        if (orderNo - orderNo2 < 500)
3888          return SDValue();
3889
3890        // Now, check if at least one of the FMUL's operands is live beyond the node N,
3891        // which guarantees that the FMA will not increase register pressure at node N.
3892        bool opIsLive = false;
3893        const SDNode *left = N0.getOperand(0).getNode();
3894        const SDNode *right = N0.getOperand(1).getNode();
3895
3896        if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
3897          opIsLive = true;
3898
3899        if (!opIsLive)
3900          for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
3901            SDNode *User = *UI;
3902            int orderNo3 = User->getIROrder();
3903            if (orderNo3 > orderNo) {
3904              opIsLive = true;
3905              break;
3906            }
3907          }
3908
3909        if (!opIsLive)
3910          for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
3911            SDNode *User = *UI;
3912            int orderNo3 = User->getIROrder();
3913            if (orderNo3 > orderNo) {
3914              opIsLive = true;
3915              break;
3916            }
3917          }
3918
3919        if (!opIsLive)
3920          return SDValue();
3921      }
3922
3923      return DAG.getNode(ISD::FMA, SDLoc(N), VT,
3924                         N0.getOperand(0), N0.getOperand(1), N1);
3925    }
3926  }
3927
3928  return SDValue();
3929}
3930
3931/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
3932///
3933static SDValue PerformADDCombine(SDNode *N,
3934                                 TargetLowering::DAGCombinerInfo &DCI,
3935                                 const NVPTXSubtarget &Subtarget,
3936                                 CodeGenOpt::Level OptLevel) {
3937  SDValue N0 = N->getOperand(0);
3938  SDValue N1 = N->getOperand(1);
3939
3940  // First try with the default operand order.
3941  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget,
3942                                                 OptLevel);
3943  if (Result.getNode())
3944    return Result;
3945
3946  // If that didn't work, try again with the operands commuted.
3947  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
3948}
3949
3950static SDValue PerformANDCombine(SDNode *N,
3951                                 TargetLowering::DAGCombinerInfo &DCI) {
3952  // The type legalizer turns a vector load of i8 values into a zextload to i16
3953  // registers, optionally ANY_EXTENDs it (if target type is integer),
3954  // and ANDs off the high 8 bits. Since we turn this load into a
3955  // target-specific DAG node, the DAG combiner fails to eliminate these AND
3956  // nodes. Do that here.
3957  SDValue Val = N->getOperand(0);
3958  SDValue Mask = N->getOperand(1);
3959
3960  if (isa<ConstantSDNode>(Val)) {
3961    std::swap(Val, Mask);
3962  }
3963
3964  SDValue AExt;
3965  // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
3966  if (Val.getOpcode() == ISD::ANY_EXTEND) {
3967    AExt = Val;
3968    Val = Val->getOperand(0);
3969  }
3970
3971  if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
3972    Val = Val->getOperand(0);
3973  }
3974
3975  if (Val->getOpcode() == NVPTXISD::LoadV2 ||
3976      Val->getOpcode() == NVPTXISD::LoadV4) {
3977    ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
3978    if (!MaskCnst) {
3979      // Not an AND with a constant
3980      return SDValue();
3981    }
3982
3983    uint64_t MaskVal = MaskCnst->getZExtValue();
3984    if (MaskVal != 0xff) {
3985      // Not an AND that chops off top 8 bits
3986      return SDValue();
3987    }
3988
3989    MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
3990    if (!Mem) {
3991      // Not a MemSDNode?!?
3992      return SDValue();
3993    }
3994
3995    EVT MemVT = Mem->getMemoryVT();
3996    if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
3997      // We only handle the i8 case
3998      return SDValue();
3999    }
4000
4001    unsigned ExtType =
4002      cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
4003        getZExtValue();
4004    if (ExtType == ISD::SEXTLOAD) {
4005      // If for some reason the load is a sextload, the and is needed to zero
4006      // out the high 8 bits
4007      return SDValue();
4008    }
4009
4010    bool AddTo = false;
4011    if (AExt.getNode() != 0) {
4012      // Re-insert the ext as a zext.
4013      Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4014                            AExt.getValueType(), Val);
4015      AddTo = true;
4016    }
4017
4018    // If we get here, the AND is unnecessary.  Just replace it with the load
4019    DCI.CombineTo(N, Val, AddTo);
4020  }
4021
4022  return SDValue();
4023}
4024
4025enum OperandSignedness {
4026  Signed = 0,
4027  Unsigned,
4028  Unknown
4029};
4030
4031/// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
4032/// that can be demoted to \p OptSize bits without loss of information. The
4033/// signedness of the operand, if determinable, is placed in \p S.
4034static bool IsMulWideOperandDemotable(SDValue Op,
4035                                      unsigned OptSize,
4036                                      OperandSignedness &S) {
4037  S = Unknown;
4038
4039  if (Op.getOpcode() == ISD::SIGN_EXTEND ||
4040      Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4041    EVT OrigVT = Op.getOperand(0).getValueType();
4042    if (OrigVT.getSizeInBits() <= OptSize) {
4043      S = Signed;
4044      return true;
4045    }
4046  } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
4047    EVT OrigVT = Op.getOperand(0).getValueType();
4048    if (OrigVT.getSizeInBits() <= OptSize) {
4049      S = Unsigned;
4050      return true;
4051    }
4052  }
4053
4054  return false;
4055}
4056
4057/// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
4058/// be demoted to \p OptSize bits without loss of information. If the operands
4059/// contain a constant, it should appear as the RHS operand. The signedness of
4060/// the operands is placed in \p IsSigned.
4061static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
4062                                        unsigned OptSize,
4063                                        bool &IsSigned) {
4064
4065  OperandSignedness LHSSign;
4066
4067  // The LHS operand must be a demotable op
4068  if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
4069    return false;
4070
4071  // We should have been able to determine the signedness from the LHS
4072  if (LHSSign == Unknown)
4073    return false;
4074
4075  IsSigned = (LHSSign == Signed);
4076
4077  // The RHS can be a demotable op or a constant
4078  if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
4079    APInt Val = CI->getAPIntValue();
4080    if (LHSSign == Unsigned) {
4081      if (Val.isIntN(OptSize)) {
4082        return true;
4083      }
4084      return false;
4085    } else {
4086      if (Val.isSignedIntN(OptSize)) {
4087        return true;
4088      }
4089      return false;
4090    }
4091  } else {
4092    OperandSignedness RHSSign;
4093    if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
4094      return false;
4095
4096    if (LHSSign != RHSSign)
4097      return false;
4098
4099    return true;
4100  }
4101}
4102
4103/// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
4104/// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
4105/// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
4106/// amount.
4107static SDValue TryMULWIDECombine(SDNode *N,
4108                                 TargetLowering::DAGCombinerInfo &DCI) {
4109  EVT MulType = N->getValueType(0);
4110  if (MulType != MVT::i32 && MulType != MVT::i64) {
4111    return SDValue();
4112  }
4113
4114  unsigned OptSize = MulType.getSizeInBits() >> 1;
4115  SDValue LHS = N->getOperand(0);
4116  SDValue RHS = N->getOperand(1);
4117
4118  // Canonicalize the multiply so the constant (if any) is on the right
4119  if (N->getOpcode() == ISD::MUL) {
4120    if (isa<ConstantSDNode>(LHS)) {
4121      std::swap(LHS, RHS);
4122    }
4123  }
4124
4125  // If we have a SHL, determine the actual multiply amount
4126  if (N->getOpcode() == ISD::SHL) {
4127    ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
4128    if (!ShlRHS) {
4129      return SDValue();
4130    }
4131
4132    APInt ShiftAmt = ShlRHS->getAPIntValue();
4133    unsigned BitWidth = MulType.getSizeInBits();
4134    if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
4135      APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
4136      RHS = DCI.DAG.getConstant(MulVal, MulType);
4137    } else {
4138      return SDValue();
4139    }
4140  }
4141
4142  bool Signed;
4143  // Verify that our operands are demotable
4144  if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
4145    return SDValue();
4146  }
4147
4148  EVT DemotedVT;
4149  if (MulType == MVT::i32) {
4150    DemotedVT = MVT::i16;
4151  } else {
4152    DemotedVT = MVT::i32;
4153  }
4154
4155  // Truncate the operands to the correct size. Note that these are just for
4156  // type consistency and will (likely) be eliminated in later phases.
4157  SDValue TruncLHS =
4158    DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS);
4159  SDValue TruncRHS =
4160    DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS);
4161
4162  unsigned Opc;
4163  if (Signed) {
4164    Opc = NVPTXISD::MUL_WIDE_SIGNED;
4165  } else {
4166    Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
4167  }
4168
4169  return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS);
4170}
4171
4172/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
4173static SDValue PerformMULCombine(SDNode *N,
4174                                 TargetLowering::DAGCombinerInfo &DCI,
4175                                 CodeGenOpt::Level OptLevel) {
4176  if (OptLevel > 0) {
4177    // Try mul.wide combining at OptLevel > 0
4178    SDValue Ret = TryMULWIDECombine(N, DCI);
4179    if (Ret.getNode())
4180      return Ret;
4181  }
4182
4183  return SDValue();
4184}
4185
4186/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
4187static SDValue PerformSHLCombine(SDNode *N,
4188                                 TargetLowering::DAGCombinerInfo &DCI,
4189                                 CodeGenOpt::Level OptLevel) {
4190  if (OptLevel > 0) {
4191    // Try mul.wide combining at OptLevel > 0
4192    SDValue Ret = TryMULWIDECombine(N, DCI);
4193    if (Ret.getNode())
4194      return Ret;
4195  }
4196
4197  return SDValue();
4198}
4199
4200SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
4201                                               DAGCombinerInfo &DCI) const {
4202  CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
4203  switch (N->getOpcode()) {
4204    default: break;
4205    case ISD::ADD:
4206    case ISD::FADD:
4207      return PerformADDCombine(N, DCI, STI, OptLevel);
4208    case ISD::MUL:
4209      return PerformMULCombine(N, DCI, OptLevel);
4210    case ISD::SHL:
4211      return PerformSHLCombine(N, DCI, OptLevel);
4212    case ISD::AND:
4213      return PerformANDCombine(N, DCI);
4214  }
4215  return SDValue();
4216}
4217
4218/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
4219static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
4220                              const DataLayout *TD,
4221                              SmallVectorImpl<SDValue> &Results) {
4222  EVT ResVT = N->getValueType(0);
4223  SDLoc DL(N);
4224
4225  assert(ResVT.isVector() && "Vector load must have vector type");
4226
4227  // We only handle "native" vector sizes for now, e.g. <4 x double> is not
4228  // legal.  We can (and should) split that into 2 loads of <2 x double> here
4229  // but I'm leaving that as a TODO for now.
4230  assert(ResVT.isSimple() && "Can only handle simple types");
4231  switch (ResVT.getSimpleVT().SimpleTy) {
4232  default:
4233    return;
4234  case MVT::v2i8:
4235  case MVT::v2i16:
4236  case MVT::v2i32:
4237  case MVT::v2i64:
4238  case MVT::v2f32:
4239  case MVT::v2f64:
4240  case MVT::v4i8:
4241  case MVT::v4i16:
4242  case MVT::v4i32:
4243  case MVT::v4f32:
4244    // This is a "native" vector type
4245    break;
4246  }
4247
4248  LoadSDNode *LD = cast<LoadSDNode>(N);
4249
4250  unsigned Align = LD->getAlignment();
4251  unsigned PrefAlign =
4252    TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
4253  if (Align < PrefAlign) {
4254    // This load is not sufficiently aligned, so bail out and let this vector
4255    // load be scalarized.  Note that we may still be able to emit smaller
4256    // vector loads.  For example, if we are loading a <4 x float> with an
4257    // alignment of 8, this check will fail but the legalizer will try again
4258    // with 2 x <2 x float>, which will succeed with an alignment of 8.
4259    return;
4260  }
4261
4262  EVT EltVT = ResVT.getVectorElementType();
4263  unsigned NumElts = ResVT.getVectorNumElements();
4264
4265  // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
4266  // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
4267  // loaded type to i16 and propagate the "real" type as the memory type.
4268  bool NeedTrunc = false;
4269  if (EltVT.getSizeInBits() < 16) {
4270    EltVT = MVT::i16;
4271    NeedTrunc = true;
4272  }
4273
4274  unsigned Opcode = 0;
4275  SDVTList LdResVTs;
4276
4277  switch (NumElts) {
4278  default:
4279    return;
4280  case 2:
4281    Opcode = NVPTXISD::LoadV2;
4282    LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4283    break;
4284  case 4: {
4285    Opcode = NVPTXISD::LoadV4;
4286    EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4287    LdResVTs = DAG.getVTList(ListVTs);
4288    break;
4289  }
4290  }
4291
4292  // Copy regular operands
4293  SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
4294
4295  // The select routine does not have access to the LoadSDNode instance, so
4296  // pass along the extension information
4297  OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
4298
4299  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4300                                          LD->getMemoryVT(),
4301                                          LD->getMemOperand());
4302
4303  SmallVector<SDValue, 4> ScalarRes;
4304
4305  for (unsigned i = 0; i < NumElts; ++i) {
4306    SDValue Res = NewLD.getValue(i);
4307    if (NeedTrunc)
4308      Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4309    ScalarRes.push_back(Res);
4310  }
4311
4312  SDValue LoadChain = NewLD.getValue(NumElts);
4313
4314  SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
4315
4316  Results.push_back(BuildVec);
4317  Results.push_back(LoadChain);
4318}
4319
4320static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
4321                                     SmallVectorImpl<SDValue> &Results) {
4322  SDValue Chain = N->getOperand(0);
4323  SDValue Intrin = N->getOperand(1);
4324  SDLoc DL(N);
4325
4326  // Get the intrinsic ID
4327  unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
4328  switch (IntrinNo) {
4329  default:
4330    return;
4331  case Intrinsic::nvvm_ldg_global_i:
4332  case Intrinsic::nvvm_ldg_global_f:
4333  case Intrinsic::nvvm_ldg_global_p:
4334  case Intrinsic::nvvm_ldu_global_i:
4335  case Intrinsic::nvvm_ldu_global_f:
4336  case Intrinsic::nvvm_ldu_global_p: {
4337    EVT ResVT = N->getValueType(0);
4338
4339    if (ResVT.isVector()) {
4340      // Vector LDG/LDU
4341
4342      unsigned NumElts = ResVT.getVectorNumElements();
4343      EVT EltVT = ResVT.getVectorElementType();
4344
4345      // Since LDU/LDG are target nodes, we cannot rely on DAG type
4346      // legalization.
4347      // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
4348      // loaded type to i16 and propagate the "real" type as the memory type.
4349      bool NeedTrunc = false;
4350      if (EltVT.getSizeInBits() < 16) {
4351        EltVT = MVT::i16;
4352        NeedTrunc = true;
4353      }
4354
4355      unsigned Opcode = 0;
4356      SDVTList LdResVTs;
4357
4358      switch (NumElts) {
4359      default:
4360        return;
4361      case 2:
4362        switch (IntrinNo) {
4363        default:
4364          return;
4365        case Intrinsic::nvvm_ldg_global_i:
4366        case Intrinsic::nvvm_ldg_global_f:
4367        case Intrinsic::nvvm_ldg_global_p:
4368          Opcode = NVPTXISD::LDGV2;
4369          break;
4370        case Intrinsic::nvvm_ldu_global_i:
4371        case Intrinsic::nvvm_ldu_global_f:
4372        case Intrinsic::nvvm_ldu_global_p:
4373          Opcode = NVPTXISD::LDUV2;
4374          break;
4375        }
4376        LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4377        break;
4378      case 4: {
4379        switch (IntrinNo) {
4380        default:
4381          return;
4382        case Intrinsic::nvvm_ldg_global_i:
4383        case Intrinsic::nvvm_ldg_global_f:
4384        case Intrinsic::nvvm_ldg_global_p:
4385          Opcode = NVPTXISD::LDGV4;
4386          break;
4387        case Intrinsic::nvvm_ldu_global_i:
4388        case Intrinsic::nvvm_ldu_global_f:
4389        case Intrinsic::nvvm_ldu_global_p:
4390          Opcode = NVPTXISD::LDUV4;
4391          break;
4392        }
4393        EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4394        LdResVTs = DAG.getVTList(ListVTs);
4395        break;
4396      }
4397      }
4398
4399      SmallVector<SDValue, 8> OtherOps;
4400
4401      // Copy regular operands
4402
4403      OtherOps.push_back(Chain); // Chain
4404                                 // Skip operand 1 (intrinsic ID)
4405      // Others
4406      OtherOps.append(N->op_begin() + 2, N->op_end());
4407
4408      MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4409
4410      SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4411                                              MemSD->getMemoryVT(),
4412                                              MemSD->getMemOperand());
4413
4414      SmallVector<SDValue, 4> ScalarRes;
4415
4416      for (unsigned i = 0; i < NumElts; ++i) {
4417        SDValue Res = NewLD.getValue(i);
4418        if (NeedTrunc)
4419          Res =
4420              DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4421        ScalarRes.push_back(Res);
4422      }
4423
4424      SDValue LoadChain = NewLD.getValue(NumElts);
4425
4426      SDValue BuildVec =
4427          DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
4428
4429      Results.push_back(BuildVec);
4430      Results.push_back(LoadChain);
4431    } else {
4432      // i8 LDG/LDU
4433      assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
4434             "Custom handling of non-i8 ldu/ldg?");
4435
4436      // Just copy all operands as-is
4437      SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
4438
4439      // Force output to i16
4440      SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
4441
4442      MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4443
4444      // We make sure the memory type is i8, which will be used during isel
4445      // to select the proper instruction.
4446      SDValue NewLD =
4447          DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
4448                                  MVT::i8, MemSD->getMemOperand());
4449
4450      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
4451                                    NewLD.getValue(0)));
4452      Results.push_back(NewLD.getValue(1));
4453    }
4454  }
4455  }
4456}
4457
4458void NVPTXTargetLowering::ReplaceNodeResults(
4459    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
4460  switch (N->getOpcode()) {
4461  default:
4462    report_fatal_error("Unhandled custom legalization");
4463  case ISD::LOAD:
4464    ReplaceLoadVector(N, DAG, getDataLayout(), Results);
4465    return;
4466  case ISD::INTRINSIC_W_CHAIN:
4467    ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
4468    return;
4469  }
4470}
4471
4472// Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file.
4473void NVPTXSection::anchor() {}
4474
4475NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
4476  delete TextSection;
4477  delete DataSection;
4478  delete BSSSection;
4479  delete ReadOnlySection;
4480
4481  delete StaticCtorSection;
4482  delete StaticDtorSection;
4483  delete LSDASection;
4484  delete EHFrameSection;
4485  delete DwarfAbbrevSection;
4486  delete DwarfInfoSection;
4487  delete DwarfLineSection;
4488  delete DwarfFrameSection;
4489  delete DwarfPubTypesSection;
4490  delete DwarfDebugInlineSection;
4491  delete DwarfStrSection;
4492  delete DwarfLocSection;
4493  delete DwarfARangesSection;
4494  delete DwarfRangesSection;
4495}
4496
4497const MCSection *
4498NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
4499                                              SectionKind Kind, Mangler &Mang,
4500                                              const TargetMachine &TM) const {
4501  return getDataSection();
4502}
4503