1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelDAGToDAG.h"
15#include "NVPTXUtilities.h"
16#include "llvm/Analysis/ValueTracking.h"
17#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
19#include "llvm/Support/CommandLine.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/raw_ostream.h"
23#include "llvm/Target/TargetIntrinsicInfo.h"
24
25using namespace llvm;
26
27#define DEBUG_TYPE "nvptx-isel"
28
29static cl::opt<int> UsePrecDivF32(
30    "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
31    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
32             " IEEE Compliant F32 div.rnd if available."),
33    cl::init(2));
34
35static cl::opt<bool>
36UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
37          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
38          cl::init(true));
39
40static cl::opt<bool>
41FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
42           cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
43           cl::init(false));
44
45
46/// createNVPTXISelDag - This pass converts a legalized DAG into a
47/// NVPTX-specific DAG, ready for instruction scheduling.
48FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
49                                       llvm::CodeGenOpt::Level OptLevel) {
50  return new NVPTXDAGToDAGISel(TM, OptLevel);
51}
52
53NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
54                                     CodeGenOpt::Level OptLevel)
55    : SelectionDAGISel(tm, OptLevel), TM(tm) {
56  doMulWide = (OptLevel > 0);
57}
58
59bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
60    Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
61    return SelectionDAGISel::runOnMachineFunction(MF);
62}
63
64int NVPTXDAGToDAGISel::getDivF32Level() const {
65  if (UsePrecDivF32.getNumOccurrences() > 0) {
66    // If nvptx-prec-div32=N is used on the command-line, always honor it
67    return UsePrecDivF32;
68  } else {
69    // Otherwise, use div.approx if fast math is enabled
70    if (TM.Options.UnsafeFPMath)
71      return 0;
72    else
73      return 2;
74  }
75}
76
77bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78  if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79    // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80    return UsePrecSqrtF32;
81  } else {
82    // Otherwise, use sqrt.approx if fast math is enabled
83    return !TM.Options.UnsafeFPMath;
84  }
85}
86
87bool NVPTXDAGToDAGISel::useF32FTZ() const {
88  if (FtzEnabled.getNumOccurrences() > 0) {
89    // If nvptx-f32ftz is used on the command-line, always honor it
90    return FtzEnabled;
91  } else {
92    const Function *F = MF->getFunction();
93    // Otherwise, check for an nvptx-f32ftz attribute on the function
94    if (F->hasFnAttribute("nvptx-f32ftz"))
95      return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
96    else
97      return false;
98  }
99}
100
101bool NVPTXDAGToDAGISel::allowFMA() const {
102  const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
103  return TL->allowFMA(*MF, OptLevel);
104}
105
106/// Select - Select instructions not customized! Used for
107/// expanded, promoted and normal instructions.
108SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
109
110  if (N->isMachineOpcode()) {
111    N->setNodeId(-1);
112    return nullptr; // Already selected.
113  }
114
115  SDNode *ResNode = nullptr;
116  switch (N->getOpcode()) {
117  case ISD::LOAD:
118    ResNode = SelectLoad(N);
119    break;
120  case ISD::STORE:
121    ResNode = SelectStore(N);
122    break;
123  case NVPTXISD::LoadV2:
124  case NVPTXISD::LoadV4:
125    ResNode = SelectLoadVector(N);
126    break;
127  case NVPTXISD::LDGV2:
128  case NVPTXISD::LDGV4:
129  case NVPTXISD::LDUV2:
130  case NVPTXISD::LDUV4:
131    ResNode = SelectLDGLDU(N);
132    break;
133  case NVPTXISD::StoreV2:
134  case NVPTXISD::StoreV4:
135    ResNode = SelectStoreVector(N);
136    break;
137  case NVPTXISD::LoadParam:
138  case NVPTXISD::LoadParamV2:
139  case NVPTXISD::LoadParamV4:
140    ResNode = SelectLoadParam(N);
141    break;
142  case NVPTXISD::StoreRetval:
143  case NVPTXISD::StoreRetvalV2:
144  case NVPTXISD::StoreRetvalV4:
145    ResNode = SelectStoreRetval(N);
146    break;
147  case NVPTXISD::StoreParam:
148  case NVPTXISD::StoreParamV2:
149  case NVPTXISD::StoreParamV4:
150  case NVPTXISD::StoreParamS32:
151  case NVPTXISD::StoreParamU32:
152    ResNode = SelectStoreParam(N);
153    break;
154  case ISD::INTRINSIC_WO_CHAIN:
155    ResNode = SelectIntrinsicNoChain(N);
156    break;
157  case ISD::INTRINSIC_W_CHAIN:
158    ResNode = SelectIntrinsicChain(N);
159    break;
160  case NVPTXISD::Tex1DFloatS32:
161  case NVPTXISD::Tex1DFloatFloat:
162  case NVPTXISD::Tex1DFloatFloatLevel:
163  case NVPTXISD::Tex1DFloatFloatGrad:
164  case NVPTXISD::Tex1DS32S32:
165  case NVPTXISD::Tex1DS32Float:
166  case NVPTXISD::Tex1DS32FloatLevel:
167  case NVPTXISD::Tex1DS32FloatGrad:
168  case NVPTXISD::Tex1DU32S32:
169  case NVPTXISD::Tex1DU32Float:
170  case NVPTXISD::Tex1DU32FloatLevel:
171  case NVPTXISD::Tex1DU32FloatGrad:
172  case NVPTXISD::Tex1DArrayFloatS32:
173  case NVPTXISD::Tex1DArrayFloatFloat:
174  case NVPTXISD::Tex1DArrayFloatFloatLevel:
175  case NVPTXISD::Tex1DArrayFloatFloatGrad:
176  case NVPTXISD::Tex1DArrayS32S32:
177  case NVPTXISD::Tex1DArrayS32Float:
178  case NVPTXISD::Tex1DArrayS32FloatLevel:
179  case NVPTXISD::Tex1DArrayS32FloatGrad:
180  case NVPTXISD::Tex1DArrayU32S32:
181  case NVPTXISD::Tex1DArrayU32Float:
182  case NVPTXISD::Tex1DArrayU32FloatLevel:
183  case NVPTXISD::Tex1DArrayU32FloatGrad:
184  case NVPTXISD::Tex2DFloatS32:
185  case NVPTXISD::Tex2DFloatFloat:
186  case NVPTXISD::Tex2DFloatFloatLevel:
187  case NVPTXISD::Tex2DFloatFloatGrad:
188  case NVPTXISD::Tex2DS32S32:
189  case NVPTXISD::Tex2DS32Float:
190  case NVPTXISD::Tex2DS32FloatLevel:
191  case NVPTXISD::Tex2DS32FloatGrad:
192  case NVPTXISD::Tex2DU32S32:
193  case NVPTXISD::Tex2DU32Float:
194  case NVPTXISD::Tex2DU32FloatLevel:
195  case NVPTXISD::Tex2DU32FloatGrad:
196  case NVPTXISD::Tex2DArrayFloatS32:
197  case NVPTXISD::Tex2DArrayFloatFloat:
198  case NVPTXISD::Tex2DArrayFloatFloatLevel:
199  case NVPTXISD::Tex2DArrayFloatFloatGrad:
200  case NVPTXISD::Tex2DArrayS32S32:
201  case NVPTXISD::Tex2DArrayS32Float:
202  case NVPTXISD::Tex2DArrayS32FloatLevel:
203  case NVPTXISD::Tex2DArrayS32FloatGrad:
204  case NVPTXISD::Tex2DArrayU32S32:
205  case NVPTXISD::Tex2DArrayU32Float:
206  case NVPTXISD::Tex2DArrayU32FloatLevel:
207  case NVPTXISD::Tex2DArrayU32FloatGrad:
208  case NVPTXISD::Tex3DFloatS32:
209  case NVPTXISD::Tex3DFloatFloat:
210  case NVPTXISD::Tex3DFloatFloatLevel:
211  case NVPTXISD::Tex3DFloatFloatGrad:
212  case NVPTXISD::Tex3DS32S32:
213  case NVPTXISD::Tex3DS32Float:
214  case NVPTXISD::Tex3DS32FloatLevel:
215  case NVPTXISD::Tex3DS32FloatGrad:
216  case NVPTXISD::Tex3DU32S32:
217  case NVPTXISD::Tex3DU32Float:
218  case NVPTXISD::Tex3DU32FloatLevel:
219  case NVPTXISD::Tex3DU32FloatGrad:
220  case NVPTXISD::TexCubeFloatFloat:
221  case NVPTXISD::TexCubeFloatFloatLevel:
222  case NVPTXISD::TexCubeS32Float:
223  case NVPTXISD::TexCubeS32FloatLevel:
224  case NVPTXISD::TexCubeU32Float:
225  case NVPTXISD::TexCubeU32FloatLevel:
226  case NVPTXISD::TexCubeArrayFloatFloat:
227  case NVPTXISD::TexCubeArrayFloatFloatLevel:
228  case NVPTXISD::TexCubeArrayS32Float:
229  case NVPTXISD::TexCubeArrayS32FloatLevel:
230  case NVPTXISD::TexCubeArrayU32Float:
231  case NVPTXISD::TexCubeArrayU32FloatLevel:
232  case NVPTXISD::Tld4R2DFloatFloat:
233  case NVPTXISD::Tld4G2DFloatFloat:
234  case NVPTXISD::Tld4B2DFloatFloat:
235  case NVPTXISD::Tld4A2DFloatFloat:
236  case NVPTXISD::Tld4R2DS64Float:
237  case NVPTXISD::Tld4G2DS64Float:
238  case NVPTXISD::Tld4B2DS64Float:
239  case NVPTXISD::Tld4A2DS64Float:
240  case NVPTXISD::Tld4R2DU64Float:
241  case NVPTXISD::Tld4G2DU64Float:
242  case NVPTXISD::Tld4B2DU64Float:
243  case NVPTXISD::Tld4A2DU64Float:
244  case NVPTXISD::TexUnified1DFloatS32:
245  case NVPTXISD::TexUnified1DFloatFloat:
246  case NVPTXISD::TexUnified1DFloatFloatLevel:
247  case NVPTXISD::TexUnified1DFloatFloatGrad:
248  case NVPTXISD::TexUnified1DS32S32:
249  case NVPTXISD::TexUnified1DS32Float:
250  case NVPTXISD::TexUnified1DS32FloatLevel:
251  case NVPTXISD::TexUnified1DS32FloatGrad:
252  case NVPTXISD::TexUnified1DU32S32:
253  case NVPTXISD::TexUnified1DU32Float:
254  case NVPTXISD::TexUnified1DU32FloatLevel:
255  case NVPTXISD::TexUnified1DU32FloatGrad:
256  case NVPTXISD::TexUnified1DArrayFloatS32:
257  case NVPTXISD::TexUnified1DArrayFloatFloat:
258  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
259  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
260  case NVPTXISD::TexUnified1DArrayS32S32:
261  case NVPTXISD::TexUnified1DArrayS32Float:
262  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
263  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
264  case NVPTXISD::TexUnified1DArrayU32S32:
265  case NVPTXISD::TexUnified1DArrayU32Float:
266  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
267  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
268  case NVPTXISD::TexUnified2DFloatS32:
269  case NVPTXISD::TexUnified2DFloatFloat:
270  case NVPTXISD::TexUnified2DFloatFloatLevel:
271  case NVPTXISD::TexUnified2DFloatFloatGrad:
272  case NVPTXISD::TexUnified2DS32S32:
273  case NVPTXISD::TexUnified2DS32Float:
274  case NVPTXISD::TexUnified2DS32FloatLevel:
275  case NVPTXISD::TexUnified2DS32FloatGrad:
276  case NVPTXISD::TexUnified2DU32S32:
277  case NVPTXISD::TexUnified2DU32Float:
278  case NVPTXISD::TexUnified2DU32FloatLevel:
279  case NVPTXISD::TexUnified2DU32FloatGrad:
280  case NVPTXISD::TexUnified2DArrayFloatS32:
281  case NVPTXISD::TexUnified2DArrayFloatFloat:
282  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
283  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
284  case NVPTXISD::TexUnified2DArrayS32S32:
285  case NVPTXISD::TexUnified2DArrayS32Float:
286  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
287  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
288  case NVPTXISD::TexUnified2DArrayU32S32:
289  case NVPTXISD::TexUnified2DArrayU32Float:
290  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
291  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
292  case NVPTXISD::TexUnified3DFloatS32:
293  case NVPTXISD::TexUnified3DFloatFloat:
294  case NVPTXISD::TexUnified3DFloatFloatLevel:
295  case NVPTXISD::TexUnified3DFloatFloatGrad:
296  case NVPTXISD::TexUnified3DS32S32:
297  case NVPTXISD::TexUnified3DS32Float:
298  case NVPTXISD::TexUnified3DS32FloatLevel:
299  case NVPTXISD::TexUnified3DS32FloatGrad:
300  case NVPTXISD::TexUnified3DU32S32:
301  case NVPTXISD::TexUnified3DU32Float:
302  case NVPTXISD::TexUnified3DU32FloatLevel:
303  case NVPTXISD::TexUnified3DU32FloatGrad:
304  case NVPTXISD::TexUnifiedCubeFloatFloat:
305  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
306  case NVPTXISD::TexUnifiedCubeS32Float:
307  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
308  case NVPTXISD::TexUnifiedCubeU32Float:
309  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
310  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
311  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
312  case NVPTXISD::TexUnifiedCubeArrayS32Float:
313  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
314  case NVPTXISD::TexUnifiedCubeArrayU32Float:
315  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
316  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
317  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
318  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
319  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
320  case NVPTXISD::Tld4UnifiedR2DS64Float:
321  case NVPTXISD::Tld4UnifiedG2DS64Float:
322  case NVPTXISD::Tld4UnifiedB2DS64Float:
323  case NVPTXISD::Tld4UnifiedA2DS64Float:
324  case NVPTXISD::Tld4UnifiedR2DU64Float:
325  case NVPTXISD::Tld4UnifiedG2DU64Float:
326  case NVPTXISD::Tld4UnifiedB2DU64Float:
327  case NVPTXISD::Tld4UnifiedA2DU64Float:
328    ResNode = SelectTextureIntrinsic(N);
329    break;
330  case NVPTXISD::Suld1DI8Clamp:
331  case NVPTXISD::Suld1DI16Clamp:
332  case NVPTXISD::Suld1DI32Clamp:
333  case NVPTXISD::Suld1DI64Clamp:
334  case NVPTXISD::Suld1DV2I8Clamp:
335  case NVPTXISD::Suld1DV2I16Clamp:
336  case NVPTXISD::Suld1DV2I32Clamp:
337  case NVPTXISD::Suld1DV2I64Clamp:
338  case NVPTXISD::Suld1DV4I8Clamp:
339  case NVPTXISD::Suld1DV4I16Clamp:
340  case NVPTXISD::Suld1DV4I32Clamp:
341  case NVPTXISD::Suld1DArrayI8Clamp:
342  case NVPTXISD::Suld1DArrayI16Clamp:
343  case NVPTXISD::Suld1DArrayI32Clamp:
344  case NVPTXISD::Suld1DArrayI64Clamp:
345  case NVPTXISD::Suld1DArrayV2I8Clamp:
346  case NVPTXISD::Suld1DArrayV2I16Clamp:
347  case NVPTXISD::Suld1DArrayV2I32Clamp:
348  case NVPTXISD::Suld1DArrayV2I64Clamp:
349  case NVPTXISD::Suld1DArrayV4I8Clamp:
350  case NVPTXISD::Suld1DArrayV4I16Clamp:
351  case NVPTXISD::Suld1DArrayV4I32Clamp:
352  case NVPTXISD::Suld2DI8Clamp:
353  case NVPTXISD::Suld2DI16Clamp:
354  case NVPTXISD::Suld2DI32Clamp:
355  case NVPTXISD::Suld2DI64Clamp:
356  case NVPTXISD::Suld2DV2I8Clamp:
357  case NVPTXISD::Suld2DV2I16Clamp:
358  case NVPTXISD::Suld2DV2I32Clamp:
359  case NVPTXISD::Suld2DV2I64Clamp:
360  case NVPTXISD::Suld2DV4I8Clamp:
361  case NVPTXISD::Suld2DV4I16Clamp:
362  case NVPTXISD::Suld2DV4I32Clamp:
363  case NVPTXISD::Suld2DArrayI8Clamp:
364  case NVPTXISD::Suld2DArrayI16Clamp:
365  case NVPTXISD::Suld2DArrayI32Clamp:
366  case NVPTXISD::Suld2DArrayI64Clamp:
367  case NVPTXISD::Suld2DArrayV2I8Clamp:
368  case NVPTXISD::Suld2DArrayV2I16Clamp:
369  case NVPTXISD::Suld2DArrayV2I32Clamp:
370  case NVPTXISD::Suld2DArrayV2I64Clamp:
371  case NVPTXISD::Suld2DArrayV4I8Clamp:
372  case NVPTXISD::Suld2DArrayV4I16Clamp:
373  case NVPTXISD::Suld2DArrayV4I32Clamp:
374  case NVPTXISD::Suld3DI8Clamp:
375  case NVPTXISD::Suld3DI16Clamp:
376  case NVPTXISD::Suld3DI32Clamp:
377  case NVPTXISD::Suld3DI64Clamp:
378  case NVPTXISD::Suld3DV2I8Clamp:
379  case NVPTXISD::Suld3DV2I16Clamp:
380  case NVPTXISD::Suld3DV2I32Clamp:
381  case NVPTXISD::Suld3DV2I64Clamp:
382  case NVPTXISD::Suld3DV4I8Clamp:
383  case NVPTXISD::Suld3DV4I16Clamp:
384  case NVPTXISD::Suld3DV4I32Clamp:
385  case NVPTXISD::Suld1DI8Trap:
386  case NVPTXISD::Suld1DI16Trap:
387  case NVPTXISD::Suld1DI32Trap:
388  case NVPTXISD::Suld1DI64Trap:
389  case NVPTXISD::Suld1DV2I8Trap:
390  case NVPTXISD::Suld1DV2I16Trap:
391  case NVPTXISD::Suld1DV2I32Trap:
392  case NVPTXISD::Suld1DV2I64Trap:
393  case NVPTXISD::Suld1DV4I8Trap:
394  case NVPTXISD::Suld1DV4I16Trap:
395  case NVPTXISD::Suld1DV4I32Trap:
396  case NVPTXISD::Suld1DArrayI8Trap:
397  case NVPTXISD::Suld1DArrayI16Trap:
398  case NVPTXISD::Suld1DArrayI32Trap:
399  case NVPTXISD::Suld1DArrayI64Trap:
400  case NVPTXISD::Suld1DArrayV2I8Trap:
401  case NVPTXISD::Suld1DArrayV2I16Trap:
402  case NVPTXISD::Suld1DArrayV2I32Trap:
403  case NVPTXISD::Suld1DArrayV2I64Trap:
404  case NVPTXISD::Suld1DArrayV4I8Trap:
405  case NVPTXISD::Suld1DArrayV4I16Trap:
406  case NVPTXISD::Suld1DArrayV4I32Trap:
407  case NVPTXISD::Suld2DI8Trap:
408  case NVPTXISD::Suld2DI16Trap:
409  case NVPTXISD::Suld2DI32Trap:
410  case NVPTXISD::Suld2DI64Trap:
411  case NVPTXISD::Suld2DV2I8Trap:
412  case NVPTXISD::Suld2DV2I16Trap:
413  case NVPTXISD::Suld2DV2I32Trap:
414  case NVPTXISD::Suld2DV2I64Trap:
415  case NVPTXISD::Suld2DV4I8Trap:
416  case NVPTXISD::Suld2DV4I16Trap:
417  case NVPTXISD::Suld2DV4I32Trap:
418  case NVPTXISD::Suld2DArrayI8Trap:
419  case NVPTXISD::Suld2DArrayI16Trap:
420  case NVPTXISD::Suld2DArrayI32Trap:
421  case NVPTXISD::Suld2DArrayI64Trap:
422  case NVPTXISD::Suld2DArrayV2I8Trap:
423  case NVPTXISD::Suld2DArrayV2I16Trap:
424  case NVPTXISD::Suld2DArrayV2I32Trap:
425  case NVPTXISD::Suld2DArrayV2I64Trap:
426  case NVPTXISD::Suld2DArrayV4I8Trap:
427  case NVPTXISD::Suld2DArrayV4I16Trap:
428  case NVPTXISD::Suld2DArrayV4I32Trap:
429  case NVPTXISD::Suld3DI8Trap:
430  case NVPTXISD::Suld3DI16Trap:
431  case NVPTXISD::Suld3DI32Trap:
432  case NVPTXISD::Suld3DI64Trap:
433  case NVPTXISD::Suld3DV2I8Trap:
434  case NVPTXISD::Suld3DV2I16Trap:
435  case NVPTXISD::Suld3DV2I32Trap:
436  case NVPTXISD::Suld3DV2I64Trap:
437  case NVPTXISD::Suld3DV4I8Trap:
438  case NVPTXISD::Suld3DV4I16Trap:
439  case NVPTXISD::Suld3DV4I32Trap:
440  case NVPTXISD::Suld1DI8Zero:
441  case NVPTXISD::Suld1DI16Zero:
442  case NVPTXISD::Suld1DI32Zero:
443  case NVPTXISD::Suld1DI64Zero:
444  case NVPTXISD::Suld1DV2I8Zero:
445  case NVPTXISD::Suld1DV2I16Zero:
446  case NVPTXISD::Suld1DV2I32Zero:
447  case NVPTXISD::Suld1DV2I64Zero:
448  case NVPTXISD::Suld1DV4I8Zero:
449  case NVPTXISD::Suld1DV4I16Zero:
450  case NVPTXISD::Suld1DV4I32Zero:
451  case NVPTXISD::Suld1DArrayI8Zero:
452  case NVPTXISD::Suld1DArrayI16Zero:
453  case NVPTXISD::Suld1DArrayI32Zero:
454  case NVPTXISD::Suld1DArrayI64Zero:
455  case NVPTXISD::Suld1DArrayV2I8Zero:
456  case NVPTXISD::Suld1DArrayV2I16Zero:
457  case NVPTXISD::Suld1DArrayV2I32Zero:
458  case NVPTXISD::Suld1DArrayV2I64Zero:
459  case NVPTXISD::Suld1DArrayV4I8Zero:
460  case NVPTXISD::Suld1DArrayV4I16Zero:
461  case NVPTXISD::Suld1DArrayV4I32Zero:
462  case NVPTXISD::Suld2DI8Zero:
463  case NVPTXISD::Suld2DI16Zero:
464  case NVPTXISD::Suld2DI32Zero:
465  case NVPTXISD::Suld2DI64Zero:
466  case NVPTXISD::Suld2DV2I8Zero:
467  case NVPTXISD::Suld2DV2I16Zero:
468  case NVPTXISD::Suld2DV2I32Zero:
469  case NVPTXISD::Suld2DV2I64Zero:
470  case NVPTXISD::Suld2DV4I8Zero:
471  case NVPTXISD::Suld2DV4I16Zero:
472  case NVPTXISD::Suld2DV4I32Zero:
473  case NVPTXISD::Suld2DArrayI8Zero:
474  case NVPTXISD::Suld2DArrayI16Zero:
475  case NVPTXISD::Suld2DArrayI32Zero:
476  case NVPTXISD::Suld2DArrayI64Zero:
477  case NVPTXISD::Suld2DArrayV2I8Zero:
478  case NVPTXISD::Suld2DArrayV2I16Zero:
479  case NVPTXISD::Suld2DArrayV2I32Zero:
480  case NVPTXISD::Suld2DArrayV2I64Zero:
481  case NVPTXISD::Suld2DArrayV4I8Zero:
482  case NVPTXISD::Suld2DArrayV4I16Zero:
483  case NVPTXISD::Suld2DArrayV4I32Zero:
484  case NVPTXISD::Suld3DI8Zero:
485  case NVPTXISD::Suld3DI16Zero:
486  case NVPTXISD::Suld3DI32Zero:
487  case NVPTXISD::Suld3DI64Zero:
488  case NVPTXISD::Suld3DV2I8Zero:
489  case NVPTXISD::Suld3DV2I16Zero:
490  case NVPTXISD::Suld3DV2I32Zero:
491  case NVPTXISD::Suld3DV2I64Zero:
492  case NVPTXISD::Suld3DV4I8Zero:
493  case NVPTXISD::Suld3DV4I16Zero:
494  case NVPTXISD::Suld3DV4I32Zero:
495    ResNode = SelectSurfaceIntrinsic(N);
496    break;
497  case ISD::AND:
498  case ISD::SRA:
499  case ISD::SRL:
500    // Try to select BFE
501    ResNode = SelectBFE(N);
502    break;
503  case ISD::ADDRSPACECAST:
504    ResNode = SelectAddrSpaceCast(N);
505    break;
506  default:
507    break;
508  }
509  if (ResNode)
510    return ResNode;
511  return SelectCode(N);
512}
513
514SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
515  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
516  switch (IID) {
517  default:
518    return NULL;
519  case Intrinsic::nvvm_ldg_global_f:
520  case Intrinsic::nvvm_ldg_global_i:
521  case Intrinsic::nvvm_ldg_global_p:
522  case Intrinsic::nvvm_ldu_global_f:
523  case Intrinsic::nvvm_ldu_global_i:
524  case Intrinsic::nvvm_ldu_global_p:
525    return SelectLDGLDU(N);
526  }
527}
528
529static unsigned int getCodeAddrSpace(MemSDNode *N) {
530  const Value *Src = N->getMemOperand()->getValue();
531
532  if (!Src)
533    return NVPTX::PTXLdStInstCode::GENERIC;
534
535  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
536    switch (PT->getAddressSpace()) {
537    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
538    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
539    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
540    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
541    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
542    case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
543    default: break;
544    }
545  }
546  return NVPTX::PTXLdStInstCode::GENERIC;
547}
548
549static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
550                          unsigned CodeAddrSpace, MachineFunction *F) {
551  // To use non-coherent caching, the load has to be from global
552  // memory and we have to prove that the memory area is not written
553  // to anywhere for the duration of the kernel call, not even after
554  // the load.
555  //
556  // To ensure that there are no writes to the memory, we require the
557  // underlying pointer to be a noalias (__restrict) kernel parameter
558  // that is never used for a write. We can only do this for kernel
559  // functions since from within a device function, we cannot know if
560  // there were or will be writes to the memory from the caller - or we
561  // could, but then we would have to do inter-procedural analysis.
562  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
563      !isKernelFunction(*F->getFunction())) {
564    return false;
565  }
566
567  // We use GetUnderlyingObjects() here instead of
568  // GetUnderlyingObject() mainly because the former looks through phi
569  // nodes while the latter does not. We need to look through phi
570  // nodes to handle pointer induction variables.
571  SmallVector<Value *, 8> Objs;
572  GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
573                       Objs, F->getDataLayout());
574  for (Value *Obj : Objs) {
575    auto *A = dyn_cast<const Argument>(Obj);
576    if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
577  }
578
579  return true;
580}
581
582SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
583  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
584  switch (IID) {
585  default:
586    return nullptr;
587  case Intrinsic::nvvm_texsurf_handle_internal:
588    return SelectTexSurfHandle(N);
589  }
590}
591
592SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
593  // Op 0 is the intrinsic ID
594  SDValue Wrapper = N->getOperand(1);
595  SDValue GlobalVal = Wrapper.getOperand(0);
596  return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
597                                GlobalVal);
598}
599
600SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
601  SDValue Src = N->getOperand(0);
602  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
603  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
604  unsigned DstAddrSpace = CastN->getDestAddressSpace();
605
606  assert(SrcAddrSpace != DstAddrSpace &&
607         "addrspacecast must be between different address spaces");
608
609  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
610    // Specific to generic
611    unsigned Opc;
612    switch (SrcAddrSpace) {
613    default: report_fatal_error("Bad address space in addrspacecast");
614    case ADDRESS_SPACE_GLOBAL:
615      Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
616      break;
617    case ADDRESS_SPACE_SHARED:
618      Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
619      break;
620    case ADDRESS_SPACE_CONST:
621      Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
622      break;
623    case ADDRESS_SPACE_LOCAL:
624      Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
625      break;
626    }
627    return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
628  } else {
629    // Generic to specific
630    if (SrcAddrSpace != 0)
631      report_fatal_error("Cannot cast between two non-generic address spaces");
632    unsigned Opc;
633    switch (DstAddrSpace) {
634    default: report_fatal_error("Bad address space in addrspacecast");
635    case ADDRESS_SPACE_GLOBAL:
636      Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
637                         : NVPTX::cvta_to_global_yes;
638      break;
639    case ADDRESS_SPACE_SHARED:
640      Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
641                         : NVPTX::cvta_to_shared_yes;
642      break;
643    case ADDRESS_SPACE_CONST:
644      Opc =
645          TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
646      break;
647    case ADDRESS_SPACE_LOCAL:
648      Opc =
649          TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
650      break;
651    case ADDRESS_SPACE_PARAM:
652      Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
653                         : NVPTX::nvvm_ptr_gen_to_param;
654      break;
655    }
656    return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
657  }
658}
659
660SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
661  SDLoc dl(N);
662  LoadSDNode *LD = cast<LoadSDNode>(N);
663  EVT LoadedVT = LD->getMemoryVT();
664  SDNode *NVPTXLD = nullptr;
665
666  // do not support pre/post inc/dec
667  if (LD->isIndexed())
668    return nullptr;
669
670  if (!LoadedVT.isSimple())
671    return nullptr;
672
673  // Address Space Setting
674  unsigned int codeAddrSpace = getCodeAddrSpace(LD);
675
676  if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
677    return SelectLDGLDU(N);
678  }
679
680  // Volatile Setting
681  // - .volatile is only availalble for .global and .shared
682  bool isVolatile = LD->isVolatile();
683  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
684      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
685      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
686    isVolatile = false;
687
688  // Vector Setting
689  MVT SimpleVT = LoadedVT.getSimpleVT();
690  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
691  if (SimpleVT.isVector()) {
692    unsigned num = SimpleVT.getVectorNumElements();
693    if (num == 2)
694      vecType = NVPTX::PTXLdStInstCode::V2;
695    else if (num == 4)
696      vecType = NVPTX::PTXLdStInstCode::V4;
697    else
698      return nullptr;
699  }
700
701  // Type Setting: fromType + fromTypeWidth
702  //
703  // Sign   : ISD::SEXTLOAD
704  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
705  //          type is integer
706  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
707  MVT ScalarVT = SimpleVT.getScalarType();
708  // Read at least 8 bits (predicates are stored as 8-bit values)
709  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
710  unsigned int fromType;
711  if ((LD->getExtensionType() == ISD::SEXTLOAD))
712    fromType = NVPTX::PTXLdStInstCode::Signed;
713  else if (ScalarVT.isFloatingPoint())
714    fromType = NVPTX::PTXLdStInstCode::Float;
715  else
716    fromType = NVPTX::PTXLdStInstCode::Unsigned;
717
718  // Create the machine instruction DAG
719  SDValue Chain = N->getOperand(0);
720  SDValue N1 = N->getOperand(1);
721  SDValue Addr;
722  SDValue Offset, Base;
723  unsigned Opcode;
724  MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
725
726  if (SelectDirectAddr(N1, Addr)) {
727    switch (TargetVT) {
728    case MVT::i8:
729      Opcode = NVPTX::LD_i8_avar;
730      break;
731    case MVT::i16:
732      Opcode = NVPTX::LD_i16_avar;
733      break;
734    case MVT::i32:
735      Opcode = NVPTX::LD_i32_avar;
736      break;
737    case MVT::i64:
738      Opcode = NVPTX::LD_i64_avar;
739      break;
740    case MVT::f32:
741      Opcode = NVPTX::LD_f32_avar;
742      break;
743    case MVT::f64:
744      Opcode = NVPTX::LD_f64_avar;
745      break;
746    default:
747      return nullptr;
748    }
749    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
750                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
751                      getI32Imm(fromTypeWidth, dl), Addr, Chain };
752    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
753  } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
754                          : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
755    switch (TargetVT) {
756    case MVT::i8:
757      Opcode = NVPTX::LD_i8_asi;
758      break;
759    case MVT::i16:
760      Opcode = NVPTX::LD_i16_asi;
761      break;
762    case MVT::i32:
763      Opcode = NVPTX::LD_i32_asi;
764      break;
765    case MVT::i64:
766      Opcode = NVPTX::LD_i64_asi;
767      break;
768    case MVT::f32:
769      Opcode = NVPTX::LD_f32_asi;
770      break;
771    case MVT::f64:
772      Opcode = NVPTX::LD_f64_asi;
773      break;
774    default:
775      return nullptr;
776    }
777    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
778                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
779                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
780    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
781  } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
782                          : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
783    if (TM.is64Bit()) {
784      switch (TargetVT) {
785      case MVT::i8:
786        Opcode = NVPTX::LD_i8_ari_64;
787        break;
788      case MVT::i16:
789        Opcode = NVPTX::LD_i16_ari_64;
790        break;
791      case MVT::i32:
792        Opcode = NVPTX::LD_i32_ari_64;
793        break;
794      case MVT::i64:
795        Opcode = NVPTX::LD_i64_ari_64;
796        break;
797      case MVT::f32:
798        Opcode = NVPTX::LD_f32_ari_64;
799        break;
800      case MVT::f64:
801        Opcode = NVPTX::LD_f64_ari_64;
802        break;
803      default:
804        return nullptr;
805      }
806    } else {
807      switch (TargetVT) {
808      case MVT::i8:
809        Opcode = NVPTX::LD_i8_ari;
810        break;
811      case MVT::i16:
812        Opcode = NVPTX::LD_i16_ari;
813        break;
814      case MVT::i32:
815        Opcode = NVPTX::LD_i32_ari;
816        break;
817      case MVT::i64:
818        Opcode = NVPTX::LD_i64_ari;
819        break;
820      case MVT::f32:
821        Opcode = NVPTX::LD_f32_ari;
822        break;
823      case MVT::f64:
824        Opcode = NVPTX::LD_f64_ari;
825        break;
826      default:
827        return nullptr;
828      }
829    }
830    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
831                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
832                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
833    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
834  } else {
835    if (TM.is64Bit()) {
836      switch (TargetVT) {
837      case MVT::i8:
838        Opcode = NVPTX::LD_i8_areg_64;
839        break;
840      case MVT::i16:
841        Opcode = NVPTX::LD_i16_areg_64;
842        break;
843      case MVT::i32:
844        Opcode = NVPTX::LD_i32_areg_64;
845        break;
846      case MVT::i64:
847        Opcode = NVPTX::LD_i64_areg_64;
848        break;
849      case MVT::f32:
850        Opcode = NVPTX::LD_f32_areg_64;
851        break;
852      case MVT::f64:
853        Opcode = NVPTX::LD_f64_areg_64;
854        break;
855      default:
856        return nullptr;
857      }
858    } else {
859      switch (TargetVT) {
860      case MVT::i8:
861        Opcode = NVPTX::LD_i8_areg;
862        break;
863      case MVT::i16:
864        Opcode = NVPTX::LD_i16_areg;
865        break;
866      case MVT::i32:
867        Opcode = NVPTX::LD_i32_areg;
868        break;
869      case MVT::i64:
870        Opcode = NVPTX::LD_i64_areg;
871        break;
872      case MVT::f32:
873        Opcode = NVPTX::LD_f32_areg;
874        break;
875      case MVT::f64:
876        Opcode = NVPTX::LD_f64_areg;
877        break;
878      default:
879        return nullptr;
880      }
881    }
882    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
883                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
884                      getI32Imm(fromTypeWidth, dl), N1, Chain };
885    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
886  }
887
888  if (NVPTXLD) {
889    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
890    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
891    cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
892  }
893
894  return NVPTXLD;
895}
896
897SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
898
899  SDValue Chain = N->getOperand(0);
900  SDValue Op1 = N->getOperand(1);
901  SDValue Addr, Offset, Base;
902  unsigned Opcode;
903  SDLoc DL(N);
904  SDNode *LD;
905  MemSDNode *MemSD = cast<MemSDNode>(N);
906  EVT LoadedVT = MemSD->getMemoryVT();
907
908  if (!LoadedVT.isSimple())
909    return nullptr;
910
911  // Address Space Setting
912  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
913
914  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
915    return SelectLDGLDU(N);
916  }
917
918  // Volatile Setting
919  // - .volatile is only availalble for .global and .shared
920  bool IsVolatile = MemSD->isVolatile();
921  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
922      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
923      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
924    IsVolatile = false;
925
926  // Vector Setting
927  MVT SimpleVT = LoadedVT.getSimpleVT();
928
929  // Type Setting: fromType + fromTypeWidth
930  //
931  // Sign   : ISD::SEXTLOAD
932  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
933  //          type is integer
934  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
935  MVT ScalarVT = SimpleVT.getScalarType();
936  // Read at least 8 bits (predicates are stored as 8-bit values)
937  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
938  unsigned int FromType;
939  // The last operand holds the original LoadSDNode::getExtensionType() value
940  unsigned ExtensionType = cast<ConstantSDNode>(
941      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
942  if (ExtensionType == ISD::SEXTLOAD)
943    FromType = NVPTX::PTXLdStInstCode::Signed;
944  else if (ScalarVT.isFloatingPoint())
945    FromType = NVPTX::PTXLdStInstCode::Float;
946  else
947    FromType = NVPTX::PTXLdStInstCode::Unsigned;
948
949  unsigned VecType;
950
951  switch (N->getOpcode()) {
952  case NVPTXISD::LoadV2:
953    VecType = NVPTX::PTXLdStInstCode::V2;
954    break;
955  case NVPTXISD::LoadV4:
956    VecType = NVPTX::PTXLdStInstCode::V4;
957    break;
958  default:
959    return nullptr;
960  }
961
962  EVT EltVT = N->getValueType(0);
963
964  if (SelectDirectAddr(Op1, Addr)) {
965    switch (N->getOpcode()) {
966    default:
967      return nullptr;
968    case NVPTXISD::LoadV2:
969      switch (EltVT.getSimpleVT().SimpleTy) {
970      default:
971        return nullptr;
972      case MVT::i8:
973        Opcode = NVPTX::LDV_i8_v2_avar;
974        break;
975      case MVT::i16:
976        Opcode = NVPTX::LDV_i16_v2_avar;
977        break;
978      case MVT::i32:
979        Opcode = NVPTX::LDV_i32_v2_avar;
980        break;
981      case MVT::i64:
982        Opcode = NVPTX::LDV_i64_v2_avar;
983        break;
984      case MVT::f32:
985        Opcode = NVPTX::LDV_f32_v2_avar;
986        break;
987      case MVT::f64:
988        Opcode = NVPTX::LDV_f64_v2_avar;
989        break;
990      }
991      break;
992    case NVPTXISD::LoadV4:
993      switch (EltVT.getSimpleVT().SimpleTy) {
994      default:
995        return nullptr;
996      case MVT::i8:
997        Opcode = NVPTX::LDV_i8_v4_avar;
998        break;
999      case MVT::i16:
1000        Opcode = NVPTX::LDV_i16_v4_avar;
1001        break;
1002      case MVT::i32:
1003        Opcode = NVPTX::LDV_i32_v4_avar;
1004        break;
1005      case MVT::f32:
1006        Opcode = NVPTX::LDV_f32_v4_avar;
1007        break;
1008      }
1009      break;
1010    }
1011
1012    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1013                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1014                      getI32Imm(FromTypeWidth, DL), Addr, Chain };
1015    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1016  } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1017                          : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1018    switch (N->getOpcode()) {
1019    default:
1020      return nullptr;
1021    case NVPTXISD::LoadV2:
1022      switch (EltVT.getSimpleVT().SimpleTy) {
1023      default:
1024        return nullptr;
1025      case MVT::i8:
1026        Opcode = NVPTX::LDV_i8_v2_asi;
1027        break;
1028      case MVT::i16:
1029        Opcode = NVPTX::LDV_i16_v2_asi;
1030        break;
1031      case MVT::i32:
1032        Opcode = NVPTX::LDV_i32_v2_asi;
1033        break;
1034      case MVT::i64:
1035        Opcode = NVPTX::LDV_i64_v2_asi;
1036        break;
1037      case MVT::f32:
1038        Opcode = NVPTX::LDV_f32_v2_asi;
1039        break;
1040      case MVT::f64:
1041        Opcode = NVPTX::LDV_f64_v2_asi;
1042        break;
1043      }
1044      break;
1045    case NVPTXISD::LoadV4:
1046      switch (EltVT.getSimpleVT().SimpleTy) {
1047      default:
1048        return nullptr;
1049      case MVT::i8:
1050        Opcode = NVPTX::LDV_i8_v4_asi;
1051        break;
1052      case MVT::i16:
1053        Opcode = NVPTX::LDV_i16_v4_asi;
1054        break;
1055      case MVT::i32:
1056        Opcode = NVPTX::LDV_i32_v4_asi;
1057        break;
1058      case MVT::f32:
1059        Opcode = NVPTX::LDV_f32_v4_asi;
1060        break;
1061      }
1062      break;
1063    }
1064
1065    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1066                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1067                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1068    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1069  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1070                          : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1071    if (TM.is64Bit()) {
1072      switch (N->getOpcode()) {
1073      default:
1074        return nullptr;
1075      case NVPTXISD::LoadV2:
1076        switch (EltVT.getSimpleVT().SimpleTy) {
1077        default:
1078          return nullptr;
1079        case MVT::i8:
1080          Opcode = NVPTX::LDV_i8_v2_ari_64;
1081          break;
1082        case MVT::i16:
1083          Opcode = NVPTX::LDV_i16_v2_ari_64;
1084          break;
1085        case MVT::i32:
1086          Opcode = NVPTX::LDV_i32_v2_ari_64;
1087          break;
1088        case MVT::i64:
1089          Opcode = NVPTX::LDV_i64_v2_ari_64;
1090          break;
1091        case MVT::f32:
1092          Opcode = NVPTX::LDV_f32_v2_ari_64;
1093          break;
1094        case MVT::f64:
1095          Opcode = NVPTX::LDV_f64_v2_ari_64;
1096          break;
1097        }
1098        break;
1099      case NVPTXISD::LoadV4:
1100        switch (EltVT.getSimpleVT().SimpleTy) {
1101        default:
1102          return nullptr;
1103        case MVT::i8:
1104          Opcode = NVPTX::LDV_i8_v4_ari_64;
1105          break;
1106        case MVT::i16:
1107          Opcode = NVPTX::LDV_i16_v4_ari_64;
1108          break;
1109        case MVT::i32:
1110          Opcode = NVPTX::LDV_i32_v4_ari_64;
1111          break;
1112        case MVT::f32:
1113          Opcode = NVPTX::LDV_f32_v4_ari_64;
1114          break;
1115        }
1116        break;
1117      }
1118    } else {
1119      switch (N->getOpcode()) {
1120      default:
1121        return nullptr;
1122      case NVPTXISD::LoadV2:
1123        switch (EltVT.getSimpleVT().SimpleTy) {
1124        default:
1125          return nullptr;
1126        case MVT::i8:
1127          Opcode = NVPTX::LDV_i8_v2_ari;
1128          break;
1129        case MVT::i16:
1130          Opcode = NVPTX::LDV_i16_v2_ari;
1131          break;
1132        case MVT::i32:
1133          Opcode = NVPTX::LDV_i32_v2_ari;
1134          break;
1135        case MVT::i64:
1136          Opcode = NVPTX::LDV_i64_v2_ari;
1137          break;
1138        case MVT::f32:
1139          Opcode = NVPTX::LDV_f32_v2_ari;
1140          break;
1141        case MVT::f64:
1142          Opcode = NVPTX::LDV_f64_v2_ari;
1143          break;
1144        }
1145        break;
1146      case NVPTXISD::LoadV4:
1147        switch (EltVT.getSimpleVT().SimpleTy) {
1148        default:
1149          return nullptr;
1150        case MVT::i8:
1151          Opcode = NVPTX::LDV_i8_v4_ari;
1152          break;
1153        case MVT::i16:
1154          Opcode = NVPTX::LDV_i16_v4_ari;
1155          break;
1156        case MVT::i32:
1157          Opcode = NVPTX::LDV_i32_v4_ari;
1158          break;
1159        case MVT::f32:
1160          Opcode = NVPTX::LDV_f32_v4_ari;
1161          break;
1162        }
1163        break;
1164      }
1165    }
1166
1167    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1168                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1169                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1170
1171    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1172  } else {
1173    if (TM.is64Bit()) {
1174      switch (N->getOpcode()) {
1175      default:
1176        return nullptr;
1177      case NVPTXISD::LoadV2:
1178        switch (EltVT.getSimpleVT().SimpleTy) {
1179        default:
1180          return nullptr;
1181        case MVT::i8:
1182          Opcode = NVPTX::LDV_i8_v2_areg_64;
1183          break;
1184        case MVT::i16:
1185          Opcode = NVPTX::LDV_i16_v2_areg_64;
1186          break;
1187        case MVT::i32:
1188          Opcode = NVPTX::LDV_i32_v2_areg_64;
1189          break;
1190        case MVT::i64:
1191          Opcode = NVPTX::LDV_i64_v2_areg_64;
1192          break;
1193        case MVT::f32:
1194          Opcode = NVPTX::LDV_f32_v2_areg_64;
1195          break;
1196        case MVT::f64:
1197          Opcode = NVPTX::LDV_f64_v2_areg_64;
1198          break;
1199        }
1200        break;
1201      case NVPTXISD::LoadV4:
1202        switch (EltVT.getSimpleVT().SimpleTy) {
1203        default:
1204          return nullptr;
1205        case MVT::i8:
1206          Opcode = NVPTX::LDV_i8_v4_areg_64;
1207          break;
1208        case MVT::i16:
1209          Opcode = NVPTX::LDV_i16_v4_areg_64;
1210          break;
1211        case MVT::i32:
1212          Opcode = NVPTX::LDV_i32_v4_areg_64;
1213          break;
1214        case MVT::f32:
1215          Opcode = NVPTX::LDV_f32_v4_areg_64;
1216          break;
1217        }
1218        break;
1219      }
1220    } else {
1221      switch (N->getOpcode()) {
1222      default:
1223        return nullptr;
1224      case NVPTXISD::LoadV2:
1225        switch (EltVT.getSimpleVT().SimpleTy) {
1226        default:
1227          return nullptr;
1228        case MVT::i8:
1229          Opcode = NVPTX::LDV_i8_v2_areg;
1230          break;
1231        case MVT::i16:
1232          Opcode = NVPTX::LDV_i16_v2_areg;
1233          break;
1234        case MVT::i32:
1235          Opcode = NVPTX::LDV_i32_v2_areg;
1236          break;
1237        case MVT::i64:
1238          Opcode = NVPTX::LDV_i64_v2_areg;
1239          break;
1240        case MVT::f32:
1241          Opcode = NVPTX::LDV_f32_v2_areg;
1242          break;
1243        case MVT::f64:
1244          Opcode = NVPTX::LDV_f64_v2_areg;
1245          break;
1246        }
1247        break;
1248      case NVPTXISD::LoadV4:
1249        switch (EltVT.getSimpleVT().SimpleTy) {
1250        default:
1251          return nullptr;
1252        case MVT::i8:
1253          Opcode = NVPTX::LDV_i8_v4_areg;
1254          break;
1255        case MVT::i16:
1256          Opcode = NVPTX::LDV_i16_v4_areg;
1257          break;
1258        case MVT::i32:
1259          Opcode = NVPTX::LDV_i32_v4_areg;
1260          break;
1261        case MVT::f32:
1262          Opcode = NVPTX::LDV_f32_v4_areg;
1263          break;
1264        }
1265        break;
1266      }
1267    }
1268
1269    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1270                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1271                      getI32Imm(FromTypeWidth, DL), Op1, Chain };
1272    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1273  }
1274
1275  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1276  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1277  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1278
1279  return LD;
1280}
1281
1282SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1283
1284  SDValue Chain = N->getOperand(0);
1285  SDValue Op1;
1286  MemSDNode *Mem;
1287  bool IsLDG = true;
1288
1289  // If this is an LDG intrinsic, the address is the third operand. Its its an
1290  // LDG/LDU SD node (from custom vector handling), then its the second operand
1291  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1292    Op1 = N->getOperand(2);
1293    Mem = cast<MemIntrinsicSDNode>(N);
1294    unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1295    switch (IID) {
1296    default:
1297      return NULL;
1298    case Intrinsic::nvvm_ldg_global_f:
1299    case Intrinsic::nvvm_ldg_global_i:
1300    case Intrinsic::nvvm_ldg_global_p:
1301      IsLDG = true;
1302      break;
1303    case Intrinsic::nvvm_ldu_global_f:
1304    case Intrinsic::nvvm_ldu_global_i:
1305    case Intrinsic::nvvm_ldu_global_p:
1306      IsLDG = false;
1307      break;
1308    }
1309  } else {
1310    Op1 = N->getOperand(1);
1311    Mem = cast<MemSDNode>(N);
1312  }
1313
1314  unsigned Opcode;
1315  SDLoc DL(N);
1316  SDNode *LD;
1317  SDValue Base, Offset, Addr;
1318
1319  EVT EltVT = Mem->getMemoryVT();
1320  if (EltVT.isVector()) {
1321    EltVT = EltVT.getVectorElementType();
1322  }
1323
1324  if (SelectDirectAddr(Op1, Addr)) {
1325    switch (N->getOpcode()) {
1326    default:
1327      return nullptr;
1328    case ISD::INTRINSIC_W_CHAIN:
1329      if (IsLDG) {
1330        switch (EltVT.getSimpleVT().SimpleTy) {
1331        default:
1332          return nullptr;
1333        case MVT::i8:
1334          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1335          break;
1336        case MVT::i16:
1337          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1338          break;
1339        case MVT::i32:
1340          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1341          break;
1342        case MVT::i64:
1343          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1344          break;
1345        case MVT::f32:
1346          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1347          break;
1348        case MVT::f64:
1349          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1350          break;
1351        }
1352      } else {
1353        switch (EltVT.getSimpleVT().SimpleTy) {
1354        default:
1355          return nullptr;
1356        case MVT::i8:
1357          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1358          break;
1359        case MVT::i16:
1360          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1361          break;
1362        case MVT::i32:
1363          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1364          break;
1365        case MVT::i64:
1366          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1367          break;
1368        case MVT::f32:
1369          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1370          break;
1371        case MVT::f64:
1372          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1373          break;
1374        }
1375      }
1376      break;
1377    case NVPTXISD::LDGV2:
1378      switch (EltVT.getSimpleVT().SimpleTy) {
1379      default:
1380        return nullptr;
1381      case MVT::i8:
1382        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1383        break;
1384      case MVT::i16:
1385        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1386        break;
1387      case MVT::i32:
1388        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1389        break;
1390      case MVT::i64:
1391        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1392        break;
1393      case MVT::f32:
1394        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1395        break;
1396      case MVT::f64:
1397        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1398        break;
1399      }
1400      break;
1401    case NVPTXISD::LDUV2:
1402      switch (EltVT.getSimpleVT().SimpleTy) {
1403      default:
1404        return nullptr;
1405      case MVT::i8:
1406        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1407        break;
1408      case MVT::i16:
1409        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1410        break;
1411      case MVT::i32:
1412        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1413        break;
1414      case MVT::i64:
1415        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1416        break;
1417      case MVT::f32:
1418        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1419        break;
1420      case MVT::f64:
1421        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1422        break;
1423      }
1424      break;
1425    case NVPTXISD::LDGV4:
1426      switch (EltVT.getSimpleVT().SimpleTy) {
1427      default:
1428        return nullptr;
1429      case MVT::i8:
1430        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1431        break;
1432      case MVT::i16:
1433        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1434        break;
1435      case MVT::i32:
1436        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1437        break;
1438      case MVT::f32:
1439        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1440        break;
1441      }
1442      break;
1443    case NVPTXISD::LDUV4:
1444      switch (EltVT.getSimpleVT().SimpleTy) {
1445      default:
1446        return nullptr;
1447      case MVT::i8:
1448        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1449        break;
1450      case MVT::i16:
1451        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1452        break;
1453      case MVT::i32:
1454        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1455        break;
1456      case MVT::f32:
1457        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1458        break;
1459      }
1460      break;
1461    }
1462
1463    SDValue Ops[] = { Addr, Chain };
1464    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1465  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1466                          : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1467    if (TM.is64Bit()) {
1468      switch (N->getOpcode()) {
1469      default:
1470        return nullptr;
1471      case ISD::LOAD:
1472      case ISD::INTRINSIC_W_CHAIN:
1473        if (IsLDG) {
1474          switch (EltVT.getSimpleVT().SimpleTy) {
1475          default:
1476            return nullptr;
1477          case MVT::i8:
1478            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1479            break;
1480          case MVT::i16:
1481            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1482            break;
1483          case MVT::i32:
1484            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1485            break;
1486          case MVT::i64:
1487            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1488            break;
1489          case MVT::f32:
1490            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1491            break;
1492          case MVT::f64:
1493            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1494            break;
1495          }
1496        } else {
1497          switch (EltVT.getSimpleVT().SimpleTy) {
1498          default:
1499            return nullptr;
1500          case MVT::i8:
1501            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1502            break;
1503          case MVT::i16:
1504            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1505            break;
1506          case MVT::i32:
1507            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1508            break;
1509          case MVT::i64:
1510            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1511            break;
1512          case MVT::f32:
1513            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1514            break;
1515          case MVT::f64:
1516            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1517            break;
1518          }
1519        }
1520        break;
1521      case NVPTXISD::LoadV2:
1522      case NVPTXISD::LDGV2:
1523        switch (EltVT.getSimpleVT().SimpleTy) {
1524        default:
1525          return nullptr;
1526        case MVT::i8:
1527          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1528          break;
1529        case MVT::i16:
1530          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1531          break;
1532        case MVT::i32:
1533          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1534          break;
1535        case MVT::i64:
1536          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1537          break;
1538        case MVT::f32:
1539          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1540          break;
1541        case MVT::f64:
1542          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1543          break;
1544        }
1545        break;
1546      case NVPTXISD::LDUV2:
1547        switch (EltVT.getSimpleVT().SimpleTy) {
1548        default:
1549          return nullptr;
1550        case MVT::i8:
1551          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1552          break;
1553        case MVT::i16:
1554          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1555          break;
1556        case MVT::i32:
1557          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1558          break;
1559        case MVT::i64:
1560          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1561          break;
1562        case MVT::f32:
1563          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1564          break;
1565        case MVT::f64:
1566          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1567          break;
1568        }
1569        break;
1570      case NVPTXISD::LoadV4:
1571      case NVPTXISD::LDGV4:
1572        switch (EltVT.getSimpleVT().SimpleTy) {
1573        default:
1574          return nullptr;
1575        case MVT::i8:
1576          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1577          break;
1578        case MVT::i16:
1579          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1580          break;
1581        case MVT::i32:
1582          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1583          break;
1584        case MVT::f32:
1585          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1586          break;
1587        }
1588        break;
1589      case NVPTXISD::LDUV4:
1590        switch (EltVT.getSimpleVT().SimpleTy) {
1591        default:
1592          return nullptr;
1593        case MVT::i8:
1594          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1595          break;
1596        case MVT::i16:
1597          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1598          break;
1599        case MVT::i32:
1600          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1601          break;
1602        case MVT::f32:
1603          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1604          break;
1605        }
1606        break;
1607      }
1608    } else {
1609      switch (N->getOpcode()) {
1610      default:
1611        return nullptr;
1612      case ISD::LOAD:
1613      case ISD::INTRINSIC_W_CHAIN:
1614        if (IsLDG) {
1615          switch (EltVT.getSimpleVT().SimpleTy) {
1616          default:
1617            return nullptr;
1618          case MVT::i8:
1619            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1620            break;
1621          case MVT::i16:
1622            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1623            break;
1624          case MVT::i32:
1625            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1626            break;
1627          case MVT::i64:
1628            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1629            break;
1630          case MVT::f32:
1631            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1632            break;
1633          case MVT::f64:
1634            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1635            break;
1636          }
1637        } else {
1638          switch (EltVT.getSimpleVT().SimpleTy) {
1639          default:
1640            return nullptr;
1641          case MVT::i8:
1642            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1643            break;
1644          case MVT::i16:
1645            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1646            break;
1647          case MVT::i32:
1648            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1649            break;
1650          case MVT::i64:
1651            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1652            break;
1653          case MVT::f32:
1654            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1655            break;
1656          case MVT::f64:
1657            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1658            break;
1659          }
1660        }
1661        break;
1662      case NVPTXISD::LoadV2:
1663      case NVPTXISD::LDGV2:
1664        switch (EltVT.getSimpleVT().SimpleTy) {
1665        default:
1666          return nullptr;
1667        case MVT::i8:
1668          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1669          break;
1670        case MVT::i16:
1671          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1672          break;
1673        case MVT::i32:
1674          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1675          break;
1676        case MVT::i64:
1677          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1678          break;
1679        case MVT::f32:
1680          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1681          break;
1682        case MVT::f64:
1683          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1684          break;
1685        }
1686        break;
1687      case NVPTXISD::LDUV2:
1688        switch (EltVT.getSimpleVT().SimpleTy) {
1689        default:
1690          return nullptr;
1691        case MVT::i8:
1692          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1693          break;
1694        case MVT::i16:
1695          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1696          break;
1697        case MVT::i32:
1698          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1699          break;
1700        case MVT::i64:
1701          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1702          break;
1703        case MVT::f32:
1704          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1705          break;
1706        case MVT::f64:
1707          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1708          break;
1709        }
1710        break;
1711      case NVPTXISD::LoadV4:
1712      case NVPTXISD::LDGV4:
1713        switch (EltVT.getSimpleVT().SimpleTy) {
1714        default:
1715          return nullptr;
1716        case MVT::i8:
1717          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1718          break;
1719        case MVT::i16:
1720          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1721          break;
1722        case MVT::i32:
1723          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1724          break;
1725        case MVT::f32:
1726          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1727          break;
1728        }
1729        break;
1730      case NVPTXISD::LDUV4:
1731        switch (EltVT.getSimpleVT().SimpleTy) {
1732        default:
1733          return nullptr;
1734        case MVT::i8:
1735          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1736          break;
1737        case MVT::i16:
1738          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1739          break;
1740        case MVT::i32:
1741          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1742          break;
1743        case MVT::f32:
1744          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1745          break;
1746        }
1747        break;
1748      }
1749    }
1750
1751    SDValue Ops[] = { Base, Offset, Chain };
1752
1753    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1754  } else {
1755    if (TM.is64Bit()) {
1756      switch (N->getOpcode()) {
1757      default:
1758        return nullptr;
1759      case ISD::LOAD:
1760      case ISD::INTRINSIC_W_CHAIN:
1761        if (IsLDG) {
1762          switch (EltVT.getSimpleVT().SimpleTy) {
1763          default:
1764            return nullptr;
1765          case MVT::i8:
1766            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1767            break;
1768          case MVT::i16:
1769            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1770            break;
1771          case MVT::i32:
1772            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1773            break;
1774          case MVT::i64:
1775            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1776            break;
1777          case MVT::f32:
1778            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1779            break;
1780          case MVT::f64:
1781            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1782            break;
1783          }
1784        } else {
1785          switch (EltVT.getSimpleVT().SimpleTy) {
1786          default:
1787            return nullptr;
1788          case MVT::i8:
1789            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1790            break;
1791          case MVT::i16:
1792            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1793            break;
1794          case MVT::i32:
1795            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1796            break;
1797          case MVT::i64:
1798            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1799            break;
1800          case MVT::f32:
1801            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1802            break;
1803          case MVT::f64:
1804            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1805            break;
1806          }
1807        }
1808        break;
1809      case NVPTXISD::LoadV2:
1810      case NVPTXISD::LDGV2:
1811        switch (EltVT.getSimpleVT().SimpleTy) {
1812        default:
1813          return nullptr;
1814        case MVT::i8:
1815          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1816          break;
1817        case MVT::i16:
1818          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1819          break;
1820        case MVT::i32:
1821          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1822          break;
1823        case MVT::i64:
1824          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1825          break;
1826        case MVT::f32:
1827          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1828          break;
1829        case MVT::f64:
1830          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1831          break;
1832        }
1833        break;
1834      case NVPTXISD::LDUV2:
1835        switch (EltVT.getSimpleVT().SimpleTy) {
1836        default:
1837          return nullptr;
1838        case MVT::i8:
1839          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1840          break;
1841        case MVT::i16:
1842          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1843          break;
1844        case MVT::i32:
1845          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1846          break;
1847        case MVT::i64:
1848          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1849          break;
1850        case MVT::f32:
1851          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1852          break;
1853        case MVT::f64:
1854          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1855          break;
1856        }
1857        break;
1858      case NVPTXISD::LoadV4:
1859      case NVPTXISD::LDGV4:
1860        switch (EltVT.getSimpleVT().SimpleTy) {
1861        default:
1862          return nullptr;
1863        case MVT::i8:
1864          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1865          break;
1866        case MVT::i16:
1867          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1868          break;
1869        case MVT::i32:
1870          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1871          break;
1872        case MVT::f32:
1873          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1874          break;
1875        }
1876        break;
1877      case NVPTXISD::LDUV4:
1878        switch (EltVT.getSimpleVT().SimpleTy) {
1879        default:
1880          return nullptr;
1881        case MVT::i8:
1882          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1883          break;
1884        case MVT::i16:
1885          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1886          break;
1887        case MVT::i32:
1888          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1889          break;
1890        case MVT::f32:
1891          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1892          break;
1893        }
1894        break;
1895      }
1896    } else {
1897      switch (N->getOpcode()) {
1898      default:
1899        return nullptr;
1900      case ISD::LOAD:
1901      case ISD::INTRINSIC_W_CHAIN:
1902        if (IsLDG) {
1903          switch (EltVT.getSimpleVT().SimpleTy) {
1904          default:
1905            return nullptr;
1906          case MVT::i8:
1907            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1908            break;
1909          case MVT::i16:
1910            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1911            break;
1912          case MVT::i32:
1913            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1914            break;
1915          case MVT::i64:
1916            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1917            break;
1918          case MVT::f32:
1919            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1920            break;
1921          case MVT::f64:
1922            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1923            break;
1924          }
1925        } else {
1926          switch (EltVT.getSimpleVT().SimpleTy) {
1927          default:
1928            return nullptr;
1929          case MVT::i8:
1930            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1931            break;
1932          case MVT::i16:
1933            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1934            break;
1935          case MVT::i32:
1936            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1937            break;
1938          case MVT::i64:
1939            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1940            break;
1941          case MVT::f32:
1942            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1943            break;
1944          case MVT::f64:
1945            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1946            break;
1947          }
1948        }
1949        break;
1950      case NVPTXISD::LoadV2:
1951      case NVPTXISD::LDGV2:
1952        switch (EltVT.getSimpleVT().SimpleTy) {
1953        default:
1954          return nullptr;
1955        case MVT::i8:
1956          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1957          break;
1958        case MVT::i16:
1959          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1960          break;
1961        case MVT::i32:
1962          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1963          break;
1964        case MVT::i64:
1965          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1966          break;
1967        case MVT::f32:
1968          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1969          break;
1970        case MVT::f64:
1971          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1972          break;
1973        }
1974        break;
1975      case NVPTXISD::LDUV2:
1976        switch (EltVT.getSimpleVT().SimpleTy) {
1977        default:
1978          return nullptr;
1979        case MVT::i8:
1980          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1981          break;
1982        case MVT::i16:
1983          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1984          break;
1985        case MVT::i32:
1986          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1987          break;
1988        case MVT::i64:
1989          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1990          break;
1991        case MVT::f32:
1992          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1993          break;
1994        case MVT::f64:
1995          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1996          break;
1997        }
1998        break;
1999      case NVPTXISD::LoadV4:
2000      case NVPTXISD::LDGV4:
2001        switch (EltVT.getSimpleVT().SimpleTy) {
2002        default:
2003          return nullptr;
2004        case MVT::i8:
2005          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2006          break;
2007        case MVT::i16:
2008          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2009          break;
2010        case MVT::i32:
2011          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2012          break;
2013        case MVT::f32:
2014          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2015          break;
2016        }
2017        break;
2018      case NVPTXISD::LDUV4:
2019        switch (EltVT.getSimpleVT().SimpleTy) {
2020        default:
2021          return nullptr;
2022        case MVT::i8:
2023          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2024          break;
2025        case MVT::i16:
2026          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2027          break;
2028        case MVT::i32:
2029          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2030          break;
2031        case MVT::f32:
2032          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2033          break;
2034        }
2035        break;
2036      }
2037    }
2038
2039    SDValue Ops[] = { Op1, Chain };
2040    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
2041  }
2042
2043  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2044  MemRefs0[0] = Mem->getMemOperand();
2045  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2046
2047  return LD;
2048}
2049
2050SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
2051  SDLoc dl(N);
2052  StoreSDNode *ST = cast<StoreSDNode>(N);
2053  EVT StoreVT = ST->getMemoryVT();
2054  SDNode *NVPTXST = nullptr;
2055
2056  // do not support pre/post inc/dec
2057  if (ST->isIndexed())
2058    return nullptr;
2059
2060  if (!StoreVT.isSimple())
2061    return nullptr;
2062
2063  // Address Space Setting
2064  unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2065
2066  // Volatile Setting
2067  // - .volatile is only availalble for .global and .shared
2068  bool isVolatile = ST->isVolatile();
2069  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2070      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2071      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2072    isVolatile = false;
2073
2074  // Vector Setting
2075  MVT SimpleVT = StoreVT.getSimpleVT();
2076  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2077  if (SimpleVT.isVector()) {
2078    unsigned num = SimpleVT.getVectorNumElements();
2079    if (num == 2)
2080      vecType = NVPTX::PTXLdStInstCode::V2;
2081    else if (num == 4)
2082      vecType = NVPTX::PTXLdStInstCode::V4;
2083    else
2084      return nullptr;
2085  }
2086
2087  // Type Setting: toType + toTypeWidth
2088  // - for integer type, always use 'u'
2089  //
2090  MVT ScalarVT = SimpleVT.getScalarType();
2091  unsigned toTypeWidth = ScalarVT.getSizeInBits();
2092  unsigned int toType;
2093  if (ScalarVT.isFloatingPoint())
2094    toType = NVPTX::PTXLdStInstCode::Float;
2095  else
2096    toType = NVPTX::PTXLdStInstCode::Unsigned;
2097
2098  // Create the machine instruction DAG
2099  SDValue Chain = N->getOperand(0);
2100  SDValue N1 = N->getOperand(1);
2101  SDValue N2 = N->getOperand(2);
2102  SDValue Addr;
2103  SDValue Offset, Base;
2104  unsigned Opcode;
2105  MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2106
2107  if (SelectDirectAddr(N2, Addr)) {
2108    switch (SourceVT) {
2109    case MVT::i8:
2110      Opcode = NVPTX::ST_i8_avar;
2111      break;
2112    case MVT::i16:
2113      Opcode = NVPTX::ST_i16_avar;
2114      break;
2115    case MVT::i32:
2116      Opcode = NVPTX::ST_i32_avar;
2117      break;
2118    case MVT::i64:
2119      Opcode = NVPTX::ST_i64_avar;
2120      break;
2121    case MVT::f32:
2122      Opcode = NVPTX::ST_f32_avar;
2123      break;
2124    case MVT::f64:
2125      Opcode = NVPTX::ST_f64_avar;
2126      break;
2127    default:
2128      return nullptr;
2129    }
2130    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2131                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2132                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2133                      Chain };
2134    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2135  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2136                          : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2137    switch (SourceVT) {
2138    case MVT::i8:
2139      Opcode = NVPTX::ST_i8_asi;
2140      break;
2141    case MVT::i16:
2142      Opcode = NVPTX::ST_i16_asi;
2143      break;
2144    case MVT::i32:
2145      Opcode = NVPTX::ST_i32_asi;
2146      break;
2147    case MVT::i64:
2148      Opcode = NVPTX::ST_i64_asi;
2149      break;
2150    case MVT::f32:
2151      Opcode = NVPTX::ST_f32_asi;
2152      break;
2153    case MVT::f64:
2154      Opcode = NVPTX::ST_f64_asi;
2155      break;
2156    default:
2157      return nullptr;
2158    }
2159    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2160                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2161                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2162                      Offset, Chain };
2163    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2164  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2165                          : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2166    if (TM.is64Bit()) {
2167      switch (SourceVT) {
2168      case MVT::i8:
2169        Opcode = NVPTX::ST_i8_ari_64;
2170        break;
2171      case MVT::i16:
2172        Opcode = NVPTX::ST_i16_ari_64;
2173        break;
2174      case MVT::i32:
2175        Opcode = NVPTX::ST_i32_ari_64;
2176        break;
2177      case MVT::i64:
2178        Opcode = NVPTX::ST_i64_ari_64;
2179        break;
2180      case MVT::f32:
2181        Opcode = NVPTX::ST_f32_ari_64;
2182        break;
2183      case MVT::f64:
2184        Opcode = NVPTX::ST_f64_ari_64;
2185        break;
2186      default:
2187        return nullptr;
2188      }
2189    } else {
2190      switch (SourceVT) {
2191      case MVT::i8:
2192        Opcode = NVPTX::ST_i8_ari;
2193        break;
2194      case MVT::i16:
2195        Opcode = NVPTX::ST_i16_ari;
2196        break;
2197      case MVT::i32:
2198        Opcode = NVPTX::ST_i32_ari;
2199        break;
2200      case MVT::i64:
2201        Opcode = NVPTX::ST_i64_ari;
2202        break;
2203      case MVT::f32:
2204        Opcode = NVPTX::ST_f32_ari;
2205        break;
2206      case MVT::f64:
2207        Opcode = NVPTX::ST_f64_ari;
2208        break;
2209      default:
2210        return nullptr;
2211      }
2212    }
2213    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2214                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2215                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2216                      Offset, Chain };
2217    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2218  } else {
2219    if (TM.is64Bit()) {
2220      switch (SourceVT) {
2221      case MVT::i8:
2222        Opcode = NVPTX::ST_i8_areg_64;
2223        break;
2224      case MVT::i16:
2225        Opcode = NVPTX::ST_i16_areg_64;
2226        break;
2227      case MVT::i32:
2228        Opcode = NVPTX::ST_i32_areg_64;
2229        break;
2230      case MVT::i64:
2231        Opcode = NVPTX::ST_i64_areg_64;
2232        break;
2233      case MVT::f32:
2234        Opcode = NVPTX::ST_f32_areg_64;
2235        break;
2236      case MVT::f64:
2237        Opcode = NVPTX::ST_f64_areg_64;
2238        break;
2239      default:
2240        return nullptr;
2241      }
2242    } else {
2243      switch (SourceVT) {
2244      case MVT::i8:
2245        Opcode = NVPTX::ST_i8_areg;
2246        break;
2247      case MVT::i16:
2248        Opcode = NVPTX::ST_i16_areg;
2249        break;
2250      case MVT::i32:
2251        Opcode = NVPTX::ST_i32_areg;
2252        break;
2253      case MVT::i64:
2254        Opcode = NVPTX::ST_i64_areg;
2255        break;
2256      case MVT::f32:
2257        Opcode = NVPTX::ST_f32_areg;
2258        break;
2259      case MVT::f64:
2260        Opcode = NVPTX::ST_f64_areg;
2261        break;
2262      default:
2263        return nullptr;
2264      }
2265    }
2266    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2267                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2268                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2269                      Chain };
2270    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2271  }
2272
2273  if (NVPTXST) {
2274    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2275    MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2276    cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2277  }
2278
2279  return NVPTXST;
2280}
2281
2282SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2283  SDValue Chain = N->getOperand(0);
2284  SDValue Op1 = N->getOperand(1);
2285  SDValue Addr, Offset, Base;
2286  unsigned Opcode;
2287  SDLoc DL(N);
2288  SDNode *ST;
2289  EVT EltVT = Op1.getValueType();
2290  MemSDNode *MemSD = cast<MemSDNode>(N);
2291  EVT StoreVT = MemSD->getMemoryVT();
2292
2293  // Address Space Setting
2294  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2295
2296  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2297    report_fatal_error("Cannot store to pointer that points to constant "
2298                       "memory space");
2299  }
2300
2301  // Volatile Setting
2302  // - .volatile is only availalble for .global and .shared
2303  bool IsVolatile = MemSD->isVolatile();
2304  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2305      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2306      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2307    IsVolatile = false;
2308
2309  // Type Setting: toType + toTypeWidth
2310  // - for integer type, always use 'u'
2311  assert(StoreVT.isSimple() && "Store value is not simple");
2312  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2313  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2314  unsigned ToType;
2315  if (ScalarVT.isFloatingPoint())
2316    ToType = NVPTX::PTXLdStInstCode::Float;
2317  else
2318    ToType = NVPTX::PTXLdStInstCode::Unsigned;
2319
2320  SmallVector<SDValue, 12> StOps;
2321  SDValue N2;
2322  unsigned VecType;
2323
2324  switch (N->getOpcode()) {
2325  case NVPTXISD::StoreV2:
2326    VecType = NVPTX::PTXLdStInstCode::V2;
2327    StOps.push_back(N->getOperand(1));
2328    StOps.push_back(N->getOperand(2));
2329    N2 = N->getOperand(3);
2330    break;
2331  case NVPTXISD::StoreV4:
2332    VecType = NVPTX::PTXLdStInstCode::V4;
2333    StOps.push_back(N->getOperand(1));
2334    StOps.push_back(N->getOperand(2));
2335    StOps.push_back(N->getOperand(3));
2336    StOps.push_back(N->getOperand(4));
2337    N2 = N->getOperand(5);
2338    break;
2339  default:
2340    return nullptr;
2341  }
2342
2343  StOps.push_back(getI32Imm(IsVolatile, DL));
2344  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2345  StOps.push_back(getI32Imm(VecType, DL));
2346  StOps.push_back(getI32Imm(ToType, DL));
2347  StOps.push_back(getI32Imm(ToTypeWidth, DL));
2348
2349  if (SelectDirectAddr(N2, Addr)) {
2350    switch (N->getOpcode()) {
2351    default:
2352      return nullptr;
2353    case NVPTXISD::StoreV2:
2354      switch (EltVT.getSimpleVT().SimpleTy) {
2355      default:
2356        return nullptr;
2357      case MVT::i8:
2358        Opcode = NVPTX::STV_i8_v2_avar;
2359        break;
2360      case MVT::i16:
2361        Opcode = NVPTX::STV_i16_v2_avar;
2362        break;
2363      case MVT::i32:
2364        Opcode = NVPTX::STV_i32_v2_avar;
2365        break;
2366      case MVT::i64:
2367        Opcode = NVPTX::STV_i64_v2_avar;
2368        break;
2369      case MVT::f32:
2370        Opcode = NVPTX::STV_f32_v2_avar;
2371        break;
2372      case MVT::f64:
2373        Opcode = NVPTX::STV_f64_v2_avar;
2374        break;
2375      }
2376      break;
2377    case NVPTXISD::StoreV4:
2378      switch (EltVT.getSimpleVT().SimpleTy) {
2379      default:
2380        return nullptr;
2381      case MVT::i8:
2382        Opcode = NVPTX::STV_i8_v4_avar;
2383        break;
2384      case MVT::i16:
2385        Opcode = NVPTX::STV_i16_v4_avar;
2386        break;
2387      case MVT::i32:
2388        Opcode = NVPTX::STV_i32_v4_avar;
2389        break;
2390      case MVT::f32:
2391        Opcode = NVPTX::STV_f32_v4_avar;
2392        break;
2393      }
2394      break;
2395    }
2396    StOps.push_back(Addr);
2397  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2398                          : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2399    switch (N->getOpcode()) {
2400    default:
2401      return nullptr;
2402    case NVPTXISD::StoreV2:
2403      switch (EltVT.getSimpleVT().SimpleTy) {
2404      default:
2405        return nullptr;
2406      case MVT::i8:
2407        Opcode = NVPTX::STV_i8_v2_asi;
2408        break;
2409      case MVT::i16:
2410        Opcode = NVPTX::STV_i16_v2_asi;
2411        break;
2412      case MVT::i32:
2413        Opcode = NVPTX::STV_i32_v2_asi;
2414        break;
2415      case MVT::i64:
2416        Opcode = NVPTX::STV_i64_v2_asi;
2417        break;
2418      case MVT::f32:
2419        Opcode = NVPTX::STV_f32_v2_asi;
2420        break;
2421      case MVT::f64:
2422        Opcode = NVPTX::STV_f64_v2_asi;
2423        break;
2424      }
2425      break;
2426    case NVPTXISD::StoreV4:
2427      switch (EltVT.getSimpleVT().SimpleTy) {
2428      default:
2429        return nullptr;
2430      case MVT::i8:
2431        Opcode = NVPTX::STV_i8_v4_asi;
2432        break;
2433      case MVT::i16:
2434        Opcode = NVPTX::STV_i16_v4_asi;
2435        break;
2436      case MVT::i32:
2437        Opcode = NVPTX::STV_i32_v4_asi;
2438        break;
2439      case MVT::f32:
2440        Opcode = NVPTX::STV_f32_v4_asi;
2441        break;
2442      }
2443      break;
2444    }
2445    StOps.push_back(Base);
2446    StOps.push_back(Offset);
2447  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2448                          : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2449    if (TM.is64Bit()) {
2450      switch (N->getOpcode()) {
2451      default:
2452        return nullptr;
2453      case NVPTXISD::StoreV2:
2454        switch (EltVT.getSimpleVT().SimpleTy) {
2455        default:
2456          return nullptr;
2457        case MVT::i8:
2458          Opcode = NVPTX::STV_i8_v2_ari_64;
2459          break;
2460        case MVT::i16:
2461          Opcode = NVPTX::STV_i16_v2_ari_64;
2462          break;
2463        case MVT::i32:
2464          Opcode = NVPTX::STV_i32_v2_ari_64;
2465          break;
2466        case MVT::i64:
2467          Opcode = NVPTX::STV_i64_v2_ari_64;
2468          break;
2469        case MVT::f32:
2470          Opcode = NVPTX::STV_f32_v2_ari_64;
2471          break;
2472        case MVT::f64:
2473          Opcode = NVPTX::STV_f64_v2_ari_64;
2474          break;
2475        }
2476        break;
2477      case NVPTXISD::StoreV4:
2478        switch (EltVT.getSimpleVT().SimpleTy) {
2479        default:
2480          return nullptr;
2481        case MVT::i8:
2482          Opcode = NVPTX::STV_i8_v4_ari_64;
2483          break;
2484        case MVT::i16:
2485          Opcode = NVPTX::STV_i16_v4_ari_64;
2486          break;
2487        case MVT::i32:
2488          Opcode = NVPTX::STV_i32_v4_ari_64;
2489          break;
2490        case MVT::f32:
2491          Opcode = NVPTX::STV_f32_v4_ari_64;
2492          break;
2493        }
2494        break;
2495      }
2496    } else {
2497      switch (N->getOpcode()) {
2498      default:
2499        return nullptr;
2500      case NVPTXISD::StoreV2:
2501        switch (EltVT.getSimpleVT().SimpleTy) {
2502        default:
2503          return nullptr;
2504        case MVT::i8:
2505          Opcode = NVPTX::STV_i8_v2_ari;
2506          break;
2507        case MVT::i16:
2508          Opcode = NVPTX::STV_i16_v2_ari;
2509          break;
2510        case MVT::i32:
2511          Opcode = NVPTX::STV_i32_v2_ari;
2512          break;
2513        case MVT::i64:
2514          Opcode = NVPTX::STV_i64_v2_ari;
2515          break;
2516        case MVT::f32:
2517          Opcode = NVPTX::STV_f32_v2_ari;
2518          break;
2519        case MVT::f64:
2520          Opcode = NVPTX::STV_f64_v2_ari;
2521          break;
2522        }
2523        break;
2524      case NVPTXISD::StoreV4:
2525        switch (EltVT.getSimpleVT().SimpleTy) {
2526        default:
2527          return nullptr;
2528        case MVT::i8:
2529          Opcode = NVPTX::STV_i8_v4_ari;
2530          break;
2531        case MVT::i16:
2532          Opcode = NVPTX::STV_i16_v4_ari;
2533          break;
2534        case MVT::i32:
2535          Opcode = NVPTX::STV_i32_v4_ari;
2536          break;
2537        case MVT::f32:
2538          Opcode = NVPTX::STV_f32_v4_ari;
2539          break;
2540        }
2541        break;
2542      }
2543    }
2544    StOps.push_back(Base);
2545    StOps.push_back(Offset);
2546  } else {
2547    if (TM.is64Bit()) {
2548      switch (N->getOpcode()) {
2549      default:
2550        return nullptr;
2551      case NVPTXISD::StoreV2:
2552        switch (EltVT.getSimpleVT().SimpleTy) {
2553        default:
2554          return nullptr;
2555        case MVT::i8:
2556          Opcode = NVPTX::STV_i8_v2_areg_64;
2557          break;
2558        case MVT::i16:
2559          Opcode = NVPTX::STV_i16_v2_areg_64;
2560          break;
2561        case MVT::i32:
2562          Opcode = NVPTX::STV_i32_v2_areg_64;
2563          break;
2564        case MVT::i64:
2565          Opcode = NVPTX::STV_i64_v2_areg_64;
2566          break;
2567        case MVT::f32:
2568          Opcode = NVPTX::STV_f32_v2_areg_64;
2569          break;
2570        case MVT::f64:
2571          Opcode = NVPTX::STV_f64_v2_areg_64;
2572          break;
2573        }
2574        break;
2575      case NVPTXISD::StoreV4:
2576        switch (EltVT.getSimpleVT().SimpleTy) {
2577        default:
2578          return nullptr;
2579        case MVT::i8:
2580          Opcode = NVPTX::STV_i8_v4_areg_64;
2581          break;
2582        case MVT::i16:
2583          Opcode = NVPTX::STV_i16_v4_areg_64;
2584          break;
2585        case MVT::i32:
2586          Opcode = NVPTX::STV_i32_v4_areg_64;
2587          break;
2588        case MVT::f32:
2589          Opcode = NVPTX::STV_f32_v4_areg_64;
2590          break;
2591        }
2592        break;
2593      }
2594    } else {
2595      switch (N->getOpcode()) {
2596      default:
2597        return nullptr;
2598      case NVPTXISD::StoreV2:
2599        switch (EltVT.getSimpleVT().SimpleTy) {
2600        default:
2601          return nullptr;
2602        case MVT::i8:
2603          Opcode = NVPTX::STV_i8_v2_areg;
2604          break;
2605        case MVT::i16:
2606          Opcode = NVPTX::STV_i16_v2_areg;
2607          break;
2608        case MVT::i32:
2609          Opcode = NVPTX::STV_i32_v2_areg;
2610          break;
2611        case MVT::i64:
2612          Opcode = NVPTX::STV_i64_v2_areg;
2613          break;
2614        case MVT::f32:
2615          Opcode = NVPTX::STV_f32_v2_areg;
2616          break;
2617        case MVT::f64:
2618          Opcode = NVPTX::STV_f64_v2_areg;
2619          break;
2620        }
2621        break;
2622      case NVPTXISD::StoreV4:
2623        switch (EltVT.getSimpleVT().SimpleTy) {
2624        default:
2625          return nullptr;
2626        case MVT::i8:
2627          Opcode = NVPTX::STV_i8_v4_areg;
2628          break;
2629        case MVT::i16:
2630          Opcode = NVPTX::STV_i16_v4_areg;
2631          break;
2632        case MVT::i32:
2633          Opcode = NVPTX::STV_i32_v4_areg;
2634          break;
2635        case MVT::f32:
2636          Opcode = NVPTX::STV_f32_v4_areg;
2637          break;
2638        }
2639        break;
2640      }
2641    }
2642    StOps.push_back(N2);
2643  }
2644
2645  StOps.push_back(Chain);
2646
2647  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2648
2649  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2650  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2651  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2652
2653  return ST;
2654}
2655
2656SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2657  SDValue Chain = Node->getOperand(0);
2658  SDValue Offset = Node->getOperand(2);
2659  SDValue Flag = Node->getOperand(3);
2660  SDLoc DL(Node);
2661  MemSDNode *Mem = cast<MemSDNode>(Node);
2662
2663  unsigned VecSize;
2664  switch (Node->getOpcode()) {
2665  default:
2666    return nullptr;
2667  case NVPTXISD::LoadParam:
2668    VecSize = 1;
2669    break;
2670  case NVPTXISD::LoadParamV2:
2671    VecSize = 2;
2672    break;
2673  case NVPTXISD::LoadParamV4:
2674    VecSize = 4;
2675    break;
2676  }
2677
2678  EVT EltVT = Node->getValueType(0);
2679  EVT MemVT = Mem->getMemoryVT();
2680
2681  unsigned Opc = 0;
2682
2683  switch (VecSize) {
2684  default:
2685    return nullptr;
2686  case 1:
2687    switch (MemVT.getSimpleVT().SimpleTy) {
2688    default:
2689      return nullptr;
2690    case MVT::i1:
2691      Opc = NVPTX::LoadParamMemI8;
2692      break;
2693    case MVT::i8:
2694      Opc = NVPTX::LoadParamMemI8;
2695      break;
2696    case MVT::i16:
2697      Opc = NVPTX::LoadParamMemI16;
2698      break;
2699    case MVT::i32:
2700      Opc = NVPTX::LoadParamMemI32;
2701      break;
2702    case MVT::i64:
2703      Opc = NVPTX::LoadParamMemI64;
2704      break;
2705    case MVT::f32:
2706      Opc = NVPTX::LoadParamMemF32;
2707      break;
2708    case MVT::f64:
2709      Opc = NVPTX::LoadParamMemF64;
2710      break;
2711    }
2712    break;
2713  case 2:
2714    switch (MemVT.getSimpleVT().SimpleTy) {
2715    default:
2716      return nullptr;
2717    case MVT::i1:
2718      Opc = NVPTX::LoadParamMemV2I8;
2719      break;
2720    case MVT::i8:
2721      Opc = NVPTX::LoadParamMemV2I8;
2722      break;
2723    case MVT::i16:
2724      Opc = NVPTX::LoadParamMemV2I16;
2725      break;
2726    case MVT::i32:
2727      Opc = NVPTX::LoadParamMemV2I32;
2728      break;
2729    case MVT::i64:
2730      Opc = NVPTX::LoadParamMemV2I64;
2731      break;
2732    case MVT::f32:
2733      Opc = NVPTX::LoadParamMemV2F32;
2734      break;
2735    case MVT::f64:
2736      Opc = NVPTX::LoadParamMemV2F64;
2737      break;
2738    }
2739    break;
2740  case 4:
2741    switch (MemVT.getSimpleVT().SimpleTy) {
2742    default:
2743      return nullptr;
2744    case MVT::i1:
2745      Opc = NVPTX::LoadParamMemV4I8;
2746      break;
2747    case MVT::i8:
2748      Opc = NVPTX::LoadParamMemV4I8;
2749      break;
2750    case MVT::i16:
2751      Opc = NVPTX::LoadParamMemV4I16;
2752      break;
2753    case MVT::i32:
2754      Opc = NVPTX::LoadParamMemV4I32;
2755      break;
2756    case MVT::f32:
2757      Opc = NVPTX::LoadParamMemV4F32;
2758      break;
2759    }
2760    break;
2761  }
2762
2763  SDVTList VTs;
2764  if (VecSize == 1) {
2765    VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2766  } else if (VecSize == 2) {
2767    VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2768  } else {
2769    EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2770    VTs = CurDAG->getVTList(EVTs);
2771  }
2772
2773  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2774
2775  SmallVector<SDValue, 2> Ops;
2776  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2777  Ops.push_back(Chain);
2778  Ops.push_back(Flag);
2779
2780  return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2781}
2782
2783SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2784  SDLoc DL(N);
2785  SDValue Chain = N->getOperand(0);
2786  SDValue Offset = N->getOperand(1);
2787  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2788  MemSDNode *Mem = cast<MemSDNode>(N);
2789
2790  // How many elements do we have?
2791  unsigned NumElts = 1;
2792  switch (N->getOpcode()) {
2793  default:
2794    return nullptr;
2795  case NVPTXISD::StoreRetval:
2796    NumElts = 1;
2797    break;
2798  case NVPTXISD::StoreRetvalV2:
2799    NumElts = 2;
2800    break;
2801  case NVPTXISD::StoreRetvalV4:
2802    NumElts = 4;
2803    break;
2804  }
2805
2806  // Build vector of operands
2807  SmallVector<SDValue, 6> Ops;
2808  for (unsigned i = 0; i < NumElts; ++i)
2809    Ops.push_back(N->getOperand(i + 2));
2810  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2811  Ops.push_back(Chain);
2812
2813  // Determine target opcode
2814  // If we have an i1, use an 8-bit store. The lowering code in
2815  // NVPTXISelLowering will have already emitted an upcast.
2816  unsigned Opcode = 0;
2817  switch (NumElts) {
2818  default:
2819    return nullptr;
2820  case 1:
2821    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2822    default:
2823      return nullptr;
2824    case MVT::i1:
2825      Opcode = NVPTX::StoreRetvalI8;
2826      break;
2827    case MVT::i8:
2828      Opcode = NVPTX::StoreRetvalI8;
2829      break;
2830    case MVT::i16:
2831      Opcode = NVPTX::StoreRetvalI16;
2832      break;
2833    case MVT::i32:
2834      Opcode = NVPTX::StoreRetvalI32;
2835      break;
2836    case MVT::i64:
2837      Opcode = NVPTX::StoreRetvalI64;
2838      break;
2839    case MVT::f32:
2840      Opcode = NVPTX::StoreRetvalF32;
2841      break;
2842    case MVT::f64:
2843      Opcode = NVPTX::StoreRetvalF64;
2844      break;
2845    }
2846    break;
2847  case 2:
2848    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2849    default:
2850      return nullptr;
2851    case MVT::i1:
2852      Opcode = NVPTX::StoreRetvalV2I8;
2853      break;
2854    case MVT::i8:
2855      Opcode = NVPTX::StoreRetvalV2I8;
2856      break;
2857    case MVT::i16:
2858      Opcode = NVPTX::StoreRetvalV2I16;
2859      break;
2860    case MVT::i32:
2861      Opcode = NVPTX::StoreRetvalV2I32;
2862      break;
2863    case MVT::i64:
2864      Opcode = NVPTX::StoreRetvalV2I64;
2865      break;
2866    case MVT::f32:
2867      Opcode = NVPTX::StoreRetvalV2F32;
2868      break;
2869    case MVT::f64:
2870      Opcode = NVPTX::StoreRetvalV2F64;
2871      break;
2872    }
2873    break;
2874  case 4:
2875    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2876    default:
2877      return nullptr;
2878    case MVT::i1:
2879      Opcode = NVPTX::StoreRetvalV4I8;
2880      break;
2881    case MVT::i8:
2882      Opcode = NVPTX::StoreRetvalV4I8;
2883      break;
2884    case MVT::i16:
2885      Opcode = NVPTX::StoreRetvalV4I16;
2886      break;
2887    case MVT::i32:
2888      Opcode = NVPTX::StoreRetvalV4I32;
2889      break;
2890    case MVT::f32:
2891      Opcode = NVPTX::StoreRetvalV4F32;
2892      break;
2893    }
2894    break;
2895  }
2896
2897  SDNode *Ret =
2898      CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2899  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2900  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2901  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2902
2903  return Ret;
2904}
2905
2906SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2907  SDLoc DL(N);
2908  SDValue Chain = N->getOperand(0);
2909  SDValue Param = N->getOperand(1);
2910  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2911  SDValue Offset = N->getOperand(2);
2912  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2913  MemSDNode *Mem = cast<MemSDNode>(N);
2914  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2915
2916  // How many elements do we have?
2917  unsigned NumElts = 1;
2918  switch (N->getOpcode()) {
2919  default:
2920    return nullptr;
2921  case NVPTXISD::StoreParamU32:
2922  case NVPTXISD::StoreParamS32:
2923  case NVPTXISD::StoreParam:
2924    NumElts = 1;
2925    break;
2926  case NVPTXISD::StoreParamV2:
2927    NumElts = 2;
2928    break;
2929  case NVPTXISD::StoreParamV4:
2930    NumElts = 4;
2931    break;
2932  }
2933
2934  // Build vector of operands
2935  SmallVector<SDValue, 8> Ops;
2936  for (unsigned i = 0; i < NumElts; ++i)
2937    Ops.push_back(N->getOperand(i + 3));
2938  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2939  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2940  Ops.push_back(Chain);
2941  Ops.push_back(Flag);
2942
2943  // Determine target opcode
2944  // If we have an i1, use an 8-bit store. The lowering code in
2945  // NVPTXISelLowering will have already emitted an upcast.
2946  unsigned Opcode = 0;
2947  switch (N->getOpcode()) {
2948  default:
2949    switch (NumElts) {
2950    default:
2951      return nullptr;
2952    case 1:
2953      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2954      default:
2955        return nullptr;
2956      case MVT::i1:
2957        Opcode = NVPTX::StoreParamI8;
2958        break;
2959      case MVT::i8:
2960        Opcode = NVPTX::StoreParamI8;
2961        break;
2962      case MVT::i16:
2963        Opcode = NVPTX::StoreParamI16;
2964        break;
2965      case MVT::i32:
2966        Opcode = NVPTX::StoreParamI32;
2967        break;
2968      case MVT::i64:
2969        Opcode = NVPTX::StoreParamI64;
2970        break;
2971      case MVT::f32:
2972        Opcode = NVPTX::StoreParamF32;
2973        break;
2974      case MVT::f64:
2975        Opcode = NVPTX::StoreParamF64;
2976        break;
2977      }
2978      break;
2979    case 2:
2980      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2981      default:
2982        return nullptr;
2983      case MVT::i1:
2984        Opcode = NVPTX::StoreParamV2I8;
2985        break;
2986      case MVT::i8:
2987        Opcode = NVPTX::StoreParamV2I8;
2988        break;
2989      case MVT::i16:
2990        Opcode = NVPTX::StoreParamV2I16;
2991        break;
2992      case MVT::i32:
2993        Opcode = NVPTX::StoreParamV2I32;
2994        break;
2995      case MVT::i64:
2996        Opcode = NVPTX::StoreParamV2I64;
2997        break;
2998      case MVT::f32:
2999        Opcode = NVPTX::StoreParamV2F32;
3000        break;
3001      case MVT::f64:
3002        Opcode = NVPTX::StoreParamV2F64;
3003        break;
3004      }
3005      break;
3006    case 4:
3007      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3008      default:
3009        return nullptr;
3010      case MVT::i1:
3011        Opcode = NVPTX::StoreParamV4I8;
3012        break;
3013      case MVT::i8:
3014        Opcode = NVPTX::StoreParamV4I8;
3015        break;
3016      case MVT::i16:
3017        Opcode = NVPTX::StoreParamV4I16;
3018        break;
3019      case MVT::i32:
3020        Opcode = NVPTX::StoreParamV4I32;
3021        break;
3022      case MVT::f32:
3023        Opcode = NVPTX::StoreParamV4F32;
3024        break;
3025      }
3026      break;
3027    }
3028    break;
3029  // Special case: if we have a sign-extend/zero-extend node, insert the
3030  // conversion instruction first, and use that as the value operand to
3031  // the selected StoreParam node.
3032  case NVPTXISD::StoreParamU32: {
3033    Opcode = NVPTX::StoreParamI32;
3034    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3035                                                MVT::i32);
3036    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3037                                         MVT::i32, Ops[0], CvtNone);
3038    Ops[0] = SDValue(Cvt, 0);
3039    break;
3040  }
3041  case NVPTXISD::StoreParamS32: {
3042    Opcode = NVPTX::StoreParamI32;
3043    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3044                                                MVT::i32);
3045    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3046                                         MVT::i32, Ops[0], CvtNone);
3047    Ops[0] = SDValue(Cvt, 0);
3048    break;
3049  }
3050  }
3051
3052  SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3053  SDNode *Ret =
3054      CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3055  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3056  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3057  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3058
3059  return Ret;
3060}
3061
3062SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3063  SDValue Chain = N->getOperand(0);
3064  SDNode *Ret = nullptr;
3065  unsigned Opc = 0;
3066  SmallVector<SDValue, 8> Ops;
3067
3068  switch (N->getOpcode()) {
3069  default: return nullptr;
3070  case NVPTXISD::Tex1DFloatS32:
3071    Opc = NVPTX::TEX_1D_F32_S32;
3072    break;
3073  case NVPTXISD::Tex1DFloatFloat:
3074    Opc = NVPTX::TEX_1D_F32_F32;
3075    break;
3076  case NVPTXISD::Tex1DFloatFloatLevel:
3077    Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3078    break;
3079  case NVPTXISD::Tex1DFloatFloatGrad:
3080    Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3081    break;
3082  case NVPTXISD::Tex1DS32S32:
3083    Opc = NVPTX::TEX_1D_S32_S32;
3084    break;
3085  case NVPTXISD::Tex1DS32Float:
3086    Opc = NVPTX::TEX_1D_S32_F32;
3087    break;
3088  case NVPTXISD::Tex1DS32FloatLevel:
3089    Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3090    break;
3091  case NVPTXISD::Tex1DS32FloatGrad:
3092    Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3093    break;
3094  case NVPTXISD::Tex1DU32S32:
3095    Opc = NVPTX::TEX_1D_U32_S32;
3096    break;
3097  case NVPTXISD::Tex1DU32Float:
3098    Opc = NVPTX::TEX_1D_U32_F32;
3099    break;
3100  case NVPTXISD::Tex1DU32FloatLevel:
3101    Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3102    break;
3103  case NVPTXISD::Tex1DU32FloatGrad:
3104    Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3105    break;
3106  case NVPTXISD::Tex1DArrayFloatS32:
3107    Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3108    break;
3109  case NVPTXISD::Tex1DArrayFloatFloat:
3110    Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3111    break;
3112  case NVPTXISD::Tex1DArrayFloatFloatLevel:
3113    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3114    break;
3115  case NVPTXISD::Tex1DArrayFloatFloatGrad:
3116    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3117    break;
3118  case NVPTXISD::Tex1DArrayS32S32:
3119    Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3120    break;
3121  case NVPTXISD::Tex1DArrayS32Float:
3122    Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3123    break;
3124  case NVPTXISD::Tex1DArrayS32FloatLevel:
3125    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3126    break;
3127  case NVPTXISD::Tex1DArrayS32FloatGrad:
3128    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3129    break;
3130  case NVPTXISD::Tex1DArrayU32S32:
3131    Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3132    break;
3133  case NVPTXISD::Tex1DArrayU32Float:
3134    Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3135    break;
3136  case NVPTXISD::Tex1DArrayU32FloatLevel:
3137    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3138    break;
3139  case NVPTXISD::Tex1DArrayU32FloatGrad:
3140    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3141    break;
3142  case NVPTXISD::Tex2DFloatS32:
3143    Opc = NVPTX::TEX_2D_F32_S32;
3144    break;
3145  case NVPTXISD::Tex2DFloatFloat:
3146    Opc = NVPTX::TEX_2D_F32_F32;
3147    break;
3148  case NVPTXISD::Tex2DFloatFloatLevel:
3149    Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3150    break;
3151  case NVPTXISD::Tex2DFloatFloatGrad:
3152    Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3153    break;
3154  case NVPTXISD::Tex2DS32S32:
3155    Opc = NVPTX::TEX_2D_S32_S32;
3156    break;
3157  case NVPTXISD::Tex2DS32Float:
3158    Opc = NVPTX::TEX_2D_S32_F32;
3159    break;
3160  case NVPTXISD::Tex2DS32FloatLevel:
3161    Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3162    break;
3163  case NVPTXISD::Tex2DS32FloatGrad:
3164    Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3165    break;
3166  case NVPTXISD::Tex2DU32S32:
3167    Opc = NVPTX::TEX_2D_U32_S32;
3168    break;
3169  case NVPTXISD::Tex2DU32Float:
3170    Opc = NVPTX::TEX_2D_U32_F32;
3171    break;
3172  case NVPTXISD::Tex2DU32FloatLevel:
3173    Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3174    break;
3175  case NVPTXISD::Tex2DU32FloatGrad:
3176    Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3177    break;
3178  case NVPTXISD::Tex2DArrayFloatS32:
3179    Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3180    break;
3181  case NVPTXISD::Tex2DArrayFloatFloat:
3182    Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3183    break;
3184  case NVPTXISD::Tex2DArrayFloatFloatLevel:
3185    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3186    break;
3187  case NVPTXISD::Tex2DArrayFloatFloatGrad:
3188    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3189    break;
3190  case NVPTXISD::Tex2DArrayS32S32:
3191    Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3192    break;
3193  case NVPTXISD::Tex2DArrayS32Float:
3194    Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3195    break;
3196  case NVPTXISD::Tex2DArrayS32FloatLevel:
3197    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3198    break;
3199  case NVPTXISD::Tex2DArrayS32FloatGrad:
3200    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3201    break;
3202  case NVPTXISD::Tex2DArrayU32S32:
3203    Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3204    break;
3205  case NVPTXISD::Tex2DArrayU32Float:
3206    Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3207    break;
3208  case NVPTXISD::Tex2DArrayU32FloatLevel:
3209    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3210    break;
3211  case NVPTXISD::Tex2DArrayU32FloatGrad:
3212    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3213    break;
3214  case NVPTXISD::Tex3DFloatS32:
3215    Opc = NVPTX::TEX_3D_F32_S32;
3216    break;
3217  case NVPTXISD::Tex3DFloatFloat:
3218    Opc = NVPTX::TEX_3D_F32_F32;
3219    break;
3220  case NVPTXISD::Tex3DFloatFloatLevel:
3221    Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3222    break;
3223  case NVPTXISD::Tex3DFloatFloatGrad:
3224    Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3225    break;
3226  case NVPTXISD::Tex3DS32S32:
3227    Opc = NVPTX::TEX_3D_S32_S32;
3228    break;
3229  case NVPTXISD::Tex3DS32Float:
3230    Opc = NVPTX::TEX_3D_S32_F32;
3231    break;
3232  case NVPTXISD::Tex3DS32FloatLevel:
3233    Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3234    break;
3235  case NVPTXISD::Tex3DS32FloatGrad:
3236    Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3237    break;
3238  case NVPTXISD::Tex3DU32S32:
3239    Opc = NVPTX::TEX_3D_U32_S32;
3240    break;
3241  case NVPTXISD::Tex3DU32Float:
3242    Opc = NVPTX::TEX_3D_U32_F32;
3243    break;
3244  case NVPTXISD::Tex3DU32FloatLevel:
3245    Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3246    break;
3247  case NVPTXISD::Tex3DU32FloatGrad:
3248    Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3249    break;
3250  case NVPTXISD::TexCubeFloatFloat:
3251    Opc = NVPTX::TEX_CUBE_F32_F32;
3252    break;
3253  case NVPTXISD::TexCubeFloatFloatLevel:
3254    Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3255    break;
3256  case NVPTXISD::TexCubeS32Float:
3257    Opc = NVPTX::TEX_CUBE_S32_F32;
3258    break;
3259  case NVPTXISD::TexCubeS32FloatLevel:
3260    Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3261    break;
3262  case NVPTXISD::TexCubeU32Float:
3263    Opc = NVPTX::TEX_CUBE_U32_F32;
3264    break;
3265  case NVPTXISD::TexCubeU32FloatLevel:
3266    Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3267    break;
3268  case NVPTXISD::TexCubeArrayFloatFloat:
3269    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3270    break;
3271  case NVPTXISD::TexCubeArrayFloatFloatLevel:
3272    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3273    break;
3274  case NVPTXISD::TexCubeArrayS32Float:
3275    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3276    break;
3277  case NVPTXISD::TexCubeArrayS32FloatLevel:
3278    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3279    break;
3280  case NVPTXISD::TexCubeArrayU32Float:
3281    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3282    break;
3283  case NVPTXISD::TexCubeArrayU32FloatLevel:
3284    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3285    break;
3286  case NVPTXISD::Tld4R2DFloatFloat:
3287    Opc = NVPTX::TLD4_R_2D_F32_F32;
3288    break;
3289  case NVPTXISD::Tld4G2DFloatFloat:
3290    Opc = NVPTX::TLD4_G_2D_F32_F32;
3291    break;
3292  case NVPTXISD::Tld4B2DFloatFloat:
3293    Opc = NVPTX::TLD4_B_2D_F32_F32;
3294    break;
3295  case NVPTXISD::Tld4A2DFloatFloat:
3296    Opc = NVPTX::TLD4_A_2D_F32_F32;
3297    break;
3298  case NVPTXISD::Tld4R2DS64Float:
3299    Opc = NVPTX::TLD4_R_2D_S32_F32;
3300    break;
3301  case NVPTXISD::Tld4G2DS64Float:
3302    Opc = NVPTX::TLD4_G_2D_S32_F32;
3303    break;
3304  case NVPTXISD::Tld4B2DS64Float:
3305    Opc = NVPTX::TLD4_B_2D_S32_F32;
3306    break;
3307  case NVPTXISD::Tld4A2DS64Float:
3308    Opc = NVPTX::TLD4_A_2D_S32_F32;
3309    break;
3310  case NVPTXISD::Tld4R2DU64Float:
3311    Opc = NVPTX::TLD4_R_2D_U32_F32;
3312    break;
3313  case NVPTXISD::Tld4G2DU64Float:
3314    Opc = NVPTX::TLD4_G_2D_U32_F32;
3315    break;
3316  case NVPTXISD::Tld4B2DU64Float:
3317    Opc = NVPTX::TLD4_B_2D_U32_F32;
3318    break;
3319  case NVPTXISD::Tld4A2DU64Float:
3320    Opc = NVPTX::TLD4_A_2D_U32_F32;
3321    break;
3322  case NVPTXISD::TexUnified1DFloatS32:
3323    Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3324    break;
3325  case NVPTXISD::TexUnified1DFloatFloat:
3326    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3327    break;
3328  case NVPTXISD::TexUnified1DFloatFloatLevel:
3329    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3330    break;
3331  case NVPTXISD::TexUnified1DFloatFloatGrad:
3332    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3333    break;
3334  case NVPTXISD::TexUnified1DS32S32:
3335    Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3336    break;
3337  case NVPTXISD::TexUnified1DS32Float:
3338    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3339    break;
3340  case NVPTXISD::TexUnified1DS32FloatLevel:
3341    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3342    break;
3343  case NVPTXISD::TexUnified1DS32FloatGrad:
3344    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3345    break;
3346  case NVPTXISD::TexUnified1DU32S32:
3347    Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3348    break;
3349  case NVPTXISD::TexUnified1DU32Float:
3350    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3351    break;
3352  case NVPTXISD::TexUnified1DU32FloatLevel:
3353    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3354    break;
3355  case NVPTXISD::TexUnified1DU32FloatGrad:
3356    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3357    break;
3358  case NVPTXISD::TexUnified1DArrayFloatS32:
3359    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3360    break;
3361  case NVPTXISD::TexUnified1DArrayFloatFloat:
3362    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3363    break;
3364  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3365    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3366    break;
3367  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3368    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3369    break;
3370  case NVPTXISD::TexUnified1DArrayS32S32:
3371    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3372    break;
3373  case NVPTXISD::TexUnified1DArrayS32Float:
3374    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3375    break;
3376  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3377    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3378    break;
3379  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3380    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3381    break;
3382  case NVPTXISD::TexUnified1DArrayU32S32:
3383    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3384    break;
3385  case NVPTXISD::TexUnified1DArrayU32Float:
3386    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3387    break;
3388  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3389    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3390    break;
3391  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3392    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3393    break;
3394  case NVPTXISD::TexUnified2DFloatS32:
3395    Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3396    break;
3397  case NVPTXISD::TexUnified2DFloatFloat:
3398    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3399    break;
3400  case NVPTXISD::TexUnified2DFloatFloatLevel:
3401    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3402    break;
3403  case NVPTXISD::TexUnified2DFloatFloatGrad:
3404    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3405    break;
3406  case NVPTXISD::TexUnified2DS32S32:
3407    Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3408    break;
3409  case NVPTXISD::TexUnified2DS32Float:
3410    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3411    break;
3412  case NVPTXISD::TexUnified2DS32FloatLevel:
3413    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3414    break;
3415  case NVPTXISD::TexUnified2DS32FloatGrad:
3416    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3417    break;
3418  case NVPTXISD::TexUnified2DU32S32:
3419    Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3420    break;
3421  case NVPTXISD::TexUnified2DU32Float:
3422    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3423    break;
3424  case NVPTXISD::TexUnified2DU32FloatLevel:
3425    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3426    break;
3427  case NVPTXISD::TexUnified2DU32FloatGrad:
3428    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3429    break;
3430  case NVPTXISD::TexUnified2DArrayFloatS32:
3431    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3432    break;
3433  case NVPTXISD::TexUnified2DArrayFloatFloat:
3434    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3435    break;
3436  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3437    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3438    break;
3439  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3440    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3441    break;
3442  case NVPTXISD::TexUnified2DArrayS32S32:
3443    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3444    break;
3445  case NVPTXISD::TexUnified2DArrayS32Float:
3446    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3447    break;
3448  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3449    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3450    break;
3451  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3452    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3453    break;
3454  case NVPTXISD::TexUnified2DArrayU32S32:
3455    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3456    break;
3457  case NVPTXISD::TexUnified2DArrayU32Float:
3458    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3459    break;
3460  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3461    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3462    break;
3463  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3464    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3465    break;
3466  case NVPTXISD::TexUnified3DFloatS32:
3467    Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3468    break;
3469  case NVPTXISD::TexUnified3DFloatFloat:
3470    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3471    break;
3472  case NVPTXISD::TexUnified3DFloatFloatLevel:
3473    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3474    break;
3475  case NVPTXISD::TexUnified3DFloatFloatGrad:
3476    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3477    break;
3478  case NVPTXISD::TexUnified3DS32S32:
3479    Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3480    break;
3481  case NVPTXISD::TexUnified3DS32Float:
3482    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3483    break;
3484  case NVPTXISD::TexUnified3DS32FloatLevel:
3485    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3486    break;
3487  case NVPTXISD::TexUnified3DS32FloatGrad:
3488    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3489    break;
3490  case NVPTXISD::TexUnified3DU32S32:
3491    Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3492    break;
3493  case NVPTXISD::TexUnified3DU32Float:
3494    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3495    break;
3496  case NVPTXISD::TexUnified3DU32FloatLevel:
3497    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3498    break;
3499  case NVPTXISD::TexUnified3DU32FloatGrad:
3500    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3501    break;
3502  case NVPTXISD::TexUnifiedCubeFloatFloat:
3503    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3504    break;
3505  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3506    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3507    break;
3508  case NVPTXISD::TexUnifiedCubeS32Float:
3509    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3510    break;
3511  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3512    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3513    break;
3514  case NVPTXISD::TexUnifiedCubeU32Float:
3515    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3516    break;
3517  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3518    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3519    break;
3520  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3521    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3522    break;
3523  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3524    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3525    break;
3526  case NVPTXISD::TexUnifiedCubeArrayS32Float:
3527    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3528    break;
3529  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3530    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3531    break;
3532  case NVPTXISD::TexUnifiedCubeArrayU32Float:
3533    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3534    break;
3535  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3536    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3537    break;
3538  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3539    Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3540    break;
3541  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3542    Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3543    break;
3544  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3545    Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3546    break;
3547  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3548    Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3549    break;
3550  case NVPTXISD::Tld4UnifiedR2DS64Float:
3551    Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3552    break;
3553  case NVPTXISD::Tld4UnifiedG2DS64Float:
3554    Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3555    break;
3556  case NVPTXISD::Tld4UnifiedB2DS64Float:
3557    Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3558    break;
3559  case NVPTXISD::Tld4UnifiedA2DS64Float:
3560    Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3561    break;
3562  case NVPTXISD::Tld4UnifiedR2DU64Float:
3563    Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3564    break;
3565  case NVPTXISD::Tld4UnifiedG2DU64Float:
3566    Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3567    break;
3568  case NVPTXISD::Tld4UnifiedB2DU64Float:
3569    Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3570    break;
3571  case NVPTXISD::Tld4UnifiedA2DU64Float:
3572    Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3573    break;
3574  }
3575
3576  // Copy over operands
3577  for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3578    Ops.push_back(N->getOperand(i));
3579  }
3580
3581  Ops.push_back(Chain);
3582  Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3583  return Ret;
3584}
3585
3586SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3587  SDValue Chain = N->getOperand(0);
3588  SDValue TexHandle = N->getOperand(1);
3589  SDNode *Ret = nullptr;
3590  unsigned Opc = 0;
3591  SmallVector<SDValue, 8> Ops;
3592  switch (N->getOpcode()) {
3593  default: return nullptr;
3594  case NVPTXISD::Suld1DI8Clamp:
3595    Opc = NVPTX::SULD_1D_I8_CLAMP;
3596    Ops.push_back(TexHandle);
3597    Ops.push_back(N->getOperand(2));
3598    Ops.push_back(Chain);
3599    break;
3600  case NVPTXISD::Suld1DI16Clamp:
3601    Opc = NVPTX::SULD_1D_I16_CLAMP;
3602    Ops.push_back(TexHandle);
3603    Ops.push_back(N->getOperand(2));
3604    Ops.push_back(Chain);
3605    break;
3606  case NVPTXISD::Suld1DI32Clamp:
3607    Opc = NVPTX::SULD_1D_I32_CLAMP;
3608    Ops.push_back(TexHandle);
3609    Ops.push_back(N->getOperand(2));
3610    Ops.push_back(Chain);
3611    break;
3612  case NVPTXISD::Suld1DI64Clamp:
3613    Opc = NVPTX::SULD_1D_I64_CLAMP;
3614    Ops.push_back(TexHandle);
3615    Ops.push_back(N->getOperand(2));
3616    Ops.push_back(Chain);
3617    break;
3618  case NVPTXISD::Suld1DV2I8Clamp:
3619    Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3620    Ops.push_back(TexHandle);
3621    Ops.push_back(N->getOperand(2));
3622    Ops.push_back(Chain);
3623    break;
3624  case NVPTXISD::Suld1DV2I16Clamp:
3625    Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3626    Ops.push_back(TexHandle);
3627    Ops.push_back(N->getOperand(2));
3628    Ops.push_back(Chain);
3629    break;
3630  case NVPTXISD::Suld1DV2I32Clamp:
3631    Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3632    Ops.push_back(TexHandle);
3633    Ops.push_back(N->getOperand(2));
3634    Ops.push_back(Chain);
3635    break;
3636  case NVPTXISD::Suld1DV2I64Clamp:
3637    Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3638    Ops.push_back(TexHandle);
3639    Ops.push_back(N->getOperand(2));
3640    Ops.push_back(Chain);
3641    break;
3642  case NVPTXISD::Suld1DV4I8Clamp:
3643    Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3644    Ops.push_back(TexHandle);
3645    Ops.push_back(N->getOperand(2));
3646    Ops.push_back(Chain);
3647    break;
3648  case NVPTXISD::Suld1DV4I16Clamp:
3649    Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3650    Ops.push_back(TexHandle);
3651    Ops.push_back(N->getOperand(2));
3652    Ops.push_back(Chain);
3653    break;
3654  case NVPTXISD::Suld1DV4I32Clamp:
3655    Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3656    Ops.push_back(TexHandle);
3657    Ops.push_back(N->getOperand(2));
3658    Ops.push_back(Chain);
3659    break;
3660  case NVPTXISD::Suld1DArrayI8Clamp:
3661    Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3662    Ops.push_back(TexHandle);
3663    Ops.push_back(N->getOperand(2));
3664    Ops.push_back(N->getOperand(3));
3665    Ops.push_back(Chain);
3666    break;
3667  case NVPTXISD::Suld1DArrayI16Clamp:
3668    Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3669    Ops.push_back(TexHandle);
3670    Ops.push_back(N->getOperand(2));
3671    Ops.push_back(N->getOperand(3));
3672    Ops.push_back(Chain);
3673    break;
3674  case NVPTXISD::Suld1DArrayI32Clamp:
3675    Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3676    Ops.push_back(TexHandle);
3677    Ops.push_back(N->getOperand(2));
3678    Ops.push_back(N->getOperand(3));
3679    Ops.push_back(Chain);
3680    break;
3681  case NVPTXISD::Suld1DArrayI64Clamp:
3682    Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3683    Ops.push_back(TexHandle);
3684    Ops.push_back(N->getOperand(2));
3685    Ops.push_back(N->getOperand(3));
3686    Ops.push_back(Chain);
3687    break;
3688  case NVPTXISD::Suld1DArrayV2I8Clamp:
3689    Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3690    Ops.push_back(TexHandle);
3691    Ops.push_back(N->getOperand(2));
3692    Ops.push_back(N->getOperand(3));
3693    Ops.push_back(Chain);
3694    break;
3695  case NVPTXISD::Suld1DArrayV2I16Clamp:
3696    Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3697    Ops.push_back(TexHandle);
3698    Ops.push_back(N->getOperand(2));
3699    Ops.push_back(N->getOperand(3));
3700    Ops.push_back(Chain);
3701    break;
3702  case NVPTXISD::Suld1DArrayV2I32Clamp:
3703    Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3704    Ops.push_back(TexHandle);
3705    Ops.push_back(N->getOperand(2));
3706    Ops.push_back(N->getOperand(3));
3707    Ops.push_back(Chain);
3708    break;
3709  case NVPTXISD::Suld1DArrayV2I64Clamp:
3710    Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3711    Ops.push_back(TexHandle);
3712    Ops.push_back(N->getOperand(2));
3713    Ops.push_back(N->getOperand(3));
3714    Ops.push_back(Chain);
3715    break;
3716  case NVPTXISD::Suld1DArrayV4I8Clamp:
3717    Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3718    Ops.push_back(TexHandle);
3719    Ops.push_back(N->getOperand(2));
3720    Ops.push_back(N->getOperand(3));
3721    Ops.push_back(Chain);
3722    break;
3723  case NVPTXISD::Suld1DArrayV4I16Clamp:
3724    Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3725    Ops.push_back(TexHandle);
3726    Ops.push_back(N->getOperand(2));
3727    Ops.push_back(N->getOperand(3));
3728    Ops.push_back(Chain);
3729    break;
3730  case NVPTXISD::Suld1DArrayV4I32Clamp:
3731    Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3732    Ops.push_back(TexHandle);
3733    Ops.push_back(N->getOperand(2));
3734    Ops.push_back(N->getOperand(3));
3735    Ops.push_back(Chain);
3736    break;
3737  case NVPTXISD::Suld2DI8Clamp:
3738    Opc = NVPTX::SULD_2D_I8_CLAMP;
3739    Ops.push_back(TexHandle);
3740    Ops.push_back(N->getOperand(2));
3741    Ops.push_back(N->getOperand(3));
3742    Ops.push_back(Chain);
3743    break;
3744  case NVPTXISD::Suld2DI16Clamp:
3745    Opc = NVPTX::SULD_2D_I16_CLAMP;
3746    Ops.push_back(TexHandle);
3747    Ops.push_back(N->getOperand(2));
3748    Ops.push_back(N->getOperand(3));
3749    Ops.push_back(Chain);
3750    break;
3751  case NVPTXISD::Suld2DI32Clamp:
3752    Opc = NVPTX::SULD_2D_I32_CLAMP;
3753    Ops.push_back(TexHandle);
3754    Ops.push_back(N->getOperand(2));
3755    Ops.push_back(N->getOperand(3));
3756    Ops.push_back(Chain);
3757    break;
3758  case NVPTXISD::Suld2DI64Clamp:
3759    Opc = NVPTX::SULD_2D_I64_CLAMP;
3760    Ops.push_back(TexHandle);
3761    Ops.push_back(N->getOperand(2));
3762    Ops.push_back(N->getOperand(3));
3763    Ops.push_back(Chain);
3764    break;
3765  case NVPTXISD::Suld2DV2I8Clamp:
3766    Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3767    Ops.push_back(TexHandle);
3768    Ops.push_back(N->getOperand(2));
3769    Ops.push_back(N->getOperand(3));
3770    Ops.push_back(Chain);
3771    break;
3772  case NVPTXISD::Suld2DV2I16Clamp:
3773    Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3774    Ops.push_back(TexHandle);
3775    Ops.push_back(N->getOperand(2));
3776    Ops.push_back(N->getOperand(3));
3777    Ops.push_back(Chain);
3778    break;
3779  case NVPTXISD::Suld2DV2I32Clamp:
3780    Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3781    Ops.push_back(TexHandle);
3782    Ops.push_back(N->getOperand(2));
3783    Ops.push_back(N->getOperand(3));
3784    Ops.push_back(Chain);
3785    break;
3786  case NVPTXISD::Suld2DV2I64Clamp:
3787    Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3788    Ops.push_back(TexHandle);
3789    Ops.push_back(N->getOperand(2));
3790    Ops.push_back(N->getOperand(3));
3791    Ops.push_back(Chain);
3792    break;
3793  case NVPTXISD::Suld2DV4I8Clamp:
3794    Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3795    Ops.push_back(TexHandle);
3796    Ops.push_back(N->getOperand(2));
3797    Ops.push_back(N->getOperand(3));
3798    Ops.push_back(Chain);
3799    break;
3800  case NVPTXISD::Suld2DV4I16Clamp:
3801    Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3802    Ops.push_back(TexHandle);
3803    Ops.push_back(N->getOperand(2));
3804    Ops.push_back(N->getOperand(3));
3805    Ops.push_back(Chain);
3806    break;
3807  case NVPTXISD::Suld2DV4I32Clamp:
3808    Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3809    Ops.push_back(TexHandle);
3810    Ops.push_back(N->getOperand(2));
3811    Ops.push_back(N->getOperand(3));
3812    Ops.push_back(Chain);
3813    break;
3814  case NVPTXISD::Suld2DArrayI8Clamp:
3815    Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3816    Ops.push_back(TexHandle);
3817    Ops.push_back(N->getOperand(2));
3818    Ops.push_back(N->getOperand(3));
3819    Ops.push_back(N->getOperand(4));
3820    Ops.push_back(Chain);
3821    break;
3822  case NVPTXISD::Suld2DArrayI16Clamp:
3823    Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3824    Ops.push_back(TexHandle);
3825    Ops.push_back(N->getOperand(2));
3826    Ops.push_back(N->getOperand(3));
3827    Ops.push_back(N->getOperand(4));
3828    Ops.push_back(Chain);
3829    break;
3830  case NVPTXISD::Suld2DArrayI32Clamp:
3831    Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3832    Ops.push_back(TexHandle);
3833    Ops.push_back(N->getOperand(2));
3834    Ops.push_back(N->getOperand(3));
3835    Ops.push_back(N->getOperand(4));
3836    Ops.push_back(Chain);
3837    break;
3838  case NVPTXISD::Suld2DArrayI64Clamp:
3839    Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3840    Ops.push_back(TexHandle);
3841    Ops.push_back(N->getOperand(2));
3842    Ops.push_back(N->getOperand(3));
3843    Ops.push_back(N->getOperand(4));
3844    Ops.push_back(Chain);
3845    break;
3846  case NVPTXISD::Suld2DArrayV2I8Clamp:
3847    Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3848    Ops.push_back(TexHandle);
3849    Ops.push_back(N->getOperand(2));
3850    Ops.push_back(N->getOperand(3));
3851    Ops.push_back(N->getOperand(4));
3852    Ops.push_back(Chain);
3853    break;
3854  case NVPTXISD::Suld2DArrayV2I16Clamp:
3855    Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3856    Ops.push_back(TexHandle);
3857    Ops.push_back(N->getOperand(2));
3858    Ops.push_back(N->getOperand(3));
3859    Ops.push_back(N->getOperand(4));
3860    Ops.push_back(Chain);
3861    break;
3862  case NVPTXISD::Suld2DArrayV2I32Clamp:
3863    Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3864    Ops.push_back(TexHandle);
3865    Ops.push_back(N->getOperand(2));
3866    Ops.push_back(N->getOperand(3));
3867    Ops.push_back(N->getOperand(4));
3868    Ops.push_back(Chain);
3869    break;
3870  case NVPTXISD::Suld2DArrayV2I64Clamp:
3871    Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3872    Ops.push_back(TexHandle);
3873    Ops.push_back(N->getOperand(2));
3874    Ops.push_back(N->getOperand(3));
3875    Ops.push_back(N->getOperand(4));
3876    Ops.push_back(Chain);
3877    break;
3878  case NVPTXISD::Suld2DArrayV4I8Clamp:
3879    Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3880    Ops.push_back(TexHandle);
3881    Ops.push_back(N->getOperand(2));
3882    Ops.push_back(N->getOperand(3));
3883    Ops.push_back(N->getOperand(4));
3884    Ops.push_back(Chain);
3885    break;
3886  case NVPTXISD::Suld2DArrayV4I16Clamp:
3887    Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3888    Ops.push_back(TexHandle);
3889    Ops.push_back(N->getOperand(2));
3890    Ops.push_back(N->getOperand(3));
3891    Ops.push_back(N->getOperand(4));
3892    Ops.push_back(Chain);
3893    break;
3894  case NVPTXISD::Suld2DArrayV4I32Clamp:
3895    Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3896    Ops.push_back(TexHandle);
3897    Ops.push_back(N->getOperand(2));
3898    Ops.push_back(N->getOperand(3));
3899    Ops.push_back(N->getOperand(4));
3900    Ops.push_back(Chain);
3901    break;
3902  case NVPTXISD::Suld3DI8Clamp:
3903    Opc = NVPTX::SULD_3D_I8_CLAMP;
3904    Ops.push_back(TexHandle);
3905    Ops.push_back(N->getOperand(2));
3906    Ops.push_back(N->getOperand(3));
3907    Ops.push_back(N->getOperand(4));
3908    Ops.push_back(Chain);
3909    break;
3910  case NVPTXISD::Suld3DI16Clamp:
3911    Opc = NVPTX::SULD_3D_I16_CLAMP;
3912    Ops.push_back(TexHandle);
3913    Ops.push_back(N->getOperand(2));
3914    Ops.push_back(N->getOperand(3));
3915    Ops.push_back(N->getOperand(4));
3916    Ops.push_back(Chain);
3917    break;
3918  case NVPTXISD::Suld3DI32Clamp:
3919    Opc = NVPTX::SULD_3D_I32_CLAMP;
3920    Ops.push_back(TexHandle);
3921    Ops.push_back(N->getOperand(2));
3922    Ops.push_back(N->getOperand(3));
3923    Ops.push_back(N->getOperand(4));
3924    Ops.push_back(Chain);
3925    break;
3926  case NVPTXISD::Suld3DI64Clamp:
3927    Opc = NVPTX::SULD_3D_I64_CLAMP;
3928    Ops.push_back(TexHandle);
3929    Ops.push_back(N->getOperand(2));
3930    Ops.push_back(N->getOperand(3));
3931    Ops.push_back(N->getOperand(4));
3932    Ops.push_back(Chain);
3933    break;
3934  case NVPTXISD::Suld3DV2I8Clamp:
3935    Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3936    Ops.push_back(TexHandle);
3937    Ops.push_back(N->getOperand(2));
3938    Ops.push_back(N->getOperand(3));
3939    Ops.push_back(N->getOperand(4));
3940    Ops.push_back(Chain);
3941    break;
3942  case NVPTXISD::Suld3DV2I16Clamp:
3943    Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3944    Ops.push_back(TexHandle);
3945    Ops.push_back(N->getOperand(2));
3946    Ops.push_back(N->getOperand(3));
3947    Ops.push_back(N->getOperand(4));
3948    Ops.push_back(Chain);
3949    break;
3950  case NVPTXISD::Suld3DV2I32Clamp:
3951    Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3952    Ops.push_back(TexHandle);
3953    Ops.push_back(N->getOperand(2));
3954    Ops.push_back(N->getOperand(3));
3955    Ops.push_back(N->getOperand(4));
3956    Ops.push_back(Chain);
3957    break;
3958  case NVPTXISD::Suld3DV2I64Clamp:
3959    Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3960    Ops.push_back(TexHandle);
3961    Ops.push_back(N->getOperand(2));
3962    Ops.push_back(N->getOperand(3));
3963    Ops.push_back(N->getOperand(4));
3964    Ops.push_back(Chain);
3965    break;
3966  case NVPTXISD::Suld3DV4I8Clamp:
3967    Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3968    Ops.push_back(TexHandle);
3969    Ops.push_back(N->getOperand(2));
3970    Ops.push_back(N->getOperand(3));
3971    Ops.push_back(N->getOperand(4));
3972    Ops.push_back(Chain);
3973    break;
3974  case NVPTXISD::Suld3DV4I16Clamp:
3975    Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3976    Ops.push_back(TexHandle);
3977    Ops.push_back(N->getOperand(2));
3978    Ops.push_back(N->getOperand(3));
3979    Ops.push_back(N->getOperand(4));
3980    Ops.push_back(Chain);
3981    break;
3982  case NVPTXISD::Suld3DV4I32Clamp:
3983    Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3984    Ops.push_back(TexHandle);
3985    Ops.push_back(N->getOperand(2));
3986    Ops.push_back(N->getOperand(3));
3987    Ops.push_back(N->getOperand(4));
3988    Ops.push_back(Chain);
3989    break;
3990  case NVPTXISD::Suld1DI8Trap:
3991    Opc = NVPTX::SULD_1D_I8_TRAP;
3992    Ops.push_back(TexHandle);
3993    Ops.push_back(N->getOperand(2));
3994    Ops.push_back(Chain);
3995    break;
3996  case NVPTXISD::Suld1DI16Trap:
3997    Opc = NVPTX::SULD_1D_I16_TRAP;
3998    Ops.push_back(TexHandle);
3999    Ops.push_back(N->getOperand(2));
4000    Ops.push_back(Chain);
4001    break;
4002  case NVPTXISD::Suld1DI32Trap:
4003    Opc = NVPTX::SULD_1D_I32_TRAP;
4004    Ops.push_back(TexHandle);
4005    Ops.push_back(N->getOperand(2));
4006    Ops.push_back(Chain);
4007    break;
4008  case NVPTXISD::Suld1DI64Trap:
4009    Opc = NVPTX::SULD_1D_I64_TRAP;
4010    Ops.push_back(TexHandle);
4011    Ops.push_back(N->getOperand(2));
4012    Ops.push_back(Chain);
4013    break;
4014  case NVPTXISD::Suld1DV2I8Trap:
4015    Opc = NVPTX::SULD_1D_V2I8_TRAP;
4016    Ops.push_back(TexHandle);
4017    Ops.push_back(N->getOperand(2));
4018    Ops.push_back(Chain);
4019    break;
4020  case NVPTXISD::Suld1DV2I16Trap:
4021    Opc = NVPTX::SULD_1D_V2I16_TRAP;
4022    Ops.push_back(TexHandle);
4023    Ops.push_back(N->getOperand(2));
4024    Ops.push_back(Chain);
4025    break;
4026  case NVPTXISD::Suld1DV2I32Trap:
4027    Opc = NVPTX::SULD_1D_V2I32_TRAP;
4028    Ops.push_back(TexHandle);
4029    Ops.push_back(N->getOperand(2));
4030    Ops.push_back(Chain);
4031    break;
4032  case NVPTXISD::Suld1DV2I64Trap:
4033    Opc = NVPTX::SULD_1D_V2I64_TRAP;
4034    Ops.push_back(TexHandle);
4035    Ops.push_back(N->getOperand(2));
4036    Ops.push_back(Chain);
4037    break;
4038  case NVPTXISD::Suld1DV4I8Trap:
4039    Opc = NVPTX::SULD_1D_V4I8_TRAP;
4040    Ops.push_back(TexHandle);
4041    Ops.push_back(N->getOperand(2));
4042    Ops.push_back(Chain);
4043    break;
4044  case NVPTXISD::Suld1DV4I16Trap:
4045    Opc = NVPTX::SULD_1D_V4I16_TRAP;
4046    Ops.push_back(TexHandle);
4047    Ops.push_back(N->getOperand(2));
4048    Ops.push_back(Chain);
4049    break;
4050  case NVPTXISD::Suld1DV4I32Trap:
4051    Opc = NVPTX::SULD_1D_V4I32_TRAP;
4052    Ops.push_back(TexHandle);
4053    Ops.push_back(N->getOperand(2));
4054    Ops.push_back(Chain);
4055    break;
4056  case NVPTXISD::Suld1DArrayI8Trap:
4057    Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4058    Ops.push_back(TexHandle);
4059    Ops.push_back(N->getOperand(2));
4060    Ops.push_back(N->getOperand(3));
4061    Ops.push_back(Chain);
4062    break;
4063  case NVPTXISD::Suld1DArrayI16Trap:
4064    Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4065    Ops.push_back(TexHandle);
4066    Ops.push_back(N->getOperand(2));
4067    Ops.push_back(N->getOperand(3));
4068    Ops.push_back(Chain);
4069    break;
4070  case NVPTXISD::Suld1DArrayI32Trap:
4071    Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4072    Ops.push_back(TexHandle);
4073    Ops.push_back(N->getOperand(2));
4074    Ops.push_back(N->getOperand(3));
4075    Ops.push_back(Chain);
4076    break;
4077  case NVPTXISD::Suld1DArrayI64Trap:
4078    Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4079    Ops.push_back(TexHandle);
4080    Ops.push_back(N->getOperand(2));
4081    Ops.push_back(N->getOperand(3));
4082    Ops.push_back(Chain);
4083    break;
4084  case NVPTXISD::Suld1DArrayV2I8Trap:
4085    Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4086    Ops.push_back(TexHandle);
4087    Ops.push_back(N->getOperand(2));
4088    Ops.push_back(N->getOperand(3));
4089    Ops.push_back(Chain);
4090    break;
4091  case NVPTXISD::Suld1DArrayV2I16Trap:
4092    Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4093    Ops.push_back(TexHandle);
4094    Ops.push_back(N->getOperand(2));
4095    Ops.push_back(N->getOperand(3));
4096    Ops.push_back(Chain);
4097    break;
4098  case NVPTXISD::Suld1DArrayV2I32Trap:
4099    Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4100    Ops.push_back(TexHandle);
4101    Ops.push_back(N->getOperand(2));
4102    Ops.push_back(N->getOperand(3));
4103    Ops.push_back(Chain);
4104    break;
4105  case NVPTXISD::Suld1DArrayV2I64Trap:
4106    Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4107    Ops.push_back(TexHandle);
4108    Ops.push_back(N->getOperand(2));
4109    Ops.push_back(N->getOperand(3));
4110    Ops.push_back(Chain);
4111    break;
4112  case NVPTXISD::Suld1DArrayV4I8Trap:
4113    Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4114    Ops.push_back(TexHandle);
4115    Ops.push_back(N->getOperand(2));
4116    Ops.push_back(N->getOperand(3));
4117    Ops.push_back(Chain);
4118    break;
4119  case NVPTXISD::Suld1DArrayV4I16Trap:
4120    Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4121    Ops.push_back(TexHandle);
4122    Ops.push_back(N->getOperand(2));
4123    Ops.push_back(N->getOperand(3));
4124    Ops.push_back(Chain);
4125    break;
4126  case NVPTXISD::Suld1DArrayV4I32Trap:
4127    Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4128    Ops.push_back(TexHandle);
4129    Ops.push_back(N->getOperand(2));
4130    Ops.push_back(N->getOperand(3));
4131    Ops.push_back(Chain);
4132    break;
4133  case NVPTXISD::Suld2DI8Trap:
4134    Opc = NVPTX::SULD_2D_I8_TRAP;
4135    Ops.push_back(TexHandle);
4136    Ops.push_back(N->getOperand(2));
4137    Ops.push_back(N->getOperand(3));
4138    Ops.push_back(Chain);
4139    break;
4140  case NVPTXISD::Suld2DI16Trap:
4141    Opc = NVPTX::SULD_2D_I16_TRAP;
4142    Ops.push_back(TexHandle);
4143    Ops.push_back(N->getOperand(2));
4144    Ops.push_back(N->getOperand(3));
4145    Ops.push_back(Chain);
4146    break;
4147  case NVPTXISD::Suld2DI32Trap:
4148    Opc = NVPTX::SULD_2D_I32_TRAP;
4149    Ops.push_back(TexHandle);
4150    Ops.push_back(N->getOperand(2));
4151    Ops.push_back(N->getOperand(3));
4152    Ops.push_back(Chain);
4153    break;
4154  case NVPTXISD::Suld2DI64Trap:
4155    Opc = NVPTX::SULD_2D_I64_TRAP;
4156    Ops.push_back(TexHandle);
4157    Ops.push_back(N->getOperand(2));
4158    Ops.push_back(N->getOperand(3));
4159    Ops.push_back(Chain);
4160    break;
4161  case NVPTXISD::Suld2DV2I8Trap:
4162    Opc = NVPTX::SULD_2D_V2I8_TRAP;
4163    Ops.push_back(TexHandle);
4164    Ops.push_back(N->getOperand(2));
4165    Ops.push_back(N->getOperand(3));
4166    Ops.push_back(Chain);
4167    break;
4168  case NVPTXISD::Suld2DV2I16Trap:
4169    Opc = NVPTX::SULD_2D_V2I16_TRAP;
4170    Ops.push_back(TexHandle);
4171    Ops.push_back(N->getOperand(2));
4172    Ops.push_back(N->getOperand(3));
4173    Ops.push_back(Chain);
4174    break;
4175  case NVPTXISD::Suld2DV2I32Trap:
4176    Opc = NVPTX::SULD_2D_V2I32_TRAP;
4177    Ops.push_back(TexHandle);
4178    Ops.push_back(N->getOperand(2));
4179    Ops.push_back(N->getOperand(3));
4180    Ops.push_back(Chain);
4181    break;
4182  case NVPTXISD::Suld2DV2I64Trap:
4183    Opc = NVPTX::SULD_2D_V2I64_TRAP;
4184    Ops.push_back(TexHandle);
4185    Ops.push_back(N->getOperand(2));
4186    Ops.push_back(N->getOperand(3));
4187    Ops.push_back(Chain);
4188    break;
4189  case NVPTXISD::Suld2DV4I8Trap:
4190    Opc = NVPTX::SULD_2D_V4I8_TRAP;
4191    Ops.push_back(TexHandle);
4192    Ops.push_back(N->getOperand(2));
4193    Ops.push_back(N->getOperand(3));
4194    Ops.push_back(Chain);
4195    break;
4196  case NVPTXISD::Suld2DV4I16Trap:
4197    Opc = NVPTX::SULD_2D_V4I16_TRAP;
4198    Ops.push_back(TexHandle);
4199    Ops.push_back(N->getOperand(2));
4200    Ops.push_back(N->getOperand(3));
4201    Ops.push_back(Chain);
4202    break;
4203  case NVPTXISD::Suld2DV4I32Trap:
4204    Opc = NVPTX::SULD_2D_V4I32_TRAP;
4205    Ops.push_back(TexHandle);
4206    Ops.push_back(N->getOperand(2));
4207    Ops.push_back(N->getOperand(3));
4208    Ops.push_back(Chain);
4209    break;
4210  case NVPTXISD::Suld2DArrayI8Trap:
4211    Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4212    Ops.push_back(TexHandle);
4213    Ops.push_back(N->getOperand(2));
4214    Ops.push_back(N->getOperand(3));
4215    Ops.push_back(N->getOperand(4));
4216    Ops.push_back(Chain);
4217    break;
4218  case NVPTXISD::Suld2DArrayI16Trap:
4219    Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4220    Ops.push_back(TexHandle);
4221    Ops.push_back(N->getOperand(2));
4222    Ops.push_back(N->getOperand(3));
4223    Ops.push_back(N->getOperand(4));
4224    Ops.push_back(Chain);
4225    break;
4226  case NVPTXISD::Suld2DArrayI32Trap:
4227    Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4228    Ops.push_back(TexHandle);
4229    Ops.push_back(N->getOperand(2));
4230    Ops.push_back(N->getOperand(3));
4231    Ops.push_back(N->getOperand(4));
4232    Ops.push_back(Chain);
4233    break;
4234  case NVPTXISD::Suld2DArrayI64Trap:
4235    Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4236    Ops.push_back(TexHandle);
4237    Ops.push_back(N->getOperand(2));
4238    Ops.push_back(N->getOperand(3));
4239    Ops.push_back(N->getOperand(4));
4240    Ops.push_back(Chain);
4241    break;
4242  case NVPTXISD::Suld2DArrayV2I8Trap:
4243    Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4244    Ops.push_back(TexHandle);
4245    Ops.push_back(N->getOperand(2));
4246    Ops.push_back(N->getOperand(3));
4247    Ops.push_back(N->getOperand(4));
4248    Ops.push_back(Chain);
4249    break;
4250  case NVPTXISD::Suld2DArrayV2I16Trap:
4251    Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4252    Ops.push_back(TexHandle);
4253    Ops.push_back(N->getOperand(2));
4254    Ops.push_back(N->getOperand(3));
4255    Ops.push_back(N->getOperand(4));
4256    Ops.push_back(Chain);
4257    break;
4258  case NVPTXISD::Suld2DArrayV2I32Trap:
4259    Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4260    Ops.push_back(TexHandle);
4261    Ops.push_back(N->getOperand(2));
4262    Ops.push_back(N->getOperand(3));
4263    Ops.push_back(N->getOperand(4));
4264    Ops.push_back(Chain);
4265    break;
4266  case NVPTXISD::Suld2DArrayV2I64Trap:
4267    Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4268    Ops.push_back(TexHandle);
4269    Ops.push_back(N->getOperand(2));
4270    Ops.push_back(N->getOperand(3));
4271    Ops.push_back(N->getOperand(4));
4272    Ops.push_back(Chain);
4273    break;
4274  case NVPTXISD::Suld2DArrayV4I8Trap:
4275    Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4276    Ops.push_back(TexHandle);
4277    Ops.push_back(N->getOperand(2));
4278    Ops.push_back(N->getOperand(3));
4279    Ops.push_back(N->getOperand(4));
4280    Ops.push_back(Chain);
4281    break;
4282  case NVPTXISD::Suld2DArrayV4I16Trap:
4283    Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4284    Ops.push_back(TexHandle);
4285    Ops.push_back(N->getOperand(2));
4286    Ops.push_back(N->getOperand(3));
4287    Ops.push_back(N->getOperand(4));
4288    Ops.push_back(Chain);
4289    break;
4290  case NVPTXISD::Suld2DArrayV4I32Trap:
4291    Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4292    Ops.push_back(TexHandle);
4293    Ops.push_back(N->getOperand(2));
4294    Ops.push_back(N->getOperand(3));
4295    Ops.push_back(N->getOperand(4));
4296    Ops.push_back(Chain);
4297    break;
4298  case NVPTXISD::Suld3DI8Trap:
4299    Opc = NVPTX::SULD_3D_I8_TRAP;
4300    Ops.push_back(TexHandle);
4301    Ops.push_back(N->getOperand(2));
4302    Ops.push_back(N->getOperand(3));
4303    Ops.push_back(N->getOperand(4));
4304    Ops.push_back(Chain);
4305    break;
4306  case NVPTXISD::Suld3DI16Trap:
4307    Opc = NVPTX::SULD_3D_I16_TRAP;
4308    Ops.push_back(TexHandle);
4309    Ops.push_back(N->getOperand(2));
4310    Ops.push_back(N->getOperand(3));
4311    Ops.push_back(N->getOperand(4));
4312    Ops.push_back(Chain);
4313    break;
4314  case NVPTXISD::Suld3DI32Trap:
4315    Opc = NVPTX::SULD_3D_I32_TRAP;
4316    Ops.push_back(TexHandle);
4317    Ops.push_back(N->getOperand(2));
4318    Ops.push_back(N->getOperand(3));
4319    Ops.push_back(N->getOperand(4));
4320    Ops.push_back(Chain);
4321    break;
4322  case NVPTXISD::Suld3DI64Trap:
4323    Opc = NVPTX::SULD_3D_I64_TRAP;
4324    Ops.push_back(TexHandle);
4325    Ops.push_back(N->getOperand(2));
4326    Ops.push_back(N->getOperand(3));
4327    Ops.push_back(N->getOperand(4));
4328    Ops.push_back(Chain);
4329    break;
4330  case NVPTXISD::Suld3DV2I8Trap:
4331    Opc = NVPTX::SULD_3D_V2I8_TRAP;
4332    Ops.push_back(TexHandle);
4333    Ops.push_back(N->getOperand(2));
4334    Ops.push_back(N->getOperand(3));
4335    Ops.push_back(N->getOperand(4));
4336    Ops.push_back(Chain);
4337    break;
4338  case NVPTXISD::Suld3DV2I16Trap:
4339    Opc = NVPTX::SULD_3D_V2I16_TRAP;
4340    Ops.push_back(TexHandle);
4341    Ops.push_back(N->getOperand(2));
4342    Ops.push_back(N->getOperand(3));
4343    Ops.push_back(N->getOperand(4));
4344    Ops.push_back(Chain);
4345    break;
4346  case NVPTXISD::Suld3DV2I32Trap:
4347    Opc = NVPTX::SULD_3D_V2I32_TRAP;
4348    Ops.push_back(TexHandle);
4349    Ops.push_back(N->getOperand(2));
4350    Ops.push_back(N->getOperand(3));
4351    Ops.push_back(N->getOperand(4));
4352    Ops.push_back(Chain);
4353    break;
4354  case NVPTXISD::Suld3DV2I64Trap:
4355    Opc = NVPTX::SULD_3D_V2I64_TRAP;
4356    Ops.push_back(TexHandle);
4357    Ops.push_back(N->getOperand(2));
4358    Ops.push_back(N->getOperand(3));
4359    Ops.push_back(N->getOperand(4));
4360    Ops.push_back(Chain);
4361    break;
4362  case NVPTXISD::Suld3DV4I8Trap:
4363    Opc = NVPTX::SULD_3D_V4I8_TRAP;
4364    Ops.push_back(TexHandle);
4365    Ops.push_back(N->getOperand(2));
4366    Ops.push_back(N->getOperand(3));
4367    Ops.push_back(N->getOperand(4));
4368    Ops.push_back(Chain);
4369    break;
4370  case NVPTXISD::Suld3DV4I16Trap:
4371    Opc = NVPTX::SULD_3D_V4I16_TRAP;
4372    Ops.push_back(TexHandle);
4373    Ops.push_back(N->getOperand(2));
4374    Ops.push_back(N->getOperand(3));
4375    Ops.push_back(N->getOperand(4));
4376    Ops.push_back(Chain);
4377    break;
4378  case NVPTXISD::Suld3DV4I32Trap:
4379    Opc = NVPTX::SULD_3D_V4I32_TRAP;
4380    Ops.push_back(TexHandle);
4381    Ops.push_back(N->getOperand(2));
4382    Ops.push_back(N->getOperand(3));
4383    Ops.push_back(N->getOperand(4));
4384    Ops.push_back(Chain);
4385    break;
4386  case NVPTXISD::Suld1DI8Zero:
4387    Opc = NVPTX::SULD_1D_I8_ZERO;
4388    Ops.push_back(TexHandle);
4389    Ops.push_back(N->getOperand(2));
4390    Ops.push_back(Chain);
4391    break;
4392  case NVPTXISD::Suld1DI16Zero:
4393    Opc = NVPTX::SULD_1D_I16_ZERO;
4394    Ops.push_back(TexHandle);
4395    Ops.push_back(N->getOperand(2));
4396    Ops.push_back(Chain);
4397    break;
4398  case NVPTXISD::Suld1DI32Zero:
4399    Opc = NVPTX::SULD_1D_I32_ZERO;
4400    Ops.push_back(TexHandle);
4401    Ops.push_back(N->getOperand(2));
4402    Ops.push_back(Chain);
4403    break;
4404  case NVPTXISD::Suld1DI64Zero:
4405    Opc = NVPTX::SULD_1D_I64_ZERO;
4406    Ops.push_back(TexHandle);
4407    Ops.push_back(N->getOperand(2));
4408    Ops.push_back(Chain);
4409    break;
4410  case NVPTXISD::Suld1DV2I8Zero:
4411    Opc = NVPTX::SULD_1D_V2I8_ZERO;
4412    Ops.push_back(TexHandle);
4413    Ops.push_back(N->getOperand(2));
4414    Ops.push_back(Chain);
4415    break;
4416  case NVPTXISD::Suld1DV2I16Zero:
4417    Opc = NVPTX::SULD_1D_V2I16_ZERO;
4418    Ops.push_back(TexHandle);
4419    Ops.push_back(N->getOperand(2));
4420    Ops.push_back(Chain);
4421    break;
4422  case NVPTXISD::Suld1DV2I32Zero:
4423    Opc = NVPTX::SULD_1D_V2I32_ZERO;
4424    Ops.push_back(TexHandle);
4425    Ops.push_back(N->getOperand(2));
4426    Ops.push_back(Chain);
4427    break;
4428  case NVPTXISD::Suld1DV2I64Zero:
4429    Opc = NVPTX::SULD_1D_V2I64_ZERO;
4430    Ops.push_back(TexHandle);
4431    Ops.push_back(N->getOperand(2));
4432    Ops.push_back(Chain);
4433    break;
4434  case NVPTXISD::Suld1DV4I8Zero:
4435    Opc = NVPTX::SULD_1D_V4I8_ZERO;
4436    Ops.push_back(TexHandle);
4437    Ops.push_back(N->getOperand(2));
4438    Ops.push_back(Chain);
4439    break;
4440  case NVPTXISD::Suld1DV4I16Zero:
4441    Opc = NVPTX::SULD_1D_V4I16_ZERO;
4442    Ops.push_back(TexHandle);
4443    Ops.push_back(N->getOperand(2));
4444    Ops.push_back(Chain);
4445    break;
4446  case NVPTXISD::Suld1DV4I32Zero:
4447    Opc = NVPTX::SULD_1D_V4I32_ZERO;
4448    Ops.push_back(TexHandle);
4449    Ops.push_back(N->getOperand(2));
4450    Ops.push_back(Chain);
4451    break;
4452  case NVPTXISD::Suld1DArrayI8Zero:
4453    Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4454    Ops.push_back(TexHandle);
4455    Ops.push_back(N->getOperand(2));
4456    Ops.push_back(N->getOperand(3));
4457    Ops.push_back(Chain);
4458    break;
4459  case NVPTXISD::Suld1DArrayI16Zero:
4460    Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4461    Ops.push_back(TexHandle);
4462    Ops.push_back(N->getOperand(2));
4463    Ops.push_back(N->getOperand(3));
4464    Ops.push_back(Chain);
4465    break;
4466  case NVPTXISD::Suld1DArrayI32Zero:
4467    Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4468    Ops.push_back(TexHandle);
4469    Ops.push_back(N->getOperand(2));
4470    Ops.push_back(N->getOperand(3));
4471    Ops.push_back(Chain);
4472    break;
4473  case NVPTXISD::Suld1DArrayI64Zero:
4474    Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4475    Ops.push_back(TexHandle);
4476    Ops.push_back(N->getOperand(2));
4477    Ops.push_back(N->getOperand(3));
4478    Ops.push_back(Chain);
4479    break;
4480  case NVPTXISD::Suld1DArrayV2I8Zero:
4481    Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4482    Ops.push_back(TexHandle);
4483    Ops.push_back(N->getOperand(2));
4484    Ops.push_back(N->getOperand(3));
4485    Ops.push_back(Chain);
4486    break;
4487  case NVPTXISD::Suld1DArrayV2I16Zero:
4488    Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4489    Ops.push_back(TexHandle);
4490    Ops.push_back(N->getOperand(2));
4491    Ops.push_back(N->getOperand(3));
4492    Ops.push_back(Chain);
4493    break;
4494  case NVPTXISD::Suld1DArrayV2I32Zero:
4495    Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4496    Ops.push_back(TexHandle);
4497    Ops.push_back(N->getOperand(2));
4498    Ops.push_back(N->getOperand(3));
4499    Ops.push_back(Chain);
4500    break;
4501  case NVPTXISD::Suld1DArrayV2I64Zero:
4502    Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4503    Ops.push_back(TexHandle);
4504    Ops.push_back(N->getOperand(2));
4505    Ops.push_back(N->getOperand(3));
4506    Ops.push_back(Chain);
4507    break;
4508  case NVPTXISD::Suld1DArrayV4I8Zero:
4509    Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4510    Ops.push_back(TexHandle);
4511    Ops.push_back(N->getOperand(2));
4512    Ops.push_back(N->getOperand(3));
4513    Ops.push_back(Chain);
4514    break;
4515  case NVPTXISD::Suld1DArrayV4I16Zero:
4516    Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4517    Ops.push_back(TexHandle);
4518    Ops.push_back(N->getOperand(2));
4519    Ops.push_back(N->getOperand(3));
4520    Ops.push_back(Chain);
4521    break;
4522  case NVPTXISD::Suld1DArrayV4I32Zero:
4523    Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4524    Ops.push_back(TexHandle);
4525    Ops.push_back(N->getOperand(2));
4526    Ops.push_back(N->getOperand(3));
4527    Ops.push_back(Chain);
4528    break;
4529  case NVPTXISD::Suld2DI8Zero:
4530    Opc = NVPTX::SULD_2D_I8_ZERO;
4531    Ops.push_back(TexHandle);
4532    Ops.push_back(N->getOperand(2));
4533    Ops.push_back(N->getOperand(3));
4534    Ops.push_back(Chain);
4535    break;
4536  case NVPTXISD::Suld2DI16Zero:
4537    Opc = NVPTX::SULD_2D_I16_ZERO;
4538    Ops.push_back(TexHandle);
4539    Ops.push_back(N->getOperand(2));
4540    Ops.push_back(N->getOperand(3));
4541    Ops.push_back(Chain);
4542    break;
4543  case NVPTXISD::Suld2DI32Zero:
4544    Opc = NVPTX::SULD_2D_I32_ZERO;
4545    Ops.push_back(TexHandle);
4546    Ops.push_back(N->getOperand(2));
4547    Ops.push_back(N->getOperand(3));
4548    Ops.push_back(Chain);
4549    break;
4550  case NVPTXISD::Suld2DI64Zero:
4551    Opc = NVPTX::SULD_2D_I64_ZERO;
4552    Ops.push_back(TexHandle);
4553    Ops.push_back(N->getOperand(2));
4554    Ops.push_back(N->getOperand(3));
4555    Ops.push_back(Chain);
4556    break;
4557  case NVPTXISD::Suld2DV2I8Zero:
4558    Opc = NVPTX::SULD_2D_V2I8_ZERO;
4559    Ops.push_back(TexHandle);
4560    Ops.push_back(N->getOperand(2));
4561    Ops.push_back(N->getOperand(3));
4562    Ops.push_back(Chain);
4563    break;
4564  case NVPTXISD::Suld2DV2I16Zero:
4565    Opc = NVPTX::SULD_2D_V2I16_ZERO;
4566    Ops.push_back(TexHandle);
4567    Ops.push_back(N->getOperand(2));
4568    Ops.push_back(N->getOperand(3));
4569    Ops.push_back(Chain);
4570    break;
4571  case NVPTXISD::Suld2DV2I32Zero:
4572    Opc = NVPTX::SULD_2D_V2I32_ZERO;
4573    Ops.push_back(TexHandle);
4574    Ops.push_back(N->getOperand(2));
4575    Ops.push_back(N->getOperand(3));
4576    Ops.push_back(Chain);
4577    break;
4578  case NVPTXISD::Suld2DV2I64Zero:
4579    Opc = NVPTX::SULD_2D_V2I64_ZERO;
4580    Ops.push_back(TexHandle);
4581    Ops.push_back(N->getOperand(2));
4582    Ops.push_back(N->getOperand(3));
4583    Ops.push_back(Chain);
4584    break;
4585  case NVPTXISD::Suld2DV4I8Zero:
4586    Opc = NVPTX::SULD_2D_V4I8_ZERO;
4587    Ops.push_back(TexHandle);
4588    Ops.push_back(N->getOperand(2));
4589    Ops.push_back(N->getOperand(3));
4590    Ops.push_back(Chain);
4591    break;
4592  case NVPTXISD::Suld2DV4I16Zero:
4593    Opc = NVPTX::SULD_2D_V4I16_ZERO;
4594    Ops.push_back(TexHandle);
4595    Ops.push_back(N->getOperand(2));
4596    Ops.push_back(N->getOperand(3));
4597    Ops.push_back(Chain);
4598    break;
4599  case NVPTXISD::Suld2DV4I32Zero:
4600    Opc = NVPTX::SULD_2D_V4I32_ZERO;
4601    Ops.push_back(TexHandle);
4602    Ops.push_back(N->getOperand(2));
4603    Ops.push_back(N->getOperand(3));
4604    Ops.push_back(Chain);
4605    break;
4606  case NVPTXISD::Suld2DArrayI8Zero:
4607    Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4608    Ops.push_back(TexHandle);
4609    Ops.push_back(N->getOperand(2));
4610    Ops.push_back(N->getOperand(3));
4611    Ops.push_back(N->getOperand(4));
4612    Ops.push_back(Chain);
4613    break;
4614  case NVPTXISD::Suld2DArrayI16Zero:
4615    Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4616    Ops.push_back(TexHandle);
4617    Ops.push_back(N->getOperand(2));
4618    Ops.push_back(N->getOperand(3));
4619    Ops.push_back(N->getOperand(4));
4620    Ops.push_back(Chain);
4621    break;
4622  case NVPTXISD::Suld2DArrayI32Zero:
4623    Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4624    Ops.push_back(TexHandle);
4625    Ops.push_back(N->getOperand(2));
4626    Ops.push_back(N->getOperand(3));
4627    Ops.push_back(N->getOperand(4));
4628    Ops.push_back(Chain);
4629    break;
4630  case NVPTXISD::Suld2DArrayI64Zero:
4631    Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4632    Ops.push_back(TexHandle);
4633    Ops.push_back(N->getOperand(2));
4634    Ops.push_back(N->getOperand(3));
4635    Ops.push_back(N->getOperand(4));
4636    Ops.push_back(Chain);
4637    break;
4638  case NVPTXISD::Suld2DArrayV2I8Zero:
4639    Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4640    Ops.push_back(TexHandle);
4641    Ops.push_back(N->getOperand(2));
4642    Ops.push_back(N->getOperand(3));
4643    Ops.push_back(N->getOperand(4));
4644    Ops.push_back(Chain);
4645    break;
4646  case NVPTXISD::Suld2DArrayV2I16Zero:
4647    Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4648    Ops.push_back(TexHandle);
4649    Ops.push_back(N->getOperand(2));
4650    Ops.push_back(N->getOperand(3));
4651    Ops.push_back(N->getOperand(4));
4652    Ops.push_back(Chain);
4653    break;
4654  case NVPTXISD::Suld2DArrayV2I32Zero:
4655    Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4656    Ops.push_back(TexHandle);
4657    Ops.push_back(N->getOperand(2));
4658    Ops.push_back(N->getOperand(3));
4659    Ops.push_back(N->getOperand(4));
4660    Ops.push_back(Chain);
4661    break;
4662  case NVPTXISD::Suld2DArrayV2I64Zero:
4663    Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4664    Ops.push_back(TexHandle);
4665    Ops.push_back(N->getOperand(2));
4666    Ops.push_back(N->getOperand(3));
4667    Ops.push_back(N->getOperand(4));
4668    Ops.push_back(Chain);
4669    break;
4670  case NVPTXISD::Suld2DArrayV4I8Zero:
4671    Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4672    Ops.push_back(TexHandle);
4673    Ops.push_back(N->getOperand(2));
4674    Ops.push_back(N->getOperand(3));
4675    Ops.push_back(N->getOperand(4));
4676    Ops.push_back(Chain);
4677    break;
4678  case NVPTXISD::Suld2DArrayV4I16Zero:
4679    Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4680    Ops.push_back(TexHandle);
4681    Ops.push_back(N->getOperand(2));
4682    Ops.push_back(N->getOperand(3));
4683    Ops.push_back(N->getOperand(4));
4684    Ops.push_back(Chain);
4685    break;
4686  case NVPTXISD::Suld2DArrayV4I32Zero:
4687    Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4688    Ops.push_back(TexHandle);
4689    Ops.push_back(N->getOperand(2));
4690    Ops.push_back(N->getOperand(3));
4691    Ops.push_back(N->getOperand(4));
4692    Ops.push_back(Chain);
4693    break;
4694  case NVPTXISD::Suld3DI8Zero:
4695    Opc = NVPTX::SULD_3D_I8_ZERO;
4696    Ops.push_back(TexHandle);
4697    Ops.push_back(N->getOperand(2));
4698    Ops.push_back(N->getOperand(3));
4699    Ops.push_back(N->getOperand(4));
4700    Ops.push_back(Chain);
4701    break;
4702  case NVPTXISD::Suld3DI16Zero:
4703    Opc = NVPTX::SULD_3D_I16_ZERO;
4704    Ops.push_back(TexHandle);
4705    Ops.push_back(N->getOperand(2));
4706    Ops.push_back(N->getOperand(3));
4707    Ops.push_back(N->getOperand(4));
4708    Ops.push_back(Chain);
4709    break;
4710  case NVPTXISD::Suld3DI32Zero:
4711    Opc = NVPTX::SULD_3D_I32_ZERO;
4712    Ops.push_back(TexHandle);
4713    Ops.push_back(N->getOperand(2));
4714    Ops.push_back(N->getOperand(3));
4715    Ops.push_back(N->getOperand(4));
4716    Ops.push_back(Chain);
4717    break;
4718  case NVPTXISD::Suld3DI64Zero:
4719    Opc = NVPTX::SULD_3D_I64_ZERO;
4720    Ops.push_back(TexHandle);
4721    Ops.push_back(N->getOperand(2));
4722    Ops.push_back(N->getOperand(3));
4723    Ops.push_back(N->getOperand(4));
4724    Ops.push_back(Chain);
4725    break;
4726  case NVPTXISD::Suld3DV2I8Zero:
4727    Opc = NVPTX::SULD_3D_V2I8_ZERO;
4728    Ops.push_back(TexHandle);
4729    Ops.push_back(N->getOperand(2));
4730    Ops.push_back(N->getOperand(3));
4731    Ops.push_back(N->getOperand(4));
4732    Ops.push_back(Chain);
4733    break;
4734  case NVPTXISD::Suld3DV2I16Zero:
4735    Opc = NVPTX::SULD_3D_V2I16_ZERO;
4736    Ops.push_back(TexHandle);
4737    Ops.push_back(N->getOperand(2));
4738    Ops.push_back(N->getOperand(3));
4739    Ops.push_back(N->getOperand(4));
4740    Ops.push_back(Chain);
4741    break;
4742  case NVPTXISD::Suld3DV2I32Zero:
4743    Opc = NVPTX::SULD_3D_V2I32_ZERO;
4744    Ops.push_back(TexHandle);
4745    Ops.push_back(N->getOperand(2));
4746    Ops.push_back(N->getOperand(3));
4747    Ops.push_back(N->getOperand(4));
4748    Ops.push_back(Chain);
4749    break;
4750  case NVPTXISD::Suld3DV2I64Zero:
4751    Opc = NVPTX::SULD_3D_V2I64_ZERO;
4752    Ops.push_back(TexHandle);
4753    Ops.push_back(N->getOperand(2));
4754    Ops.push_back(N->getOperand(3));
4755    Ops.push_back(N->getOperand(4));
4756    Ops.push_back(Chain);
4757    break;
4758  case NVPTXISD::Suld3DV4I8Zero:
4759    Opc = NVPTX::SULD_3D_V4I8_ZERO;
4760    Ops.push_back(TexHandle);
4761    Ops.push_back(N->getOperand(2));
4762    Ops.push_back(N->getOperand(3));
4763    Ops.push_back(N->getOperand(4));
4764    Ops.push_back(Chain);
4765    break;
4766  case NVPTXISD::Suld3DV4I16Zero:
4767    Opc = NVPTX::SULD_3D_V4I16_ZERO;
4768    Ops.push_back(TexHandle);
4769    Ops.push_back(N->getOperand(2));
4770    Ops.push_back(N->getOperand(3));
4771    Ops.push_back(N->getOperand(4));
4772    Ops.push_back(Chain);
4773    break;
4774  case NVPTXISD::Suld3DV4I32Zero:
4775    Opc = NVPTX::SULD_3D_V4I32_ZERO;
4776    Ops.push_back(TexHandle);
4777    Ops.push_back(N->getOperand(2));
4778    Ops.push_back(N->getOperand(3));
4779    Ops.push_back(N->getOperand(4));
4780    Ops.push_back(Chain);
4781    break;
4782  }
4783  Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4784  return Ret;
4785}
4786
4787
4788/// SelectBFE - Look for instruction sequences that can be made more efficient
4789/// by using the 'bfe' (bit-field extract) PTX instruction
4790SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
4791  SDLoc DL(N);
4792  SDValue LHS = N->getOperand(0);
4793  SDValue RHS = N->getOperand(1);
4794  SDValue Len;
4795  SDValue Start;
4796  SDValue Val;
4797  bool IsSigned = false;
4798
4799  if (N->getOpcode() == ISD::AND) {
4800    // Canonicalize the operands
4801    // We want 'and %val, %mask'
4802    if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4803      std::swap(LHS, RHS);
4804    }
4805
4806    ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4807    if (!Mask) {
4808      // We need a constant mask on the RHS of the AND
4809      return NULL;
4810    }
4811
4812    // Extract the mask bits
4813    uint64_t MaskVal = Mask->getZExtValue();
4814    if (!isMask_64(MaskVal)) {
4815      // We *could* handle shifted masks here, but doing so would require an
4816      // 'and' operation to fix up the low-order bits so we would trade
4817      // shr+and for bfe+and, which has the same throughput
4818      return NULL;
4819    }
4820
4821    // How many bits are in our mask?
4822    uint64_t NumBits = countTrailingOnes(MaskVal);
4823    Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4824
4825    if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4826      // We have a 'srl/and' pair, extract the effective start bit and length
4827      Val = LHS.getNode()->getOperand(0);
4828      Start = LHS.getNode()->getOperand(1);
4829      ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4830      if (StartConst) {
4831        uint64_t StartVal = StartConst->getZExtValue();
4832        // How many "good" bits do we have left?  "good" is defined here as bits
4833        // that exist in the original value, not shifted in.
4834        uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4835        if (NumBits > GoodBits) {
4836          // Do not handle the case where bits have been shifted in. In theory
4837          // we could handle this, but the cost is likely higher than just
4838          // emitting the srl/and pair.
4839          return NULL;
4840        }
4841        Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4842      } else {
4843        // Do not handle the case where the shift amount (can be zero if no srl
4844        // was found) is not constant. We could handle this case, but it would
4845        // require run-time logic that would be more expensive than just
4846        // emitting the srl/and pair.
4847        return NULL;
4848      }
4849    } else {
4850      // Do not handle the case where the LHS of the and is not a shift. While
4851      // it would be trivial to handle this case, it would just transform
4852      // 'and' -> 'bfe', but 'and' has higher-throughput.
4853      return NULL;
4854    }
4855  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4856    if (LHS->getOpcode() == ISD::AND) {
4857      ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4858      if (!ShiftCnst) {
4859        // Shift amount must be constant
4860        return NULL;
4861      }
4862
4863      uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4864
4865      SDValue AndLHS = LHS->getOperand(0);
4866      SDValue AndRHS = LHS->getOperand(1);
4867
4868      // Canonicalize the AND to have the mask on the RHS
4869      if (isa<ConstantSDNode>(AndLHS)) {
4870        std::swap(AndLHS, AndRHS);
4871      }
4872
4873      ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4874      if (!MaskCnst) {
4875        // Mask must be constant
4876        return NULL;
4877      }
4878
4879      uint64_t MaskVal = MaskCnst->getZExtValue();
4880      uint64_t NumZeros;
4881      uint64_t NumBits;
4882      if (isMask_64(MaskVal)) {
4883        NumZeros = 0;
4884        // The number of bits in the result bitfield will be the number of
4885        // trailing ones (the AND) minus the number of bits we shift off
4886        NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4887      } else if (isShiftedMask_64(MaskVal)) {
4888        NumZeros = countTrailingZeros(MaskVal);
4889        unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4890        // The number of bits in the result bitfield will be the number of
4891        // trailing zeros plus the number of set bits in the mask minus the
4892        // number of bits we shift off
4893        NumBits = NumZeros + NumOnes - ShiftAmt;
4894      } else {
4895        // This is not a mask we can handle
4896        return NULL;
4897      }
4898
4899      if (ShiftAmt < NumZeros) {
4900        // Handling this case would require extra logic that would make this
4901        // transformation non-profitable
4902        return NULL;
4903      }
4904
4905      Val = AndLHS;
4906      Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4907      Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4908    } else if (LHS->getOpcode() == ISD::SHL) {
4909      // Here, we have a pattern like:
4910      //
4911      // (sra (shl val, NN), MM)
4912      // or
4913      // (srl (shl val, NN), MM)
4914      //
4915      // If MM >= NN, we can efficiently optimize this with bfe
4916      Val = LHS->getOperand(0);
4917
4918      SDValue ShlRHS = LHS->getOperand(1);
4919      ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4920      if (!ShlCnst) {
4921        // Shift amount must be constant
4922        return NULL;
4923      }
4924      uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4925
4926      SDValue ShrRHS = RHS;
4927      ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4928      if (!ShrCnst) {
4929        // Shift amount must be constant
4930        return NULL;
4931      }
4932      uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4933
4934      // To avoid extra codegen and be profitable, we need Outer >= Inner
4935      if (OuterShiftAmt < InnerShiftAmt) {
4936        return NULL;
4937      }
4938
4939      // If the outer shift is more than the type size, we have no bitfield to
4940      // extract (since we also check that the inner shift is <= the outer shift
4941      // then this also implies that the inner shift is < the type size)
4942      if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4943        return NULL;
4944      }
4945
4946      Start =
4947        CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
4948      Len =
4949        CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
4950                                  OuterShiftAmt, DL, MVT::i32);
4951
4952      if (N->getOpcode() == ISD::SRA) {
4953        // If we have a arithmetic right shift, we need to use the signed bfe
4954        // variant
4955        IsSigned = true;
4956      }
4957    } else {
4958      // No can do...
4959      return NULL;
4960    }
4961  } else {
4962    // No can do...
4963    return NULL;
4964  }
4965
4966
4967  unsigned Opc;
4968  // For the BFE operations we form here from "and" and "srl", always use the
4969  // unsigned variants.
4970  if (Val.getValueType() == MVT::i32) {
4971    if (IsSigned) {
4972      Opc = NVPTX::BFE_S32rii;
4973    } else {
4974      Opc = NVPTX::BFE_U32rii;
4975    }
4976  } else if (Val.getValueType() == MVT::i64) {
4977    if (IsSigned) {
4978      Opc = NVPTX::BFE_S64rii;
4979    } else {
4980      Opc = NVPTX::BFE_U64rii;
4981    }
4982  } else {
4983    // We cannot handle this type
4984    return NULL;
4985  }
4986
4987  SDValue Ops[] = {
4988    Val, Start, Len
4989  };
4990
4991  return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
4992}
4993
4994// SelectDirectAddr - Match a direct address for DAG.
4995// A direct address could be a globaladdress or externalsymbol.
4996bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4997  // Return true if TGA or ES.
4998  if (N.getOpcode() == ISD::TargetGlobalAddress ||
4999      N.getOpcode() == ISD::TargetExternalSymbol) {
5000    Address = N;
5001    return true;
5002  }
5003  if (N.getOpcode() == NVPTXISD::Wrapper) {
5004    Address = N.getOperand(0);
5005    return true;
5006  }
5007  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
5008    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
5009    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
5010      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
5011        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
5012  }
5013  return false;
5014}
5015
5016// symbol+offset
5017bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5018    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5019  if (Addr.getOpcode() == ISD::ADD) {
5020    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5021      SDValue base = Addr.getOperand(0);
5022      if (SelectDirectAddr(base, Base)) {
5023        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5024                                           mvt);
5025        return true;
5026      }
5027    }
5028  }
5029  return false;
5030}
5031
5032// symbol+offset
5033bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5034                                     SDValue &Base, SDValue &Offset) {
5035  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5036}
5037
5038// symbol+offset
5039bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5040                                       SDValue &Base, SDValue &Offset) {
5041  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5042}
5043
5044// register+offset
5045bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5046    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5047  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5048    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5049    Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
5050    return true;
5051  }
5052  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5053      Addr.getOpcode() == ISD::TargetGlobalAddress)
5054    return false; // direct calls.
5055
5056  if (Addr.getOpcode() == ISD::ADD) {
5057    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5058      return false;
5059    }
5060    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5061      if (FrameIndexSDNode *FIN =
5062              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5063        // Constant offset from frame ref.
5064        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5065      else
5066        Base = Addr.getOperand(0);
5067      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5068                                         mvt);
5069      return true;
5070    }
5071  }
5072  return false;
5073}
5074
5075// register+offset
5076bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5077                                     SDValue &Base, SDValue &Offset) {
5078  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5079}
5080
5081// register+offset
5082bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5083                                       SDValue &Base, SDValue &Offset) {
5084  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5085}
5086
5087bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5088                                                 unsigned int spN) const {
5089  const Value *Src = nullptr;
5090  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5091    if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5092      return true;
5093    Src = mN->getMemOperand()->getValue();
5094  }
5095  if (!Src)
5096    return false;
5097  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
5098    return (PT->getAddressSpace() == spN);
5099  return false;
5100}
5101
5102/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5103/// inline asm expressions.
5104bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5105    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5106  SDValue Op0, Op1;
5107  switch (ConstraintID) {
5108  default:
5109    return true;
5110  case InlineAsm::Constraint_m: // memory
5111    if (SelectDirectAddr(Op, Op0)) {
5112      OutOps.push_back(Op0);
5113      OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5114      return false;
5115    }
5116    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5117      OutOps.push_back(Op0);
5118      OutOps.push_back(Op1);
5119      return false;
5120    }
5121    break;
5122  }
5123  return true;
5124}
5125