1//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXISelDAGToDAG.h"
15#include "NVPTXUtilities.h"
16#include "llvm/Analysis/ValueTracking.h"
17#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
19#include "llvm/Support/CommandLine.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/raw_ostream.h"
23#include "llvm/Target/TargetIntrinsicInfo.h"
24
25using namespace llvm;
26
27#define DEBUG_TYPE "nvptx-isel"
28
29static cl::opt<int> UsePrecDivF32(
30    "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
31    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
32             " IEEE Compliant F32 div.rnd if available."),
33    cl::init(2));
34
35static cl::opt<bool>
36UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
37          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
38          cl::init(true));
39
40static cl::opt<bool>
41FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
42           cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
43           cl::init(false));
44
45
46/// createNVPTXISelDag - This pass converts a legalized DAG into a
47/// NVPTX-specific DAG, ready for instruction scheduling.
48FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
49                                       llvm::CodeGenOpt::Level OptLevel) {
50  return new NVPTXDAGToDAGISel(TM, OptLevel);
51}
52
53NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
54                                     CodeGenOpt::Level OptLevel)
55    : SelectionDAGISel(tm, OptLevel), TM(tm) {
56  doMulWide = (OptLevel > 0);
57}
58
59bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
60    Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
61    return SelectionDAGISel::runOnMachineFunction(MF);
62}
63
64int NVPTXDAGToDAGISel::getDivF32Level() const {
65  if (UsePrecDivF32.getNumOccurrences() > 0) {
66    // If nvptx-prec-div32=N is used on the command-line, always honor it
67    return UsePrecDivF32;
68  } else {
69    // Otherwise, use div.approx if fast math is enabled
70    if (TM.Options.UnsafeFPMath)
71      return 0;
72    else
73      return 2;
74  }
75}
76
77bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78  if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79    // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80    return UsePrecSqrtF32;
81  } else {
82    // Otherwise, use sqrt.approx if fast math is enabled
83    return !TM.Options.UnsafeFPMath;
84  }
85}
86
87bool NVPTXDAGToDAGISel::useF32FTZ() const {
88  if (FtzEnabled.getNumOccurrences() > 0) {
89    // If nvptx-f32ftz is used on the command-line, always honor it
90    return FtzEnabled;
91  } else {
92    const Function *F = MF->getFunction();
93    // Otherwise, check for an nvptx-f32ftz attribute on the function
94    if (F->hasFnAttribute("nvptx-f32ftz"))
95      return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
96    else
97      return false;
98  }
99}
100
101bool NVPTXDAGToDAGISel::allowFMA() const {
102  const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
103  return TL->allowFMA(*MF, OptLevel);
104}
105
106/// Select - Select instructions not customized! Used for
107/// expanded, promoted and normal instructions.
108void NVPTXDAGToDAGISel::Select(SDNode *N) {
109
110  if (N->isMachineOpcode()) {
111    N->setNodeId(-1);
112    return; // Already selected.
113  }
114
115  switch (N->getOpcode()) {
116  case ISD::LOAD:
117    if (tryLoad(N))
118      return;
119    break;
120  case ISD::STORE:
121    if (tryStore(N))
122      return;
123    break;
124  case NVPTXISD::LoadV2:
125  case NVPTXISD::LoadV4:
126    if (tryLoadVector(N))
127      return;
128    break;
129  case NVPTXISD::LDGV2:
130  case NVPTXISD::LDGV4:
131  case NVPTXISD::LDUV2:
132  case NVPTXISD::LDUV4:
133    if (tryLDGLDU(N))
134      return;
135    break;
136  case NVPTXISD::StoreV2:
137  case NVPTXISD::StoreV4:
138    if (tryStoreVector(N))
139      return;
140    break;
141  case NVPTXISD::LoadParam:
142  case NVPTXISD::LoadParamV2:
143  case NVPTXISD::LoadParamV4:
144    if (tryLoadParam(N))
145      return;
146    break;
147  case NVPTXISD::StoreRetval:
148  case NVPTXISD::StoreRetvalV2:
149  case NVPTXISD::StoreRetvalV4:
150    if (tryStoreRetval(N))
151      return;
152    break;
153  case NVPTXISD::StoreParam:
154  case NVPTXISD::StoreParamV2:
155  case NVPTXISD::StoreParamV4:
156  case NVPTXISD::StoreParamS32:
157  case NVPTXISD::StoreParamU32:
158    if (tryStoreParam(N))
159      return;
160    break;
161  case ISD::INTRINSIC_WO_CHAIN:
162    if (tryIntrinsicNoChain(N))
163      return;
164    break;
165  case ISD::INTRINSIC_W_CHAIN:
166    if (tryIntrinsicChain(N))
167      return;
168    break;
169  case NVPTXISD::Tex1DFloatS32:
170  case NVPTXISD::Tex1DFloatFloat:
171  case NVPTXISD::Tex1DFloatFloatLevel:
172  case NVPTXISD::Tex1DFloatFloatGrad:
173  case NVPTXISD::Tex1DS32S32:
174  case NVPTXISD::Tex1DS32Float:
175  case NVPTXISD::Tex1DS32FloatLevel:
176  case NVPTXISD::Tex1DS32FloatGrad:
177  case NVPTXISD::Tex1DU32S32:
178  case NVPTXISD::Tex1DU32Float:
179  case NVPTXISD::Tex1DU32FloatLevel:
180  case NVPTXISD::Tex1DU32FloatGrad:
181  case NVPTXISD::Tex1DArrayFloatS32:
182  case NVPTXISD::Tex1DArrayFloatFloat:
183  case NVPTXISD::Tex1DArrayFloatFloatLevel:
184  case NVPTXISD::Tex1DArrayFloatFloatGrad:
185  case NVPTXISD::Tex1DArrayS32S32:
186  case NVPTXISD::Tex1DArrayS32Float:
187  case NVPTXISD::Tex1DArrayS32FloatLevel:
188  case NVPTXISD::Tex1DArrayS32FloatGrad:
189  case NVPTXISD::Tex1DArrayU32S32:
190  case NVPTXISD::Tex1DArrayU32Float:
191  case NVPTXISD::Tex1DArrayU32FloatLevel:
192  case NVPTXISD::Tex1DArrayU32FloatGrad:
193  case NVPTXISD::Tex2DFloatS32:
194  case NVPTXISD::Tex2DFloatFloat:
195  case NVPTXISD::Tex2DFloatFloatLevel:
196  case NVPTXISD::Tex2DFloatFloatGrad:
197  case NVPTXISD::Tex2DS32S32:
198  case NVPTXISD::Tex2DS32Float:
199  case NVPTXISD::Tex2DS32FloatLevel:
200  case NVPTXISD::Tex2DS32FloatGrad:
201  case NVPTXISD::Tex2DU32S32:
202  case NVPTXISD::Tex2DU32Float:
203  case NVPTXISD::Tex2DU32FloatLevel:
204  case NVPTXISD::Tex2DU32FloatGrad:
205  case NVPTXISD::Tex2DArrayFloatS32:
206  case NVPTXISD::Tex2DArrayFloatFloat:
207  case NVPTXISD::Tex2DArrayFloatFloatLevel:
208  case NVPTXISD::Tex2DArrayFloatFloatGrad:
209  case NVPTXISD::Tex2DArrayS32S32:
210  case NVPTXISD::Tex2DArrayS32Float:
211  case NVPTXISD::Tex2DArrayS32FloatLevel:
212  case NVPTXISD::Tex2DArrayS32FloatGrad:
213  case NVPTXISD::Tex2DArrayU32S32:
214  case NVPTXISD::Tex2DArrayU32Float:
215  case NVPTXISD::Tex2DArrayU32FloatLevel:
216  case NVPTXISD::Tex2DArrayU32FloatGrad:
217  case NVPTXISD::Tex3DFloatS32:
218  case NVPTXISD::Tex3DFloatFloat:
219  case NVPTXISD::Tex3DFloatFloatLevel:
220  case NVPTXISD::Tex3DFloatFloatGrad:
221  case NVPTXISD::Tex3DS32S32:
222  case NVPTXISD::Tex3DS32Float:
223  case NVPTXISD::Tex3DS32FloatLevel:
224  case NVPTXISD::Tex3DS32FloatGrad:
225  case NVPTXISD::Tex3DU32S32:
226  case NVPTXISD::Tex3DU32Float:
227  case NVPTXISD::Tex3DU32FloatLevel:
228  case NVPTXISD::Tex3DU32FloatGrad:
229  case NVPTXISD::TexCubeFloatFloat:
230  case NVPTXISD::TexCubeFloatFloatLevel:
231  case NVPTXISD::TexCubeS32Float:
232  case NVPTXISD::TexCubeS32FloatLevel:
233  case NVPTXISD::TexCubeU32Float:
234  case NVPTXISD::TexCubeU32FloatLevel:
235  case NVPTXISD::TexCubeArrayFloatFloat:
236  case NVPTXISD::TexCubeArrayFloatFloatLevel:
237  case NVPTXISD::TexCubeArrayS32Float:
238  case NVPTXISD::TexCubeArrayS32FloatLevel:
239  case NVPTXISD::TexCubeArrayU32Float:
240  case NVPTXISD::TexCubeArrayU32FloatLevel:
241  case NVPTXISD::Tld4R2DFloatFloat:
242  case NVPTXISD::Tld4G2DFloatFloat:
243  case NVPTXISD::Tld4B2DFloatFloat:
244  case NVPTXISD::Tld4A2DFloatFloat:
245  case NVPTXISD::Tld4R2DS64Float:
246  case NVPTXISD::Tld4G2DS64Float:
247  case NVPTXISD::Tld4B2DS64Float:
248  case NVPTXISD::Tld4A2DS64Float:
249  case NVPTXISD::Tld4R2DU64Float:
250  case NVPTXISD::Tld4G2DU64Float:
251  case NVPTXISD::Tld4B2DU64Float:
252  case NVPTXISD::Tld4A2DU64Float:
253  case NVPTXISD::TexUnified1DFloatS32:
254  case NVPTXISD::TexUnified1DFloatFloat:
255  case NVPTXISD::TexUnified1DFloatFloatLevel:
256  case NVPTXISD::TexUnified1DFloatFloatGrad:
257  case NVPTXISD::TexUnified1DS32S32:
258  case NVPTXISD::TexUnified1DS32Float:
259  case NVPTXISD::TexUnified1DS32FloatLevel:
260  case NVPTXISD::TexUnified1DS32FloatGrad:
261  case NVPTXISD::TexUnified1DU32S32:
262  case NVPTXISD::TexUnified1DU32Float:
263  case NVPTXISD::TexUnified1DU32FloatLevel:
264  case NVPTXISD::TexUnified1DU32FloatGrad:
265  case NVPTXISD::TexUnified1DArrayFloatS32:
266  case NVPTXISD::TexUnified1DArrayFloatFloat:
267  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
268  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
269  case NVPTXISD::TexUnified1DArrayS32S32:
270  case NVPTXISD::TexUnified1DArrayS32Float:
271  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
272  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
273  case NVPTXISD::TexUnified1DArrayU32S32:
274  case NVPTXISD::TexUnified1DArrayU32Float:
275  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
276  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
277  case NVPTXISD::TexUnified2DFloatS32:
278  case NVPTXISD::TexUnified2DFloatFloat:
279  case NVPTXISD::TexUnified2DFloatFloatLevel:
280  case NVPTXISD::TexUnified2DFloatFloatGrad:
281  case NVPTXISD::TexUnified2DS32S32:
282  case NVPTXISD::TexUnified2DS32Float:
283  case NVPTXISD::TexUnified2DS32FloatLevel:
284  case NVPTXISD::TexUnified2DS32FloatGrad:
285  case NVPTXISD::TexUnified2DU32S32:
286  case NVPTXISD::TexUnified2DU32Float:
287  case NVPTXISD::TexUnified2DU32FloatLevel:
288  case NVPTXISD::TexUnified2DU32FloatGrad:
289  case NVPTXISD::TexUnified2DArrayFloatS32:
290  case NVPTXISD::TexUnified2DArrayFloatFloat:
291  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
292  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
293  case NVPTXISD::TexUnified2DArrayS32S32:
294  case NVPTXISD::TexUnified2DArrayS32Float:
295  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
296  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
297  case NVPTXISD::TexUnified2DArrayU32S32:
298  case NVPTXISD::TexUnified2DArrayU32Float:
299  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
300  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
301  case NVPTXISD::TexUnified3DFloatS32:
302  case NVPTXISD::TexUnified3DFloatFloat:
303  case NVPTXISD::TexUnified3DFloatFloatLevel:
304  case NVPTXISD::TexUnified3DFloatFloatGrad:
305  case NVPTXISD::TexUnified3DS32S32:
306  case NVPTXISD::TexUnified3DS32Float:
307  case NVPTXISD::TexUnified3DS32FloatLevel:
308  case NVPTXISD::TexUnified3DS32FloatGrad:
309  case NVPTXISD::TexUnified3DU32S32:
310  case NVPTXISD::TexUnified3DU32Float:
311  case NVPTXISD::TexUnified3DU32FloatLevel:
312  case NVPTXISD::TexUnified3DU32FloatGrad:
313  case NVPTXISD::TexUnifiedCubeFloatFloat:
314  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
315  case NVPTXISD::TexUnifiedCubeS32Float:
316  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
317  case NVPTXISD::TexUnifiedCubeU32Float:
318  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
319  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
320  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
321  case NVPTXISD::TexUnifiedCubeArrayS32Float:
322  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
323  case NVPTXISD::TexUnifiedCubeArrayU32Float:
324  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
325  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
326  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
327  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
328  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
329  case NVPTXISD::Tld4UnifiedR2DS64Float:
330  case NVPTXISD::Tld4UnifiedG2DS64Float:
331  case NVPTXISD::Tld4UnifiedB2DS64Float:
332  case NVPTXISD::Tld4UnifiedA2DS64Float:
333  case NVPTXISD::Tld4UnifiedR2DU64Float:
334  case NVPTXISD::Tld4UnifiedG2DU64Float:
335  case NVPTXISD::Tld4UnifiedB2DU64Float:
336  case NVPTXISD::Tld4UnifiedA2DU64Float:
337    if (tryTextureIntrinsic(N))
338      return;
339    break;
340  case NVPTXISD::Suld1DI8Clamp:
341  case NVPTXISD::Suld1DI16Clamp:
342  case NVPTXISD::Suld1DI32Clamp:
343  case NVPTXISD::Suld1DI64Clamp:
344  case NVPTXISD::Suld1DV2I8Clamp:
345  case NVPTXISD::Suld1DV2I16Clamp:
346  case NVPTXISD::Suld1DV2I32Clamp:
347  case NVPTXISD::Suld1DV2I64Clamp:
348  case NVPTXISD::Suld1DV4I8Clamp:
349  case NVPTXISD::Suld1DV4I16Clamp:
350  case NVPTXISD::Suld1DV4I32Clamp:
351  case NVPTXISD::Suld1DArrayI8Clamp:
352  case NVPTXISD::Suld1DArrayI16Clamp:
353  case NVPTXISD::Suld1DArrayI32Clamp:
354  case NVPTXISD::Suld1DArrayI64Clamp:
355  case NVPTXISD::Suld1DArrayV2I8Clamp:
356  case NVPTXISD::Suld1DArrayV2I16Clamp:
357  case NVPTXISD::Suld1DArrayV2I32Clamp:
358  case NVPTXISD::Suld1DArrayV2I64Clamp:
359  case NVPTXISD::Suld1DArrayV4I8Clamp:
360  case NVPTXISD::Suld1DArrayV4I16Clamp:
361  case NVPTXISD::Suld1DArrayV4I32Clamp:
362  case NVPTXISD::Suld2DI8Clamp:
363  case NVPTXISD::Suld2DI16Clamp:
364  case NVPTXISD::Suld2DI32Clamp:
365  case NVPTXISD::Suld2DI64Clamp:
366  case NVPTXISD::Suld2DV2I8Clamp:
367  case NVPTXISD::Suld2DV2I16Clamp:
368  case NVPTXISD::Suld2DV2I32Clamp:
369  case NVPTXISD::Suld2DV2I64Clamp:
370  case NVPTXISD::Suld2DV4I8Clamp:
371  case NVPTXISD::Suld2DV4I16Clamp:
372  case NVPTXISD::Suld2DV4I32Clamp:
373  case NVPTXISD::Suld2DArrayI8Clamp:
374  case NVPTXISD::Suld2DArrayI16Clamp:
375  case NVPTXISD::Suld2DArrayI32Clamp:
376  case NVPTXISD::Suld2DArrayI64Clamp:
377  case NVPTXISD::Suld2DArrayV2I8Clamp:
378  case NVPTXISD::Suld2DArrayV2I16Clamp:
379  case NVPTXISD::Suld2DArrayV2I32Clamp:
380  case NVPTXISD::Suld2DArrayV2I64Clamp:
381  case NVPTXISD::Suld2DArrayV4I8Clamp:
382  case NVPTXISD::Suld2DArrayV4I16Clamp:
383  case NVPTXISD::Suld2DArrayV4I32Clamp:
384  case NVPTXISD::Suld3DI8Clamp:
385  case NVPTXISD::Suld3DI16Clamp:
386  case NVPTXISD::Suld3DI32Clamp:
387  case NVPTXISD::Suld3DI64Clamp:
388  case NVPTXISD::Suld3DV2I8Clamp:
389  case NVPTXISD::Suld3DV2I16Clamp:
390  case NVPTXISD::Suld3DV2I32Clamp:
391  case NVPTXISD::Suld3DV2I64Clamp:
392  case NVPTXISD::Suld3DV4I8Clamp:
393  case NVPTXISD::Suld3DV4I16Clamp:
394  case NVPTXISD::Suld3DV4I32Clamp:
395  case NVPTXISD::Suld1DI8Trap:
396  case NVPTXISD::Suld1DI16Trap:
397  case NVPTXISD::Suld1DI32Trap:
398  case NVPTXISD::Suld1DI64Trap:
399  case NVPTXISD::Suld1DV2I8Trap:
400  case NVPTXISD::Suld1DV2I16Trap:
401  case NVPTXISD::Suld1DV2I32Trap:
402  case NVPTXISD::Suld1DV2I64Trap:
403  case NVPTXISD::Suld1DV4I8Trap:
404  case NVPTXISD::Suld1DV4I16Trap:
405  case NVPTXISD::Suld1DV4I32Trap:
406  case NVPTXISD::Suld1DArrayI8Trap:
407  case NVPTXISD::Suld1DArrayI16Trap:
408  case NVPTXISD::Suld1DArrayI32Trap:
409  case NVPTXISD::Suld1DArrayI64Trap:
410  case NVPTXISD::Suld1DArrayV2I8Trap:
411  case NVPTXISD::Suld1DArrayV2I16Trap:
412  case NVPTXISD::Suld1DArrayV2I32Trap:
413  case NVPTXISD::Suld1DArrayV2I64Trap:
414  case NVPTXISD::Suld1DArrayV4I8Trap:
415  case NVPTXISD::Suld1DArrayV4I16Trap:
416  case NVPTXISD::Suld1DArrayV4I32Trap:
417  case NVPTXISD::Suld2DI8Trap:
418  case NVPTXISD::Suld2DI16Trap:
419  case NVPTXISD::Suld2DI32Trap:
420  case NVPTXISD::Suld2DI64Trap:
421  case NVPTXISD::Suld2DV2I8Trap:
422  case NVPTXISD::Suld2DV2I16Trap:
423  case NVPTXISD::Suld2DV2I32Trap:
424  case NVPTXISD::Suld2DV2I64Trap:
425  case NVPTXISD::Suld2DV4I8Trap:
426  case NVPTXISD::Suld2DV4I16Trap:
427  case NVPTXISD::Suld2DV4I32Trap:
428  case NVPTXISD::Suld2DArrayI8Trap:
429  case NVPTXISD::Suld2DArrayI16Trap:
430  case NVPTXISD::Suld2DArrayI32Trap:
431  case NVPTXISD::Suld2DArrayI64Trap:
432  case NVPTXISD::Suld2DArrayV2I8Trap:
433  case NVPTXISD::Suld2DArrayV2I16Trap:
434  case NVPTXISD::Suld2DArrayV2I32Trap:
435  case NVPTXISD::Suld2DArrayV2I64Trap:
436  case NVPTXISD::Suld2DArrayV4I8Trap:
437  case NVPTXISD::Suld2DArrayV4I16Trap:
438  case NVPTXISD::Suld2DArrayV4I32Trap:
439  case NVPTXISD::Suld3DI8Trap:
440  case NVPTXISD::Suld3DI16Trap:
441  case NVPTXISD::Suld3DI32Trap:
442  case NVPTXISD::Suld3DI64Trap:
443  case NVPTXISD::Suld3DV2I8Trap:
444  case NVPTXISD::Suld3DV2I16Trap:
445  case NVPTXISD::Suld3DV2I32Trap:
446  case NVPTXISD::Suld3DV2I64Trap:
447  case NVPTXISD::Suld3DV4I8Trap:
448  case NVPTXISD::Suld3DV4I16Trap:
449  case NVPTXISD::Suld3DV4I32Trap:
450  case NVPTXISD::Suld1DI8Zero:
451  case NVPTXISD::Suld1DI16Zero:
452  case NVPTXISD::Suld1DI32Zero:
453  case NVPTXISD::Suld1DI64Zero:
454  case NVPTXISD::Suld1DV2I8Zero:
455  case NVPTXISD::Suld1DV2I16Zero:
456  case NVPTXISD::Suld1DV2I32Zero:
457  case NVPTXISD::Suld1DV2I64Zero:
458  case NVPTXISD::Suld1DV4I8Zero:
459  case NVPTXISD::Suld1DV4I16Zero:
460  case NVPTXISD::Suld1DV4I32Zero:
461  case NVPTXISD::Suld1DArrayI8Zero:
462  case NVPTXISD::Suld1DArrayI16Zero:
463  case NVPTXISD::Suld1DArrayI32Zero:
464  case NVPTXISD::Suld1DArrayI64Zero:
465  case NVPTXISD::Suld1DArrayV2I8Zero:
466  case NVPTXISD::Suld1DArrayV2I16Zero:
467  case NVPTXISD::Suld1DArrayV2I32Zero:
468  case NVPTXISD::Suld1DArrayV2I64Zero:
469  case NVPTXISD::Suld1DArrayV4I8Zero:
470  case NVPTXISD::Suld1DArrayV4I16Zero:
471  case NVPTXISD::Suld1DArrayV4I32Zero:
472  case NVPTXISD::Suld2DI8Zero:
473  case NVPTXISD::Suld2DI16Zero:
474  case NVPTXISD::Suld2DI32Zero:
475  case NVPTXISD::Suld2DI64Zero:
476  case NVPTXISD::Suld2DV2I8Zero:
477  case NVPTXISD::Suld2DV2I16Zero:
478  case NVPTXISD::Suld2DV2I32Zero:
479  case NVPTXISD::Suld2DV2I64Zero:
480  case NVPTXISD::Suld2DV4I8Zero:
481  case NVPTXISD::Suld2DV4I16Zero:
482  case NVPTXISD::Suld2DV4I32Zero:
483  case NVPTXISD::Suld2DArrayI8Zero:
484  case NVPTXISD::Suld2DArrayI16Zero:
485  case NVPTXISD::Suld2DArrayI32Zero:
486  case NVPTXISD::Suld2DArrayI64Zero:
487  case NVPTXISD::Suld2DArrayV2I8Zero:
488  case NVPTXISD::Suld2DArrayV2I16Zero:
489  case NVPTXISD::Suld2DArrayV2I32Zero:
490  case NVPTXISD::Suld2DArrayV2I64Zero:
491  case NVPTXISD::Suld2DArrayV4I8Zero:
492  case NVPTXISD::Suld2DArrayV4I16Zero:
493  case NVPTXISD::Suld2DArrayV4I32Zero:
494  case NVPTXISD::Suld3DI8Zero:
495  case NVPTXISD::Suld3DI16Zero:
496  case NVPTXISD::Suld3DI32Zero:
497  case NVPTXISD::Suld3DI64Zero:
498  case NVPTXISD::Suld3DV2I8Zero:
499  case NVPTXISD::Suld3DV2I16Zero:
500  case NVPTXISD::Suld3DV2I32Zero:
501  case NVPTXISD::Suld3DV2I64Zero:
502  case NVPTXISD::Suld3DV4I8Zero:
503  case NVPTXISD::Suld3DV4I16Zero:
504  case NVPTXISD::Suld3DV4I32Zero:
505    if (trySurfaceIntrinsic(N))
506      return;
507    break;
508  case ISD::AND:
509  case ISD::SRA:
510  case ISD::SRL:
511    // Try to select BFE
512    if (tryBFE(N))
513      return;
514    break;
515  case ISD::ADDRSPACECAST:
516    SelectAddrSpaceCast(N);
517    return;
518  default:
519    break;
520  }
521  SelectCode(N);
522}
523
524bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
525  unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
526  switch (IID) {
527  default:
528    return false;
529  case Intrinsic::nvvm_ldg_global_f:
530  case Intrinsic::nvvm_ldg_global_i:
531  case Intrinsic::nvvm_ldg_global_p:
532  case Intrinsic::nvvm_ldu_global_f:
533  case Intrinsic::nvvm_ldu_global_i:
534  case Intrinsic::nvvm_ldu_global_p:
535    return tryLDGLDU(N);
536  }
537}
538
539static unsigned int getCodeAddrSpace(MemSDNode *N) {
540  const Value *Src = N->getMemOperand()->getValue();
541
542  if (!Src)
543    return NVPTX::PTXLdStInstCode::GENERIC;
544
545  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
546    switch (PT->getAddressSpace()) {
547    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
548    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
549    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
550    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
551    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
552    case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
553    default: break;
554    }
555  }
556  return NVPTX::PTXLdStInstCode::GENERIC;
557}
558
559static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
560                          unsigned CodeAddrSpace, MachineFunction *F) {
561  // To use non-coherent caching, the load has to be from global
562  // memory and we have to prove that the memory area is not written
563  // to anywhere for the duration of the kernel call, not even after
564  // the load.
565  //
566  // To ensure that there are no writes to the memory, we require the
567  // underlying pointer to be a noalias (__restrict) kernel parameter
568  // that is never used for a write. We can only do this for kernel
569  // functions since from within a device function, we cannot know if
570  // there were or will be writes to the memory from the caller - or we
571  // could, but then we would have to do inter-procedural analysis.
572  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
573      !isKernelFunction(*F->getFunction())) {
574    return false;
575  }
576
577  // We use GetUnderlyingObjects() here instead of
578  // GetUnderlyingObject() mainly because the former looks through phi
579  // nodes while the latter does not. We need to look through phi
580  // nodes to handle pointer induction variables.
581  SmallVector<Value *, 8> Objs;
582  GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
583                       Objs, F->getDataLayout());
584  for (Value *Obj : Objs) {
585    auto *A = dyn_cast<const Argument>(Obj);
586    if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
587  }
588
589  return true;
590}
591
592bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
593  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
594  switch (IID) {
595  default:
596    return false;
597  case Intrinsic::nvvm_texsurf_handle_internal:
598    SelectTexSurfHandle(N);
599    return true;
600  }
601}
602
603void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
604  // Op 0 is the intrinsic ID
605  SDValue Wrapper = N->getOperand(1);
606  SDValue GlobalVal = Wrapper.getOperand(0);
607  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
608                                        MVT::i64, GlobalVal));
609}
610
611void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
612  SDValue Src = N->getOperand(0);
613  AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
614  unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
615  unsigned DstAddrSpace = CastN->getDestAddressSpace();
616
617  assert(SrcAddrSpace != DstAddrSpace &&
618         "addrspacecast must be between different address spaces");
619
620  if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
621    // Specific to generic
622    unsigned Opc;
623    switch (SrcAddrSpace) {
624    default: report_fatal_error("Bad address space in addrspacecast");
625    case ADDRESS_SPACE_GLOBAL:
626      Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
627      break;
628    case ADDRESS_SPACE_SHARED:
629      Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
630      break;
631    case ADDRESS_SPACE_CONST:
632      Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
633      break;
634    case ADDRESS_SPACE_LOCAL:
635      Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
636      break;
637    }
638    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
639                                          Src));
640    return;
641  } else {
642    // Generic to specific
643    if (SrcAddrSpace != 0)
644      report_fatal_error("Cannot cast between two non-generic address spaces");
645    unsigned Opc;
646    switch (DstAddrSpace) {
647    default: report_fatal_error("Bad address space in addrspacecast");
648    case ADDRESS_SPACE_GLOBAL:
649      Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
650                         : NVPTX::cvta_to_global_yes;
651      break;
652    case ADDRESS_SPACE_SHARED:
653      Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
654                         : NVPTX::cvta_to_shared_yes;
655      break;
656    case ADDRESS_SPACE_CONST:
657      Opc =
658          TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
659      break;
660    case ADDRESS_SPACE_LOCAL:
661      Opc =
662          TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
663      break;
664    case ADDRESS_SPACE_PARAM:
665      Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
666                         : NVPTX::nvvm_ptr_gen_to_param;
667      break;
668    }
669    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
670                                          Src));
671    return;
672  }
673}
674
675bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
676  SDLoc dl(N);
677  LoadSDNode *LD = cast<LoadSDNode>(N);
678  EVT LoadedVT = LD->getMemoryVT();
679  SDNode *NVPTXLD = nullptr;
680
681  // do not support pre/post inc/dec
682  if (LD->isIndexed())
683    return false;
684
685  if (!LoadedVT.isSimple())
686    return false;
687
688  // Address Space Setting
689  unsigned int codeAddrSpace = getCodeAddrSpace(LD);
690
691  if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
692    return tryLDGLDU(N);
693  }
694
695  // Volatile Setting
696  // - .volatile is only availalble for .global and .shared
697  bool isVolatile = LD->isVolatile();
698  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
699      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
700      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
701    isVolatile = false;
702
703  // Vector Setting
704  MVT SimpleVT = LoadedVT.getSimpleVT();
705  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
706  if (SimpleVT.isVector()) {
707    unsigned num = SimpleVT.getVectorNumElements();
708    if (num == 2)
709      vecType = NVPTX::PTXLdStInstCode::V2;
710    else if (num == 4)
711      vecType = NVPTX::PTXLdStInstCode::V4;
712    else
713      return false;
714  }
715
716  // Type Setting: fromType + fromTypeWidth
717  //
718  // Sign   : ISD::SEXTLOAD
719  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
720  //          type is integer
721  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
722  MVT ScalarVT = SimpleVT.getScalarType();
723  // Read at least 8 bits (predicates are stored as 8-bit values)
724  unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
725  unsigned int fromType;
726  if ((LD->getExtensionType() == ISD::SEXTLOAD))
727    fromType = NVPTX::PTXLdStInstCode::Signed;
728  else if (ScalarVT.isFloatingPoint())
729    fromType = NVPTX::PTXLdStInstCode::Float;
730  else
731    fromType = NVPTX::PTXLdStInstCode::Unsigned;
732
733  // Create the machine instruction DAG
734  SDValue Chain = N->getOperand(0);
735  SDValue N1 = N->getOperand(1);
736  SDValue Addr;
737  SDValue Offset, Base;
738  unsigned Opcode;
739  MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
740
741  if (SelectDirectAddr(N1, Addr)) {
742    switch (TargetVT) {
743    case MVT::i8:
744      Opcode = NVPTX::LD_i8_avar;
745      break;
746    case MVT::i16:
747      Opcode = NVPTX::LD_i16_avar;
748      break;
749    case MVT::i32:
750      Opcode = NVPTX::LD_i32_avar;
751      break;
752    case MVT::i64:
753      Opcode = NVPTX::LD_i64_avar;
754      break;
755    case MVT::f32:
756      Opcode = NVPTX::LD_f32_avar;
757      break;
758    case MVT::f64:
759      Opcode = NVPTX::LD_f64_avar;
760      break;
761    default:
762      return false;
763    }
764    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
765                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
766                      getI32Imm(fromTypeWidth, dl), Addr, Chain };
767    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
768  } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
769                          : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
770    switch (TargetVT) {
771    case MVT::i8:
772      Opcode = NVPTX::LD_i8_asi;
773      break;
774    case MVT::i16:
775      Opcode = NVPTX::LD_i16_asi;
776      break;
777    case MVT::i32:
778      Opcode = NVPTX::LD_i32_asi;
779      break;
780    case MVT::i64:
781      Opcode = NVPTX::LD_i64_asi;
782      break;
783    case MVT::f32:
784      Opcode = NVPTX::LD_f32_asi;
785      break;
786    case MVT::f64:
787      Opcode = NVPTX::LD_f64_asi;
788      break;
789    default:
790      return false;
791    }
792    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
793                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
794                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
795    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
796  } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
797                          : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
798    if (TM.is64Bit()) {
799      switch (TargetVT) {
800      case MVT::i8:
801        Opcode = NVPTX::LD_i8_ari_64;
802        break;
803      case MVT::i16:
804        Opcode = NVPTX::LD_i16_ari_64;
805        break;
806      case MVT::i32:
807        Opcode = NVPTX::LD_i32_ari_64;
808        break;
809      case MVT::i64:
810        Opcode = NVPTX::LD_i64_ari_64;
811        break;
812      case MVT::f32:
813        Opcode = NVPTX::LD_f32_ari_64;
814        break;
815      case MVT::f64:
816        Opcode = NVPTX::LD_f64_ari_64;
817        break;
818      default:
819        return false;
820      }
821    } else {
822      switch (TargetVT) {
823      case MVT::i8:
824        Opcode = NVPTX::LD_i8_ari;
825        break;
826      case MVT::i16:
827        Opcode = NVPTX::LD_i16_ari;
828        break;
829      case MVT::i32:
830        Opcode = NVPTX::LD_i32_ari;
831        break;
832      case MVT::i64:
833        Opcode = NVPTX::LD_i64_ari;
834        break;
835      case MVT::f32:
836        Opcode = NVPTX::LD_f32_ari;
837        break;
838      case MVT::f64:
839        Opcode = NVPTX::LD_f64_ari;
840        break;
841      default:
842        return false;
843      }
844    }
845    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
846                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
847                      getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
848    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
849  } else {
850    if (TM.is64Bit()) {
851      switch (TargetVT) {
852      case MVT::i8:
853        Opcode = NVPTX::LD_i8_areg_64;
854        break;
855      case MVT::i16:
856        Opcode = NVPTX::LD_i16_areg_64;
857        break;
858      case MVT::i32:
859        Opcode = NVPTX::LD_i32_areg_64;
860        break;
861      case MVT::i64:
862        Opcode = NVPTX::LD_i64_areg_64;
863        break;
864      case MVT::f32:
865        Opcode = NVPTX::LD_f32_areg_64;
866        break;
867      case MVT::f64:
868        Opcode = NVPTX::LD_f64_areg_64;
869        break;
870      default:
871        return false;
872      }
873    } else {
874      switch (TargetVT) {
875      case MVT::i8:
876        Opcode = NVPTX::LD_i8_areg;
877        break;
878      case MVT::i16:
879        Opcode = NVPTX::LD_i16_areg;
880        break;
881      case MVT::i32:
882        Opcode = NVPTX::LD_i32_areg;
883        break;
884      case MVT::i64:
885        Opcode = NVPTX::LD_i64_areg;
886        break;
887      case MVT::f32:
888        Opcode = NVPTX::LD_f32_areg;
889        break;
890      case MVT::f64:
891        Opcode = NVPTX::LD_f64_areg;
892        break;
893      default:
894        return false;
895      }
896    }
897    SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
898                      getI32Imm(vecType, dl), getI32Imm(fromType, dl),
899                      getI32Imm(fromTypeWidth, dl), N1, Chain };
900    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
901  }
902
903  if (!NVPTXLD)
904    return false;
905
906  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
907  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
908  cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
909
910  ReplaceNode(N, NVPTXLD);
911  return true;
912}
913
914bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
915
916  SDValue Chain = N->getOperand(0);
917  SDValue Op1 = N->getOperand(1);
918  SDValue Addr, Offset, Base;
919  unsigned Opcode;
920  SDLoc DL(N);
921  SDNode *LD;
922  MemSDNode *MemSD = cast<MemSDNode>(N);
923  EVT LoadedVT = MemSD->getMemoryVT();
924
925  if (!LoadedVT.isSimple())
926    return false;
927
928  // Address Space Setting
929  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
930
931  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
932    return tryLDGLDU(N);
933  }
934
935  // Volatile Setting
936  // - .volatile is only availalble for .global and .shared
937  bool IsVolatile = MemSD->isVolatile();
938  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
939      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
940      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
941    IsVolatile = false;
942
943  // Vector Setting
944  MVT SimpleVT = LoadedVT.getSimpleVT();
945
946  // Type Setting: fromType + fromTypeWidth
947  //
948  // Sign   : ISD::SEXTLOAD
949  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
950  //          type is integer
951  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
952  MVT ScalarVT = SimpleVT.getScalarType();
953  // Read at least 8 bits (predicates are stored as 8-bit values)
954  unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
955  unsigned int FromType;
956  // The last operand holds the original LoadSDNode::getExtensionType() value
957  unsigned ExtensionType = cast<ConstantSDNode>(
958      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
959  if (ExtensionType == ISD::SEXTLOAD)
960    FromType = NVPTX::PTXLdStInstCode::Signed;
961  else if (ScalarVT.isFloatingPoint())
962    FromType = NVPTX::PTXLdStInstCode::Float;
963  else
964    FromType = NVPTX::PTXLdStInstCode::Unsigned;
965
966  unsigned VecType;
967
968  switch (N->getOpcode()) {
969  case NVPTXISD::LoadV2:
970    VecType = NVPTX::PTXLdStInstCode::V2;
971    break;
972  case NVPTXISD::LoadV4:
973    VecType = NVPTX::PTXLdStInstCode::V4;
974    break;
975  default:
976    return false;
977  }
978
979  EVT EltVT = N->getValueType(0);
980
981  if (SelectDirectAddr(Op1, Addr)) {
982    switch (N->getOpcode()) {
983    default:
984      return false;
985    case NVPTXISD::LoadV2:
986      switch (EltVT.getSimpleVT().SimpleTy) {
987      default:
988        return false;
989      case MVT::i8:
990        Opcode = NVPTX::LDV_i8_v2_avar;
991        break;
992      case MVT::i16:
993        Opcode = NVPTX::LDV_i16_v2_avar;
994        break;
995      case MVT::i32:
996        Opcode = NVPTX::LDV_i32_v2_avar;
997        break;
998      case MVT::i64:
999        Opcode = NVPTX::LDV_i64_v2_avar;
1000        break;
1001      case MVT::f32:
1002        Opcode = NVPTX::LDV_f32_v2_avar;
1003        break;
1004      case MVT::f64:
1005        Opcode = NVPTX::LDV_f64_v2_avar;
1006        break;
1007      }
1008      break;
1009    case NVPTXISD::LoadV4:
1010      switch (EltVT.getSimpleVT().SimpleTy) {
1011      default:
1012        return false;
1013      case MVT::i8:
1014        Opcode = NVPTX::LDV_i8_v4_avar;
1015        break;
1016      case MVT::i16:
1017        Opcode = NVPTX::LDV_i16_v4_avar;
1018        break;
1019      case MVT::i32:
1020        Opcode = NVPTX::LDV_i32_v4_avar;
1021        break;
1022      case MVT::f32:
1023        Opcode = NVPTX::LDV_f32_v4_avar;
1024        break;
1025      }
1026      break;
1027    }
1028
1029    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1030                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1031                      getI32Imm(FromTypeWidth, DL), Addr, Chain };
1032    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1033  } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1034                          : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1035    switch (N->getOpcode()) {
1036    default:
1037      return false;
1038    case NVPTXISD::LoadV2:
1039      switch (EltVT.getSimpleVT().SimpleTy) {
1040      default:
1041        return false;
1042      case MVT::i8:
1043        Opcode = NVPTX::LDV_i8_v2_asi;
1044        break;
1045      case MVT::i16:
1046        Opcode = NVPTX::LDV_i16_v2_asi;
1047        break;
1048      case MVT::i32:
1049        Opcode = NVPTX::LDV_i32_v2_asi;
1050        break;
1051      case MVT::i64:
1052        Opcode = NVPTX::LDV_i64_v2_asi;
1053        break;
1054      case MVT::f32:
1055        Opcode = NVPTX::LDV_f32_v2_asi;
1056        break;
1057      case MVT::f64:
1058        Opcode = NVPTX::LDV_f64_v2_asi;
1059        break;
1060      }
1061      break;
1062    case NVPTXISD::LoadV4:
1063      switch (EltVT.getSimpleVT().SimpleTy) {
1064      default:
1065        return false;
1066      case MVT::i8:
1067        Opcode = NVPTX::LDV_i8_v4_asi;
1068        break;
1069      case MVT::i16:
1070        Opcode = NVPTX::LDV_i16_v4_asi;
1071        break;
1072      case MVT::i32:
1073        Opcode = NVPTX::LDV_i32_v4_asi;
1074        break;
1075      case MVT::f32:
1076        Opcode = NVPTX::LDV_f32_v4_asi;
1077        break;
1078      }
1079      break;
1080    }
1081
1082    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1083                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1084                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1085    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1086  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1087                          : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1088    if (TM.is64Bit()) {
1089      switch (N->getOpcode()) {
1090      default:
1091        return false;
1092      case NVPTXISD::LoadV2:
1093        switch (EltVT.getSimpleVT().SimpleTy) {
1094        default:
1095          return false;
1096        case MVT::i8:
1097          Opcode = NVPTX::LDV_i8_v2_ari_64;
1098          break;
1099        case MVT::i16:
1100          Opcode = NVPTX::LDV_i16_v2_ari_64;
1101          break;
1102        case MVT::i32:
1103          Opcode = NVPTX::LDV_i32_v2_ari_64;
1104          break;
1105        case MVT::i64:
1106          Opcode = NVPTX::LDV_i64_v2_ari_64;
1107          break;
1108        case MVT::f32:
1109          Opcode = NVPTX::LDV_f32_v2_ari_64;
1110          break;
1111        case MVT::f64:
1112          Opcode = NVPTX::LDV_f64_v2_ari_64;
1113          break;
1114        }
1115        break;
1116      case NVPTXISD::LoadV4:
1117        switch (EltVT.getSimpleVT().SimpleTy) {
1118        default:
1119          return false;
1120        case MVT::i8:
1121          Opcode = NVPTX::LDV_i8_v4_ari_64;
1122          break;
1123        case MVT::i16:
1124          Opcode = NVPTX::LDV_i16_v4_ari_64;
1125          break;
1126        case MVT::i32:
1127          Opcode = NVPTX::LDV_i32_v4_ari_64;
1128          break;
1129        case MVT::f32:
1130          Opcode = NVPTX::LDV_f32_v4_ari_64;
1131          break;
1132        }
1133        break;
1134      }
1135    } else {
1136      switch (N->getOpcode()) {
1137      default:
1138        return false;
1139      case NVPTXISD::LoadV2:
1140        switch (EltVT.getSimpleVT().SimpleTy) {
1141        default:
1142          return false;
1143        case MVT::i8:
1144          Opcode = NVPTX::LDV_i8_v2_ari;
1145          break;
1146        case MVT::i16:
1147          Opcode = NVPTX::LDV_i16_v2_ari;
1148          break;
1149        case MVT::i32:
1150          Opcode = NVPTX::LDV_i32_v2_ari;
1151          break;
1152        case MVT::i64:
1153          Opcode = NVPTX::LDV_i64_v2_ari;
1154          break;
1155        case MVT::f32:
1156          Opcode = NVPTX::LDV_f32_v2_ari;
1157          break;
1158        case MVT::f64:
1159          Opcode = NVPTX::LDV_f64_v2_ari;
1160          break;
1161        }
1162        break;
1163      case NVPTXISD::LoadV4:
1164        switch (EltVT.getSimpleVT().SimpleTy) {
1165        default:
1166          return false;
1167        case MVT::i8:
1168          Opcode = NVPTX::LDV_i8_v4_ari;
1169          break;
1170        case MVT::i16:
1171          Opcode = NVPTX::LDV_i16_v4_ari;
1172          break;
1173        case MVT::i32:
1174          Opcode = NVPTX::LDV_i32_v4_ari;
1175          break;
1176        case MVT::f32:
1177          Opcode = NVPTX::LDV_f32_v4_ari;
1178          break;
1179        }
1180        break;
1181      }
1182    }
1183
1184    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1185                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1186                      getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1187
1188    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1189  } else {
1190    if (TM.is64Bit()) {
1191      switch (N->getOpcode()) {
1192      default:
1193        return false;
1194      case NVPTXISD::LoadV2:
1195        switch (EltVT.getSimpleVT().SimpleTy) {
1196        default:
1197          return false;
1198        case MVT::i8:
1199          Opcode = NVPTX::LDV_i8_v2_areg_64;
1200          break;
1201        case MVT::i16:
1202          Opcode = NVPTX::LDV_i16_v2_areg_64;
1203          break;
1204        case MVT::i32:
1205          Opcode = NVPTX::LDV_i32_v2_areg_64;
1206          break;
1207        case MVT::i64:
1208          Opcode = NVPTX::LDV_i64_v2_areg_64;
1209          break;
1210        case MVT::f32:
1211          Opcode = NVPTX::LDV_f32_v2_areg_64;
1212          break;
1213        case MVT::f64:
1214          Opcode = NVPTX::LDV_f64_v2_areg_64;
1215          break;
1216        }
1217        break;
1218      case NVPTXISD::LoadV4:
1219        switch (EltVT.getSimpleVT().SimpleTy) {
1220        default:
1221          return false;
1222        case MVT::i8:
1223          Opcode = NVPTX::LDV_i8_v4_areg_64;
1224          break;
1225        case MVT::i16:
1226          Opcode = NVPTX::LDV_i16_v4_areg_64;
1227          break;
1228        case MVT::i32:
1229          Opcode = NVPTX::LDV_i32_v4_areg_64;
1230          break;
1231        case MVT::f32:
1232          Opcode = NVPTX::LDV_f32_v4_areg_64;
1233          break;
1234        }
1235        break;
1236      }
1237    } else {
1238      switch (N->getOpcode()) {
1239      default:
1240        return false;
1241      case NVPTXISD::LoadV2:
1242        switch (EltVT.getSimpleVT().SimpleTy) {
1243        default:
1244          return false;
1245        case MVT::i8:
1246          Opcode = NVPTX::LDV_i8_v2_areg;
1247          break;
1248        case MVT::i16:
1249          Opcode = NVPTX::LDV_i16_v2_areg;
1250          break;
1251        case MVT::i32:
1252          Opcode = NVPTX::LDV_i32_v2_areg;
1253          break;
1254        case MVT::i64:
1255          Opcode = NVPTX::LDV_i64_v2_areg;
1256          break;
1257        case MVT::f32:
1258          Opcode = NVPTX::LDV_f32_v2_areg;
1259          break;
1260        case MVT::f64:
1261          Opcode = NVPTX::LDV_f64_v2_areg;
1262          break;
1263        }
1264        break;
1265      case NVPTXISD::LoadV4:
1266        switch (EltVT.getSimpleVT().SimpleTy) {
1267        default:
1268          return false;
1269        case MVT::i8:
1270          Opcode = NVPTX::LDV_i8_v4_areg;
1271          break;
1272        case MVT::i16:
1273          Opcode = NVPTX::LDV_i16_v4_areg;
1274          break;
1275        case MVT::i32:
1276          Opcode = NVPTX::LDV_i32_v4_areg;
1277          break;
1278        case MVT::f32:
1279          Opcode = NVPTX::LDV_f32_v4_areg;
1280          break;
1281        }
1282        break;
1283      }
1284    }
1285
1286    SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1287                      getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1288                      getI32Imm(FromTypeWidth, DL), Op1, Chain };
1289    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1290  }
1291
1292  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1293  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1294  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1295
1296  ReplaceNode(N, LD);
1297  return true;
1298}
1299
1300bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1301
1302  SDValue Chain = N->getOperand(0);
1303  SDValue Op1;
1304  MemSDNode *Mem;
1305  bool IsLDG = true;
1306
1307  // If this is an LDG intrinsic, the address is the third operand. If its an
1308  // LDG/LDU SD node (from custom vector handling), then its the second operand
1309  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1310    Op1 = N->getOperand(2);
1311    Mem = cast<MemIntrinsicSDNode>(N);
1312    unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1313    switch (IID) {
1314    default:
1315      return false;
1316    case Intrinsic::nvvm_ldg_global_f:
1317    case Intrinsic::nvvm_ldg_global_i:
1318    case Intrinsic::nvvm_ldg_global_p:
1319      IsLDG = true;
1320      break;
1321    case Intrinsic::nvvm_ldu_global_f:
1322    case Intrinsic::nvvm_ldu_global_i:
1323    case Intrinsic::nvvm_ldu_global_p:
1324      IsLDG = false;
1325      break;
1326    }
1327  } else {
1328    Op1 = N->getOperand(1);
1329    Mem = cast<MemSDNode>(N);
1330  }
1331
1332  unsigned Opcode;
1333  SDLoc DL(N);
1334  SDNode *LD;
1335  SDValue Base, Offset, Addr;
1336
1337  EVT EltVT = Mem->getMemoryVT();
1338  unsigned NumElts = 1;
1339  if (EltVT.isVector()) {
1340    NumElts = EltVT.getVectorNumElements();
1341    EltVT = EltVT.getVectorElementType();
1342  }
1343
1344  // Build the "promoted" result VTList for the load. If we are really loading
1345  // i8s, then the return type will be promoted to i16 since we do not expose
1346  // 8-bit registers in NVPTX.
1347  EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1348  SmallVector<EVT, 5> InstVTs;
1349  for (unsigned i = 0; i != NumElts; ++i) {
1350    InstVTs.push_back(NodeVT);
1351  }
1352  InstVTs.push_back(MVT::Other);
1353  SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1354
1355  if (SelectDirectAddr(Op1, Addr)) {
1356    switch (N->getOpcode()) {
1357    default:
1358      return false;
1359    case ISD::INTRINSIC_W_CHAIN:
1360      if (IsLDG) {
1361        switch (EltVT.getSimpleVT().SimpleTy) {
1362        default:
1363          return false;
1364        case MVT::i8:
1365          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1366          break;
1367        case MVT::i16:
1368          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1369          break;
1370        case MVT::i32:
1371          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1372          break;
1373        case MVT::i64:
1374          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1375          break;
1376        case MVT::f32:
1377          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1378          break;
1379        case MVT::f64:
1380          Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1381          break;
1382        }
1383      } else {
1384        switch (EltVT.getSimpleVT().SimpleTy) {
1385        default:
1386          return false;
1387        case MVT::i8:
1388          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1389          break;
1390        case MVT::i16:
1391          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1392          break;
1393        case MVT::i32:
1394          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1395          break;
1396        case MVT::i64:
1397          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1398          break;
1399        case MVT::f32:
1400          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1401          break;
1402        case MVT::f64:
1403          Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1404          break;
1405        }
1406      }
1407      break;
1408    case NVPTXISD::LDGV2:
1409      switch (EltVT.getSimpleVT().SimpleTy) {
1410      default:
1411        return false;
1412      case MVT::i8:
1413        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1414        break;
1415      case MVT::i16:
1416        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1417        break;
1418      case MVT::i32:
1419        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1420        break;
1421      case MVT::i64:
1422        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1423        break;
1424      case MVT::f32:
1425        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1426        break;
1427      case MVT::f64:
1428        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1429        break;
1430      }
1431      break;
1432    case NVPTXISD::LDUV2:
1433      switch (EltVT.getSimpleVT().SimpleTy) {
1434      default:
1435        return false;
1436      case MVT::i8:
1437        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1438        break;
1439      case MVT::i16:
1440        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1441        break;
1442      case MVT::i32:
1443        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1444        break;
1445      case MVT::i64:
1446        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1447        break;
1448      case MVT::f32:
1449        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1450        break;
1451      case MVT::f64:
1452        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1453        break;
1454      }
1455      break;
1456    case NVPTXISD::LDGV4:
1457      switch (EltVT.getSimpleVT().SimpleTy) {
1458      default:
1459        return false;
1460      case MVT::i8:
1461        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1462        break;
1463      case MVT::i16:
1464        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1465        break;
1466      case MVT::i32:
1467        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1468        break;
1469      case MVT::f32:
1470        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1471        break;
1472      }
1473      break;
1474    case NVPTXISD::LDUV4:
1475      switch (EltVT.getSimpleVT().SimpleTy) {
1476      default:
1477        return false;
1478      case MVT::i8:
1479        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1480        break;
1481      case MVT::i16:
1482        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1483        break;
1484      case MVT::i32:
1485        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1486        break;
1487      case MVT::f32:
1488        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1489        break;
1490      }
1491      break;
1492    }
1493
1494    SDValue Ops[] = { Addr, Chain };
1495    LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1496  } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1497                          : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1498    if (TM.is64Bit()) {
1499      switch (N->getOpcode()) {
1500      default:
1501        return false;
1502      case ISD::LOAD:
1503      case ISD::INTRINSIC_W_CHAIN:
1504        if (IsLDG) {
1505          switch (EltVT.getSimpleVT().SimpleTy) {
1506          default:
1507            return false;
1508          case MVT::i8:
1509            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1510            break;
1511          case MVT::i16:
1512            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1513            break;
1514          case MVT::i32:
1515            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1516            break;
1517          case MVT::i64:
1518            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1519            break;
1520          case MVT::f32:
1521            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1522            break;
1523          case MVT::f64:
1524            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1525            break;
1526          }
1527        } else {
1528          switch (EltVT.getSimpleVT().SimpleTy) {
1529          default:
1530            return false;
1531          case MVT::i8:
1532            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1533            break;
1534          case MVT::i16:
1535            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1536            break;
1537          case MVT::i32:
1538            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1539            break;
1540          case MVT::i64:
1541            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1542            break;
1543          case MVT::f32:
1544            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1545            break;
1546          case MVT::f64:
1547            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1548            break;
1549          }
1550        }
1551        break;
1552      case NVPTXISD::LoadV2:
1553      case NVPTXISD::LDGV2:
1554        switch (EltVT.getSimpleVT().SimpleTy) {
1555        default:
1556          return false;
1557        case MVT::i8:
1558          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1559          break;
1560        case MVT::i16:
1561          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1562          break;
1563        case MVT::i32:
1564          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1565          break;
1566        case MVT::i64:
1567          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1568          break;
1569        case MVT::f32:
1570          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1571          break;
1572        case MVT::f64:
1573          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1574          break;
1575        }
1576        break;
1577      case NVPTXISD::LDUV2:
1578        switch (EltVT.getSimpleVT().SimpleTy) {
1579        default:
1580          return false;
1581        case MVT::i8:
1582          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1583          break;
1584        case MVT::i16:
1585          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1586          break;
1587        case MVT::i32:
1588          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1589          break;
1590        case MVT::i64:
1591          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1592          break;
1593        case MVT::f32:
1594          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1595          break;
1596        case MVT::f64:
1597          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1598          break;
1599        }
1600        break;
1601      case NVPTXISD::LoadV4:
1602      case NVPTXISD::LDGV4:
1603        switch (EltVT.getSimpleVT().SimpleTy) {
1604        default:
1605          return false;
1606        case MVT::i8:
1607          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1608          break;
1609        case MVT::i16:
1610          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1611          break;
1612        case MVT::i32:
1613          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1614          break;
1615        case MVT::f32:
1616          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1617          break;
1618        }
1619        break;
1620      case NVPTXISD::LDUV4:
1621        switch (EltVT.getSimpleVT().SimpleTy) {
1622        default:
1623          return false;
1624        case MVT::i8:
1625          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1626          break;
1627        case MVT::i16:
1628          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1629          break;
1630        case MVT::i32:
1631          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1632          break;
1633        case MVT::f32:
1634          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1635          break;
1636        }
1637        break;
1638      }
1639    } else {
1640      switch (N->getOpcode()) {
1641      default:
1642        return false;
1643      case ISD::LOAD:
1644      case ISD::INTRINSIC_W_CHAIN:
1645        if (IsLDG) {
1646          switch (EltVT.getSimpleVT().SimpleTy) {
1647          default:
1648            return false;
1649          case MVT::i8:
1650            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1651            break;
1652          case MVT::i16:
1653            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1654            break;
1655          case MVT::i32:
1656            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1657            break;
1658          case MVT::i64:
1659            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1660            break;
1661          case MVT::f32:
1662            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1663            break;
1664          case MVT::f64:
1665            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1666            break;
1667          }
1668        } else {
1669          switch (EltVT.getSimpleVT().SimpleTy) {
1670          default:
1671            return false;
1672          case MVT::i8:
1673            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1674            break;
1675          case MVT::i16:
1676            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1677            break;
1678          case MVT::i32:
1679            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1680            break;
1681          case MVT::i64:
1682            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1683            break;
1684          case MVT::f32:
1685            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1686            break;
1687          case MVT::f64:
1688            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1689            break;
1690          }
1691        }
1692        break;
1693      case NVPTXISD::LoadV2:
1694      case NVPTXISD::LDGV2:
1695        switch (EltVT.getSimpleVT().SimpleTy) {
1696        default:
1697          return false;
1698        case MVT::i8:
1699          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1700          break;
1701        case MVT::i16:
1702          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1703          break;
1704        case MVT::i32:
1705          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1706          break;
1707        case MVT::i64:
1708          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1709          break;
1710        case MVT::f32:
1711          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1712          break;
1713        case MVT::f64:
1714          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1715          break;
1716        }
1717        break;
1718      case NVPTXISD::LDUV2:
1719        switch (EltVT.getSimpleVT().SimpleTy) {
1720        default:
1721          return false;
1722        case MVT::i8:
1723          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1724          break;
1725        case MVT::i16:
1726          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1727          break;
1728        case MVT::i32:
1729          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1730          break;
1731        case MVT::i64:
1732          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1733          break;
1734        case MVT::f32:
1735          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1736          break;
1737        case MVT::f64:
1738          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1739          break;
1740        }
1741        break;
1742      case NVPTXISD::LoadV4:
1743      case NVPTXISD::LDGV4:
1744        switch (EltVT.getSimpleVT().SimpleTy) {
1745        default:
1746          return false;
1747        case MVT::i8:
1748          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1749          break;
1750        case MVT::i16:
1751          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1752          break;
1753        case MVT::i32:
1754          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1755          break;
1756        case MVT::f32:
1757          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1758          break;
1759        }
1760        break;
1761      case NVPTXISD::LDUV4:
1762        switch (EltVT.getSimpleVT().SimpleTy) {
1763        default:
1764          return false;
1765        case MVT::i8:
1766          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1767          break;
1768        case MVT::i16:
1769          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1770          break;
1771        case MVT::i32:
1772          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1773          break;
1774        case MVT::f32:
1775          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1776          break;
1777        }
1778        break;
1779      }
1780    }
1781
1782    SDValue Ops[] = { Base, Offset, Chain };
1783
1784    LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1785  } else {
1786    if (TM.is64Bit()) {
1787      switch (N->getOpcode()) {
1788      default:
1789        return false;
1790      case ISD::LOAD:
1791      case ISD::INTRINSIC_W_CHAIN:
1792        if (IsLDG) {
1793          switch (EltVT.getSimpleVT().SimpleTy) {
1794          default:
1795            return false;
1796          case MVT::i8:
1797            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1798            break;
1799          case MVT::i16:
1800            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1801            break;
1802          case MVT::i32:
1803            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1804            break;
1805          case MVT::i64:
1806            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1807            break;
1808          case MVT::f32:
1809            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1810            break;
1811          case MVT::f64:
1812            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1813            break;
1814          }
1815        } else {
1816          switch (EltVT.getSimpleVT().SimpleTy) {
1817          default:
1818            return false;
1819          case MVT::i8:
1820            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1821            break;
1822          case MVT::i16:
1823            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1824            break;
1825          case MVT::i32:
1826            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1827            break;
1828          case MVT::i64:
1829            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1830            break;
1831          case MVT::f32:
1832            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1833            break;
1834          case MVT::f64:
1835            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1836            break;
1837          }
1838        }
1839        break;
1840      case NVPTXISD::LoadV2:
1841      case NVPTXISD::LDGV2:
1842        switch (EltVT.getSimpleVT().SimpleTy) {
1843        default:
1844          return false;
1845        case MVT::i8:
1846          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1847          break;
1848        case MVT::i16:
1849          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1850          break;
1851        case MVT::i32:
1852          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1853          break;
1854        case MVT::i64:
1855          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1856          break;
1857        case MVT::f32:
1858          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1859          break;
1860        case MVT::f64:
1861          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1862          break;
1863        }
1864        break;
1865      case NVPTXISD::LDUV2:
1866        switch (EltVT.getSimpleVT().SimpleTy) {
1867        default:
1868          return false;
1869        case MVT::i8:
1870          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1871          break;
1872        case MVT::i16:
1873          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1874          break;
1875        case MVT::i32:
1876          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1877          break;
1878        case MVT::i64:
1879          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1880          break;
1881        case MVT::f32:
1882          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1883          break;
1884        case MVT::f64:
1885          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1886          break;
1887        }
1888        break;
1889      case NVPTXISD::LoadV4:
1890      case NVPTXISD::LDGV4:
1891        switch (EltVT.getSimpleVT().SimpleTy) {
1892        default:
1893          return false;
1894        case MVT::i8:
1895          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1896          break;
1897        case MVT::i16:
1898          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1899          break;
1900        case MVT::i32:
1901          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1902          break;
1903        case MVT::f32:
1904          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1905          break;
1906        }
1907        break;
1908      case NVPTXISD::LDUV4:
1909        switch (EltVT.getSimpleVT().SimpleTy) {
1910        default:
1911          return false;
1912        case MVT::i8:
1913          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1914          break;
1915        case MVT::i16:
1916          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1917          break;
1918        case MVT::i32:
1919          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1920          break;
1921        case MVT::f32:
1922          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1923          break;
1924        }
1925        break;
1926      }
1927    } else {
1928      switch (N->getOpcode()) {
1929      default:
1930        return false;
1931      case ISD::LOAD:
1932      case ISD::INTRINSIC_W_CHAIN:
1933        if (IsLDG) {
1934          switch (EltVT.getSimpleVT().SimpleTy) {
1935          default:
1936            return false;
1937          case MVT::i8:
1938            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1939            break;
1940          case MVT::i16:
1941            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1942            break;
1943          case MVT::i32:
1944            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1945            break;
1946          case MVT::i64:
1947            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1948            break;
1949          case MVT::f32:
1950            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1951            break;
1952          case MVT::f64:
1953            Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1954            break;
1955          }
1956        } else {
1957          switch (EltVT.getSimpleVT().SimpleTy) {
1958          default:
1959            return false;
1960          case MVT::i8:
1961            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1962            break;
1963          case MVT::i16:
1964            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1965            break;
1966          case MVT::i32:
1967            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1968            break;
1969          case MVT::i64:
1970            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1971            break;
1972          case MVT::f32:
1973            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1974            break;
1975          case MVT::f64:
1976            Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1977            break;
1978          }
1979        }
1980        break;
1981      case NVPTXISD::LoadV2:
1982      case NVPTXISD::LDGV2:
1983        switch (EltVT.getSimpleVT().SimpleTy) {
1984        default:
1985          return false;
1986        case MVT::i8:
1987          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1988          break;
1989        case MVT::i16:
1990          Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1991          break;
1992        case MVT::i32:
1993          Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1994          break;
1995        case MVT::i64:
1996          Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1997          break;
1998        case MVT::f32:
1999          Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
2000          break;
2001        case MVT::f64:
2002          Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
2003          break;
2004        }
2005        break;
2006      case NVPTXISD::LDUV2:
2007        switch (EltVT.getSimpleVT().SimpleTy) {
2008        default:
2009          return false;
2010        case MVT::i8:
2011          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
2012          break;
2013        case MVT::i16:
2014          Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
2015          break;
2016        case MVT::i32:
2017          Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
2018          break;
2019        case MVT::i64:
2020          Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
2021          break;
2022        case MVT::f32:
2023          Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
2024          break;
2025        case MVT::f64:
2026          Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
2027          break;
2028        }
2029        break;
2030      case NVPTXISD::LoadV4:
2031      case NVPTXISD::LDGV4:
2032        switch (EltVT.getSimpleVT().SimpleTy) {
2033        default:
2034          return false;
2035        case MVT::i8:
2036          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2037          break;
2038        case MVT::i16:
2039          Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2040          break;
2041        case MVT::i32:
2042          Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2043          break;
2044        case MVT::f32:
2045          Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2046          break;
2047        }
2048        break;
2049      case NVPTXISD::LDUV4:
2050        switch (EltVT.getSimpleVT().SimpleTy) {
2051        default:
2052          return false;
2053        case MVT::i8:
2054          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2055          break;
2056        case MVT::i16:
2057          Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2058          break;
2059        case MVT::i32:
2060          Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2061          break;
2062        case MVT::f32:
2063          Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2064          break;
2065        }
2066        break;
2067      }
2068    }
2069
2070    SDValue Ops[] = { Op1, Chain };
2071    LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
2072  }
2073
2074  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2075  MemRefs0[0] = Mem->getMemOperand();
2076  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2077
2078  // For automatic generation of LDG (through SelectLoad[Vector], not the
2079  // intrinsics), we may have an extending load like:
2080  //
2081  //   i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
2082  //
2083  // In this case, the matching logic above will select a load for the original
2084  // memory type (in this case, i8) and our types will not match (the node needs
2085  // to return an i32 in this case). Our LDG/LDU nodes do not support the
2086  // concept of sign-/zero-extension, so emulate it here by adding an explicit
2087  // CVT instruction. Ptxas should clean up any redundancies here.
2088
2089  EVT OrigType = N->getValueType(0);
2090  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
2091
2092  if (OrigType != EltVT && LdNode) {
2093    // We have an extending-load. The instruction we selected operates on the
2094    // smaller type, but the SDNode we are replacing has the larger type. We
2095    // need to emit a CVT to make the types match.
2096    bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
2097    unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
2098                                       EltVT.getSimpleVT(), IsSigned);
2099
2100    // For each output value, apply the manual sign/zero-extension and make sure
2101    // all users of the load go through that CVT.
2102    for (unsigned i = 0; i != NumElts; ++i) {
2103      SDValue Res(LD, i);
2104      SDValue OrigVal(N, i);
2105
2106      SDNode *CvtNode =
2107        CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
2108                               CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2109                                                         DL, MVT::i32));
2110      ReplaceUses(OrigVal, SDValue(CvtNode, 0));
2111    }
2112  }
2113
2114  ReplaceNode(N, LD);
2115  return true;
2116}
2117
2118bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
2119  SDLoc dl(N);
2120  StoreSDNode *ST = cast<StoreSDNode>(N);
2121  EVT StoreVT = ST->getMemoryVT();
2122  SDNode *NVPTXST = nullptr;
2123
2124  // do not support pre/post inc/dec
2125  if (ST->isIndexed())
2126    return false;
2127
2128  if (!StoreVT.isSimple())
2129    return false;
2130
2131  // Address Space Setting
2132  unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2133
2134  // Volatile Setting
2135  // - .volatile is only availalble for .global and .shared
2136  bool isVolatile = ST->isVolatile();
2137  if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2138      codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2139      codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2140    isVolatile = false;
2141
2142  // Vector Setting
2143  MVT SimpleVT = StoreVT.getSimpleVT();
2144  unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2145  if (SimpleVT.isVector()) {
2146    unsigned num = SimpleVT.getVectorNumElements();
2147    if (num == 2)
2148      vecType = NVPTX::PTXLdStInstCode::V2;
2149    else if (num == 4)
2150      vecType = NVPTX::PTXLdStInstCode::V4;
2151    else
2152      return false;
2153  }
2154
2155  // Type Setting: toType + toTypeWidth
2156  // - for integer type, always use 'u'
2157  //
2158  MVT ScalarVT = SimpleVT.getScalarType();
2159  unsigned toTypeWidth = ScalarVT.getSizeInBits();
2160  unsigned int toType;
2161  if (ScalarVT.isFloatingPoint())
2162    toType = NVPTX::PTXLdStInstCode::Float;
2163  else
2164    toType = NVPTX::PTXLdStInstCode::Unsigned;
2165
2166  // Create the machine instruction DAG
2167  SDValue Chain = N->getOperand(0);
2168  SDValue N1 = N->getOperand(1);
2169  SDValue N2 = N->getOperand(2);
2170  SDValue Addr;
2171  SDValue Offset, Base;
2172  unsigned Opcode;
2173  MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2174
2175  if (SelectDirectAddr(N2, Addr)) {
2176    switch (SourceVT) {
2177    case MVT::i8:
2178      Opcode = NVPTX::ST_i8_avar;
2179      break;
2180    case MVT::i16:
2181      Opcode = NVPTX::ST_i16_avar;
2182      break;
2183    case MVT::i32:
2184      Opcode = NVPTX::ST_i32_avar;
2185      break;
2186    case MVT::i64:
2187      Opcode = NVPTX::ST_i64_avar;
2188      break;
2189    case MVT::f32:
2190      Opcode = NVPTX::ST_f32_avar;
2191      break;
2192    case MVT::f64:
2193      Opcode = NVPTX::ST_f64_avar;
2194      break;
2195    default:
2196      return false;
2197    }
2198    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2199                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2200                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2201                      Chain };
2202    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2203  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2204                          : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2205    switch (SourceVT) {
2206    case MVT::i8:
2207      Opcode = NVPTX::ST_i8_asi;
2208      break;
2209    case MVT::i16:
2210      Opcode = NVPTX::ST_i16_asi;
2211      break;
2212    case MVT::i32:
2213      Opcode = NVPTX::ST_i32_asi;
2214      break;
2215    case MVT::i64:
2216      Opcode = NVPTX::ST_i64_asi;
2217      break;
2218    case MVT::f32:
2219      Opcode = NVPTX::ST_f32_asi;
2220      break;
2221    case MVT::f64:
2222      Opcode = NVPTX::ST_f64_asi;
2223      break;
2224    default:
2225      return false;
2226    }
2227    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2228                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2229                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2230                      Offset, Chain };
2231    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2232  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2233                          : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2234    if (TM.is64Bit()) {
2235      switch (SourceVT) {
2236      case MVT::i8:
2237        Opcode = NVPTX::ST_i8_ari_64;
2238        break;
2239      case MVT::i16:
2240        Opcode = NVPTX::ST_i16_ari_64;
2241        break;
2242      case MVT::i32:
2243        Opcode = NVPTX::ST_i32_ari_64;
2244        break;
2245      case MVT::i64:
2246        Opcode = NVPTX::ST_i64_ari_64;
2247        break;
2248      case MVT::f32:
2249        Opcode = NVPTX::ST_f32_ari_64;
2250        break;
2251      case MVT::f64:
2252        Opcode = NVPTX::ST_f64_ari_64;
2253        break;
2254      default:
2255        return false;
2256      }
2257    } else {
2258      switch (SourceVT) {
2259      case MVT::i8:
2260        Opcode = NVPTX::ST_i8_ari;
2261        break;
2262      case MVT::i16:
2263        Opcode = NVPTX::ST_i16_ari;
2264        break;
2265      case MVT::i32:
2266        Opcode = NVPTX::ST_i32_ari;
2267        break;
2268      case MVT::i64:
2269        Opcode = NVPTX::ST_i64_ari;
2270        break;
2271      case MVT::f32:
2272        Opcode = NVPTX::ST_f32_ari;
2273        break;
2274      case MVT::f64:
2275        Opcode = NVPTX::ST_f64_ari;
2276        break;
2277      default:
2278        return false;
2279      }
2280    }
2281    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2282                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2283                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2284                      Offset, Chain };
2285    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2286  } else {
2287    if (TM.is64Bit()) {
2288      switch (SourceVT) {
2289      case MVT::i8:
2290        Opcode = NVPTX::ST_i8_areg_64;
2291        break;
2292      case MVT::i16:
2293        Opcode = NVPTX::ST_i16_areg_64;
2294        break;
2295      case MVT::i32:
2296        Opcode = NVPTX::ST_i32_areg_64;
2297        break;
2298      case MVT::i64:
2299        Opcode = NVPTX::ST_i64_areg_64;
2300        break;
2301      case MVT::f32:
2302        Opcode = NVPTX::ST_f32_areg_64;
2303        break;
2304      case MVT::f64:
2305        Opcode = NVPTX::ST_f64_areg_64;
2306        break;
2307      default:
2308        return false;
2309      }
2310    } else {
2311      switch (SourceVT) {
2312      case MVT::i8:
2313        Opcode = NVPTX::ST_i8_areg;
2314        break;
2315      case MVT::i16:
2316        Opcode = NVPTX::ST_i16_areg;
2317        break;
2318      case MVT::i32:
2319        Opcode = NVPTX::ST_i32_areg;
2320        break;
2321      case MVT::i64:
2322        Opcode = NVPTX::ST_i64_areg;
2323        break;
2324      case MVT::f32:
2325        Opcode = NVPTX::ST_f32_areg;
2326        break;
2327      case MVT::f64:
2328        Opcode = NVPTX::ST_f64_areg;
2329        break;
2330      default:
2331        return false;
2332      }
2333    }
2334    SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2335                      getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2336                      getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2337                      Chain };
2338    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2339  }
2340
2341  if (!NVPTXST)
2342    return false;
2343
2344  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2345  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2346  cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2347  ReplaceNode(N, NVPTXST);
2348  return true;
2349}
2350
2351bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
2352  SDValue Chain = N->getOperand(0);
2353  SDValue Op1 = N->getOperand(1);
2354  SDValue Addr, Offset, Base;
2355  unsigned Opcode;
2356  SDLoc DL(N);
2357  SDNode *ST;
2358  EVT EltVT = Op1.getValueType();
2359  MemSDNode *MemSD = cast<MemSDNode>(N);
2360  EVT StoreVT = MemSD->getMemoryVT();
2361
2362  // Address Space Setting
2363  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2364
2365  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2366    report_fatal_error("Cannot store to pointer that points to constant "
2367                       "memory space");
2368  }
2369
2370  // Volatile Setting
2371  // - .volatile is only availalble for .global and .shared
2372  bool IsVolatile = MemSD->isVolatile();
2373  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2374      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2375      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2376    IsVolatile = false;
2377
2378  // Type Setting: toType + toTypeWidth
2379  // - for integer type, always use 'u'
2380  assert(StoreVT.isSimple() && "Store value is not simple");
2381  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2382  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2383  unsigned ToType;
2384  if (ScalarVT.isFloatingPoint())
2385    ToType = NVPTX::PTXLdStInstCode::Float;
2386  else
2387    ToType = NVPTX::PTXLdStInstCode::Unsigned;
2388
2389  SmallVector<SDValue, 12> StOps;
2390  SDValue N2;
2391  unsigned VecType;
2392
2393  switch (N->getOpcode()) {
2394  case NVPTXISD::StoreV2:
2395    VecType = NVPTX::PTXLdStInstCode::V2;
2396    StOps.push_back(N->getOperand(1));
2397    StOps.push_back(N->getOperand(2));
2398    N2 = N->getOperand(3);
2399    break;
2400  case NVPTXISD::StoreV4:
2401    VecType = NVPTX::PTXLdStInstCode::V4;
2402    StOps.push_back(N->getOperand(1));
2403    StOps.push_back(N->getOperand(2));
2404    StOps.push_back(N->getOperand(3));
2405    StOps.push_back(N->getOperand(4));
2406    N2 = N->getOperand(5);
2407    break;
2408  default:
2409    return false;
2410  }
2411
2412  StOps.push_back(getI32Imm(IsVolatile, DL));
2413  StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2414  StOps.push_back(getI32Imm(VecType, DL));
2415  StOps.push_back(getI32Imm(ToType, DL));
2416  StOps.push_back(getI32Imm(ToTypeWidth, DL));
2417
2418  if (SelectDirectAddr(N2, Addr)) {
2419    switch (N->getOpcode()) {
2420    default:
2421      return false;
2422    case NVPTXISD::StoreV2:
2423      switch (EltVT.getSimpleVT().SimpleTy) {
2424      default:
2425        return false;
2426      case MVT::i8:
2427        Opcode = NVPTX::STV_i8_v2_avar;
2428        break;
2429      case MVT::i16:
2430        Opcode = NVPTX::STV_i16_v2_avar;
2431        break;
2432      case MVT::i32:
2433        Opcode = NVPTX::STV_i32_v2_avar;
2434        break;
2435      case MVT::i64:
2436        Opcode = NVPTX::STV_i64_v2_avar;
2437        break;
2438      case MVT::f32:
2439        Opcode = NVPTX::STV_f32_v2_avar;
2440        break;
2441      case MVT::f64:
2442        Opcode = NVPTX::STV_f64_v2_avar;
2443        break;
2444      }
2445      break;
2446    case NVPTXISD::StoreV4:
2447      switch (EltVT.getSimpleVT().SimpleTy) {
2448      default:
2449        return false;
2450      case MVT::i8:
2451        Opcode = NVPTX::STV_i8_v4_avar;
2452        break;
2453      case MVT::i16:
2454        Opcode = NVPTX::STV_i16_v4_avar;
2455        break;
2456      case MVT::i32:
2457        Opcode = NVPTX::STV_i32_v4_avar;
2458        break;
2459      case MVT::f32:
2460        Opcode = NVPTX::STV_f32_v4_avar;
2461        break;
2462      }
2463      break;
2464    }
2465    StOps.push_back(Addr);
2466  } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2467                          : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2468    switch (N->getOpcode()) {
2469    default:
2470      return false;
2471    case NVPTXISD::StoreV2:
2472      switch (EltVT.getSimpleVT().SimpleTy) {
2473      default:
2474        return false;
2475      case MVT::i8:
2476        Opcode = NVPTX::STV_i8_v2_asi;
2477        break;
2478      case MVT::i16:
2479        Opcode = NVPTX::STV_i16_v2_asi;
2480        break;
2481      case MVT::i32:
2482        Opcode = NVPTX::STV_i32_v2_asi;
2483        break;
2484      case MVT::i64:
2485        Opcode = NVPTX::STV_i64_v2_asi;
2486        break;
2487      case MVT::f32:
2488        Opcode = NVPTX::STV_f32_v2_asi;
2489        break;
2490      case MVT::f64:
2491        Opcode = NVPTX::STV_f64_v2_asi;
2492        break;
2493      }
2494      break;
2495    case NVPTXISD::StoreV4:
2496      switch (EltVT.getSimpleVT().SimpleTy) {
2497      default:
2498        return false;
2499      case MVT::i8:
2500        Opcode = NVPTX::STV_i8_v4_asi;
2501        break;
2502      case MVT::i16:
2503        Opcode = NVPTX::STV_i16_v4_asi;
2504        break;
2505      case MVT::i32:
2506        Opcode = NVPTX::STV_i32_v4_asi;
2507        break;
2508      case MVT::f32:
2509        Opcode = NVPTX::STV_f32_v4_asi;
2510        break;
2511      }
2512      break;
2513    }
2514    StOps.push_back(Base);
2515    StOps.push_back(Offset);
2516  } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2517                          : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2518    if (TM.is64Bit()) {
2519      switch (N->getOpcode()) {
2520      default:
2521        return false;
2522      case NVPTXISD::StoreV2:
2523        switch (EltVT.getSimpleVT().SimpleTy) {
2524        default:
2525          return false;
2526        case MVT::i8:
2527          Opcode = NVPTX::STV_i8_v2_ari_64;
2528          break;
2529        case MVT::i16:
2530          Opcode = NVPTX::STV_i16_v2_ari_64;
2531          break;
2532        case MVT::i32:
2533          Opcode = NVPTX::STV_i32_v2_ari_64;
2534          break;
2535        case MVT::i64:
2536          Opcode = NVPTX::STV_i64_v2_ari_64;
2537          break;
2538        case MVT::f32:
2539          Opcode = NVPTX::STV_f32_v2_ari_64;
2540          break;
2541        case MVT::f64:
2542          Opcode = NVPTX::STV_f64_v2_ari_64;
2543          break;
2544        }
2545        break;
2546      case NVPTXISD::StoreV4:
2547        switch (EltVT.getSimpleVT().SimpleTy) {
2548        default:
2549          return false;
2550        case MVT::i8:
2551          Opcode = NVPTX::STV_i8_v4_ari_64;
2552          break;
2553        case MVT::i16:
2554          Opcode = NVPTX::STV_i16_v4_ari_64;
2555          break;
2556        case MVT::i32:
2557          Opcode = NVPTX::STV_i32_v4_ari_64;
2558          break;
2559        case MVT::f32:
2560          Opcode = NVPTX::STV_f32_v4_ari_64;
2561          break;
2562        }
2563        break;
2564      }
2565    } else {
2566      switch (N->getOpcode()) {
2567      default:
2568        return false;
2569      case NVPTXISD::StoreV2:
2570        switch (EltVT.getSimpleVT().SimpleTy) {
2571        default:
2572          return false;
2573        case MVT::i8:
2574          Opcode = NVPTX::STV_i8_v2_ari;
2575          break;
2576        case MVT::i16:
2577          Opcode = NVPTX::STV_i16_v2_ari;
2578          break;
2579        case MVT::i32:
2580          Opcode = NVPTX::STV_i32_v2_ari;
2581          break;
2582        case MVT::i64:
2583          Opcode = NVPTX::STV_i64_v2_ari;
2584          break;
2585        case MVT::f32:
2586          Opcode = NVPTX::STV_f32_v2_ari;
2587          break;
2588        case MVT::f64:
2589          Opcode = NVPTX::STV_f64_v2_ari;
2590          break;
2591        }
2592        break;
2593      case NVPTXISD::StoreV4:
2594        switch (EltVT.getSimpleVT().SimpleTy) {
2595        default:
2596          return false;
2597        case MVT::i8:
2598          Opcode = NVPTX::STV_i8_v4_ari;
2599          break;
2600        case MVT::i16:
2601          Opcode = NVPTX::STV_i16_v4_ari;
2602          break;
2603        case MVT::i32:
2604          Opcode = NVPTX::STV_i32_v4_ari;
2605          break;
2606        case MVT::f32:
2607          Opcode = NVPTX::STV_f32_v4_ari;
2608          break;
2609        }
2610        break;
2611      }
2612    }
2613    StOps.push_back(Base);
2614    StOps.push_back(Offset);
2615  } else {
2616    if (TM.is64Bit()) {
2617      switch (N->getOpcode()) {
2618      default:
2619        return false;
2620      case NVPTXISD::StoreV2:
2621        switch (EltVT.getSimpleVT().SimpleTy) {
2622        default:
2623          return false;
2624        case MVT::i8:
2625          Opcode = NVPTX::STV_i8_v2_areg_64;
2626          break;
2627        case MVT::i16:
2628          Opcode = NVPTX::STV_i16_v2_areg_64;
2629          break;
2630        case MVT::i32:
2631          Opcode = NVPTX::STV_i32_v2_areg_64;
2632          break;
2633        case MVT::i64:
2634          Opcode = NVPTX::STV_i64_v2_areg_64;
2635          break;
2636        case MVT::f32:
2637          Opcode = NVPTX::STV_f32_v2_areg_64;
2638          break;
2639        case MVT::f64:
2640          Opcode = NVPTX::STV_f64_v2_areg_64;
2641          break;
2642        }
2643        break;
2644      case NVPTXISD::StoreV4:
2645        switch (EltVT.getSimpleVT().SimpleTy) {
2646        default:
2647          return false;
2648        case MVT::i8:
2649          Opcode = NVPTX::STV_i8_v4_areg_64;
2650          break;
2651        case MVT::i16:
2652          Opcode = NVPTX::STV_i16_v4_areg_64;
2653          break;
2654        case MVT::i32:
2655          Opcode = NVPTX::STV_i32_v4_areg_64;
2656          break;
2657        case MVT::f32:
2658          Opcode = NVPTX::STV_f32_v4_areg_64;
2659          break;
2660        }
2661        break;
2662      }
2663    } else {
2664      switch (N->getOpcode()) {
2665      default:
2666        return false;
2667      case NVPTXISD::StoreV2:
2668        switch (EltVT.getSimpleVT().SimpleTy) {
2669        default:
2670          return false;
2671        case MVT::i8:
2672          Opcode = NVPTX::STV_i8_v2_areg;
2673          break;
2674        case MVT::i16:
2675          Opcode = NVPTX::STV_i16_v2_areg;
2676          break;
2677        case MVT::i32:
2678          Opcode = NVPTX::STV_i32_v2_areg;
2679          break;
2680        case MVT::i64:
2681          Opcode = NVPTX::STV_i64_v2_areg;
2682          break;
2683        case MVT::f32:
2684          Opcode = NVPTX::STV_f32_v2_areg;
2685          break;
2686        case MVT::f64:
2687          Opcode = NVPTX::STV_f64_v2_areg;
2688          break;
2689        }
2690        break;
2691      case NVPTXISD::StoreV4:
2692        switch (EltVT.getSimpleVT().SimpleTy) {
2693        default:
2694          return false;
2695        case MVT::i8:
2696          Opcode = NVPTX::STV_i8_v4_areg;
2697          break;
2698        case MVT::i16:
2699          Opcode = NVPTX::STV_i16_v4_areg;
2700          break;
2701        case MVT::i32:
2702          Opcode = NVPTX::STV_i32_v4_areg;
2703          break;
2704        case MVT::f32:
2705          Opcode = NVPTX::STV_f32_v4_areg;
2706          break;
2707        }
2708        break;
2709      }
2710    }
2711    StOps.push_back(N2);
2712  }
2713
2714  StOps.push_back(Chain);
2715
2716  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2717
2718  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2719  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2720  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2721
2722  ReplaceNode(N, ST);
2723  return true;
2724}
2725
2726bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2727  SDValue Chain = Node->getOperand(0);
2728  SDValue Offset = Node->getOperand(2);
2729  SDValue Flag = Node->getOperand(3);
2730  SDLoc DL(Node);
2731  MemSDNode *Mem = cast<MemSDNode>(Node);
2732
2733  unsigned VecSize;
2734  switch (Node->getOpcode()) {
2735  default:
2736    return false;
2737  case NVPTXISD::LoadParam:
2738    VecSize = 1;
2739    break;
2740  case NVPTXISD::LoadParamV2:
2741    VecSize = 2;
2742    break;
2743  case NVPTXISD::LoadParamV4:
2744    VecSize = 4;
2745    break;
2746  }
2747
2748  EVT EltVT = Node->getValueType(0);
2749  EVT MemVT = Mem->getMemoryVT();
2750
2751  unsigned Opc = 0;
2752
2753  switch (VecSize) {
2754  default:
2755    return false;
2756  case 1:
2757    switch (MemVT.getSimpleVT().SimpleTy) {
2758    default:
2759      return false;
2760    case MVT::i1:
2761      Opc = NVPTX::LoadParamMemI8;
2762      break;
2763    case MVT::i8:
2764      Opc = NVPTX::LoadParamMemI8;
2765      break;
2766    case MVT::i16:
2767      Opc = NVPTX::LoadParamMemI16;
2768      break;
2769    case MVT::i32:
2770      Opc = NVPTX::LoadParamMemI32;
2771      break;
2772    case MVT::i64:
2773      Opc = NVPTX::LoadParamMemI64;
2774      break;
2775    case MVT::f32:
2776      Opc = NVPTX::LoadParamMemF32;
2777      break;
2778    case MVT::f64:
2779      Opc = NVPTX::LoadParamMemF64;
2780      break;
2781    }
2782    break;
2783  case 2:
2784    switch (MemVT.getSimpleVT().SimpleTy) {
2785    default:
2786      return false;
2787    case MVT::i1:
2788      Opc = NVPTX::LoadParamMemV2I8;
2789      break;
2790    case MVT::i8:
2791      Opc = NVPTX::LoadParamMemV2I8;
2792      break;
2793    case MVT::i16:
2794      Opc = NVPTX::LoadParamMemV2I16;
2795      break;
2796    case MVT::i32:
2797      Opc = NVPTX::LoadParamMemV2I32;
2798      break;
2799    case MVT::i64:
2800      Opc = NVPTX::LoadParamMemV2I64;
2801      break;
2802    case MVT::f32:
2803      Opc = NVPTX::LoadParamMemV2F32;
2804      break;
2805    case MVT::f64:
2806      Opc = NVPTX::LoadParamMemV2F64;
2807      break;
2808    }
2809    break;
2810  case 4:
2811    switch (MemVT.getSimpleVT().SimpleTy) {
2812    default:
2813      return false;
2814    case MVT::i1:
2815      Opc = NVPTX::LoadParamMemV4I8;
2816      break;
2817    case MVT::i8:
2818      Opc = NVPTX::LoadParamMemV4I8;
2819      break;
2820    case MVT::i16:
2821      Opc = NVPTX::LoadParamMemV4I16;
2822      break;
2823    case MVT::i32:
2824      Opc = NVPTX::LoadParamMemV4I32;
2825      break;
2826    case MVT::f32:
2827      Opc = NVPTX::LoadParamMemV4F32;
2828      break;
2829    }
2830    break;
2831  }
2832
2833  SDVTList VTs;
2834  if (VecSize == 1) {
2835    VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2836  } else if (VecSize == 2) {
2837    VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2838  } else {
2839    EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2840    VTs = CurDAG->getVTList(EVTs);
2841  }
2842
2843  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2844
2845  SmallVector<SDValue, 2> Ops;
2846  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2847  Ops.push_back(Chain);
2848  Ops.push_back(Flag);
2849
2850  ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops));
2851  return true;
2852}
2853
2854bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2855  SDLoc DL(N);
2856  SDValue Chain = N->getOperand(0);
2857  SDValue Offset = N->getOperand(1);
2858  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2859  MemSDNode *Mem = cast<MemSDNode>(N);
2860
2861  // How many elements do we have?
2862  unsigned NumElts = 1;
2863  switch (N->getOpcode()) {
2864  default:
2865    return false;
2866  case NVPTXISD::StoreRetval:
2867    NumElts = 1;
2868    break;
2869  case NVPTXISD::StoreRetvalV2:
2870    NumElts = 2;
2871    break;
2872  case NVPTXISD::StoreRetvalV4:
2873    NumElts = 4;
2874    break;
2875  }
2876
2877  // Build vector of operands
2878  SmallVector<SDValue, 6> Ops;
2879  for (unsigned i = 0; i < NumElts; ++i)
2880    Ops.push_back(N->getOperand(i + 2));
2881  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2882  Ops.push_back(Chain);
2883
2884  // Determine target opcode
2885  // If we have an i1, use an 8-bit store. The lowering code in
2886  // NVPTXISelLowering will have already emitted an upcast.
2887  unsigned Opcode = 0;
2888  switch (NumElts) {
2889  default:
2890    return false;
2891  case 1:
2892    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2893    default:
2894      return false;
2895    case MVT::i1:
2896      Opcode = NVPTX::StoreRetvalI8;
2897      break;
2898    case MVT::i8:
2899      Opcode = NVPTX::StoreRetvalI8;
2900      break;
2901    case MVT::i16:
2902      Opcode = NVPTX::StoreRetvalI16;
2903      break;
2904    case MVT::i32:
2905      Opcode = NVPTX::StoreRetvalI32;
2906      break;
2907    case MVT::i64:
2908      Opcode = NVPTX::StoreRetvalI64;
2909      break;
2910    case MVT::f32:
2911      Opcode = NVPTX::StoreRetvalF32;
2912      break;
2913    case MVT::f64:
2914      Opcode = NVPTX::StoreRetvalF64;
2915      break;
2916    }
2917    break;
2918  case 2:
2919    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2920    default:
2921      return false;
2922    case MVT::i1:
2923      Opcode = NVPTX::StoreRetvalV2I8;
2924      break;
2925    case MVT::i8:
2926      Opcode = NVPTX::StoreRetvalV2I8;
2927      break;
2928    case MVT::i16:
2929      Opcode = NVPTX::StoreRetvalV2I16;
2930      break;
2931    case MVT::i32:
2932      Opcode = NVPTX::StoreRetvalV2I32;
2933      break;
2934    case MVT::i64:
2935      Opcode = NVPTX::StoreRetvalV2I64;
2936      break;
2937    case MVT::f32:
2938      Opcode = NVPTX::StoreRetvalV2F32;
2939      break;
2940    case MVT::f64:
2941      Opcode = NVPTX::StoreRetvalV2F64;
2942      break;
2943    }
2944    break;
2945  case 4:
2946    switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2947    default:
2948      return false;
2949    case MVT::i1:
2950      Opcode = NVPTX::StoreRetvalV4I8;
2951      break;
2952    case MVT::i8:
2953      Opcode = NVPTX::StoreRetvalV4I8;
2954      break;
2955    case MVT::i16:
2956      Opcode = NVPTX::StoreRetvalV4I16;
2957      break;
2958    case MVT::i32:
2959      Opcode = NVPTX::StoreRetvalV4I32;
2960      break;
2961    case MVT::f32:
2962      Opcode = NVPTX::StoreRetvalV4F32;
2963      break;
2964    }
2965    break;
2966  }
2967
2968  SDNode *Ret =
2969      CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2970  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2971  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2972  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2973
2974  ReplaceNode(N, Ret);
2975  return true;
2976}
2977
2978bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2979  SDLoc DL(N);
2980  SDValue Chain = N->getOperand(0);
2981  SDValue Param = N->getOperand(1);
2982  unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2983  SDValue Offset = N->getOperand(2);
2984  unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2985  MemSDNode *Mem = cast<MemSDNode>(N);
2986  SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2987
2988  // How many elements do we have?
2989  unsigned NumElts = 1;
2990  switch (N->getOpcode()) {
2991  default:
2992    return false;
2993  case NVPTXISD::StoreParamU32:
2994  case NVPTXISD::StoreParamS32:
2995  case NVPTXISD::StoreParam:
2996    NumElts = 1;
2997    break;
2998  case NVPTXISD::StoreParamV2:
2999    NumElts = 2;
3000    break;
3001  case NVPTXISD::StoreParamV4:
3002    NumElts = 4;
3003    break;
3004  }
3005
3006  // Build vector of operands
3007  SmallVector<SDValue, 8> Ops;
3008  for (unsigned i = 0; i < NumElts; ++i)
3009    Ops.push_back(N->getOperand(i + 3));
3010  Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
3011  Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
3012  Ops.push_back(Chain);
3013  Ops.push_back(Flag);
3014
3015  // Determine target opcode
3016  // If we have an i1, use an 8-bit store. The lowering code in
3017  // NVPTXISelLowering will have already emitted an upcast.
3018  unsigned Opcode = 0;
3019  switch (N->getOpcode()) {
3020  default:
3021    switch (NumElts) {
3022    default:
3023      return false;
3024    case 1:
3025      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3026      default:
3027        return false;
3028      case MVT::i1:
3029        Opcode = NVPTX::StoreParamI8;
3030        break;
3031      case MVT::i8:
3032        Opcode = NVPTX::StoreParamI8;
3033        break;
3034      case MVT::i16:
3035        Opcode = NVPTX::StoreParamI16;
3036        break;
3037      case MVT::i32:
3038        Opcode = NVPTX::StoreParamI32;
3039        break;
3040      case MVT::i64:
3041        Opcode = NVPTX::StoreParamI64;
3042        break;
3043      case MVT::f32:
3044        Opcode = NVPTX::StoreParamF32;
3045        break;
3046      case MVT::f64:
3047        Opcode = NVPTX::StoreParamF64;
3048        break;
3049      }
3050      break;
3051    case 2:
3052      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3053      default:
3054        return false;
3055      case MVT::i1:
3056        Opcode = NVPTX::StoreParamV2I8;
3057        break;
3058      case MVT::i8:
3059        Opcode = NVPTX::StoreParamV2I8;
3060        break;
3061      case MVT::i16:
3062        Opcode = NVPTX::StoreParamV2I16;
3063        break;
3064      case MVT::i32:
3065        Opcode = NVPTX::StoreParamV2I32;
3066        break;
3067      case MVT::i64:
3068        Opcode = NVPTX::StoreParamV2I64;
3069        break;
3070      case MVT::f32:
3071        Opcode = NVPTX::StoreParamV2F32;
3072        break;
3073      case MVT::f64:
3074        Opcode = NVPTX::StoreParamV2F64;
3075        break;
3076      }
3077      break;
3078    case 4:
3079      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3080      default:
3081        return false;
3082      case MVT::i1:
3083        Opcode = NVPTX::StoreParamV4I8;
3084        break;
3085      case MVT::i8:
3086        Opcode = NVPTX::StoreParamV4I8;
3087        break;
3088      case MVT::i16:
3089        Opcode = NVPTX::StoreParamV4I16;
3090        break;
3091      case MVT::i32:
3092        Opcode = NVPTX::StoreParamV4I32;
3093        break;
3094      case MVT::f32:
3095        Opcode = NVPTX::StoreParamV4F32;
3096        break;
3097      }
3098      break;
3099    }
3100    break;
3101  // Special case: if we have a sign-extend/zero-extend node, insert the
3102  // conversion instruction first, and use that as the value operand to
3103  // the selected StoreParam node.
3104  case NVPTXISD::StoreParamU32: {
3105    Opcode = NVPTX::StoreParamI32;
3106    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3107                                                MVT::i32);
3108    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3109                                         MVT::i32, Ops[0], CvtNone);
3110    Ops[0] = SDValue(Cvt, 0);
3111    break;
3112  }
3113  case NVPTXISD::StoreParamS32: {
3114    Opcode = NVPTX::StoreParamI32;
3115    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3116                                                MVT::i32);
3117    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3118                                         MVT::i32, Ops[0], CvtNone);
3119    Ops[0] = SDValue(Cvt, 0);
3120    break;
3121  }
3122  }
3123
3124  SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3125  SDNode *Ret =
3126      CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3127  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3128  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3129  cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3130
3131  ReplaceNode(N, Ret);
3132  return true;
3133}
3134
3135bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
3136  SDValue Chain = N->getOperand(0);
3137  unsigned Opc = 0;
3138  SmallVector<SDValue, 8> Ops;
3139
3140  switch (N->getOpcode()) {
3141  default: return false;
3142  case NVPTXISD::Tex1DFloatS32:
3143    Opc = NVPTX::TEX_1D_F32_S32;
3144    break;
3145  case NVPTXISD::Tex1DFloatFloat:
3146    Opc = NVPTX::TEX_1D_F32_F32;
3147    break;
3148  case NVPTXISD::Tex1DFloatFloatLevel:
3149    Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3150    break;
3151  case NVPTXISD::Tex1DFloatFloatGrad:
3152    Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3153    break;
3154  case NVPTXISD::Tex1DS32S32:
3155    Opc = NVPTX::TEX_1D_S32_S32;
3156    break;
3157  case NVPTXISD::Tex1DS32Float:
3158    Opc = NVPTX::TEX_1D_S32_F32;
3159    break;
3160  case NVPTXISD::Tex1DS32FloatLevel:
3161    Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3162    break;
3163  case NVPTXISD::Tex1DS32FloatGrad:
3164    Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3165    break;
3166  case NVPTXISD::Tex1DU32S32:
3167    Opc = NVPTX::TEX_1D_U32_S32;
3168    break;
3169  case NVPTXISD::Tex1DU32Float:
3170    Opc = NVPTX::TEX_1D_U32_F32;
3171    break;
3172  case NVPTXISD::Tex1DU32FloatLevel:
3173    Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3174    break;
3175  case NVPTXISD::Tex1DU32FloatGrad:
3176    Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3177    break;
3178  case NVPTXISD::Tex1DArrayFloatS32:
3179    Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3180    break;
3181  case NVPTXISD::Tex1DArrayFloatFloat:
3182    Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3183    break;
3184  case NVPTXISD::Tex1DArrayFloatFloatLevel:
3185    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3186    break;
3187  case NVPTXISD::Tex1DArrayFloatFloatGrad:
3188    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3189    break;
3190  case NVPTXISD::Tex1DArrayS32S32:
3191    Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3192    break;
3193  case NVPTXISD::Tex1DArrayS32Float:
3194    Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3195    break;
3196  case NVPTXISD::Tex1DArrayS32FloatLevel:
3197    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3198    break;
3199  case NVPTXISD::Tex1DArrayS32FloatGrad:
3200    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3201    break;
3202  case NVPTXISD::Tex1DArrayU32S32:
3203    Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3204    break;
3205  case NVPTXISD::Tex1DArrayU32Float:
3206    Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3207    break;
3208  case NVPTXISD::Tex1DArrayU32FloatLevel:
3209    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3210    break;
3211  case NVPTXISD::Tex1DArrayU32FloatGrad:
3212    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3213    break;
3214  case NVPTXISD::Tex2DFloatS32:
3215    Opc = NVPTX::TEX_2D_F32_S32;
3216    break;
3217  case NVPTXISD::Tex2DFloatFloat:
3218    Opc = NVPTX::TEX_2D_F32_F32;
3219    break;
3220  case NVPTXISD::Tex2DFloatFloatLevel:
3221    Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3222    break;
3223  case NVPTXISD::Tex2DFloatFloatGrad:
3224    Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3225    break;
3226  case NVPTXISD::Tex2DS32S32:
3227    Opc = NVPTX::TEX_2D_S32_S32;
3228    break;
3229  case NVPTXISD::Tex2DS32Float:
3230    Opc = NVPTX::TEX_2D_S32_F32;
3231    break;
3232  case NVPTXISD::Tex2DS32FloatLevel:
3233    Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3234    break;
3235  case NVPTXISD::Tex2DS32FloatGrad:
3236    Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3237    break;
3238  case NVPTXISD::Tex2DU32S32:
3239    Opc = NVPTX::TEX_2D_U32_S32;
3240    break;
3241  case NVPTXISD::Tex2DU32Float:
3242    Opc = NVPTX::TEX_2D_U32_F32;
3243    break;
3244  case NVPTXISD::Tex2DU32FloatLevel:
3245    Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3246    break;
3247  case NVPTXISD::Tex2DU32FloatGrad:
3248    Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3249    break;
3250  case NVPTXISD::Tex2DArrayFloatS32:
3251    Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3252    break;
3253  case NVPTXISD::Tex2DArrayFloatFloat:
3254    Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3255    break;
3256  case NVPTXISD::Tex2DArrayFloatFloatLevel:
3257    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3258    break;
3259  case NVPTXISD::Tex2DArrayFloatFloatGrad:
3260    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3261    break;
3262  case NVPTXISD::Tex2DArrayS32S32:
3263    Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3264    break;
3265  case NVPTXISD::Tex2DArrayS32Float:
3266    Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3267    break;
3268  case NVPTXISD::Tex2DArrayS32FloatLevel:
3269    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3270    break;
3271  case NVPTXISD::Tex2DArrayS32FloatGrad:
3272    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3273    break;
3274  case NVPTXISD::Tex2DArrayU32S32:
3275    Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3276    break;
3277  case NVPTXISD::Tex2DArrayU32Float:
3278    Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3279    break;
3280  case NVPTXISD::Tex2DArrayU32FloatLevel:
3281    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3282    break;
3283  case NVPTXISD::Tex2DArrayU32FloatGrad:
3284    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3285    break;
3286  case NVPTXISD::Tex3DFloatS32:
3287    Opc = NVPTX::TEX_3D_F32_S32;
3288    break;
3289  case NVPTXISD::Tex3DFloatFloat:
3290    Opc = NVPTX::TEX_3D_F32_F32;
3291    break;
3292  case NVPTXISD::Tex3DFloatFloatLevel:
3293    Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3294    break;
3295  case NVPTXISD::Tex3DFloatFloatGrad:
3296    Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3297    break;
3298  case NVPTXISD::Tex3DS32S32:
3299    Opc = NVPTX::TEX_3D_S32_S32;
3300    break;
3301  case NVPTXISD::Tex3DS32Float:
3302    Opc = NVPTX::TEX_3D_S32_F32;
3303    break;
3304  case NVPTXISD::Tex3DS32FloatLevel:
3305    Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3306    break;
3307  case NVPTXISD::Tex3DS32FloatGrad:
3308    Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3309    break;
3310  case NVPTXISD::Tex3DU32S32:
3311    Opc = NVPTX::TEX_3D_U32_S32;
3312    break;
3313  case NVPTXISD::Tex3DU32Float:
3314    Opc = NVPTX::TEX_3D_U32_F32;
3315    break;
3316  case NVPTXISD::Tex3DU32FloatLevel:
3317    Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3318    break;
3319  case NVPTXISD::Tex3DU32FloatGrad:
3320    Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3321    break;
3322  case NVPTXISD::TexCubeFloatFloat:
3323    Opc = NVPTX::TEX_CUBE_F32_F32;
3324    break;
3325  case NVPTXISD::TexCubeFloatFloatLevel:
3326    Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3327    break;
3328  case NVPTXISD::TexCubeS32Float:
3329    Opc = NVPTX::TEX_CUBE_S32_F32;
3330    break;
3331  case NVPTXISD::TexCubeS32FloatLevel:
3332    Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3333    break;
3334  case NVPTXISD::TexCubeU32Float:
3335    Opc = NVPTX::TEX_CUBE_U32_F32;
3336    break;
3337  case NVPTXISD::TexCubeU32FloatLevel:
3338    Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3339    break;
3340  case NVPTXISD::TexCubeArrayFloatFloat:
3341    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3342    break;
3343  case NVPTXISD::TexCubeArrayFloatFloatLevel:
3344    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3345    break;
3346  case NVPTXISD::TexCubeArrayS32Float:
3347    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3348    break;
3349  case NVPTXISD::TexCubeArrayS32FloatLevel:
3350    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3351    break;
3352  case NVPTXISD::TexCubeArrayU32Float:
3353    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3354    break;
3355  case NVPTXISD::TexCubeArrayU32FloatLevel:
3356    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3357    break;
3358  case NVPTXISD::Tld4R2DFloatFloat:
3359    Opc = NVPTX::TLD4_R_2D_F32_F32;
3360    break;
3361  case NVPTXISD::Tld4G2DFloatFloat:
3362    Opc = NVPTX::TLD4_G_2D_F32_F32;
3363    break;
3364  case NVPTXISD::Tld4B2DFloatFloat:
3365    Opc = NVPTX::TLD4_B_2D_F32_F32;
3366    break;
3367  case NVPTXISD::Tld4A2DFloatFloat:
3368    Opc = NVPTX::TLD4_A_2D_F32_F32;
3369    break;
3370  case NVPTXISD::Tld4R2DS64Float:
3371    Opc = NVPTX::TLD4_R_2D_S32_F32;
3372    break;
3373  case NVPTXISD::Tld4G2DS64Float:
3374    Opc = NVPTX::TLD4_G_2D_S32_F32;
3375    break;
3376  case NVPTXISD::Tld4B2DS64Float:
3377    Opc = NVPTX::TLD4_B_2D_S32_F32;
3378    break;
3379  case NVPTXISD::Tld4A2DS64Float:
3380    Opc = NVPTX::TLD4_A_2D_S32_F32;
3381    break;
3382  case NVPTXISD::Tld4R2DU64Float:
3383    Opc = NVPTX::TLD4_R_2D_U32_F32;
3384    break;
3385  case NVPTXISD::Tld4G2DU64Float:
3386    Opc = NVPTX::TLD4_G_2D_U32_F32;
3387    break;
3388  case NVPTXISD::Tld4B2DU64Float:
3389    Opc = NVPTX::TLD4_B_2D_U32_F32;
3390    break;
3391  case NVPTXISD::Tld4A2DU64Float:
3392    Opc = NVPTX::TLD4_A_2D_U32_F32;
3393    break;
3394  case NVPTXISD::TexUnified1DFloatS32:
3395    Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3396    break;
3397  case NVPTXISD::TexUnified1DFloatFloat:
3398    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3399    break;
3400  case NVPTXISD::TexUnified1DFloatFloatLevel:
3401    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3402    break;
3403  case NVPTXISD::TexUnified1DFloatFloatGrad:
3404    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3405    break;
3406  case NVPTXISD::TexUnified1DS32S32:
3407    Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3408    break;
3409  case NVPTXISD::TexUnified1DS32Float:
3410    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3411    break;
3412  case NVPTXISD::TexUnified1DS32FloatLevel:
3413    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3414    break;
3415  case NVPTXISD::TexUnified1DS32FloatGrad:
3416    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3417    break;
3418  case NVPTXISD::TexUnified1DU32S32:
3419    Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3420    break;
3421  case NVPTXISD::TexUnified1DU32Float:
3422    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3423    break;
3424  case NVPTXISD::TexUnified1DU32FloatLevel:
3425    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3426    break;
3427  case NVPTXISD::TexUnified1DU32FloatGrad:
3428    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3429    break;
3430  case NVPTXISD::TexUnified1DArrayFloatS32:
3431    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3432    break;
3433  case NVPTXISD::TexUnified1DArrayFloatFloat:
3434    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3435    break;
3436  case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3437    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3438    break;
3439  case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3440    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3441    break;
3442  case NVPTXISD::TexUnified1DArrayS32S32:
3443    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3444    break;
3445  case NVPTXISD::TexUnified1DArrayS32Float:
3446    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3447    break;
3448  case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3449    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3450    break;
3451  case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3452    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3453    break;
3454  case NVPTXISD::TexUnified1DArrayU32S32:
3455    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3456    break;
3457  case NVPTXISD::TexUnified1DArrayU32Float:
3458    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3459    break;
3460  case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3461    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3462    break;
3463  case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3464    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3465    break;
3466  case NVPTXISD::TexUnified2DFloatS32:
3467    Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3468    break;
3469  case NVPTXISD::TexUnified2DFloatFloat:
3470    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3471    break;
3472  case NVPTXISD::TexUnified2DFloatFloatLevel:
3473    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3474    break;
3475  case NVPTXISD::TexUnified2DFloatFloatGrad:
3476    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3477    break;
3478  case NVPTXISD::TexUnified2DS32S32:
3479    Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3480    break;
3481  case NVPTXISD::TexUnified2DS32Float:
3482    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3483    break;
3484  case NVPTXISD::TexUnified2DS32FloatLevel:
3485    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3486    break;
3487  case NVPTXISD::TexUnified2DS32FloatGrad:
3488    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3489    break;
3490  case NVPTXISD::TexUnified2DU32S32:
3491    Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3492    break;
3493  case NVPTXISD::TexUnified2DU32Float:
3494    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3495    break;
3496  case NVPTXISD::TexUnified2DU32FloatLevel:
3497    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3498    break;
3499  case NVPTXISD::TexUnified2DU32FloatGrad:
3500    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3501    break;
3502  case NVPTXISD::TexUnified2DArrayFloatS32:
3503    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3504    break;
3505  case NVPTXISD::TexUnified2DArrayFloatFloat:
3506    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3507    break;
3508  case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3509    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3510    break;
3511  case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3512    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3513    break;
3514  case NVPTXISD::TexUnified2DArrayS32S32:
3515    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3516    break;
3517  case NVPTXISD::TexUnified2DArrayS32Float:
3518    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3519    break;
3520  case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3521    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3522    break;
3523  case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3524    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3525    break;
3526  case NVPTXISD::TexUnified2DArrayU32S32:
3527    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3528    break;
3529  case NVPTXISD::TexUnified2DArrayU32Float:
3530    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3531    break;
3532  case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3533    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3534    break;
3535  case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3536    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3537    break;
3538  case NVPTXISD::TexUnified3DFloatS32:
3539    Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3540    break;
3541  case NVPTXISD::TexUnified3DFloatFloat:
3542    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3543    break;
3544  case NVPTXISD::TexUnified3DFloatFloatLevel:
3545    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3546    break;
3547  case NVPTXISD::TexUnified3DFloatFloatGrad:
3548    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3549    break;
3550  case NVPTXISD::TexUnified3DS32S32:
3551    Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3552    break;
3553  case NVPTXISD::TexUnified3DS32Float:
3554    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3555    break;
3556  case NVPTXISD::TexUnified3DS32FloatLevel:
3557    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3558    break;
3559  case NVPTXISD::TexUnified3DS32FloatGrad:
3560    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3561    break;
3562  case NVPTXISD::TexUnified3DU32S32:
3563    Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3564    break;
3565  case NVPTXISD::TexUnified3DU32Float:
3566    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3567    break;
3568  case NVPTXISD::TexUnified3DU32FloatLevel:
3569    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3570    break;
3571  case NVPTXISD::TexUnified3DU32FloatGrad:
3572    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3573    break;
3574  case NVPTXISD::TexUnifiedCubeFloatFloat:
3575    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3576    break;
3577  case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3578    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3579    break;
3580  case NVPTXISD::TexUnifiedCubeS32Float:
3581    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3582    break;
3583  case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3584    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3585    break;
3586  case NVPTXISD::TexUnifiedCubeU32Float:
3587    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3588    break;
3589  case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3590    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3591    break;
3592  case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3593    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3594    break;
3595  case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3596    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3597    break;
3598  case NVPTXISD::TexUnifiedCubeArrayS32Float:
3599    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3600    break;
3601  case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3602    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3603    break;
3604  case NVPTXISD::TexUnifiedCubeArrayU32Float:
3605    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3606    break;
3607  case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3608    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3609    break;
3610  case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3611    Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3612    break;
3613  case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3614    Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3615    break;
3616  case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3617    Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3618    break;
3619  case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3620    Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3621    break;
3622  case NVPTXISD::Tld4UnifiedR2DS64Float:
3623    Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3624    break;
3625  case NVPTXISD::Tld4UnifiedG2DS64Float:
3626    Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3627    break;
3628  case NVPTXISD::Tld4UnifiedB2DS64Float:
3629    Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3630    break;
3631  case NVPTXISD::Tld4UnifiedA2DS64Float:
3632    Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3633    break;
3634  case NVPTXISD::Tld4UnifiedR2DU64Float:
3635    Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3636    break;
3637  case NVPTXISD::Tld4UnifiedG2DU64Float:
3638    Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3639    break;
3640  case NVPTXISD::Tld4UnifiedB2DU64Float:
3641    Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3642    break;
3643  case NVPTXISD::Tld4UnifiedA2DU64Float:
3644    Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3645    break;
3646  }
3647
3648  // Copy over operands
3649  for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3650    Ops.push_back(N->getOperand(i));
3651  }
3652
3653  Ops.push_back(Chain);
3654  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3655  return true;
3656}
3657
3658bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
3659  SDValue Chain = N->getOperand(0);
3660  SDValue TexHandle = N->getOperand(1);
3661  unsigned Opc = 0;
3662  SmallVector<SDValue, 8> Ops;
3663  switch (N->getOpcode()) {
3664  default: return false;
3665  case NVPTXISD::Suld1DI8Clamp:
3666    Opc = NVPTX::SULD_1D_I8_CLAMP;
3667    Ops.push_back(TexHandle);
3668    Ops.push_back(N->getOperand(2));
3669    Ops.push_back(Chain);
3670    break;
3671  case NVPTXISD::Suld1DI16Clamp:
3672    Opc = NVPTX::SULD_1D_I16_CLAMP;
3673    Ops.push_back(TexHandle);
3674    Ops.push_back(N->getOperand(2));
3675    Ops.push_back(Chain);
3676    break;
3677  case NVPTXISD::Suld1DI32Clamp:
3678    Opc = NVPTX::SULD_1D_I32_CLAMP;
3679    Ops.push_back(TexHandle);
3680    Ops.push_back(N->getOperand(2));
3681    Ops.push_back(Chain);
3682    break;
3683  case NVPTXISD::Suld1DI64Clamp:
3684    Opc = NVPTX::SULD_1D_I64_CLAMP;
3685    Ops.push_back(TexHandle);
3686    Ops.push_back(N->getOperand(2));
3687    Ops.push_back(Chain);
3688    break;
3689  case NVPTXISD::Suld1DV2I8Clamp:
3690    Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3691    Ops.push_back(TexHandle);
3692    Ops.push_back(N->getOperand(2));
3693    Ops.push_back(Chain);
3694    break;
3695  case NVPTXISD::Suld1DV2I16Clamp:
3696    Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3697    Ops.push_back(TexHandle);
3698    Ops.push_back(N->getOperand(2));
3699    Ops.push_back(Chain);
3700    break;
3701  case NVPTXISD::Suld1DV2I32Clamp:
3702    Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3703    Ops.push_back(TexHandle);
3704    Ops.push_back(N->getOperand(2));
3705    Ops.push_back(Chain);
3706    break;
3707  case NVPTXISD::Suld1DV2I64Clamp:
3708    Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3709    Ops.push_back(TexHandle);
3710    Ops.push_back(N->getOperand(2));
3711    Ops.push_back(Chain);
3712    break;
3713  case NVPTXISD::Suld1DV4I8Clamp:
3714    Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3715    Ops.push_back(TexHandle);
3716    Ops.push_back(N->getOperand(2));
3717    Ops.push_back(Chain);
3718    break;
3719  case NVPTXISD::Suld1DV4I16Clamp:
3720    Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3721    Ops.push_back(TexHandle);
3722    Ops.push_back(N->getOperand(2));
3723    Ops.push_back(Chain);
3724    break;
3725  case NVPTXISD::Suld1DV4I32Clamp:
3726    Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3727    Ops.push_back(TexHandle);
3728    Ops.push_back(N->getOperand(2));
3729    Ops.push_back(Chain);
3730    break;
3731  case NVPTXISD::Suld1DArrayI8Clamp:
3732    Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3733    Ops.push_back(TexHandle);
3734    Ops.push_back(N->getOperand(2));
3735    Ops.push_back(N->getOperand(3));
3736    Ops.push_back(Chain);
3737    break;
3738  case NVPTXISD::Suld1DArrayI16Clamp:
3739    Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3740    Ops.push_back(TexHandle);
3741    Ops.push_back(N->getOperand(2));
3742    Ops.push_back(N->getOperand(3));
3743    Ops.push_back(Chain);
3744    break;
3745  case NVPTXISD::Suld1DArrayI32Clamp:
3746    Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3747    Ops.push_back(TexHandle);
3748    Ops.push_back(N->getOperand(2));
3749    Ops.push_back(N->getOperand(3));
3750    Ops.push_back(Chain);
3751    break;
3752  case NVPTXISD::Suld1DArrayI64Clamp:
3753    Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3754    Ops.push_back(TexHandle);
3755    Ops.push_back(N->getOperand(2));
3756    Ops.push_back(N->getOperand(3));
3757    Ops.push_back(Chain);
3758    break;
3759  case NVPTXISD::Suld1DArrayV2I8Clamp:
3760    Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3761    Ops.push_back(TexHandle);
3762    Ops.push_back(N->getOperand(2));
3763    Ops.push_back(N->getOperand(3));
3764    Ops.push_back(Chain);
3765    break;
3766  case NVPTXISD::Suld1DArrayV2I16Clamp:
3767    Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3768    Ops.push_back(TexHandle);
3769    Ops.push_back(N->getOperand(2));
3770    Ops.push_back(N->getOperand(3));
3771    Ops.push_back(Chain);
3772    break;
3773  case NVPTXISD::Suld1DArrayV2I32Clamp:
3774    Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3775    Ops.push_back(TexHandle);
3776    Ops.push_back(N->getOperand(2));
3777    Ops.push_back(N->getOperand(3));
3778    Ops.push_back(Chain);
3779    break;
3780  case NVPTXISD::Suld1DArrayV2I64Clamp:
3781    Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3782    Ops.push_back(TexHandle);
3783    Ops.push_back(N->getOperand(2));
3784    Ops.push_back(N->getOperand(3));
3785    Ops.push_back(Chain);
3786    break;
3787  case NVPTXISD::Suld1DArrayV4I8Clamp:
3788    Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3789    Ops.push_back(TexHandle);
3790    Ops.push_back(N->getOperand(2));
3791    Ops.push_back(N->getOperand(3));
3792    Ops.push_back(Chain);
3793    break;
3794  case NVPTXISD::Suld1DArrayV4I16Clamp:
3795    Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3796    Ops.push_back(TexHandle);
3797    Ops.push_back(N->getOperand(2));
3798    Ops.push_back(N->getOperand(3));
3799    Ops.push_back(Chain);
3800    break;
3801  case NVPTXISD::Suld1DArrayV4I32Clamp:
3802    Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3803    Ops.push_back(TexHandle);
3804    Ops.push_back(N->getOperand(2));
3805    Ops.push_back(N->getOperand(3));
3806    Ops.push_back(Chain);
3807    break;
3808  case NVPTXISD::Suld2DI8Clamp:
3809    Opc = NVPTX::SULD_2D_I8_CLAMP;
3810    Ops.push_back(TexHandle);
3811    Ops.push_back(N->getOperand(2));
3812    Ops.push_back(N->getOperand(3));
3813    Ops.push_back(Chain);
3814    break;
3815  case NVPTXISD::Suld2DI16Clamp:
3816    Opc = NVPTX::SULD_2D_I16_CLAMP;
3817    Ops.push_back(TexHandle);
3818    Ops.push_back(N->getOperand(2));
3819    Ops.push_back(N->getOperand(3));
3820    Ops.push_back(Chain);
3821    break;
3822  case NVPTXISD::Suld2DI32Clamp:
3823    Opc = NVPTX::SULD_2D_I32_CLAMP;
3824    Ops.push_back(TexHandle);
3825    Ops.push_back(N->getOperand(2));
3826    Ops.push_back(N->getOperand(3));
3827    Ops.push_back(Chain);
3828    break;
3829  case NVPTXISD::Suld2DI64Clamp:
3830    Opc = NVPTX::SULD_2D_I64_CLAMP;
3831    Ops.push_back(TexHandle);
3832    Ops.push_back(N->getOperand(2));
3833    Ops.push_back(N->getOperand(3));
3834    Ops.push_back(Chain);
3835    break;
3836  case NVPTXISD::Suld2DV2I8Clamp:
3837    Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3838    Ops.push_back(TexHandle);
3839    Ops.push_back(N->getOperand(2));
3840    Ops.push_back(N->getOperand(3));
3841    Ops.push_back(Chain);
3842    break;
3843  case NVPTXISD::Suld2DV2I16Clamp:
3844    Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3845    Ops.push_back(TexHandle);
3846    Ops.push_back(N->getOperand(2));
3847    Ops.push_back(N->getOperand(3));
3848    Ops.push_back(Chain);
3849    break;
3850  case NVPTXISD::Suld2DV2I32Clamp:
3851    Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3852    Ops.push_back(TexHandle);
3853    Ops.push_back(N->getOperand(2));
3854    Ops.push_back(N->getOperand(3));
3855    Ops.push_back(Chain);
3856    break;
3857  case NVPTXISD::Suld2DV2I64Clamp:
3858    Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3859    Ops.push_back(TexHandle);
3860    Ops.push_back(N->getOperand(2));
3861    Ops.push_back(N->getOperand(3));
3862    Ops.push_back(Chain);
3863    break;
3864  case NVPTXISD::Suld2DV4I8Clamp:
3865    Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3866    Ops.push_back(TexHandle);
3867    Ops.push_back(N->getOperand(2));
3868    Ops.push_back(N->getOperand(3));
3869    Ops.push_back(Chain);
3870    break;
3871  case NVPTXISD::Suld2DV4I16Clamp:
3872    Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3873    Ops.push_back(TexHandle);
3874    Ops.push_back(N->getOperand(2));
3875    Ops.push_back(N->getOperand(3));
3876    Ops.push_back(Chain);
3877    break;
3878  case NVPTXISD::Suld2DV4I32Clamp:
3879    Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3880    Ops.push_back(TexHandle);
3881    Ops.push_back(N->getOperand(2));
3882    Ops.push_back(N->getOperand(3));
3883    Ops.push_back(Chain);
3884    break;
3885  case NVPTXISD::Suld2DArrayI8Clamp:
3886    Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3887    Ops.push_back(TexHandle);
3888    Ops.push_back(N->getOperand(2));
3889    Ops.push_back(N->getOperand(3));
3890    Ops.push_back(N->getOperand(4));
3891    Ops.push_back(Chain);
3892    break;
3893  case NVPTXISD::Suld2DArrayI16Clamp:
3894    Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3895    Ops.push_back(TexHandle);
3896    Ops.push_back(N->getOperand(2));
3897    Ops.push_back(N->getOperand(3));
3898    Ops.push_back(N->getOperand(4));
3899    Ops.push_back(Chain);
3900    break;
3901  case NVPTXISD::Suld2DArrayI32Clamp:
3902    Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3903    Ops.push_back(TexHandle);
3904    Ops.push_back(N->getOperand(2));
3905    Ops.push_back(N->getOperand(3));
3906    Ops.push_back(N->getOperand(4));
3907    Ops.push_back(Chain);
3908    break;
3909  case NVPTXISD::Suld2DArrayI64Clamp:
3910    Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3911    Ops.push_back(TexHandle);
3912    Ops.push_back(N->getOperand(2));
3913    Ops.push_back(N->getOperand(3));
3914    Ops.push_back(N->getOperand(4));
3915    Ops.push_back(Chain);
3916    break;
3917  case NVPTXISD::Suld2DArrayV2I8Clamp:
3918    Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3919    Ops.push_back(TexHandle);
3920    Ops.push_back(N->getOperand(2));
3921    Ops.push_back(N->getOperand(3));
3922    Ops.push_back(N->getOperand(4));
3923    Ops.push_back(Chain);
3924    break;
3925  case NVPTXISD::Suld2DArrayV2I16Clamp:
3926    Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3927    Ops.push_back(TexHandle);
3928    Ops.push_back(N->getOperand(2));
3929    Ops.push_back(N->getOperand(3));
3930    Ops.push_back(N->getOperand(4));
3931    Ops.push_back(Chain);
3932    break;
3933  case NVPTXISD::Suld2DArrayV2I32Clamp:
3934    Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3935    Ops.push_back(TexHandle);
3936    Ops.push_back(N->getOperand(2));
3937    Ops.push_back(N->getOperand(3));
3938    Ops.push_back(N->getOperand(4));
3939    Ops.push_back(Chain);
3940    break;
3941  case NVPTXISD::Suld2DArrayV2I64Clamp:
3942    Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3943    Ops.push_back(TexHandle);
3944    Ops.push_back(N->getOperand(2));
3945    Ops.push_back(N->getOperand(3));
3946    Ops.push_back(N->getOperand(4));
3947    Ops.push_back(Chain);
3948    break;
3949  case NVPTXISD::Suld2DArrayV4I8Clamp:
3950    Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3951    Ops.push_back(TexHandle);
3952    Ops.push_back(N->getOperand(2));
3953    Ops.push_back(N->getOperand(3));
3954    Ops.push_back(N->getOperand(4));
3955    Ops.push_back(Chain);
3956    break;
3957  case NVPTXISD::Suld2DArrayV4I16Clamp:
3958    Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3959    Ops.push_back(TexHandle);
3960    Ops.push_back(N->getOperand(2));
3961    Ops.push_back(N->getOperand(3));
3962    Ops.push_back(N->getOperand(4));
3963    Ops.push_back(Chain);
3964    break;
3965  case NVPTXISD::Suld2DArrayV4I32Clamp:
3966    Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3967    Ops.push_back(TexHandle);
3968    Ops.push_back(N->getOperand(2));
3969    Ops.push_back(N->getOperand(3));
3970    Ops.push_back(N->getOperand(4));
3971    Ops.push_back(Chain);
3972    break;
3973  case NVPTXISD::Suld3DI8Clamp:
3974    Opc = NVPTX::SULD_3D_I8_CLAMP;
3975    Ops.push_back(TexHandle);
3976    Ops.push_back(N->getOperand(2));
3977    Ops.push_back(N->getOperand(3));
3978    Ops.push_back(N->getOperand(4));
3979    Ops.push_back(Chain);
3980    break;
3981  case NVPTXISD::Suld3DI16Clamp:
3982    Opc = NVPTX::SULD_3D_I16_CLAMP;
3983    Ops.push_back(TexHandle);
3984    Ops.push_back(N->getOperand(2));
3985    Ops.push_back(N->getOperand(3));
3986    Ops.push_back(N->getOperand(4));
3987    Ops.push_back(Chain);
3988    break;
3989  case NVPTXISD::Suld3DI32Clamp:
3990    Opc = NVPTX::SULD_3D_I32_CLAMP;
3991    Ops.push_back(TexHandle);
3992    Ops.push_back(N->getOperand(2));
3993    Ops.push_back(N->getOperand(3));
3994    Ops.push_back(N->getOperand(4));
3995    Ops.push_back(Chain);
3996    break;
3997  case NVPTXISD::Suld3DI64Clamp:
3998    Opc = NVPTX::SULD_3D_I64_CLAMP;
3999    Ops.push_back(TexHandle);
4000    Ops.push_back(N->getOperand(2));
4001    Ops.push_back(N->getOperand(3));
4002    Ops.push_back(N->getOperand(4));
4003    Ops.push_back(Chain);
4004    break;
4005  case NVPTXISD::Suld3DV2I8Clamp:
4006    Opc = NVPTX::SULD_3D_V2I8_CLAMP;
4007    Ops.push_back(TexHandle);
4008    Ops.push_back(N->getOperand(2));
4009    Ops.push_back(N->getOperand(3));
4010    Ops.push_back(N->getOperand(4));
4011    Ops.push_back(Chain);
4012    break;
4013  case NVPTXISD::Suld3DV2I16Clamp:
4014    Opc = NVPTX::SULD_3D_V2I16_CLAMP;
4015    Ops.push_back(TexHandle);
4016    Ops.push_back(N->getOperand(2));
4017    Ops.push_back(N->getOperand(3));
4018    Ops.push_back(N->getOperand(4));
4019    Ops.push_back(Chain);
4020    break;
4021  case NVPTXISD::Suld3DV2I32Clamp:
4022    Opc = NVPTX::SULD_3D_V2I32_CLAMP;
4023    Ops.push_back(TexHandle);
4024    Ops.push_back(N->getOperand(2));
4025    Ops.push_back(N->getOperand(3));
4026    Ops.push_back(N->getOperand(4));
4027    Ops.push_back(Chain);
4028    break;
4029  case NVPTXISD::Suld3DV2I64Clamp:
4030    Opc = NVPTX::SULD_3D_V2I64_CLAMP;
4031    Ops.push_back(TexHandle);
4032    Ops.push_back(N->getOperand(2));
4033    Ops.push_back(N->getOperand(3));
4034    Ops.push_back(N->getOperand(4));
4035    Ops.push_back(Chain);
4036    break;
4037  case NVPTXISD::Suld3DV4I8Clamp:
4038    Opc = NVPTX::SULD_3D_V4I8_CLAMP;
4039    Ops.push_back(TexHandle);
4040    Ops.push_back(N->getOperand(2));
4041    Ops.push_back(N->getOperand(3));
4042    Ops.push_back(N->getOperand(4));
4043    Ops.push_back(Chain);
4044    break;
4045  case NVPTXISD::Suld3DV4I16Clamp:
4046    Opc = NVPTX::SULD_3D_V4I16_CLAMP;
4047    Ops.push_back(TexHandle);
4048    Ops.push_back(N->getOperand(2));
4049    Ops.push_back(N->getOperand(3));
4050    Ops.push_back(N->getOperand(4));
4051    Ops.push_back(Chain);
4052    break;
4053  case NVPTXISD::Suld3DV4I32Clamp:
4054    Opc = NVPTX::SULD_3D_V4I32_CLAMP;
4055    Ops.push_back(TexHandle);
4056    Ops.push_back(N->getOperand(2));
4057    Ops.push_back(N->getOperand(3));
4058    Ops.push_back(N->getOperand(4));
4059    Ops.push_back(Chain);
4060    break;
4061  case NVPTXISD::Suld1DI8Trap:
4062    Opc = NVPTX::SULD_1D_I8_TRAP;
4063    Ops.push_back(TexHandle);
4064    Ops.push_back(N->getOperand(2));
4065    Ops.push_back(Chain);
4066    break;
4067  case NVPTXISD::Suld1DI16Trap:
4068    Opc = NVPTX::SULD_1D_I16_TRAP;
4069    Ops.push_back(TexHandle);
4070    Ops.push_back(N->getOperand(2));
4071    Ops.push_back(Chain);
4072    break;
4073  case NVPTXISD::Suld1DI32Trap:
4074    Opc = NVPTX::SULD_1D_I32_TRAP;
4075    Ops.push_back(TexHandle);
4076    Ops.push_back(N->getOperand(2));
4077    Ops.push_back(Chain);
4078    break;
4079  case NVPTXISD::Suld1DI64Trap:
4080    Opc = NVPTX::SULD_1D_I64_TRAP;
4081    Ops.push_back(TexHandle);
4082    Ops.push_back(N->getOperand(2));
4083    Ops.push_back(Chain);
4084    break;
4085  case NVPTXISD::Suld1DV2I8Trap:
4086    Opc = NVPTX::SULD_1D_V2I8_TRAP;
4087    Ops.push_back(TexHandle);
4088    Ops.push_back(N->getOperand(2));
4089    Ops.push_back(Chain);
4090    break;
4091  case NVPTXISD::Suld1DV2I16Trap:
4092    Opc = NVPTX::SULD_1D_V2I16_TRAP;
4093    Ops.push_back(TexHandle);
4094    Ops.push_back(N->getOperand(2));
4095    Ops.push_back(Chain);
4096    break;
4097  case NVPTXISD::Suld1DV2I32Trap:
4098    Opc = NVPTX::SULD_1D_V2I32_TRAP;
4099    Ops.push_back(TexHandle);
4100    Ops.push_back(N->getOperand(2));
4101    Ops.push_back(Chain);
4102    break;
4103  case NVPTXISD::Suld1DV2I64Trap:
4104    Opc = NVPTX::SULD_1D_V2I64_TRAP;
4105    Ops.push_back(TexHandle);
4106    Ops.push_back(N->getOperand(2));
4107    Ops.push_back(Chain);
4108    break;
4109  case NVPTXISD::Suld1DV4I8Trap:
4110    Opc = NVPTX::SULD_1D_V4I8_TRAP;
4111    Ops.push_back(TexHandle);
4112    Ops.push_back(N->getOperand(2));
4113    Ops.push_back(Chain);
4114    break;
4115  case NVPTXISD::Suld1DV4I16Trap:
4116    Opc = NVPTX::SULD_1D_V4I16_TRAP;
4117    Ops.push_back(TexHandle);
4118    Ops.push_back(N->getOperand(2));
4119    Ops.push_back(Chain);
4120    break;
4121  case NVPTXISD::Suld1DV4I32Trap:
4122    Opc = NVPTX::SULD_1D_V4I32_TRAP;
4123    Ops.push_back(TexHandle);
4124    Ops.push_back(N->getOperand(2));
4125    Ops.push_back(Chain);
4126    break;
4127  case NVPTXISD::Suld1DArrayI8Trap:
4128    Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4129    Ops.push_back(TexHandle);
4130    Ops.push_back(N->getOperand(2));
4131    Ops.push_back(N->getOperand(3));
4132    Ops.push_back(Chain);
4133    break;
4134  case NVPTXISD::Suld1DArrayI16Trap:
4135    Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4136    Ops.push_back(TexHandle);
4137    Ops.push_back(N->getOperand(2));
4138    Ops.push_back(N->getOperand(3));
4139    Ops.push_back(Chain);
4140    break;
4141  case NVPTXISD::Suld1DArrayI32Trap:
4142    Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4143    Ops.push_back(TexHandle);
4144    Ops.push_back(N->getOperand(2));
4145    Ops.push_back(N->getOperand(3));
4146    Ops.push_back(Chain);
4147    break;
4148  case NVPTXISD::Suld1DArrayI64Trap:
4149    Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4150    Ops.push_back(TexHandle);
4151    Ops.push_back(N->getOperand(2));
4152    Ops.push_back(N->getOperand(3));
4153    Ops.push_back(Chain);
4154    break;
4155  case NVPTXISD::Suld1DArrayV2I8Trap:
4156    Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4157    Ops.push_back(TexHandle);
4158    Ops.push_back(N->getOperand(2));
4159    Ops.push_back(N->getOperand(3));
4160    Ops.push_back(Chain);
4161    break;
4162  case NVPTXISD::Suld1DArrayV2I16Trap:
4163    Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4164    Ops.push_back(TexHandle);
4165    Ops.push_back(N->getOperand(2));
4166    Ops.push_back(N->getOperand(3));
4167    Ops.push_back(Chain);
4168    break;
4169  case NVPTXISD::Suld1DArrayV2I32Trap:
4170    Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4171    Ops.push_back(TexHandle);
4172    Ops.push_back(N->getOperand(2));
4173    Ops.push_back(N->getOperand(3));
4174    Ops.push_back(Chain);
4175    break;
4176  case NVPTXISD::Suld1DArrayV2I64Trap:
4177    Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4178    Ops.push_back(TexHandle);
4179    Ops.push_back(N->getOperand(2));
4180    Ops.push_back(N->getOperand(3));
4181    Ops.push_back(Chain);
4182    break;
4183  case NVPTXISD::Suld1DArrayV4I8Trap:
4184    Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4185    Ops.push_back(TexHandle);
4186    Ops.push_back(N->getOperand(2));
4187    Ops.push_back(N->getOperand(3));
4188    Ops.push_back(Chain);
4189    break;
4190  case NVPTXISD::Suld1DArrayV4I16Trap:
4191    Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4192    Ops.push_back(TexHandle);
4193    Ops.push_back(N->getOperand(2));
4194    Ops.push_back(N->getOperand(3));
4195    Ops.push_back(Chain);
4196    break;
4197  case NVPTXISD::Suld1DArrayV4I32Trap:
4198    Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4199    Ops.push_back(TexHandle);
4200    Ops.push_back(N->getOperand(2));
4201    Ops.push_back(N->getOperand(3));
4202    Ops.push_back(Chain);
4203    break;
4204  case NVPTXISD::Suld2DI8Trap:
4205    Opc = NVPTX::SULD_2D_I8_TRAP;
4206    Ops.push_back(TexHandle);
4207    Ops.push_back(N->getOperand(2));
4208    Ops.push_back(N->getOperand(3));
4209    Ops.push_back(Chain);
4210    break;
4211  case NVPTXISD::Suld2DI16Trap:
4212    Opc = NVPTX::SULD_2D_I16_TRAP;
4213    Ops.push_back(TexHandle);
4214    Ops.push_back(N->getOperand(2));
4215    Ops.push_back(N->getOperand(3));
4216    Ops.push_back(Chain);
4217    break;
4218  case NVPTXISD::Suld2DI32Trap:
4219    Opc = NVPTX::SULD_2D_I32_TRAP;
4220    Ops.push_back(TexHandle);
4221    Ops.push_back(N->getOperand(2));
4222    Ops.push_back(N->getOperand(3));
4223    Ops.push_back(Chain);
4224    break;
4225  case NVPTXISD::Suld2DI64Trap:
4226    Opc = NVPTX::SULD_2D_I64_TRAP;
4227    Ops.push_back(TexHandle);
4228    Ops.push_back(N->getOperand(2));
4229    Ops.push_back(N->getOperand(3));
4230    Ops.push_back(Chain);
4231    break;
4232  case NVPTXISD::Suld2DV2I8Trap:
4233    Opc = NVPTX::SULD_2D_V2I8_TRAP;
4234    Ops.push_back(TexHandle);
4235    Ops.push_back(N->getOperand(2));
4236    Ops.push_back(N->getOperand(3));
4237    Ops.push_back(Chain);
4238    break;
4239  case NVPTXISD::Suld2DV2I16Trap:
4240    Opc = NVPTX::SULD_2D_V2I16_TRAP;
4241    Ops.push_back(TexHandle);
4242    Ops.push_back(N->getOperand(2));
4243    Ops.push_back(N->getOperand(3));
4244    Ops.push_back(Chain);
4245    break;
4246  case NVPTXISD::Suld2DV2I32Trap:
4247    Opc = NVPTX::SULD_2D_V2I32_TRAP;
4248    Ops.push_back(TexHandle);
4249    Ops.push_back(N->getOperand(2));
4250    Ops.push_back(N->getOperand(3));
4251    Ops.push_back(Chain);
4252    break;
4253  case NVPTXISD::Suld2DV2I64Trap:
4254    Opc = NVPTX::SULD_2D_V2I64_TRAP;
4255    Ops.push_back(TexHandle);
4256    Ops.push_back(N->getOperand(2));
4257    Ops.push_back(N->getOperand(3));
4258    Ops.push_back(Chain);
4259    break;
4260  case NVPTXISD::Suld2DV4I8Trap:
4261    Opc = NVPTX::SULD_2D_V4I8_TRAP;
4262    Ops.push_back(TexHandle);
4263    Ops.push_back(N->getOperand(2));
4264    Ops.push_back(N->getOperand(3));
4265    Ops.push_back(Chain);
4266    break;
4267  case NVPTXISD::Suld2DV4I16Trap:
4268    Opc = NVPTX::SULD_2D_V4I16_TRAP;
4269    Ops.push_back(TexHandle);
4270    Ops.push_back(N->getOperand(2));
4271    Ops.push_back(N->getOperand(3));
4272    Ops.push_back(Chain);
4273    break;
4274  case NVPTXISD::Suld2DV4I32Trap:
4275    Opc = NVPTX::SULD_2D_V4I32_TRAP;
4276    Ops.push_back(TexHandle);
4277    Ops.push_back(N->getOperand(2));
4278    Ops.push_back(N->getOperand(3));
4279    Ops.push_back(Chain);
4280    break;
4281  case NVPTXISD::Suld2DArrayI8Trap:
4282    Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4283    Ops.push_back(TexHandle);
4284    Ops.push_back(N->getOperand(2));
4285    Ops.push_back(N->getOperand(3));
4286    Ops.push_back(N->getOperand(4));
4287    Ops.push_back(Chain);
4288    break;
4289  case NVPTXISD::Suld2DArrayI16Trap:
4290    Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4291    Ops.push_back(TexHandle);
4292    Ops.push_back(N->getOperand(2));
4293    Ops.push_back(N->getOperand(3));
4294    Ops.push_back(N->getOperand(4));
4295    Ops.push_back(Chain);
4296    break;
4297  case NVPTXISD::Suld2DArrayI32Trap:
4298    Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4299    Ops.push_back(TexHandle);
4300    Ops.push_back(N->getOperand(2));
4301    Ops.push_back(N->getOperand(3));
4302    Ops.push_back(N->getOperand(4));
4303    Ops.push_back(Chain);
4304    break;
4305  case NVPTXISD::Suld2DArrayI64Trap:
4306    Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4307    Ops.push_back(TexHandle);
4308    Ops.push_back(N->getOperand(2));
4309    Ops.push_back(N->getOperand(3));
4310    Ops.push_back(N->getOperand(4));
4311    Ops.push_back(Chain);
4312    break;
4313  case NVPTXISD::Suld2DArrayV2I8Trap:
4314    Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4315    Ops.push_back(TexHandle);
4316    Ops.push_back(N->getOperand(2));
4317    Ops.push_back(N->getOperand(3));
4318    Ops.push_back(N->getOperand(4));
4319    Ops.push_back(Chain);
4320    break;
4321  case NVPTXISD::Suld2DArrayV2I16Trap:
4322    Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4323    Ops.push_back(TexHandle);
4324    Ops.push_back(N->getOperand(2));
4325    Ops.push_back(N->getOperand(3));
4326    Ops.push_back(N->getOperand(4));
4327    Ops.push_back(Chain);
4328    break;
4329  case NVPTXISD::Suld2DArrayV2I32Trap:
4330    Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4331    Ops.push_back(TexHandle);
4332    Ops.push_back(N->getOperand(2));
4333    Ops.push_back(N->getOperand(3));
4334    Ops.push_back(N->getOperand(4));
4335    Ops.push_back(Chain);
4336    break;
4337  case NVPTXISD::Suld2DArrayV2I64Trap:
4338    Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4339    Ops.push_back(TexHandle);
4340    Ops.push_back(N->getOperand(2));
4341    Ops.push_back(N->getOperand(3));
4342    Ops.push_back(N->getOperand(4));
4343    Ops.push_back(Chain);
4344    break;
4345  case NVPTXISD::Suld2DArrayV4I8Trap:
4346    Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4347    Ops.push_back(TexHandle);
4348    Ops.push_back(N->getOperand(2));
4349    Ops.push_back(N->getOperand(3));
4350    Ops.push_back(N->getOperand(4));
4351    Ops.push_back(Chain);
4352    break;
4353  case NVPTXISD::Suld2DArrayV4I16Trap:
4354    Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4355    Ops.push_back(TexHandle);
4356    Ops.push_back(N->getOperand(2));
4357    Ops.push_back(N->getOperand(3));
4358    Ops.push_back(N->getOperand(4));
4359    Ops.push_back(Chain);
4360    break;
4361  case NVPTXISD::Suld2DArrayV4I32Trap:
4362    Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4363    Ops.push_back(TexHandle);
4364    Ops.push_back(N->getOperand(2));
4365    Ops.push_back(N->getOperand(3));
4366    Ops.push_back(N->getOperand(4));
4367    Ops.push_back(Chain);
4368    break;
4369  case NVPTXISD::Suld3DI8Trap:
4370    Opc = NVPTX::SULD_3D_I8_TRAP;
4371    Ops.push_back(TexHandle);
4372    Ops.push_back(N->getOperand(2));
4373    Ops.push_back(N->getOperand(3));
4374    Ops.push_back(N->getOperand(4));
4375    Ops.push_back(Chain);
4376    break;
4377  case NVPTXISD::Suld3DI16Trap:
4378    Opc = NVPTX::SULD_3D_I16_TRAP;
4379    Ops.push_back(TexHandle);
4380    Ops.push_back(N->getOperand(2));
4381    Ops.push_back(N->getOperand(3));
4382    Ops.push_back(N->getOperand(4));
4383    Ops.push_back(Chain);
4384    break;
4385  case NVPTXISD::Suld3DI32Trap:
4386    Opc = NVPTX::SULD_3D_I32_TRAP;
4387    Ops.push_back(TexHandle);
4388    Ops.push_back(N->getOperand(2));
4389    Ops.push_back(N->getOperand(3));
4390    Ops.push_back(N->getOperand(4));
4391    Ops.push_back(Chain);
4392    break;
4393  case NVPTXISD::Suld3DI64Trap:
4394    Opc = NVPTX::SULD_3D_I64_TRAP;
4395    Ops.push_back(TexHandle);
4396    Ops.push_back(N->getOperand(2));
4397    Ops.push_back(N->getOperand(3));
4398    Ops.push_back(N->getOperand(4));
4399    Ops.push_back(Chain);
4400    break;
4401  case NVPTXISD::Suld3DV2I8Trap:
4402    Opc = NVPTX::SULD_3D_V2I8_TRAP;
4403    Ops.push_back(TexHandle);
4404    Ops.push_back(N->getOperand(2));
4405    Ops.push_back(N->getOperand(3));
4406    Ops.push_back(N->getOperand(4));
4407    Ops.push_back(Chain);
4408    break;
4409  case NVPTXISD::Suld3DV2I16Trap:
4410    Opc = NVPTX::SULD_3D_V2I16_TRAP;
4411    Ops.push_back(TexHandle);
4412    Ops.push_back(N->getOperand(2));
4413    Ops.push_back(N->getOperand(3));
4414    Ops.push_back(N->getOperand(4));
4415    Ops.push_back(Chain);
4416    break;
4417  case NVPTXISD::Suld3DV2I32Trap:
4418    Opc = NVPTX::SULD_3D_V2I32_TRAP;
4419    Ops.push_back(TexHandle);
4420    Ops.push_back(N->getOperand(2));
4421    Ops.push_back(N->getOperand(3));
4422    Ops.push_back(N->getOperand(4));
4423    Ops.push_back(Chain);
4424    break;
4425  case NVPTXISD::Suld3DV2I64Trap:
4426    Opc = NVPTX::SULD_3D_V2I64_TRAP;
4427    Ops.push_back(TexHandle);
4428    Ops.push_back(N->getOperand(2));
4429    Ops.push_back(N->getOperand(3));
4430    Ops.push_back(N->getOperand(4));
4431    Ops.push_back(Chain);
4432    break;
4433  case NVPTXISD::Suld3DV4I8Trap:
4434    Opc = NVPTX::SULD_3D_V4I8_TRAP;
4435    Ops.push_back(TexHandle);
4436    Ops.push_back(N->getOperand(2));
4437    Ops.push_back(N->getOperand(3));
4438    Ops.push_back(N->getOperand(4));
4439    Ops.push_back(Chain);
4440    break;
4441  case NVPTXISD::Suld3DV4I16Trap:
4442    Opc = NVPTX::SULD_3D_V4I16_TRAP;
4443    Ops.push_back(TexHandle);
4444    Ops.push_back(N->getOperand(2));
4445    Ops.push_back(N->getOperand(3));
4446    Ops.push_back(N->getOperand(4));
4447    Ops.push_back(Chain);
4448    break;
4449  case NVPTXISD::Suld3DV4I32Trap:
4450    Opc = NVPTX::SULD_3D_V4I32_TRAP;
4451    Ops.push_back(TexHandle);
4452    Ops.push_back(N->getOperand(2));
4453    Ops.push_back(N->getOperand(3));
4454    Ops.push_back(N->getOperand(4));
4455    Ops.push_back(Chain);
4456    break;
4457  case NVPTXISD::Suld1DI8Zero:
4458    Opc = NVPTX::SULD_1D_I8_ZERO;
4459    Ops.push_back(TexHandle);
4460    Ops.push_back(N->getOperand(2));
4461    Ops.push_back(Chain);
4462    break;
4463  case NVPTXISD::Suld1DI16Zero:
4464    Opc = NVPTX::SULD_1D_I16_ZERO;
4465    Ops.push_back(TexHandle);
4466    Ops.push_back(N->getOperand(2));
4467    Ops.push_back(Chain);
4468    break;
4469  case NVPTXISD::Suld1DI32Zero:
4470    Opc = NVPTX::SULD_1D_I32_ZERO;
4471    Ops.push_back(TexHandle);
4472    Ops.push_back(N->getOperand(2));
4473    Ops.push_back(Chain);
4474    break;
4475  case NVPTXISD::Suld1DI64Zero:
4476    Opc = NVPTX::SULD_1D_I64_ZERO;
4477    Ops.push_back(TexHandle);
4478    Ops.push_back(N->getOperand(2));
4479    Ops.push_back(Chain);
4480    break;
4481  case NVPTXISD::Suld1DV2I8Zero:
4482    Opc = NVPTX::SULD_1D_V2I8_ZERO;
4483    Ops.push_back(TexHandle);
4484    Ops.push_back(N->getOperand(2));
4485    Ops.push_back(Chain);
4486    break;
4487  case NVPTXISD::Suld1DV2I16Zero:
4488    Opc = NVPTX::SULD_1D_V2I16_ZERO;
4489    Ops.push_back(TexHandle);
4490    Ops.push_back(N->getOperand(2));
4491    Ops.push_back(Chain);
4492    break;
4493  case NVPTXISD::Suld1DV2I32Zero:
4494    Opc = NVPTX::SULD_1D_V2I32_ZERO;
4495    Ops.push_back(TexHandle);
4496    Ops.push_back(N->getOperand(2));
4497    Ops.push_back(Chain);
4498    break;
4499  case NVPTXISD::Suld1DV2I64Zero:
4500    Opc = NVPTX::SULD_1D_V2I64_ZERO;
4501    Ops.push_back(TexHandle);
4502    Ops.push_back(N->getOperand(2));
4503    Ops.push_back(Chain);
4504    break;
4505  case NVPTXISD::Suld1DV4I8Zero:
4506    Opc = NVPTX::SULD_1D_V4I8_ZERO;
4507    Ops.push_back(TexHandle);
4508    Ops.push_back(N->getOperand(2));
4509    Ops.push_back(Chain);
4510    break;
4511  case NVPTXISD::Suld1DV4I16Zero:
4512    Opc = NVPTX::SULD_1D_V4I16_ZERO;
4513    Ops.push_back(TexHandle);
4514    Ops.push_back(N->getOperand(2));
4515    Ops.push_back(Chain);
4516    break;
4517  case NVPTXISD::Suld1DV4I32Zero:
4518    Opc = NVPTX::SULD_1D_V4I32_ZERO;
4519    Ops.push_back(TexHandle);
4520    Ops.push_back(N->getOperand(2));
4521    Ops.push_back(Chain);
4522    break;
4523  case NVPTXISD::Suld1DArrayI8Zero:
4524    Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4525    Ops.push_back(TexHandle);
4526    Ops.push_back(N->getOperand(2));
4527    Ops.push_back(N->getOperand(3));
4528    Ops.push_back(Chain);
4529    break;
4530  case NVPTXISD::Suld1DArrayI16Zero:
4531    Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4532    Ops.push_back(TexHandle);
4533    Ops.push_back(N->getOperand(2));
4534    Ops.push_back(N->getOperand(3));
4535    Ops.push_back(Chain);
4536    break;
4537  case NVPTXISD::Suld1DArrayI32Zero:
4538    Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4539    Ops.push_back(TexHandle);
4540    Ops.push_back(N->getOperand(2));
4541    Ops.push_back(N->getOperand(3));
4542    Ops.push_back(Chain);
4543    break;
4544  case NVPTXISD::Suld1DArrayI64Zero:
4545    Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4546    Ops.push_back(TexHandle);
4547    Ops.push_back(N->getOperand(2));
4548    Ops.push_back(N->getOperand(3));
4549    Ops.push_back(Chain);
4550    break;
4551  case NVPTXISD::Suld1DArrayV2I8Zero:
4552    Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4553    Ops.push_back(TexHandle);
4554    Ops.push_back(N->getOperand(2));
4555    Ops.push_back(N->getOperand(3));
4556    Ops.push_back(Chain);
4557    break;
4558  case NVPTXISD::Suld1DArrayV2I16Zero:
4559    Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4560    Ops.push_back(TexHandle);
4561    Ops.push_back(N->getOperand(2));
4562    Ops.push_back(N->getOperand(3));
4563    Ops.push_back(Chain);
4564    break;
4565  case NVPTXISD::Suld1DArrayV2I32Zero:
4566    Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4567    Ops.push_back(TexHandle);
4568    Ops.push_back(N->getOperand(2));
4569    Ops.push_back(N->getOperand(3));
4570    Ops.push_back(Chain);
4571    break;
4572  case NVPTXISD::Suld1DArrayV2I64Zero:
4573    Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4574    Ops.push_back(TexHandle);
4575    Ops.push_back(N->getOperand(2));
4576    Ops.push_back(N->getOperand(3));
4577    Ops.push_back(Chain);
4578    break;
4579  case NVPTXISD::Suld1DArrayV4I8Zero:
4580    Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4581    Ops.push_back(TexHandle);
4582    Ops.push_back(N->getOperand(2));
4583    Ops.push_back(N->getOperand(3));
4584    Ops.push_back(Chain);
4585    break;
4586  case NVPTXISD::Suld1DArrayV4I16Zero:
4587    Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4588    Ops.push_back(TexHandle);
4589    Ops.push_back(N->getOperand(2));
4590    Ops.push_back(N->getOperand(3));
4591    Ops.push_back(Chain);
4592    break;
4593  case NVPTXISD::Suld1DArrayV4I32Zero:
4594    Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4595    Ops.push_back(TexHandle);
4596    Ops.push_back(N->getOperand(2));
4597    Ops.push_back(N->getOperand(3));
4598    Ops.push_back(Chain);
4599    break;
4600  case NVPTXISD::Suld2DI8Zero:
4601    Opc = NVPTX::SULD_2D_I8_ZERO;
4602    Ops.push_back(TexHandle);
4603    Ops.push_back(N->getOperand(2));
4604    Ops.push_back(N->getOperand(3));
4605    Ops.push_back(Chain);
4606    break;
4607  case NVPTXISD::Suld2DI16Zero:
4608    Opc = NVPTX::SULD_2D_I16_ZERO;
4609    Ops.push_back(TexHandle);
4610    Ops.push_back(N->getOperand(2));
4611    Ops.push_back(N->getOperand(3));
4612    Ops.push_back(Chain);
4613    break;
4614  case NVPTXISD::Suld2DI32Zero:
4615    Opc = NVPTX::SULD_2D_I32_ZERO;
4616    Ops.push_back(TexHandle);
4617    Ops.push_back(N->getOperand(2));
4618    Ops.push_back(N->getOperand(3));
4619    Ops.push_back(Chain);
4620    break;
4621  case NVPTXISD::Suld2DI64Zero:
4622    Opc = NVPTX::SULD_2D_I64_ZERO;
4623    Ops.push_back(TexHandle);
4624    Ops.push_back(N->getOperand(2));
4625    Ops.push_back(N->getOperand(3));
4626    Ops.push_back(Chain);
4627    break;
4628  case NVPTXISD::Suld2DV2I8Zero:
4629    Opc = NVPTX::SULD_2D_V2I8_ZERO;
4630    Ops.push_back(TexHandle);
4631    Ops.push_back(N->getOperand(2));
4632    Ops.push_back(N->getOperand(3));
4633    Ops.push_back(Chain);
4634    break;
4635  case NVPTXISD::Suld2DV2I16Zero:
4636    Opc = NVPTX::SULD_2D_V2I16_ZERO;
4637    Ops.push_back(TexHandle);
4638    Ops.push_back(N->getOperand(2));
4639    Ops.push_back(N->getOperand(3));
4640    Ops.push_back(Chain);
4641    break;
4642  case NVPTXISD::Suld2DV2I32Zero:
4643    Opc = NVPTX::SULD_2D_V2I32_ZERO;
4644    Ops.push_back(TexHandle);
4645    Ops.push_back(N->getOperand(2));
4646    Ops.push_back(N->getOperand(3));
4647    Ops.push_back(Chain);
4648    break;
4649  case NVPTXISD::Suld2DV2I64Zero:
4650    Opc = NVPTX::SULD_2D_V2I64_ZERO;
4651    Ops.push_back(TexHandle);
4652    Ops.push_back(N->getOperand(2));
4653    Ops.push_back(N->getOperand(3));
4654    Ops.push_back(Chain);
4655    break;
4656  case NVPTXISD::Suld2DV4I8Zero:
4657    Opc = NVPTX::SULD_2D_V4I8_ZERO;
4658    Ops.push_back(TexHandle);
4659    Ops.push_back(N->getOperand(2));
4660    Ops.push_back(N->getOperand(3));
4661    Ops.push_back(Chain);
4662    break;
4663  case NVPTXISD::Suld2DV4I16Zero:
4664    Opc = NVPTX::SULD_2D_V4I16_ZERO;
4665    Ops.push_back(TexHandle);
4666    Ops.push_back(N->getOperand(2));
4667    Ops.push_back(N->getOperand(3));
4668    Ops.push_back(Chain);
4669    break;
4670  case NVPTXISD::Suld2DV4I32Zero:
4671    Opc = NVPTX::SULD_2D_V4I32_ZERO;
4672    Ops.push_back(TexHandle);
4673    Ops.push_back(N->getOperand(2));
4674    Ops.push_back(N->getOperand(3));
4675    Ops.push_back(Chain);
4676    break;
4677  case NVPTXISD::Suld2DArrayI8Zero:
4678    Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4679    Ops.push_back(TexHandle);
4680    Ops.push_back(N->getOperand(2));
4681    Ops.push_back(N->getOperand(3));
4682    Ops.push_back(N->getOperand(4));
4683    Ops.push_back(Chain);
4684    break;
4685  case NVPTXISD::Suld2DArrayI16Zero:
4686    Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4687    Ops.push_back(TexHandle);
4688    Ops.push_back(N->getOperand(2));
4689    Ops.push_back(N->getOperand(3));
4690    Ops.push_back(N->getOperand(4));
4691    Ops.push_back(Chain);
4692    break;
4693  case NVPTXISD::Suld2DArrayI32Zero:
4694    Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4695    Ops.push_back(TexHandle);
4696    Ops.push_back(N->getOperand(2));
4697    Ops.push_back(N->getOperand(3));
4698    Ops.push_back(N->getOperand(4));
4699    Ops.push_back(Chain);
4700    break;
4701  case NVPTXISD::Suld2DArrayI64Zero:
4702    Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4703    Ops.push_back(TexHandle);
4704    Ops.push_back(N->getOperand(2));
4705    Ops.push_back(N->getOperand(3));
4706    Ops.push_back(N->getOperand(4));
4707    Ops.push_back(Chain);
4708    break;
4709  case NVPTXISD::Suld2DArrayV2I8Zero:
4710    Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4711    Ops.push_back(TexHandle);
4712    Ops.push_back(N->getOperand(2));
4713    Ops.push_back(N->getOperand(3));
4714    Ops.push_back(N->getOperand(4));
4715    Ops.push_back(Chain);
4716    break;
4717  case NVPTXISD::Suld2DArrayV2I16Zero:
4718    Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4719    Ops.push_back(TexHandle);
4720    Ops.push_back(N->getOperand(2));
4721    Ops.push_back(N->getOperand(3));
4722    Ops.push_back(N->getOperand(4));
4723    Ops.push_back(Chain);
4724    break;
4725  case NVPTXISD::Suld2DArrayV2I32Zero:
4726    Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4727    Ops.push_back(TexHandle);
4728    Ops.push_back(N->getOperand(2));
4729    Ops.push_back(N->getOperand(3));
4730    Ops.push_back(N->getOperand(4));
4731    Ops.push_back(Chain);
4732    break;
4733  case NVPTXISD::Suld2DArrayV2I64Zero:
4734    Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4735    Ops.push_back(TexHandle);
4736    Ops.push_back(N->getOperand(2));
4737    Ops.push_back(N->getOperand(3));
4738    Ops.push_back(N->getOperand(4));
4739    Ops.push_back(Chain);
4740    break;
4741  case NVPTXISD::Suld2DArrayV4I8Zero:
4742    Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4743    Ops.push_back(TexHandle);
4744    Ops.push_back(N->getOperand(2));
4745    Ops.push_back(N->getOperand(3));
4746    Ops.push_back(N->getOperand(4));
4747    Ops.push_back(Chain);
4748    break;
4749  case NVPTXISD::Suld2DArrayV4I16Zero:
4750    Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4751    Ops.push_back(TexHandle);
4752    Ops.push_back(N->getOperand(2));
4753    Ops.push_back(N->getOperand(3));
4754    Ops.push_back(N->getOperand(4));
4755    Ops.push_back(Chain);
4756    break;
4757  case NVPTXISD::Suld2DArrayV4I32Zero:
4758    Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4759    Ops.push_back(TexHandle);
4760    Ops.push_back(N->getOperand(2));
4761    Ops.push_back(N->getOperand(3));
4762    Ops.push_back(N->getOperand(4));
4763    Ops.push_back(Chain);
4764    break;
4765  case NVPTXISD::Suld3DI8Zero:
4766    Opc = NVPTX::SULD_3D_I8_ZERO;
4767    Ops.push_back(TexHandle);
4768    Ops.push_back(N->getOperand(2));
4769    Ops.push_back(N->getOperand(3));
4770    Ops.push_back(N->getOperand(4));
4771    Ops.push_back(Chain);
4772    break;
4773  case NVPTXISD::Suld3DI16Zero:
4774    Opc = NVPTX::SULD_3D_I16_ZERO;
4775    Ops.push_back(TexHandle);
4776    Ops.push_back(N->getOperand(2));
4777    Ops.push_back(N->getOperand(3));
4778    Ops.push_back(N->getOperand(4));
4779    Ops.push_back(Chain);
4780    break;
4781  case NVPTXISD::Suld3DI32Zero:
4782    Opc = NVPTX::SULD_3D_I32_ZERO;
4783    Ops.push_back(TexHandle);
4784    Ops.push_back(N->getOperand(2));
4785    Ops.push_back(N->getOperand(3));
4786    Ops.push_back(N->getOperand(4));
4787    Ops.push_back(Chain);
4788    break;
4789  case NVPTXISD::Suld3DI64Zero:
4790    Opc = NVPTX::SULD_3D_I64_ZERO;
4791    Ops.push_back(TexHandle);
4792    Ops.push_back(N->getOperand(2));
4793    Ops.push_back(N->getOperand(3));
4794    Ops.push_back(N->getOperand(4));
4795    Ops.push_back(Chain);
4796    break;
4797  case NVPTXISD::Suld3DV2I8Zero:
4798    Opc = NVPTX::SULD_3D_V2I8_ZERO;
4799    Ops.push_back(TexHandle);
4800    Ops.push_back(N->getOperand(2));
4801    Ops.push_back(N->getOperand(3));
4802    Ops.push_back(N->getOperand(4));
4803    Ops.push_back(Chain);
4804    break;
4805  case NVPTXISD::Suld3DV2I16Zero:
4806    Opc = NVPTX::SULD_3D_V2I16_ZERO;
4807    Ops.push_back(TexHandle);
4808    Ops.push_back(N->getOperand(2));
4809    Ops.push_back(N->getOperand(3));
4810    Ops.push_back(N->getOperand(4));
4811    Ops.push_back(Chain);
4812    break;
4813  case NVPTXISD::Suld3DV2I32Zero:
4814    Opc = NVPTX::SULD_3D_V2I32_ZERO;
4815    Ops.push_back(TexHandle);
4816    Ops.push_back(N->getOperand(2));
4817    Ops.push_back(N->getOperand(3));
4818    Ops.push_back(N->getOperand(4));
4819    Ops.push_back(Chain);
4820    break;
4821  case NVPTXISD::Suld3DV2I64Zero:
4822    Opc = NVPTX::SULD_3D_V2I64_ZERO;
4823    Ops.push_back(TexHandle);
4824    Ops.push_back(N->getOperand(2));
4825    Ops.push_back(N->getOperand(3));
4826    Ops.push_back(N->getOperand(4));
4827    Ops.push_back(Chain);
4828    break;
4829  case NVPTXISD::Suld3DV4I8Zero:
4830    Opc = NVPTX::SULD_3D_V4I8_ZERO;
4831    Ops.push_back(TexHandle);
4832    Ops.push_back(N->getOperand(2));
4833    Ops.push_back(N->getOperand(3));
4834    Ops.push_back(N->getOperand(4));
4835    Ops.push_back(Chain);
4836    break;
4837  case NVPTXISD::Suld3DV4I16Zero:
4838    Opc = NVPTX::SULD_3D_V4I16_ZERO;
4839    Ops.push_back(TexHandle);
4840    Ops.push_back(N->getOperand(2));
4841    Ops.push_back(N->getOperand(3));
4842    Ops.push_back(N->getOperand(4));
4843    Ops.push_back(Chain);
4844    break;
4845  case NVPTXISD::Suld3DV4I32Zero:
4846    Opc = NVPTX::SULD_3D_V4I32_ZERO;
4847    Ops.push_back(TexHandle);
4848    Ops.push_back(N->getOperand(2));
4849    Ops.push_back(N->getOperand(3));
4850    Ops.push_back(N->getOperand(4));
4851    Ops.push_back(Chain);
4852    break;
4853  }
4854  ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
4855  return true;
4856}
4857
4858
4859/// SelectBFE - Look for instruction sequences that can be made more efficient
4860/// by using the 'bfe' (bit-field extract) PTX instruction
4861bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
4862  SDLoc DL(N);
4863  SDValue LHS = N->getOperand(0);
4864  SDValue RHS = N->getOperand(1);
4865  SDValue Len;
4866  SDValue Start;
4867  SDValue Val;
4868  bool IsSigned = false;
4869
4870  if (N->getOpcode() == ISD::AND) {
4871    // Canonicalize the operands
4872    // We want 'and %val, %mask'
4873    if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4874      std::swap(LHS, RHS);
4875    }
4876
4877    ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4878    if (!Mask) {
4879      // We need a constant mask on the RHS of the AND
4880      return false;
4881    }
4882
4883    // Extract the mask bits
4884    uint64_t MaskVal = Mask->getZExtValue();
4885    if (!isMask_64(MaskVal)) {
4886      // We *could* handle shifted masks here, but doing so would require an
4887      // 'and' operation to fix up the low-order bits so we would trade
4888      // shr+and for bfe+and, which has the same throughput
4889      return false;
4890    }
4891
4892    // How many bits are in our mask?
4893    uint64_t NumBits = countTrailingOnes(MaskVal);
4894    Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4895
4896    if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4897      // We have a 'srl/and' pair, extract the effective start bit and length
4898      Val = LHS.getNode()->getOperand(0);
4899      Start = LHS.getNode()->getOperand(1);
4900      ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4901      if (StartConst) {
4902        uint64_t StartVal = StartConst->getZExtValue();
4903        // How many "good" bits do we have left?  "good" is defined here as bits
4904        // that exist in the original value, not shifted in.
4905        uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4906        if (NumBits > GoodBits) {
4907          // Do not handle the case where bits have been shifted in. In theory
4908          // we could handle this, but the cost is likely higher than just
4909          // emitting the srl/and pair.
4910          return false;
4911        }
4912        Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4913      } else {
4914        // Do not handle the case where the shift amount (can be zero if no srl
4915        // was found) is not constant. We could handle this case, but it would
4916        // require run-time logic that would be more expensive than just
4917        // emitting the srl/and pair.
4918        return false;
4919      }
4920    } else {
4921      // Do not handle the case where the LHS of the and is not a shift. While
4922      // it would be trivial to handle this case, it would just transform
4923      // 'and' -> 'bfe', but 'and' has higher-throughput.
4924      return false;
4925    }
4926  } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4927    if (LHS->getOpcode() == ISD::AND) {
4928      ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4929      if (!ShiftCnst) {
4930        // Shift amount must be constant
4931        return false;
4932      }
4933
4934      uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4935
4936      SDValue AndLHS = LHS->getOperand(0);
4937      SDValue AndRHS = LHS->getOperand(1);
4938
4939      // Canonicalize the AND to have the mask on the RHS
4940      if (isa<ConstantSDNode>(AndLHS)) {
4941        std::swap(AndLHS, AndRHS);
4942      }
4943
4944      ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4945      if (!MaskCnst) {
4946        // Mask must be constant
4947        return false;
4948      }
4949
4950      uint64_t MaskVal = MaskCnst->getZExtValue();
4951      uint64_t NumZeros;
4952      uint64_t NumBits;
4953      if (isMask_64(MaskVal)) {
4954        NumZeros = 0;
4955        // The number of bits in the result bitfield will be the number of
4956        // trailing ones (the AND) minus the number of bits we shift off
4957        NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4958      } else if (isShiftedMask_64(MaskVal)) {
4959        NumZeros = countTrailingZeros(MaskVal);
4960        unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4961        // The number of bits in the result bitfield will be the number of
4962        // trailing zeros plus the number of set bits in the mask minus the
4963        // number of bits we shift off
4964        NumBits = NumZeros + NumOnes - ShiftAmt;
4965      } else {
4966        // This is not a mask we can handle
4967        return false;
4968      }
4969
4970      if (ShiftAmt < NumZeros) {
4971        // Handling this case would require extra logic that would make this
4972        // transformation non-profitable
4973        return false;
4974      }
4975
4976      Val = AndLHS;
4977      Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4978      Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4979    } else if (LHS->getOpcode() == ISD::SHL) {
4980      // Here, we have a pattern like:
4981      //
4982      // (sra (shl val, NN), MM)
4983      // or
4984      // (srl (shl val, NN), MM)
4985      //
4986      // If MM >= NN, we can efficiently optimize this with bfe
4987      Val = LHS->getOperand(0);
4988
4989      SDValue ShlRHS = LHS->getOperand(1);
4990      ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4991      if (!ShlCnst) {
4992        // Shift amount must be constant
4993        return false;
4994      }
4995      uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4996
4997      SDValue ShrRHS = RHS;
4998      ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4999      if (!ShrCnst) {
5000        // Shift amount must be constant
5001        return false;
5002      }
5003      uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
5004
5005      // To avoid extra codegen and be profitable, we need Outer >= Inner
5006      if (OuterShiftAmt < InnerShiftAmt) {
5007        return false;
5008      }
5009
5010      // If the outer shift is more than the type size, we have no bitfield to
5011      // extract (since we also check that the inner shift is <= the outer shift
5012      // then this also implies that the inner shift is < the type size)
5013      if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
5014        return false;
5015      }
5016
5017      Start =
5018        CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
5019      Len =
5020        CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
5021                                  OuterShiftAmt, DL, MVT::i32);
5022
5023      if (N->getOpcode() == ISD::SRA) {
5024        // If we have a arithmetic right shift, we need to use the signed bfe
5025        // variant
5026        IsSigned = true;
5027      }
5028    } else {
5029      // No can do...
5030      return false;
5031    }
5032  } else {
5033    // No can do...
5034    return false;
5035  }
5036
5037
5038  unsigned Opc;
5039  // For the BFE operations we form here from "and" and "srl", always use the
5040  // unsigned variants.
5041  if (Val.getValueType() == MVT::i32) {
5042    if (IsSigned) {
5043      Opc = NVPTX::BFE_S32rii;
5044    } else {
5045      Opc = NVPTX::BFE_U32rii;
5046    }
5047  } else if (Val.getValueType() == MVT::i64) {
5048    if (IsSigned) {
5049      Opc = NVPTX::BFE_S64rii;
5050    } else {
5051      Opc = NVPTX::BFE_U64rii;
5052    }
5053  } else {
5054    // We cannot handle this type
5055    return false;
5056  }
5057
5058  SDValue Ops[] = {
5059    Val, Start, Len
5060  };
5061
5062  ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
5063  return true;
5064}
5065
5066// SelectDirectAddr - Match a direct address for DAG.
5067// A direct address could be a globaladdress or externalsymbol.
5068bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
5069  // Return true if TGA or ES.
5070  if (N.getOpcode() == ISD::TargetGlobalAddress ||
5071      N.getOpcode() == ISD::TargetExternalSymbol) {
5072    Address = N;
5073    return true;
5074  }
5075  if (N.getOpcode() == NVPTXISD::Wrapper) {
5076    Address = N.getOperand(0);
5077    return true;
5078  }
5079  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
5080    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
5081    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
5082      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
5083        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
5084  }
5085  return false;
5086}
5087
5088// symbol+offset
5089bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5090    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5091  if (Addr.getOpcode() == ISD::ADD) {
5092    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5093      SDValue base = Addr.getOperand(0);
5094      if (SelectDirectAddr(base, Base)) {
5095        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5096                                           mvt);
5097        return true;
5098      }
5099    }
5100  }
5101  return false;
5102}
5103
5104// symbol+offset
5105bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5106                                     SDValue &Base, SDValue &Offset) {
5107  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5108}
5109
5110// symbol+offset
5111bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5112                                       SDValue &Base, SDValue &Offset) {
5113  return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5114}
5115
5116// register+offset
5117bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5118    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5119  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5120    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5121    Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
5122    return true;
5123  }
5124  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5125      Addr.getOpcode() == ISD::TargetGlobalAddress)
5126    return false; // direct calls.
5127
5128  if (Addr.getOpcode() == ISD::ADD) {
5129    if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5130      return false;
5131    }
5132    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5133      if (FrameIndexSDNode *FIN =
5134              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5135        // Constant offset from frame ref.
5136        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5137      else
5138        Base = Addr.getOperand(0);
5139      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5140                                         mvt);
5141      return true;
5142    }
5143  }
5144  return false;
5145}
5146
5147// register+offset
5148bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5149                                     SDValue &Base, SDValue &Offset) {
5150  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5151}
5152
5153// register+offset
5154bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5155                                       SDValue &Base, SDValue &Offset) {
5156  return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5157}
5158
5159bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5160                                                 unsigned int spN) const {
5161  const Value *Src = nullptr;
5162  if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5163    if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5164      return true;
5165    Src = mN->getMemOperand()->getValue();
5166  }
5167  if (!Src)
5168    return false;
5169  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
5170    return (PT->getAddressSpace() == spN);
5171  return false;
5172}
5173
5174/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5175/// inline asm expressions.
5176bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5177    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5178  SDValue Op0, Op1;
5179  switch (ConstraintID) {
5180  default:
5181    return true;
5182  case InlineAsm::Constraint_m: // memory
5183    if (SelectDirectAddr(Op, Op0)) {
5184      OutOps.push_back(Op0);
5185      OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5186      return false;
5187    }
5188    if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5189      OutOps.push_back(Op0);
5190      OutOps.push_back(Op1);
5191      return false;
5192    }
5193    break;
5194  }
5195  return true;
5196}
5197
5198/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
5199/// conversion from \p SrcTy to \p DestTy.
5200unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
5201                                             bool IsSigned) {
5202  switch (SrcTy.SimpleTy) {
5203  default:
5204    llvm_unreachable("Unhandled source type");
5205  case MVT::i8:
5206    switch (DestTy.SimpleTy) {
5207    default:
5208      llvm_unreachable("Unhandled dest type");
5209    case MVT::i16:
5210      return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
5211    case MVT::i32:
5212      return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
5213    case MVT::i64:
5214      return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
5215    }
5216  case MVT::i16:
5217    switch (DestTy.SimpleTy) {
5218    default:
5219      llvm_unreachable("Unhandled dest type");
5220    case MVT::i8:
5221      return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
5222    case MVT::i32:
5223      return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
5224    case MVT::i64:
5225      return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
5226    }
5227  case MVT::i32:
5228    switch (DestTy.SimpleTy) {
5229    default:
5230      llvm_unreachable("Unhandled dest type");
5231    case MVT::i8:
5232      return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
5233    case MVT::i16:
5234      return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
5235    case MVT::i64:
5236      return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
5237    }
5238  case MVT::i64:
5239    switch (DestTy.SimpleTy) {
5240    default:
5241      llvm_unreachable("Unhandled dest type");
5242    case MVT::i8:
5243      return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
5244    case MVT::i16:
5245      return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
5246    case MVT::i32:
5247      return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
5248    }
5249  }
5250}
5251