1//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that NVPTX uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
17
18#include "NVPTX.h"
19#include "llvm/CodeGen/SelectionDAG.h"
20#include "llvm/Target/TargetLowering.h"
21
22namespace llvm {
23namespace NVPTXISD {
24enum NodeType : unsigned {
25  // Start the numbering from where ISD NodeType finishes.
26  FIRST_NUMBER = ISD::BUILTIN_OP_END,
27  Wrapper,
28  CALL,
29  RET_FLAG,
30  LOAD_PARAM,
31  DeclareParam,
32  DeclareScalarParam,
33  DeclareRetParam,
34  DeclareRet,
35  DeclareScalarRet,
36  PrintCall,
37  PrintConvergentCall,
38  PrintCallUni,
39  PrintConvergentCallUni,
40  CallArgBegin,
41  CallArg,
42  LastCallArg,
43  CallArgEnd,
44  CallVoid,
45  CallVal,
46  CallSymbol,
47  Prototype,
48  MoveParam,
49  PseudoUseParam,
50  RETURN,
51  CallSeqBegin,
52  CallSeqEnd,
53  CallPrototype,
54  FUN_SHFL_CLAMP,
55  FUN_SHFR_CLAMP,
56  MUL_WIDE_SIGNED,
57  MUL_WIDE_UNSIGNED,
58  IMAD,
59  Dummy,
60
61  LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
62  LoadV4,
63  LDGV2, // LDG.v2
64  LDGV4, // LDG.v4
65  LDUV2, // LDU.v2
66  LDUV4, // LDU.v4
67  StoreV2,
68  StoreV4,
69  LoadParam,
70  LoadParamV2,
71  LoadParamV4,
72  StoreParam,
73  StoreParamV2,
74  StoreParamV4,
75  StoreParamS32, // to sext and store a <32bit value, not used currently
76  StoreParamU32, // to zext and store a <32bit value, not used currently
77  StoreRetval,
78  StoreRetvalV2,
79  StoreRetvalV4,
80
81  // Texture intrinsics
82  Tex1DFloatS32,
83  Tex1DFloatFloat,
84  Tex1DFloatFloatLevel,
85  Tex1DFloatFloatGrad,
86  Tex1DS32S32,
87  Tex1DS32Float,
88  Tex1DS32FloatLevel,
89  Tex1DS32FloatGrad,
90  Tex1DU32S32,
91  Tex1DU32Float,
92  Tex1DU32FloatLevel,
93  Tex1DU32FloatGrad,
94  Tex1DArrayFloatS32,
95  Tex1DArrayFloatFloat,
96  Tex1DArrayFloatFloatLevel,
97  Tex1DArrayFloatFloatGrad,
98  Tex1DArrayS32S32,
99  Tex1DArrayS32Float,
100  Tex1DArrayS32FloatLevel,
101  Tex1DArrayS32FloatGrad,
102  Tex1DArrayU32S32,
103  Tex1DArrayU32Float,
104  Tex1DArrayU32FloatLevel,
105  Tex1DArrayU32FloatGrad,
106  Tex2DFloatS32,
107  Tex2DFloatFloat,
108  Tex2DFloatFloatLevel,
109  Tex2DFloatFloatGrad,
110  Tex2DS32S32,
111  Tex2DS32Float,
112  Tex2DS32FloatLevel,
113  Tex2DS32FloatGrad,
114  Tex2DU32S32,
115  Tex2DU32Float,
116  Tex2DU32FloatLevel,
117  Tex2DU32FloatGrad,
118  Tex2DArrayFloatS32,
119  Tex2DArrayFloatFloat,
120  Tex2DArrayFloatFloatLevel,
121  Tex2DArrayFloatFloatGrad,
122  Tex2DArrayS32S32,
123  Tex2DArrayS32Float,
124  Tex2DArrayS32FloatLevel,
125  Tex2DArrayS32FloatGrad,
126  Tex2DArrayU32S32,
127  Tex2DArrayU32Float,
128  Tex2DArrayU32FloatLevel,
129  Tex2DArrayU32FloatGrad,
130  Tex3DFloatS32,
131  Tex3DFloatFloat,
132  Tex3DFloatFloatLevel,
133  Tex3DFloatFloatGrad,
134  Tex3DS32S32,
135  Tex3DS32Float,
136  Tex3DS32FloatLevel,
137  Tex3DS32FloatGrad,
138  Tex3DU32S32,
139  Tex3DU32Float,
140  Tex3DU32FloatLevel,
141  Tex3DU32FloatGrad,
142  TexCubeFloatFloat,
143  TexCubeFloatFloatLevel,
144  TexCubeS32Float,
145  TexCubeS32FloatLevel,
146  TexCubeU32Float,
147  TexCubeU32FloatLevel,
148  TexCubeArrayFloatFloat,
149  TexCubeArrayFloatFloatLevel,
150  TexCubeArrayS32Float,
151  TexCubeArrayS32FloatLevel,
152  TexCubeArrayU32Float,
153  TexCubeArrayU32FloatLevel,
154  Tld4R2DFloatFloat,
155  Tld4G2DFloatFloat,
156  Tld4B2DFloatFloat,
157  Tld4A2DFloatFloat,
158  Tld4R2DS64Float,
159  Tld4G2DS64Float,
160  Tld4B2DS64Float,
161  Tld4A2DS64Float,
162  Tld4R2DU64Float,
163  Tld4G2DU64Float,
164  Tld4B2DU64Float,
165  Tld4A2DU64Float,
166  TexUnified1DFloatS32,
167  TexUnified1DFloatFloat,
168  TexUnified1DFloatFloatLevel,
169  TexUnified1DFloatFloatGrad,
170  TexUnified1DS32S32,
171  TexUnified1DS32Float,
172  TexUnified1DS32FloatLevel,
173  TexUnified1DS32FloatGrad,
174  TexUnified1DU32S32,
175  TexUnified1DU32Float,
176  TexUnified1DU32FloatLevel,
177  TexUnified1DU32FloatGrad,
178  TexUnified1DArrayFloatS32,
179  TexUnified1DArrayFloatFloat,
180  TexUnified1DArrayFloatFloatLevel,
181  TexUnified1DArrayFloatFloatGrad,
182  TexUnified1DArrayS32S32,
183  TexUnified1DArrayS32Float,
184  TexUnified1DArrayS32FloatLevel,
185  TexUnified1DArrayS32FloatGrad,
186  TexUnified1DArrayU32S32,
187  TexUnified1DArrayU32Float,
188  TexUnified1DArrayU32FloatLevel,
189  TexUnified1DArrayU32FloatGrad,
190  TexUnified2DFloatS32,
191  TexUnified2DFloatFloat,
192  TexUnified2DFloatFloatLevel,
193  TexUnified2DFloatFloatGrad,
194  TexUnified2DS32S32,
195  TexUnified2DS32Float,
196  TexUnified2DS32FloatLevel,
197  TexUnified2DS32FloatGrad,
198  TexUnified2DU32S32,
199  TexUnified2DU32Float,
200  TexUnified2DU32FloatLevel,
201  TexUnified2DU32FloatGrad,
202  TexUnified2DArrayFloatS32,
203  TexUnified2DArrayFloatFloat,
204  TexUnified2DArrayFloatFloatLevel,
205  TexUnified2DArrayFloatFloatGrad,
206  TexUnified2DArrayS32S32,
207  TexUnified2DArrayS32Float,
208  TexUnified2DArrayS32FloatLevel,
209  TexUnified2DArrayS32FloatGrad,
210  TexUnified2DArrayU32S32,
211  TexUnified2DArrayU32Float,
212  TexUnified2DArrayU32FloatLevel,
213  TexUnified2DArrayU32FloatGrad,
214  TexUnified3DFloatS32,
215  TexUnified3DFloatFloat,
216  TexUnified3DFloatFloatLevel,
217  TexUnified3DFloatFloatGrad,
218  TexUnified3DS32S32,
219  TexUnified3DS32Float,
220  TexUnified3DS32FloatLevel,
221  TexUnified3DS32FloatGrad,
222  TexUnified3DU32S32,
223  TexUnified3DU32Float,
224  TexUnified3DU32FloatLevel,
225  TexUnified3DU32FloatGrad,
226  TexUnifiedCubeFloatFloat,
227  TexUnifiedCubeFloatFloatLevel,
228  TexUnifiedCubeS32Float,
229  TexUnifiedCubeS32FloatLevel,
230  TexUnifiedCubeU32Float,
231  TexUnifiedCubeU32FloatLevel,
232  TexUnifiedCubeArrayFloatFloat,
233  TexUnifiedCubeArrayFloatFloatLevel,
234  TexUnifiedCubeArrayS32Float,
235  TexUnifiedCubeArrayS32FloatLevel,
236  TexUnifiedCubeArrayU32Float,
237  TexUnifiedCubeArrayU32FloatLevel,
238  Tld4UnifiedR2DFloatFloat,
239  Tld4UnifiedG2DFloatFloat,
240  Tld4UnifiedB2DFloatFloat,
241  Tld4UnifiedA2DFloatFloat,
242  Tld4UnifiedR2DS64Float,
243  Tld4UnifiedG2DS64Float,
244  Tld4UnifiedB2DS64Float,
245  Tld4UnifiedA2DS64Float,
246  Tld4UnifiedR2DU64Float,
247  Tld4UnifiedG2DU64Float,
248  Tld4UnifiedB2DU64Float,
249  Tld4UnifiedA2DU64Float,
250
251  // Surface intrinsics
252  Suld1DI8Clamp,
253  Suld1DI16Clamp,
254  Suld1DI32Clamp,
255  Suld1DI64Clamp,
256  Suld1DV2I8Clamp,
257  Suld1DV2I16Clamp,
258  Suld1DV2I32Clamp,
259  Suld1DV2I64Clamp,
260  Suld1DV4I8Clamp,
261  Suld1DV4I16Clamp,
262  Suld1DV4I32Clamp,
263
264  Suld1DArrayI8Clamp,
265  Suld1DArrayI16Clamp,
266  Suld1DArrayI32Clamp,
267  Suld1DArrayI64Clamp,
268  Suld1DArrayV2I8Clamp,
269  Suld1DArrayV2I16Clamp,
270  Suld1DArrayV2I32Clamp,
271  Suld1DArrayV2I64Clamp,
272  Suld1DArrayV4I8Clamp,
273  Suld1DArrayV4I16Clamp,
274  Suld1DArrayV4I32Clamp,
275
276  Suld2DI8Clamp,
277  Suld2DI16Clamp,
278  Suld2DI32Clamp,
279  Suld2DI64Clamp,
280  Suld2DV2I8Clamp,
281  Suld2DV2I16Clamp,
282  Suld2DV2I32Clamp,
283  Suld2DV2I64Clamp,
284  Suld2DV4I8Clamp,
285  Suld2DV4I16Clamp,
286  Suld2DV4I32Clamp,
287
288  Suld2DArrayI8Clamp,
289  Suld2DArrayI16Clamp,
290  Suld2DArrayI32Clamp,
291  Suld2DArrayI64Clamp,
292  Suld2DArrayV2I8Clamp,
293  Suld2DArrayV2I16Clamp,
294  Suld2DArrayV2I32Clamp,
295  Suld2DArrayV2I64Clamp,
296  Suld2DArrayV4I8Clamp,
297  Suld2DArrayV4I16Clamp,
298  Suld2DArrayV4I32Clamp,
299
300  Suld3DI8Clamp,
301  Suld3DI16Clamp,
302  Suld3DI32Clamp,
303  Suld3DI64Clamp,
304  Suld3DV2I8Clamp,
305  Suld3DV2I16Clamp,
306  Suld3DV2I32Clamp,
307  Suld3DV2I64Clamp,
308  Suld3DV4I8Clamp,
309  Suld3DV4I16Clamp,
310  Suld3DV4I32Clamp,
311
312  Suld1DI8Trap,
313  Suld1DI16Trap,
314  Suld1DI32Trap,
315  Suld1DI64Trap,
316  Suld1DV2I8Trap,
317  Suld1DV2I16Trap,
318  Suld1DV2I32Trap,
319  Suld1DV2I64Trap,
320  Suld1DV4I8Trap,
321  Suld1DV4I16Trap,
322  Suld1DV4I32Trap,
323
324  Suld1DArrayI8Trap,
325  Suld1DArrayI16Trap,
326  Suld1DArrayI32Trap,
327  Suld1DArrayI64Trap,
328  Suld1DArrayV2I8Trap,
329  Suld1DArrayV2I16Trap,
330  Suld1DArrayV2I32Trap,
331  Suld1DArrayV2I64Trap,
332  Suld1DArrayV4I8Trap,
333  Suld1DArrayV4I16Trap,
334  Suld1DArrayV4I32Trap,
335
336  Suld2DI8Trap,
337  Suld2DI16Trap,
338  Suld2DI32Trap,
339  Suld2DI64Trap,
340  Suld2DV2I8Trap,
341  Suld2DV2I16Trap,
342  Suld2DV2I32Trap,
343  Suld2DV2I64Trap,
344  Suld2DV4I8Trap,
345  Suld2DV4I16Trap,
346  Suld2DV4I32Trap,
347
348  Suld2DArrayI8Trap,
349  Suld2DArrayI16Trap,
350  Suld2DArrayI32Trap,
351  Suld2DArrayI64Trap,
352  Suld2DArrayV2I8Trap,
353  Suld2DArrayV2I16Trap,
354  Suld2DArrayV2I32Trap,
355  Suld2DArrayV2I64Trap,
356  Suld2DArrayV4I8Trap,
357  Suld2DArrayV4I16Trap,
358  Suld2DArrayV4I32Trap,
359
360  Suld3DI8Trap,
361  Suld3DI16Trap,
362  Suld3DI32Trap,
363  Suld3DI64Trap,
364  Suld3DV2I8Trap,
365  Suld3DV2I16Trap,
366  Suld3DV2I32Trap,
367  Suld3DV2I64Trap,
368  Suld3DV4I8Trap,
369  Suld3DV4I16Trap,
370  Suld3DV4I32Trap,
371
372  Suld1DI8Zero,
373  Suld1DI16Zero,
374  Suld1DI32Zero,
375  Suld1DI64Zero,
376  Suld1DV2I8Zero,
377  Suld1DV2I16Zero,
378  Suld1DV2I32Zero,
379  Suld1DV2I64Zero,
380  Suld1DV4I8Zero,
381  Suld1DV4I16Zero,
382  Suld1DV4I32Zero,
383
384  Suld1DArrayI8Zero,
385  Suld1DArrayI16Zero,
386  Suld1DArrayI32Zero,
387  Suld1DArrayI64Zero,
388  Suld1DArrayV2I8Zero,
389  Suld1DArrayV2I16Zero,
390  Suld1DArrayV2I32Zero,
391  Suld1DArrayV2I64Zero,
392  Suld1DArrayV4I8Zero,
393  Suld1DArrayV4I16Zero,
394  Suld1DArrayV4I32Zero,
395
396  Suld2DI8Zero,
397  Suld2DI16Zero,
398  Suld2DI32Zero,
399  Suld2DI64Zero,
400  Suld2DV2I8Zero,
401  Suld2DV2I16Zero,
402  Suld2DV2I32Zero,
403  Suld2DV2I64Zero,
404  Suld2DV4I8Zero,
405  Suld2DV4I16Zero,
406  Suld2DV4I32Zero,
407
408  Suld2DArrayI8Zero,
409  Suld2DArrayI16Zero,
410  Suld2DArrayI32Zero,
411  Suld2DArrayI64Zero,
412  Suld2DArrayV2I8Zero,
413  Suld2DArrayV2I16Zero,
414  Suld2DArrayV2I32Zero,
415  Suld2DArrayV2I64Zero,
416  Suld2DArrayV4I8Zero,
417  Suld2DArrayV4I16Zero,
418  Suld2DArrayV4I32Zero,
419
420  Suld3DI8Zero,
421  Suld3DI16Zero,
422  Suld3DI32Zero,
423  Suld3DI64Zero,
424  Suld3DV2I8Zero,
425  Suld3DV2I16Zero,
426  Suld3DV2I32Zero,
427  Suld3DV2I64Zero,
428  Suld3DV4I8Zero,
429  Suld3DV4I16Zero,
430  Suld3DV4I32Zero
431};
432}
433
434class NVPTXSubtarget;
435
436//===--------------------------------------------------------------------===//
437// TargetLowering Implementation
438//===--------------------------------------------------------------------===//
439class NVPTXTargetLowering : public TargetLowering {
440public:
441  explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
442                               const NVPTXSubtarget &STI);
443  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
444
445  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
446
447  const char *getTargetNodeName(unsigned Opcode) const override;
448
449  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
450                          unsigned Intrinsic) const override;
451
452  /// isLegalAddressingMode - Return true if the addressing mode represented
453  /// by AM is legal for this target, for a load/store of the specified type
454  /// Used to guide target specific optimizations, like loop strength
455  /// reduction (LoopStrengthReduce.cpp) and memory optimization for
456  /// address mode (CodeGenPrepare.cpp)
457  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
458                             unsigned AS) const override;
459
460  bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
461    // Truncating 64-bit to 32-bit is free in SASS.
462    if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
463      return false;
464    return SrcTy->getPrimitiveSizeInBits() == 64 &&
465           DstTy->getPrimitiveSizeInBits() == 32;
466  }
467
468  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
469                         EVT VT) const override {
470    if (VT.isVector())
471      return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
472    return MVT::i1;
473  }
474
475  ConstraintType getConstraintType(StringRef Constraint) const override;
476  std::pair<unsigned, const TargetRegisterClass *>
477  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
478                               StringRef Constraint, MVT VT) const override;
479
480  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
481                               bool isVarArg,
482                               const SmallVectorImpl<ISD::InputArg> &Ins,
483                               const SDLoc &dl, SelectionDAG &DAG,
484                               SmallVectorImpl<SDValue> &InVals) const override;
485
486  SDValue LowerCall(CallLoweringInfo &CLI,
487                    SmallVectorImpl<SDValue> &InVals) const override;
488
489  std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
490                           const SmallVectorImpl<ISD::OutputArg> &,
491                           unsigned retAlignment,
492                           const ImmutableCallSite *CS) const;
493
494  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
495                      const SmallVectorImpl<ISD::OutputArg> &Outs,
496                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
497                      SelectionDAG &DAG) const override;
498
499  void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
500                                    std::vector<SDValue> &Ops,
501                                    SelectionDAG &DAG) const override;
502
503  const NVPTXTargetMachine *nvTM;
504
505  // PTX always uses 32-bit shift amounts
506  MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
507    return MVT::i32;
508  }
509
510  TargetLoweringBase::LegalizeTypeAction
511  getPreferredVectorAction(EVT VT) const override;
512
513  bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
514
515  bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
516
517  bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
518
519private:
520  const NVPTXSubtarget &STI; // cache the subtarget here
521  SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
522
523  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
524
525  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
526  SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
527
528  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
529  SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
530  SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
531
532  SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
533  SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
534
535  SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
536
537  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
538                          SelectionDAG &DAG) const override;
539  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
540
541  unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
542                                Type *Ty, unsigned Idx) const;
543};
544} // namespace llvm
545
546#endif
547