1//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that NVPTX uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
17
18#include "NVPTX.h"
19#include "llvm/CodeGen/SelectionDAG.h"
20#include "llvm/Target/TargetLowering.h"
21
22namespace llvm {
23namespace NVPTXISD {
24enum NodeType {
25  // Start the numbering from where ISD NodeType finishes.
26  FIRST_NUMBER = ISD::BUILTIN_OP_END,
27  Wrapper,
28  CALL,
29  RET_FLAG,
30  LOAD_PARAM,
31  DeclareParam,
32  DeclareScalarParam,
33  DeclareRetParam,
34  DeclareRet,
35  DeclareScalarRet,
36  PrintCall,
37  PrintCallUni,
38  CallArgBegin,
39  CallArg,
40  LastCallArg,
41  CallArgEnd,
42  CallVoid,
43  CallVal,
44  CallSymbol,
45  Prototype,
46  MoveParam,
47  PseudoUseParam,
48  RETURN,
49  CallSeqBegin,
50  CallSeqEnd,
51  CallPrototype,
52  FUN_SHFL_CLAMP,
53  FUN_SHFR_CLAMP,
54  MUL_WIDE_SIGNED,
55  MUL_WIDE_UNSIGNED,
56  IMAD,
57  Dummy,
58
59  LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
60  LoadV4,
61  LDGV2, // LDG.v2
62  LDGV4, // LDG.v4
63  LDUV2, // LDU.v2
64  LDUV4, // LDU.v4
65  StoreV2,
66  StoreV4,
67  LoadParam,
68  LoadParamV2,
69  LoadParamV4,
70  StoreParam,
71  StoreParamV2,
72  StoreParamV4,
73  StoreParamS32, // to sext and store a <32bit value, not used currently
74  StoreParamU32, // to zext and store a <32bit value, not used currently
75  StoreRetval,
76  StoreRetvalV2,
77  StoreRetvalV4,
78
79  // Texture intrinsics
80  Tex1DFloatS32,
81  Tex1DFloatFloat,
82  Tex1DFloatFloatLevel,
83  Tex1DFloatFloatGrad,
84  Tex1DS32S32,
85  Tex1DS32Float,
86  Tex1DS32FloatLevel,
87  Tex1DS32FloatGrad,
88  Tex1DU32S32,
89  Tex1DU32Float,
90  Tex1DU32FloatLevel,
91  Tex1DU32FloatGrad,
92  Tex1DArrayFloatS32,
93  Tex1DArrayFloatFloat,
94  Tex1DArrayFloatFloatLevel,
95  Tex1DArrayFloatFloatGrad,
96  Tex1DArrayS32S32,
97  Tex1DArrayS32Float,
98  Tex1DArrayS32FloatLevel,
99  Tex1DArrayS32FloatGrad,
100  Tex1DArrayU32S32,
101  Tex1DArrayU32Float,
102  Tex1DArrayU32FloatLevel,
103  Tex1DArrayU32FloatGrad,
104  Tex2DFloatS32,
105  Tex2DFloatFloat,
106  Tex2DFloatFloatLevel,
107  Tex2DFloatFloatGrad,
108  Tex2DS32S32,
109  Tex2DS32Float,
110  Tex2DS32FloatLevel,
111  Tex2DS32FloatGrad,
112  Tex2DU32S32,
113  Tex2DU32Float,
114  Tex2DU32FloatLevel,
115  Tex2DU32FloatGrad,
116  Tex2DArrayFloatS32,
117  Tex2DArrayFloatFloat,
118  Tex2DArrayFloatFloatLevel,
119  Tex2DArrayFloatFloatGrad,
120  Tex2DArrayS32S32,
121  Tex2DArrayS32Float,
122  Tex2DArrayS32FloatLevel,
123  Tex2DArrayS32FloatGrad,
124  Tex2DArrayU32S32,
125  Tex2DArrayU32Float,
126  Tex2DArrayU32FloatLevel,
127  Tex2DArrayU32FloatGrad,
128  Tex3DFloatS32,
129  Tex3DFloatFloat,
130  Tex3DFloatFloatLevel,
131  Tex3DFloatFloatGrad,
132  Tex3DS32S32,
133  Tex3DS32Float,
134  Tex3DS32FloatLevel,
135  Tex3DS32FloatGrad,
136  Tex3DU32S32,
137  Tex3DU32Float,
138  Tex3DU32FloatLevel,
139  Tex3DU32FloatGrad,
140  TexCubeFloatFloat,
141  TexCubeFloatFloatLevel,
142  TexCubeS32Float,
143  TexCubeS32FloatLevel,
144  TexCubeU32Float,
145  TexCubeU32FloatLevel,
146  TexCubeArrayFloatFloat,
147  TexCubeArrayFloatFloatLevel,
148  TexCubeArrayS32Float,
149  TexCubeArrayS32FloatLevel,
150  TexCubeArrayU32Float,
151  TexCubeArrayU32FloatLevel,
152  Tld4R2DFloatFloat,
153  Tld4G2DFloatFloat,
154  Tld4B2DFloatFloat,
155  Tld4A2DFloatFloat,
156  Tld4R2DS64Float,
157  Tld4G2DS64Float,
158  Tld4B2DS64Float,
159  Tld4A2DS64Float,
160  Tld4R2DU64Float,
161  Tld4G2DU64Float,
162  Tld4B2DU64Float,
163  Tld4A2DU64Float,
164  TexUnified1DFloatS32,
165  TexUnified1DFloatFloat,
166  TexUnified1DFloatFloatLevel,
167  TexUnified1DFloatFloatGrad,
168  TexUnified1DS32S32,
169  TexUnified1DS32Float,
170  TexUnified1DS32FloatLevel,
171  TexUnified1DS32FloatGrad,
172  TexUnified1DU32S32,
173  TexUnified1DU32Float,
174  TexUnified1DU32FloatLevel,
175  TexUnified1DU32FloatGrad,
176  TexUnified1DArrayFloatS32,
177  TexUnified1DArrayFloatFloat,
178  TexUnified1DArrayFloatFloatLevel,
179  TexUnified1DArrayFloatFloatGrad,
180  TexUnified1DArrayS32S32,
181  TexUnified1DArrayS32Float,
182  TexUnified1DArrayS32FloatLevel,
183  TexUnified1DArrayS32FloatGrad,
184  TexUnified1DArrayU32S32,
185  TexUnified1DArrayU32Float,
186  TexUnified1DArrayU32FloatLevel,
187  TexUnified1DArrayU32FloatGrad,
188  TexUnified2DFloatS32,
189  TexUnified2DFloatFloat,
190  TexUnified2DFloatFloatLevel,
191  TexUnified2DFloatFloatGrad,
192  TexUnified2DS32S32,
193  TexUnified2DS32Float,
194  TexUnified2DS32FloatLevel,
195  TexUnified2DS32FloatGrad,
196  TexUnified2DU32S32,
197  TexUnified2DU32Float,
198  TexUnified2DU32FloatLevel,
199  TexUnified2DU32FloatGrad,
200  TexUnified2DArrayFloatS32,
201  TexUnified2DArrayFloatFloat,
202  TexUnified2DArrayFloatFloatLevel,
203  TexUnified2DArrayFloatFloatGrad,
204  TexUnified2DArrayS32S32,
205  TexUnified2DArrayS32Float,
206  TexUnified2DArrayS32FloatLevel,
207  TexUnified2DArrayS32FloatGrad,
208  TexUnified2DArrayU32S32,
209  TexUnified2DArrayU32Float,
210  TexUnified2DArrayU32FloatLevel,
211  TexUnified2DArrayU32FloatGrad,
212  TexUnified3DFloatS32,
213  TexUnified3DFloatFloat,
214  TexUnified3DFloatFloatLevel,
215  TexUnified3DFloatFloatGrad,
216  TexUnified3DS32S32,
217  TexUnified3DS32Float,
218  TexUnified3DS32FloatLevel,
219  TexUnified3DS32FloatGrad,
220  TexUnified3DU32S32,
221  TexUnified3DU32Float,
222  TexUnified3DU32FloatLevel,
223  TexUnified3DU32FloatGrad,
224  TexUnifiedCubeFloatFloat,
225  TexUnifiedCubeFloatFloatLevel,
226  TexUnifiedCubeS32Float,
227  TexUnifiedCubeS32FloatLevel,
228  TexUnifiedCubeU32Float,
229  TexUnifiedCubeU32FloatLevel,
230  TexUnifiedCubeArrayFloatFloat,
231  TexUnifiedCubeArrayFloatFloatLevel,
232  TexUnifiedCubeArrayS32Float,
233  TexUnifiedCubeArrayS32FloatLevel,
234  TexUnifiedCubeArrayU32Float,
235  TexUnifiedCubeArrayU32FloatLevel,
236  Tld4UnifiedR2DFloatFloat,
237  Tld4UnifiedG2DFloatFloat,
238  Tld4UnifiedB2DFloatFloat,
239  Tld4UnifiedA2DFloatFloat,
240  Tld4UnifiedR2DS64Float,
241  Tld4UnifiedG2DS64Float,
242  Tld4UnifiedB2DS64Float,
243  Tld4UnifiedA2DS64Float,
244  Tld4UnifiedR2DU64Float,
245  Tld4UnifiedG2DU64Float,
246  Tld4UnifiedB2DU64Float,
247  Tld4UnifiedA2DU64Float,
248
249  // Surface intrinsics
250  Suld1DI8Clamp,
251  Suld1DI16Clamp,
252  Suld1DI32Clamp,
253  Suld1DI64Clamp,
254  Suld1DV2I8Clamp,
255  Suld1DV2I16Clamp,
256  Suld1DV2I32Clamp,
257  Suld1DV2I64Clamp,
258  Suld1DV4I8Clamp,
259  Suld1DV4I16Clamp,
260  Suld1DV4I32Clamp,
261
262  Suld1DArrayI8Clamp,
263  Suld1DArrayI16Clamp,
264  Suld1DArrayI32Clamp,
265  Suld1DArrayI64Clamp,
266  Suld1DArrayV2I8Clamp,
267  Suld1DArrayV2I16Clamp,
268  Suld1DArrayV2I32Clamp,
269  Suld1DArrayV2I64Clamp,
270  Suld1DArrayV4I8Clamp,
271  Suld1DArrayV4I16Clamp,
272  Suld1DArrayV4I32Clamp,
273
274  Suld2DI8Clamp,
275  Suld2DI16Clamp,
276  Suld2DI32Clamp,
277  Suld2DI64Clamp,
278  Suld2DV2I8Clamp,
279  Suld2DV2I16Clamp,
280  Suld2DV2I32Clamp,
281  Suld2DV2I64Clamp,
282  Suld2DV4I8Clamp,
283  Suld2DV4I16Clamp,
284  Suld2DV4I32Clamp,
285
286  Suld2DArrayI8Clamp,
287  Suld2DArrayI16Clamp,
288  Suld2DArrayI32Clamp,
289  Suld2DArrayI64Clamp,
290  Suld2DArrayV2I8Clamp,
291  Suld2DArrayV2I16Clamp,
292  Suld2DArrayV2I32Clamp,
293  Suld2DArrayV2I64Clamp,
294  Suld2DArrayV4I8Clamp,
295  Suld2DArrayV4I16Clamp,
296  Suld2DArrayV4I32Clamp,
297
298  Suld3DI8Clamp,
299  Suld3DI16Clamp,
300  Suld3DI32Clamp,
301  Suld3DI64Clamp,
302  Suld3DV2I8Clamp,
303  Suld3DV2I16Clamp,
304  Suld3DV2I32Clamp,
305  Suld3DV2I64Clamp,
306  Suld3DV4I8Clamp,
307  Suld3DV4I16Clamp,
308  Suld3DV4I32Clamp,
309
310  Suld1DI8Trap,
311  Suld1DI16Trap,
312  Suld1DI32Trap,
313  Suld1DI64Trap,
314  Suld1DV2I8Trap,
315  Suld1DV2I16Trap,
316  Suld1DV2I32Trap,
317  Suld1DV2I64Trap,
318  Suld1DV4I8Trap,
319  Suld1DV4I16Trap,
320  Suld1DV4I32Trap,
321
322  Suld1DArrayI8Trap,
323  Suld1DArrayI16Trap,
324  Suld1DArrayI32Trap,
325  Suld1DArrayI64Trap,
326  Suld1DArrayV2I8Trap,
327  Suld1DArrayV2I16Trap,
328  Suld1DArrayV2I32Trap,
329  Suld1DArrayV2I64Trap,
330  Suld1DArrayV4I8Trap,
331  Suld1DArrayV4I16Trap,
332  Suld1DArrayV4I32Trap,
333
334  Suld2DI8Trap,
335  Suld2DI16Trap,
336  Suld2DI32Trap,
337  Suld2DI64Trap,
338  Suld2DV2I8Trap,
339  Suld2DV2I16Trap,
340  Suld2DV2I32Trap,
341  Suld2DV2I64Trap,
342  Suld2DV4I8Trap,
343  Suld2DV4I16Trap,
344  Suld2DV4I32Trap,
345
346  Suld2DArrayI8Trap,
347  Suld2DArrayI16Trap,
348  Suld2DArrayI32Trap,
349  Suld2DArrayI64Trap,
350  Suld2DArrayV2I8Trap,
351  Suld2DArrayV2I16Trap,
352  Suld2DArrayV2I32Trap,
353  Suld2DArrayV2I64Trap,
354  Suld2DArrayV4I8Trap,
355  Suld2DArrayV4I16Trap,
356  Suld2DArrayV4I32Trap,
357
358  Suld3DI8Trap,
359  Suld3DI16Trap,
360  Suld3DI32Trap,
361  Suld3DI64Trap,
362  Suld3DV2I8Trap,
363  Suld3DV2I16Trap,
364  Suld3DV2I32Trap,
365  Suld3DV2I64Trap,
366  Suld3DV4I8Trap,
367  Suld3DV4I16Trap,
368  Suld3DV4I32Trap,
369
370  Suld1DI8Zero,
371  Suld1DI16Zero,
372  Suld1DI32Zero,
373  Suld1DI64Zero,
374  Suld1DV2I8Zero,
375  Suld1DV2I16Zero,
376  Suld1DV2I32Zero,
377  Suld1DV2I64Zero,
378  Suld1DV4I8Zero,
379  Suld1DV4I16Zero,
380  Suld1DV4I32Zero,
381
382  Suld1DArrayI8Zero,
383  Suld1DArrayI16Zero,
384  Suld1DArrayI32Zero,
385  Suld1DArrayI64Zero,
386  Suld1DArrayV2I8Zero,
387  Suld1DArrayV2I16Zero,
388  Suld1DArrayV2I32Zero,
389  Suld1DArrayV2I64Zero,
390  Suld1DArrayV4I8Zero,
391  Suld1DArrayV4I16Zero,
392  Suld1DArrayV4I32Zero,
393
394  Suld2DI8Zero,
395  Suld2DI16Zero,
396  Suld2DI32Zero,
397  Suld2DI64Zero,
398  Suld2DV2I8Zero,
399  Suld2DV2I16Zero,
400  Suld2DV2I32Zero,
401  Suld2DV2I64Zero,
402  Suld2DV4I8Zero,
403  Suld2DV4I16Zero,
404  Suld2DV4I32Zero,
405
406  Suld2DArrayI8Zero,
407  Suld2DArrayI16Zero,
408  Suld2DArrayI32Zero,
409  Suld2DArrayI64Zero,
410  Suld2DArrayV2I8Zero,
411  Suld2DArrayV2I16Zero,
412  Suld2DArrayV2I32Zero,
413  Suld2DArrayV2I64Zero,
414  Suld2DArrayV4I8Zero,
415  Suld2DArrayV4I16Zero,
416  Suld2DArrayV4I32Zero,
417
418  Suld3DI8Zero,
419  Suld3DI16Zero,
420  Suld3DI32Zero,
421  Suld3DI64Zero,
422  Suld3DV2I8Zero,
423  Suld3DV2I16Zero,
424  Suld3DV2I32Zero,
425  Suld3DV2I64Zero,
426  Suld3DV4I8Zero,
427  Suld3DV4I16Zero,
428  Suld3DV4I32Zero
429};
430}
431
432class NVPTXSubtarget;
433
434//===--------------------------------------------------------------------===//
435// TargetLowering Implementation
436//===--------------------------------------------------------------------===//
437class NVPTXTargetLowering : public TargetLowering {
438public:
439  explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
440                               const NVPTXSubtarget &STI);
441  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
442
443  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
444  SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
445                             SelectionDAG &DAG) const;
446
447  const char *getTargetNodeName(unsigned Opcode) const override;
448
449  bool isTypeSupportedInIntrinsic(MVT VT) const;
450
451  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
452                          unsigned Intrinsic) const override;
453
454  /// isLegalAddressingMode - Return true if the addressing mode represented
455  /// by AM is legal for this target, for a load/store of the specified type
456  /// Used to guide target specific optimizations, like loop strength
457  /// reduction (LoopStrengthReduce.cpp) and memory optimization for
458  /// address mode (CodeGenPrepare.cpp)
459  bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
460
461  /// getFunctionAlignment - Return the Log2 alignment of this function.
462  unsigned getFunctionAlignment(const Function *F) const;
463
464  EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override {
465    if (VT.isVector())
466      return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
467    return MVT::i1;
468  }
469
470  ConstraintType
471  getConstraintType(const std::string &Constraint) const override;
472  std::pair<unsigned, const TargetRegisterClass *>
473  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
474                               const std::string &Constraint,
475                               MVT VT) const override;
476
477  SDValue LowerFormalArguments(
478      SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
479      const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
480      SmallVectorImpl<SDValue> &InVals) const override;
481
482  SDValue LowerCall(CallLoweringInfo &CLI,
483                    SmallVectorImpl<SDValue> &InVals) const override;
484
485  std::string getPrototype(Type *, const ArgListTy &,
486                           const SmallVectorImpl<ISD::OutputArg> &,
487                           unsigned retAlignment,
488                           const ImmutableCallSite *CS) const;
489
490  SDValue
491  LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
492              const SmallVectorImpl<ISD::OutputArg> &Outs,
493              const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
494              SelectionDAG &DAG) const override;
495
496  void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
497                                    std::vector<SDValue> &Ops,
498                                    SelectionDAG &DAG) const override;
499
500  unsigned getInlineAsmMemConstraint(
501      const std::string &ConstraintCode) const override {
502    // FIXME: Map different constraints differently.
503    return InlineAsm::Constraint_m;
504  }
505
506  const NVPTXTargetMachine *nvTM;
507
508  // PTX always uses 32-bit shift amounts
509  MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
510
511  TargetLoweringBase::LegalizeTypeAction
512  getPreferredVectorAction(EVT VT) const override;
513
514  bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
515
516  bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
517
518  bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
519
520private:
521  const NVPTXSubtarget &STI; // cache the subtarget here
522
523  SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
524                     EVT = MVT::i32) const;
525  SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
526  SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
527
528  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
529
530  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
531  SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
532
533  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
534  SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
535  SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
536
537  SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
538  SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
539
540  SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
541
542  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
543                          SelectionDAG &DAG) const override;
544  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
545
546  unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
547                                Type *Ty, unsigned Idx) const;
548};
549} // namespace llvm
550
551#endif
552