1//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Interface definition of the TargetLowering class that is common
12/// to all AMD GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
17#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
18
19#include "llvm/Target/TargetLowering.h"
20
21namespace llvm {
22
23class AMDGPUMachineFunction;
24class AMDGPUSubtarget;
25class MachineRegisterInfo;
26
27class AMDGPUTargetLowering : public TargetLowering {
28protected:
29  const AMDGPUSubtarget *Subtarget;
30
31  SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
32                                   const SDValue &InitPtr,
33                                   SDValue Chain,
34                                   SelectionDAG &DAG) const;
35  SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
36  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
37  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
38  /// \brief Lower vector stores by merging the vector elements into an integer
39  /// of the same bitwidth.
40  SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
41  /// \brief Split a vector store into multiple scalar stores.
42  /// \returns The resulting chain.
43
44  SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
45  SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
46  SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
47  SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
48  SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
49
50  SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
51  SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
52  SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
53  SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
54
55  SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
56
57  SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const;
58  SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
59  SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
60  SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
61
62  SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const;
63  SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
64  SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
65
66  SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
67
68protected:
69  bool shouldCombineMemoryType(EVT VT) const;
70  SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
71  SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
72  SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
73  SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
74  SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const;
75  SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
76  SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
77  SDValue performCtlzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
78                             SDValue RHS, DAGCombinerInfo &DCI) const;
79  SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
80
81  static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
82  static EVT getEquivalentBitType(LLVMContext &Context, EVT VT);
83
84  virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
85                                     SelectionDAG &DAG) const;
86
87  /// Return 64-bit value Op as two 32-bit integers.
88  std::pair<SDValue, SDValue> split64BitValue(SDValue Op,
89                                              SelectionDAG &DAG) const;
90  SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const;
91  SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const;
92
93  /// \brief Split a vector load into 2 loads of half the vector.
94  SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
95
96  /// \brief Split a vector store into 2 stores of half the vector.
97  SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
98
99  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
100  SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
101  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
102  SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
103  void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
104                                    SmallVectorImpl<SDValue> &Results) const;
105  /// The SelectionDAGBuilder will automatically promote function arguments
106  /// with illegal types.  However, this does not work for the AMDGPU targets
107  /// since the function arguments are stored in memory as these illegal types.
108  /// In order to handle this properly we need to get the origianl types sizes
109  /// from the LLVM IR Function and fixup the ISD:InputArg values before
110  /// passing them to AnalyzeFormalArguments()
111  void getOriginalFunctionArgs(SelectionDAG &DAG,
112                               const Function *F,
113                               const SmallVectorImpl<ISD::InputArg> &Ins,
114                               SmallVectorImpl<ISD::InputArg> &OrigIns) const;
115  void AnalyzeFormalArguments(CCState &State,
116                              const SmallVectorImpl<ISD::InputArg> &Ins) const;
117  void AnalyzeReturn(CCState &State,
118                     const SmallVectorImpl<ISD::OutputArg> &Outs) const;
119
120public:
121  AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
122
123  bool isFAbsFree(EVT VT) const override;
124  bool isFNegFree(EVT VT) const override;
125  bool isTruncateFree(EVT Src, EVT Dest) const override;
126  bool isTruncateFree(Type *Src, Type *Dest) const override;
127
128  bool isZExtFree(Type *Src, Type *Dest) const override;
129  bool isZExtFree(EVT Src, EVT Dest) const override;
130  bool isZExtFree(SDValue Val, EVT VT2) const override;
131
132  bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
133
134  MVT getVectorIdxTy(const DataLayout &) const override;
135  bool isSelectSupported(SelectSupportKind) const override;
136
137  bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
138  bool ShouldShrinkFPConstant(EVT VT) const override;
139  bool shouldReduceLoadWidth(SDNode *Load,
140                             ISD::LoadExtType ExtType,
141                             EVT ExtVT) const override;
142
143  bool isLoadBitCastBeneficial(EVT, EVT) const final;
144
145  bool storeOfVectorConstantIsCheap(EVT MemVT,
146                                    unsigned NumElem,
147                                    unsigned AS) const override;
148  bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override;
149  bool isCheapToSpeculateCttz() const override;
150  bool isCheapToSpeculateCtlz() const override;
151
152  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
153                      const SmallVectorImpl<ISD::OutputArg> &Outs,
154                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
155                      SelectionDAG &DAG) const override;
156  SDValue LowerCall(CallLoweringInfo &CLI,
157                    SmallVectorImpl<SDValue> &InVals) const override;
158
159  SDValue LowerDYNAMIC_STACKALLOC(SDValue Op,
160                                  SelectionDAG &DAG) const;
161
162  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
163  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
164  void ReplaceNodeResults(SDNode * N,
165                          SmallVectorImpl<SDValue> &Results,
166                          SelectionDAG &DAG) const override;
167
168  SDValue CombineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
169                               SDValue RHS, SDValue True, SDValue False,
170                               SDValue CC, DAGCombinerInfo &DCI) const;
171
172  const char* getTargetNodeName(unsigned Opcode) const override;
173
174  SDValue getRsqrtEstimate(SDValue Operand,
175                           DAGCombinerInfo &DCI,
176                           unsigned &RefinementSteps,
177                           bool &UseOneConstNR) const override;
178  SDValue getRecipEstimate(SDValue Operand,
179                           DAGCombinerInfo &DCI,
180                           unsigned &RefinementSteps) const override;
181
182  virtual SDNode *PostISelFolding(MachineSDNode *N,
183                                  SelectionDAG &DAG) const = 0;
184
185  /// \brief Determine which of the bits specified in \p Mask are known to be
186  /// either zero or one and return them in the \p KnownZero and \p KnownOne
187  /// bitsets.
188  void computeKnownBitsForTargetNode(const SDValue Op,
189                                     APInt &KnownZero,
190                                     APInt &KnownOne,
191                                     const SelectionDAG &DAG,
192                                     unsigned Depth = 0) const override;
193
194  unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
195                                           unsigned Depth = 0) const override;
196
197  /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
198  /// MachineFunction.
199  ///
200  /// \returns a RegisterSDNode representing Reg.
201  virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
202                                       const TargetRegisterClass *RC,
203                                       unsigned Reg, EVT VT) const;
204
205  enum ImplicitParameter {
206    FIRST_IMPLICIT,
207    GRID_DIM = FIRST_IMPLICIT,
208    GRID_OFFSET,
209  };
210
211  /// \brief Helper function that returns the byte offset of the given
212  /// type of implicit parameter.
213  uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
214                                      const ImplicitParameter Param) const;
215};
216
217namespace AMDGPUISD {
218
219enum NodeType : unsigned {
220  // AMDIL ISD Opcodes
221  FIRST_NUMBER = ISD::BUILTIN_OP_END,
222  CALL,        // Function call based on a single integer
223  UMUL,        // 32bit unsigned multiplication
224  BRANCH_COND,
225  // End AMDIL ISD Opcodes
226  ENDPGM,
227  RETURN,
228  DWORDADDR,
229  FRACT,
230  CLAMP,
231
232  // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
233  // Denormals handled on some parts.
234  COS_HW,
235  SIN_HW,
236  FMAX_LEGACY,
237  FMIN_LEGACY,
238  FMAX3,
239  SMAX3,
240  UMAX3,
241  FMIN3,
242  SMIN3,
243  UMIN3,
244  FMED3,
245  SMED3,
246  UMED3,
247  URECIP,
248  DIV_SCALE,
249  DIV_FMAS,
250  DIV_FIXUP,
251  TRIG_PREOP, // 1 ULP max error for f64
252
253  // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
254  //            For f64, max error 2^29 ULP, handles denormals.
255  RCP,
256  RSQ,
257  RSQ_LEGACY,
258  RSQ_CLAMP,
259  LDEXP,
260  FP_CLASS,
261  DOT4,
262  CARRY,
263  BORROW,
264  BFE_U32, // Extract range of bits with zero extension to 32-bits.
265  BFE_I32, // Extract range of bits with sign extension to 32-bits.
266  BFI, // (src0 & src1) | (~src0 & src2)
267  BFM, // Insert a range of bits into a 32-bit word.
268  FFBH_U32, // ctlz with -1 if input is zero.
269  MUL_U24,
270  MUL_I24,
271  MAD_U24,
272  MAD_I24,
273  TEXTURE_FETCH,
274  EXPORT,
275  CONST_ADDRESS,
276  REGISTER_LOAD,
277  REGISTER_STORE,
278  LOAD_INPUT,
279  SAMPLE,
280  SAMPLEB,
281  SAMPLED,
282  SAMPLEL,
283
284  // These cvt_f32_ubyte* nodes need to remain consecutive and in order.
285  CVT_F32_UBYTE0,
286  CVT_F32_UBYTE1,
287  CVT_F32_UBYTE2,
288  CVT_F32_UBYTE3,
289  /// This node is for VLIW targets and it is used to represent a vector
290  /// that is stored in consecutive registers with the same channel.
291  /// For example:
292  ///   |X  |Y|Z|W|
293  /// T0|v.x| | | |
294  /// T1|v.y| | | |
295  /// T2|v.z| | | |
296  /// T3|v.w| | | |
297  BUILD_VERTICAL_VECTOR,
298  /// Pointer to the start of the shader's constant data.
299  CONST_DATA_PTR,
300  SENDMSG,
301  INTERP_MOV,
302  INTERP_P1,
303  INTERP_P2,
304  PC_ADD_REL_OFFSET,
305  FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
306  STORE_MSKOR,
307  LOAD_CONSTANT,
308  TBUFFER_STORE_FORMAT,
309  ATOMIC_CMP_SWAP,
310  ATOMIC_INC,
311  ATOMIC_DEC,
312  LAST_AMDGPU_ISD_NUMBER
313};
314
315
316} // End namespace AMDGPUISD
317
318} // End namespace llvm
319
320#endif
321