NVPTXInstrInfo.td revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
2c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
3c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//                     The LLVM Compiler Infrastructure
4c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
5b64f8b07c104c6cc986570ac8ee0ed16a9f23976Howard Hinnant// This file is distributed under the University of Illinois Open Source
6b64f8b07c104c6cc986570ac8ee0ed16a9f23976Howard Hinnant// License. See LICENSE.TXT for details.
7c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
8c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//===----------------------------------------------------------------------===//
9c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
10c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant// This file describes the PTX instructions in TableGen format.
11c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
12c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//===----------------------------------------------------------------------===//
13c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
14c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantinclude "NVPTXInstrFormats.td"
15c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
16c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant// A NOP instruction
17c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef NOP : NVPTXInst<(outs), (ins), "", []>;
18c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
19c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant// List of vector specific properties
20c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef isVecLD      : VecInstTypeEnum<1>;
21c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef isVecST      : VecInstTypeEnum<2>;
22c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef isVecBuild   : VecInstTypeEnum<3>;
23c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef isVecShuffle : VecInstTypeEnum<4>;
24c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef isVecExtract : VecInstTypeEnum<5>;
25c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef isVecInsert  : VecInstTypeEnum<6>;
26c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef isVecDest    : VecInstTypeEnum<7>;
27c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef isVecOther   : VecInstTypeEnum<15>;
28c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
29c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//===----------------------------------------------------------------------===//
30c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant// NVPTX Operand Definitions.
31c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//===----------------------------------------------------------------------===//
32c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
33c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef brtarget    : Operand<OtherVT>;
34c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
35c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant// CVT conversion modes
36c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant// These must match the enum in NVPTX.h
37c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtNONE : PatLeaf<(i32 0x0)>;
38c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRNI  : PatLeaf<(i32 0x1)>;
39c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRZI  : PatLeaf<(i32 0x2)>;
40c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRMI  : PatLeaf<(i32 0x3)>;
41c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRPI  : PatLeaf<(i32 0x4)>;
42c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRN   : PatLeaf<(i32 0x5)>;
43c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRZ   : PatLeaf<(i32 0x6)>;
44c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRM   : PatLeaf<(i32 0x7)>;
45c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRP   : PatLeaf<(i32 0x8)>;
46c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
47c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtNONE_FTZ : PatLeaf<(i32 0x10)>;
48c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRNI_FTZ  : PatLeaf<(i32 0x11)>;
49c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRZI_FTZ  : PatLeaf<(i32 0x12)>;
50c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRMI_FTZ  : PatLeaf<(i32 0x13)>;
51c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRPI_FTZ  : PatLeaf<(i32 0x14)>;
52c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRN_FTZ   : PatLeaf<(i32 0x15)>;
53c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantdef CvtRZ_FTZ   : PatLeaf<(i32 0x16)>;
54def CvtRM_FTZ   : PatLeaf<(i32 0x17)>;
55def CvtRP_FTZ   : PatLeaf<(i32 0x18)>;
56
57def CvtSAT      : PatLeaf<(i32 0x20)>;
58def CvtSAT_FTZ  : PatLeaf<(i32 0x30)>;
59
60def CvtMode : Operand<i32> {
61  let PrintMethod = "printCvtMode";
62}
63
64// Compare modes
65// These must match the enum in NVPTX.h
66def CmpEQ   : PatLeaf<(i32 0)>;
67def CmpNE   : PatLeaf<(i32 1)>;
68def CmpLT   : PatLeaf<(i32 2)>;
69def CmpLE   : PatLeaf<(i32 3)>;
70def CmpGT   : PatLeaf<(i32 4)>;
71def CmpGE   : PatLeaf<(i32 5)>;
72def CmpLO   : PatLeaf<(i32 6)>;
73def CmpLS   : PatLeaf<(i32 7)>;
74def CmpHI   : PatLeaf<(i32 8)>;
75def CmpHS   : PatLeaf<(i32 9)>;
76def CmpEQU  : PatLeaf<(i32 10)>;
77def CmpNEU  : PatLeaf<(i32 11)>;
78def CmpLTU  : PatLeaf<(i32 12)>;
79def CmpLEU  : PatLeaf<(i32 13)>;
80def CmpGTU  : PatLeaf<(i32 14)>;
81def CmpGEU  : PatLeaf<(i32 15)>;
82def CmpNUM  : PatLeaf<(i32 16)>;
83def CmpNAN  : PatLeaf<(i32 17)>;
84
85def CmpEQ_FTZ   : PatLeaf<(i32 0x100)>;
86def CmpNE_FTZ   : PatLeaf<(i32 0x101)>;
87def CmpLT_FTZ   : PatLeaf<(i32 0x102)>;
88def CmpLE_FTZ   : PatLeaf<(i32 0x103)>;
89def CmpGT_FTZ   : PatLeaf<(i32 0x104)>;
90def CmpGE_FTZ   : PatLeaf<(i32 0x105)>;
91def CmpLO_FTZ   : PatLeaf<(i32 0x106)>;
92def CmpLS_FTZ   : PatLeaf<(i32 0x107)>;
93def CmpHI_FTZ   : PatLeaf<(i32 0x108)>;
94def CmpHS_FTZ   : PatLeaf<(i32 0x109)>;
95def CmpEQU_FTZ  : PatLeaf<(i32 0x10A)>;
96def CmpNEU_FTZ  : PatLeaf<(i32 0x10B)>;
97def CmpLTU_FTZ  : PatLeaf<(i32 0x10C)>;
98def CmpLEU_FTZ  : PatLeaf<(i32 0x10D)>;
99def CmpGTU_FTZ  : PatLeaf<(i32 0x10E)>;
100def CmpGEU_FTZ  : PatLeaf<(i32 0x10F)>;
101def CmpNUM_FTZ  : PatLeaf<(i32 0x110)>;
102def CmpNAN_FTZ  : PatLeaf<(i32 0x111)>;
103
104def CmpMode : Operand<i32> {
105  let PrintMethod = "printCmpMode";
106}
107
108def F32ConstZero : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
109    return CurDAG->getTargetConstantFP(0.0, MVT::f32);
110  }]>;
111def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
112    return CurDAG->getTargetConstantFP(1.0, MVT::f32);
113  }]>;
114
115//===----------------------------------------------------------------------===//
116// NVPTX Instruction Predicate Definitions
117//===----------------------------------------------------------------------===//
118
119
120def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
121def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
122def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
123def useAtomRedG32forGen32 :
124  Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
125def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
126def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
127def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
128def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
129def useAtomRedG64forGen64 :
130  Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
131def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
132def hasVote : Predicate<"Subtarget.hasVote()">;
133def hasDouble : Predicate<"Subtarget.hasDouble()">;
134def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
135def hasLDG : Predicate<"Subtarget.hasLDG()">;
136def hasLDU : Predicate<"Subtarget.hasLDU()">;
137def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
138
139def doF32FTZ : Predicate<"useF32FTZ()">;
140def doNoF32FTZ : Predicate<"!useF32FTZ()">;
141
142def doFMAF32      : Predicate<"doFMAF32">;
143def doFMAF32_ftz  : Predicate<"(doFMAF32 && useF32FTZ())">;
144def doFMAF32AGG      : Predicate<"doFMAF32AGG">;
145def doFMAF32AGG_ftz  : Predicate<"(doFMAF32AGG && useF32FTZ())">;
146def doFMAF64      : Predicate<"doFMAF64">;
147def doFMAF64AGG      : Predicate<"doFMAF64AGG">;
148
149def doMulWide      : Predicate<"doMulWide">;
150
151def allowFMA : Predicate<"allowFMA">;
152def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">;
153
154def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
155def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
156
157def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
158def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
159
160def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
161
162def true : Predicate<"1">;
163
164
165//===----------------------------------------------------------------------===//
166// Some Common Instruction Class Templates
167//===----------------------------------------------------------------------===//
168
169multiclass I3<string OpcStr, SDNode OpNode> {
170  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
171                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
172                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
173                       Int64Regs:$b))]>;
174  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
175                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
176                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
177  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
178                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
179                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
180                       Int32Regs:$b))]>;
181  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
182                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
183                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
184  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
185                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
186                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
187                       Int16Regs:$b))]>;
188  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
189                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
190                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
191}
192
193multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
194   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
195       Int32Regs:$b),
196                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
197                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
198                        Int32Regs:$b))]>;
199   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
200                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
201                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
202}
203
204multiclass F3<string OpcStr, SDNode OpNode> {
205   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
206                      (ins Float64Regs:$a, Float64Regs:$b),
207                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
208                      [(set Float64Regs:$dst,
209                        (OpNode Float64Regs:$a, Float64Regs:$b))]>,
210                      Requires<[allowFMA]>;
211   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
212                      (ins Float64Regs:$a, f64imm:$b),
213                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
214                      [(set Float64Regs:$dst,
215                        (OpNode Float64Regs:$a, fpimm:$b))]>,
216                      Requires<[allowFMA]>;
217   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
218                      (ins Float32Regs:$a, Float32Regs:$b),
219                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
220                      [(set Float32Regs:$dst,
221                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
222                      Requires<[allowFMA_ftz]>;
223   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
224                      (ins Float32Regs:$a, f32imm:$b),
225                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
226                      [(set Float32Regs:$dst,
227                        (OpNode Float32Regs:$a, fpimm:$b))]>,
228                      Requires<[allowFMA_ftz]>;
229   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
230                      (ins Float32Regs:$a, Float32Regs:$b),
231                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
232                      [(set Float32Regs:$dst,
233                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
234                      Requires<[allowFMA]>;
235   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
236                      (ins Float32Regs:$a, f32imm:$b),
237                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
238                      [(set Float32Regs:$dst,
239                        (OpNode Float32Regs:$a, fpimm:$b))]>,
240                      Requires<[allowFMA]>;
241}
242
243multiclass F3_rn<string OpcStr, SDNode OpNode> {
244   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
245                      (ins Float64Regs:$a, Float64Regs:$b),
246                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
247                      [(set Float64Regs:$dst,
248                        (OpNode Float64Regs:$a, Float64Regs:$b))]>;
249   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
250                      (ins Float64Regs:$a, f64imm:$b),
251                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
252                      [(set Float64Regs:$dst,
253                        (OpNode Float64Regs:$a, fpimm:$b))]>;
254   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
255                      (ins Float32Regs:$a, Float32Regs:$b),
256                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
257                      [(set Float32Regs:$dst,
258                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
259                      Requires<[doF32FTZ]>;
260   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
261                      (ins Float32Regs:$a, f32imm:$b),
262                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
263                      [(set Float32Regs:$dst,
264                        (OpNode Float32Regs:$a, fpimm:$b))]>,
265                      Requires<[doF32FTZ]>;
266   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
267                      (ins Float32Regs:$a, Float32Regs:$b),
268                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
269                      [(set Float32Regs:$dst,
270                        (OpNode Float32Regs:$a, Float32Regs:$b))]>;
271   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
272                      (ins Float32Regs:$a, f32imm:$b),
273                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
274                      [(set Float32Regs:$dst,
275                        (OpNode Float32Regs:$a, fpimm:$b))]>;
276}
277
278multiclass F2<string OpcStr, SDNode OpNode> {
279   def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
280                      !strconcat(OpcStr, ".f64 \t$dst, $a;"),
281                      [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
282   def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
283                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
284                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
285                      Requires<[doF32FTZ]>;
286   def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
287                      !strconcat(OpcStr, ".f32 \t$dst, $a;"),
288                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
289}
290
291//===----------------------------------------------------------------------===//
292// NVPTX Instructions.
293//===----------------------------------------------------------------------===//
294
295//-----------------------------------
296// General Type Conversion
297//-----------------------------------
298
299let neverHasSideEffects = 1 in {
300// Generate a cvt to the given type from all possible types.
301// Each instance takes a CvtMode immediate that defines the conversion mode to
302// use.  It can be CvtNONE to omit a conversion mode.
303multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
304  def _s16 : NVPTXInst<(outs RC:$dst),
305                       (ins Int16Regs:$src, CvtMode:$mode),
306                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
307                       FromName, ".s16\t$dst, $src;"),
308                       []>;
309  def _u16 : NVPTXInst<(outs RC:$dst),
310                       (ins Int16Regs:$src, CvtMode:$mode),
311                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
312                       FromName, ".u16\t$dst, $src;"),
313                       []>;
314  def _f16 : NVPTXInst<(outs RC:$dst),
315                       (ins Int16Regs:$src, CvtMode:$mode),
316                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
317                       FromName, ".f16\t$dst, $src;"),
318                       []>;
319  def _s32 : NVPTXInst<(outs RC:$dst),
320                       (ins Int32Regs:$src, CvtMode:$mode),
321                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
322                       FromName, ".s32\t$dst, $src;"),
323                       []>;
324  def _u32 : NVPTXInst<(outs RC:$dst),
325                       (ins Int32Regs:$src, CvtMode:$mode),
326                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
327                       FromName, ".u32\t$dst, $src;"),
328                       []>;
329  def _s64 : NVPTXInst<(outs RC:$dst),
330                       (ins Int64Regs:$src, CvtMode:$mode),
331                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
332                       FromName, ".s64\t$dst, $src;"),
333                       []>;
334  def _u64 : NVPTXInst<(outs RC:$dst),
335                       (ins Int64Regs:$src, CvtMode:$mode),
336                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
337                       FromName, ".u64\t$dst, $src;"),
338                       []>;
339  def _f32 : NVPTXInst<(outs RC:$dst),
340                       (ins Float32Regs:$src, CvtMode:$mode),
341                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
342                       FromName, ".f32\t$dst, $src;"),
343                       []>;
344  def _f64 : NVPTXInst<(outs RC:$dst),
345                       (ins Float64Regs:$src, CvtMode:$mode),
346                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
347                       FromName, ".f64\t$dst, $src;"),
348                       []>;
349}
350
351// Generate a cvt to all possible types.
352defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
353defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
354defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
355defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
356defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
357defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
358defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
359defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
360defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
361
362// This set of cvt is different from the above. The type of the source
363// and target are the same.
364//
365def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
366                        "cvt.s16.s8 \t$dst, $src;", []>;
367def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
368                        "cvt.s32.s8 \t$dst, $src;", []>;
369def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
370                        "cvt.s32.s16 \t$dst, $src;", []>;
371def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
372                        "cvt.s64.s8 \t$dst, $src;", []>;
373def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
374                        "cvt.s64.s16 \t$dst, $src;", []>;
375def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
376                        "cvt.s64.s32 \t$dst, $src;", []>;
377}
378
379//-----------------------------------
380// Integer Arithmetic
381//-----------------------------------
382
383multiclass ADD_SUB_i1<SDNode OpNode> {
384   def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
385          "xor.pred \t$dst, $a, $b;",
386      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
387   def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
388          "xor.pred \t$dst, $a, $b;",
389      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
390}
391
392defm ADD_i1 : ADD_SUB_i1<add>;
393defm SUB_i1 : ADD_SUB_i1<sub>;
394
395
396defm ADD : I3<"add.s", add>;
397defm SUB : I3<"sub.s", sub>;
398
399defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
400defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
401
402defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
403defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
404
405//mul.wide PTX instruction
406def SInt32Const : PatLeaf<(imm), [{
407  const APInt &v = N->getAPIntValue();
408  if (v.isSignedIntN(32))
409    return true;
410  return false;
411}]>;
412
413def UInt32Const : PatLeaf<(imm), [{
414  const APInt &v = N->getAPIntValue();
415  if (v.isIntN(32))
416    return true;
417  return false;
418}]>;
419
420def SInt16Const : PatLeaf<(imm), [{
421  const APInt &v = N->getAPIntValue();
422  if (v.isSignedIntN(16))
423    return true;
424  return false;
425}]>;
426
427def UInt16Const : PatLeaf<(imm), [{
428  const APInt &v = N->getAPIntValue();
429  if (v.isIntN(16))
430    return true;
431  return false;
432}]>;
433
434def Int5Const : PatLeaf<(imm), [{
435  const APInt &v = N->getAPIntValue();
436  // Check if 0 <= v < 32
437  // Only then the result from (x << v) will be i32
438  if (v.sge(0) && v.slt(32))
439    return true;
440  return false;
441}]>;
442
443def Int4Const : PatLeaf<(imm), [{
444  const APInt &v = N->getAPIntValue();
445  // Check if 0 <= v < 16
446  // Only then the result from (x << v) will be i16
447  if (v.sge(0) && v.slt(16))
448    return true;
449  return false;
450}]>;
451
452def SHL2MUL32 : SDNodeXForm<imm, [{
453  const APInt &v = N->getAPIntValue();
454  APInt temp(32, 1);
455  return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
456}]>;
457
458def SHL2MUL16 : SDNodeXForm<imm, [{
459  const APInt &v = N->getAPIntValue();
460  APInt temp(16, 1);
461  return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
462}]>;
463
464def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
465                           (ins Int32Regs:$a, Int32Regs:$b),
466                           "mul.wide.s32 \t$dst, $a, $b;", []>;
467def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
468                            (ins Int32Regs:$a, i64imm:$b),
469                           "mul.wide.s32 \t$dst, $a, $b;", []>;
470
471def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
472                           (ins Int32Regs:$a, Int32Regs:$b),
473                           "mul.wide.u32 \t$dst, $a, $b;", []>;
474def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
475                            (ins Int32Regs:$a, i64imm:$b),
476                           "mul.wide.u32 \t$dst, $a, $b;", []>;
477
478def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
479                            (ins Int16Regs:$a, Int16Regs:$b),
480                           "mul.wide.s16 \t$dst, $a, $b;", []>;
481def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
482                            (ins Int16Regs:$a, i32imm:$b),
483                           "mul.wide.s16 \t$dst, $a, $b;", []>;
484
485def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
486                            (ins Int16Regs:$a, Int16Regs:$b),
487                           "mul.wide.u16 \t$dst, $a, $b;", []>;
488def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
489                            (ins Int16Regs:$a, i32imm:$b),
490                           "mul.wide.u16 \t$dst, $a, $b;", []>;
491
492def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
493          (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
494          Requires<[doMulWide]>;
495def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
496          (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
497          Requires<[doMulWide]>;
498
499def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
500          (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
501          Requires<[doMulWide]>;
502def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
503          (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
504          Requires<[doMulWide]>;
505
506def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
507          (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
508          Requires<[doMulWide]>;
509def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
510          (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
511          Requires<[doMulWide]>;
512
513def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
514          (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
515def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
516          (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
517          Requires<[doMulWide]>;
518
519def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
520          (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
521def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
522          (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
523          Requires<[doMulWide]>;
524
525def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
526          (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
527def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
528          (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
529          Requires<[doMulWide]>;
530
531defm MULT : I3<"mul.lo.s", mul>;
532
533defm MULTHS : I3<"mul.hi.s", mulhs>;
534defm MULTHU : I3<"mul.hi.u", mulhu>;
535
536defm SDIV : I3<"div.s", sdiv>;
537defm UDIV : I3<"div.u", udiv>;
538
539defm SREM : I3<"rem.s", srem>;
540// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
541defm UREM : I3<"rem.u", urem>;
542// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
543
544def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
545                      (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
546                      "mad.lo.s16 \t$dst, $a, $b, $c;",
547                      [(set Int16Regs:$dst, (add
548                        (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
549def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
550                      (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
551                      "mad.lo.s16 \t$dst, $a, $b, $c;",
552                      [(set Int16Regs:$dst, (add
553                        (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
554def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
555                      (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
556                      "mad.lo.s16 \t$dst, $a, $b, $c;",
557                      [(set Int16Regs:$dst, (add
558                        (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
559def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
560    (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
561                      "mad.lo.s16 \t$dst, $a, $b, $c;",
562                      [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
563                        imm:$c))]>;
564
565def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
566                      (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
567                      "mad.lo.s32 \t$dst, $a, $b, $c;",
568                      [(set Int32Regs:$dst, (add
569                        (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
570def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
571                      (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
572                      "mad.lo.s32 \t$dst, $a, $b, $c;",
573                      [(set Int32Regs:$dst, (add
574                        (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
575def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
576                      (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
577                      "mad.lo.s32 \t$dst, $a, $b, $c;",
578                      [(set Int32Regs:$dst, (add
579                        (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
580def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
581                      (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
582                      "mad.lo.s32 \t$dst, $a, $b, $c;",
583                      [(set Int32Regs:$dst, (add
584                        (mul Int32Regs:$a, imm:$b), imm:$c))]>;
585
586def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
587                      (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
588                      "mad.lo.s64 \t$dst, $a, $b, $c;",
589                      [(set Int64Regs:$dst, (add
590                        (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
591def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
592                      (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
593                      "mad.lo.s64 \t$dst, $a, $b, $c;",
594                      [(set Int64Regs:$dst, (add
595                        (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
596def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
597                      (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
598                      "mad.lo.s64 \t$dst, $a, $b, $c;",
599                      [(set Int64Regs:$dst, (add
600                        (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
601def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
602                      (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
603                      "mad.lo.s64 \t$dst, $a, $b, $c;",
604                      [(set Int64Regs:$dst, (add
605                        (mul Int64Regs:$a, imm:$b), imm:$c))]>;
606
607
608def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
609                     "neg.s16 \t$dst, $src;",
610         [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
611def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
612                     "neg.s32 \t$dst, $src;",
613         [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
614def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
615                     "neg.s64 \t$dst, $src;",
616         [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
617
618//-----------------------------------
619// Floating Point Arithmetic
620//-----------------------------------
621
622// Constant 1.0f
623def FloatConst1 : PatLeaf<(fpimm), [{
624    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
625      return false;
626    float f = (float)N->getValueAPF().convertToFloat();
627    return (f==1.0f);
628}]>;
629// Constand (double)1.0
630def DoubleConst1 : PatLeaf<(fpimm), [{
631    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
632      return false;
633    double d = (double)N->getValueAPF().convertToDouble();
634    return (d==1.0);
635}]>;
636
637defm FADD : F3<"add", fadd>;
638defm FSUB : F3<"sub", fsub>;
639defm FMUL : F3<"mul", fmul>;
640
641defm FADD_rn : F3_rn<"add", fadd>;
642defm FSUB_rn : F3_rn<"sub", fsub>;
643defm FMUL_rn : F3_rn<"mul", fmul>;
644
645defm FABS : F2<"abs", fabs>;
646defm FNEG : F2<"neg", fneg>;
647defm FSQRT : F2<"sqrt.rn", fsqrt>;
648
649//
650// F64 division
651//
652def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
653                      (ins f64imm:$a, Float64Regs:$b),
654                      "rcp.rn.f64 \t$dst, $b;",
655                      [(set Float64Regs:$dst,
656                        (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
657def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
658                      (ins Float64Regs:$a, Float64Regs:$b),
659                      "div.rn.f64 \t$dst, $a, $b;",
660                      [(set Float64Regs:$dst,
661                        (fdiv Float64Regs:$a, Float64Regs:$b))]>;
662def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
663                      (ins Float64Regs:$a, f64imm:$b),
664                      "div.rn.f64 \t$dst, $a, $b;",
665                      [(set Float64Regs:$dst,
666                        (fdiv Float64Regs:$a, fpimm:$b))]>;
667
668//
669// F32 Approximate reciprocal
670//
671def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
672                      (ins f32imm:$a, Float32Regs:$b),
673                      "rcp.approx.ftz.f32 \t$dst, $b;",
674                      [(set Float32Regs:$dst,
675                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
676                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
677def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
678                        (ins f32imm:$a, Float32Regs:$b),
679                       "rcp.approx.f32 \t$dst, $b;",
680                      [(set Float32Regs:$dst,
681                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
682                      Requires<[do_DIVF32_APPROX]>;
683//
684// F32 Approximate division
685//
686def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
687                      (ins Float32Regs:$a, Float32Regs:$b),
688                      "div.approx.ftz.f32 \t$dst, $a, $b;",
689                      [(set Float32Regs:$dst,
690                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
691                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
692def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst),
693                      (ins Float32Regs:$a, f32imm:$b),
694                      "div.approx.ftz.f32 \t$dst, $a, $b;",
695                      [(set Float32Regs:$dst,
696                        (fdiv Float32Regs:$a, fpimm:$b))]>,
697                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
698def FDIV32approxrr     : NVPTXInst<(outs Float32Regs:$dst),
699                      (ins Float32Regs:$a, Float32Regs:$b),
700                      "div.approx.f32 \t$dst, $a, $b;",
701                      [(set Float32Regs:$dst,
702                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
703                      Requires<[do_DIVF32_APPROX]>;
704def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst),
705                      (ins Float32Regs:$a, f32imm:$b),
706                      "div.approx.f32 \t$dst, $a, $b;",
707                      [(set Float32Regs:$dst,
708                        (fdiv Float32Regs:$a, fpimm:$b))]>,
709                      Requires<[do_DIVF32_APPROX]>;
710//
711// F32 Semi-accurate reciprocal
712//
713// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
714//
715def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
716                      (ins f32imm:$a, Float32Regs:$b),
717                      "rcp.approx.ftz.f32 \t$dst, $b;",
718                      [(set Float32Regs:$dst,
719                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
720                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
721def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
722                      (ins f32imm:$a, Float32Regs:$b),
723                      "rcp.approx.f32 \t$dst, $b;",
724                      [(set Float32Regs:$dst,
725                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
726                      Requires<[do_DIVF32_FULL]>;
727//
728// F32 Semi-accurate division
729//
730def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
731                      (ins Float32Regs:$a, Float32Regs:$b),
732                      "div.full.ftz.f32 \t$dst, $a, $b;",
733                      [(set Float32Regs:$dst,
734                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
735                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
736def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
737                      (ins Float32Regs:$a, f32imm:$b),
738                      "div.full.ftz.f32 \t$dst, $a, $b;",
739                      [(set Float32Regs:$dst,
740                        (fdiv Float32Regs:$a, fpimm:$b))]>,
741                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
742def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
743                      (ins Float32Regs:$a, Float32Regs:$b),
744                      "div.full.f32 \t$dst, $a, $b;",
745                      [(set Float32Regs:$dst,
746                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
747                      Requires<[do_DIVF32_FULL]>;
748def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
749                      (ins Float32Regs:$a, f32imm:$b),
750                      "div.full.f32 \t$dst, $a, $b;",
751                      [(set Float32Regs:$dst,
752                        (fdiv Float32Regs:$a, fpimm:$b))]>,
753                      Requires<[do_DIVF32_FULL]>;
754//
755// F32 Accurate reciprocal
756//
757def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
758                        (ins f32imm:$a, Float32Regs:$b),
759                       "rcp.rn.ftz.f32 \t$dst, $b;",
760                      [(set Float32Regs:$dst,
761                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
762                      Requires<[reqPTX20, doF32FTZ]>;
763def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
764                      (ins f32imm:$a, Float32Regs:$b),
765                       "rcp.rn.f32 \t$dst, $b;",
766                      [(set Float32Regs:$dst,
767                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
768                      Requires<[reqPTX20]>;
769//
770// F32 Accurate division
771//
772def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
773                      (ins Float32Regs:$a, Float32Regs:$b),
774                      "div.rn.ftz.f32 \t$dst, $a, $b;",
775                      [(set Float32Regs:$dst,
776                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
777                      Requires<[doF32FTZ, reqPTX20]>;
778def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
779                      (ins Float32Regs:$a, f32imm:$b),
780                      "div.rn.ftz.f32 \t$dst, $a, $b;",
781                      [(set Float32Regs:$dst,
782                        (fdiv Float32Regs:$a, fpimm:$b))]>,
783                      Requires<[doF32FTZ, reqPTX20]>;
784def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
785                      (ins Float32Regs:$a, Float32Regs:$b),
786                      "div.rn.f32 \t$dst, $a, $b;",
787                      [(set Float32Regs:$dst,
788                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
789                      Requires<[reqPTX20]>;
790def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
791                      (ins Float32Regs:$a, f32imm:$b),
792                      "div.rn.f32 \t$dst, $a, $b;",
793                      [(set Float32Regs:$dst,
794                        (fdiv Float32Regs:$a, fpimm:$b))]>,
795                      Requires<[reqPTX20]>;
796
797//
798// F32 rsqrt
799//
800
801def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
802                       "rsqrt.approx.f32 \t$dst, $b;", []>;
803
804def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
805         (RSQRTF32approx1r Float32Regs:$b)>,
806         Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
807
808multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
809   def rrr : NVPTXInst<(outs Float32Regs:$dst),
810                      (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
811                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
812                      [(set Float32Regs:$dst, (fadd
813                        (fmul Float32Regs:$a, Float32Regs:$b),
814                        Float32Regs:$c))]>, Requires<[Pred]>;
815   // This is to WAR a weird bug in Tablegen that does not automatically
816   // generate the following permutated rule rrr2 from the above rrr.
817   // So we explicitly add it here. This happens to FMA32 only.
818   // See the comments at FMAD32 and FMA32 for more information.
819   def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
820                        (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
821                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
822                      [(set Float32Regs:$dst, (fadd Float32Regs:$c,
823                        (fmul Float32Regs:$a, Float32Regs:$b)))]>,
824                      Requires<[Pred]>;
825   def rri : NVPTXInst<(outs Float32Regs:$dst),
826                      (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
827                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
828                      [(set Float32Regs:$dst, (fadd
829                        (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
830                      Requires<[Pred]>;
831   def rir : NVPTXInst<(outs Float32Regs:$dst),
832                      (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
833                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
834                      [(set Float32Regs:$dst, (fadd
835                        (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
836                      Requires<[Pred]>;
837   def rii : NVPTXInst<(outs Float32Regs:$dst),
838                      (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
839                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
840                      [(set Float32Regs:$dst, (fadd
841                        (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
842                      Requires<[Pred]>;
843}
844
845multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
846   def rrr : NVPTXInst<(outs Float64Regs:$dst),
847                      (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
848                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
849                      [(set Float64Regs:$dst, (fadd
850                        (fmul Float64Regs:$a, Float64Regs:$b),
851                        Float64Regs:$c))]>, Requires<[Pred]>;
852   def rri : NVPTXInst<(outs Float64Regs:$dst),
853                      (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
854                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
855                      [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
856                        Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
857   def rir : NVPTXInst<(outs Float64Regs:$dst),
858                      (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
859                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
860                      [(set Float64Regs:$dst, (fadd
861                        (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
862                      Requires<[Pred]>;
863   def rii : NVPTXInst<(outs Float64Regs:$dst),
864                      (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
865                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
866                      [(set Float64Regs:$dst, (fadd
867                        (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
868                      Requires<[Pred]>;
869}
870
871// Due to a unknown reason (most likely a bug in tablegen), tablegen does not
872// automatically generate the rrr2 rule from
873// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
874// If we reverse the order of the following two lines, then rrr2 rule will be
875// generated for FMA32, but not for rrr.
876// Therefore, we manually write the rrr2 rule in FPCONTRACT32.
877defm FMA32_ftz  : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
878defm FMA32  : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
879defm FMA64  : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
880
881// b*c-a => fmad(b, c, -a)
882multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
883  def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
884          (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
885          Requires<[Pred]>;
886}
887
888// a-b*c => fmad(-b,c, a)
889// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
890// b*c-a => fmad(b, c, -a)
891// - legal because b*c-a <=> b*c+(-a)
892multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
893  def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
894          (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
895          Requires<[Pred]>;
896  def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
897          (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
898          Requires<[Pred]>;
899}
900
901// a-b*c => fmad(-b,c, a)
902// b*c-a => fmad(b, c, -a)
903multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
904  def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
905          (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
906          Requires<[Pred]>;
907
908  def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
909          (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
910          Requires<[Pred]>;
911}
912
913defm FMAF32ext_ftz  : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
914defm FMAF32ext  : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
915defm FMAF64ext  : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
916
917def SINF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
918                      "sin.approx.f32 \t$dst, $src;",
919                      [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
920def COSF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
921                      "cos.approx.f32 \t$dst, $src;",
922                      [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
923
924// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y))
925// e.g. "poor man's fmod()"
926
927// frem - f32 FTZ
928def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
929          (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
930            (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ),
931             Float32Regs:$y))>,
932          Requires<[doF32FTZ]>;
933def : Pat<(frem Float32Regs:$x, fpimm:$y),
934          (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
935            (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ),
936             fpimm:$y))>,
937          Requires<[doF32FTZ]>;
938
939// frem - f32
940def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
941          (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
942            (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI),
943             Float32Regs:$y))>;
944def : Pat<(frem Float32Regs:$x, fpimm:$y),
945          (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
946            (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI),
947             fpimm:$y))>;
948
949// frem - f64
950def : Pat<(frem Float64Regs:$x, Float64Regs:$y),
951          (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
952            (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI),
953             Float64Regs:$y))>;
954def : Pat<(frem Float64Regs:$x, fpimm:$y),
955          (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
956            (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI),
957             fpimm:$y))>;
958
959//-----------------------------------
960// Logical Arithmetic
961//-----------------------------------
962
963multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
964  def b1rr:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
965                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
966                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
967  def b1ri:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
968                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
969                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
970  def b16rr:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
971                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
972                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
973                        Int16Regs:$b))]>;
974  def b16ri:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
975                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
976                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
977  def b32rr:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
978                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
979                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
980                        Int32Regs:$b))]>;
981  def b32ri:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
982                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
983                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
984  def b64rr:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
985                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
986                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
987                        Int64Regs:$b))]>;
988  def b64ri:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
989                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
990                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
991}
992
993defm OR  : LOG_FORMAT<"or", or>;
994defm AND : LOG_FORMAT<"and", and>;
995defm XOR : LOG_FORMAT<"xor", xor>;
996
997def NOT1:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
998                      "not.pred \t$dst, $src;",
999                      [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1000def NOT16:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1001                      "not.b16 \t$dst, $src;",
1002                      [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1003def NOT32:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1004                      "not.b32 \t$dst, $src;",
1005                      [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1006def NOT64:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1007                      "not.b64 \t$dst, $src;",
1008                      [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1009
1010// For shifts, the second src operand must be 32-bit value
1011multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1012   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1013                      Int32Regs:$b),
1014                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1015                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1016                        Int32Regs:$b))]>;
1017   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1018                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1019                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1020                        (i32 imm:$b)))]>;
1021   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1022                      Int32Regs:$b),
1023                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1024                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1025                        Int32Regs:$b))]>;
1026   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1027                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1028                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1029                        (i32 imm:$b)))]>;
1030   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1031                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1032                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1033                        (i32 imm:$b)))]>;
1034   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1035                      Int32Regs:$b),
1036                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1037                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1038                        Int32Regs:$b))]>;
1039   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1040                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1041                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1042                        (i32 imm:$b)))]>;
1043}
1044
1045defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1046
1047// For shifts, the second src operand must be 32-bit value
1048// Need to add cvt for the 8-bits.
1049multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1050   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1051                      Int32Regs:$b),
1052                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1053                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1054                        Int32Regs:$b))]>;
1055   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1056                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1057                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1058                        (i32 imm:$b)))]>;
1059   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1060                      Int32Regs:$b),
1061                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1062                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1063                        Int32Regs:$b))]>;
1064   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1065                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1066                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1067                        (i32 imm:$b)))]>;
1068   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1069                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1070                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1071                        (i32 imm:$b)))]>;
1072   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1073                      Int32Regs:$b),
1074                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1075                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1076                        Int32Regs:$b))]>;
1077   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1078                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1079                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1080                        (i32 imm:$b)))]>;
1081}
1082
1083defm SRA : RSHIFT_FORMAT<"shr.s", sra>;
1084defm SRL : RSHIFT_FORMAT<"shr.u", srl>;
1085
1086// 32bit
1087def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1088  (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1089    !strconcat("{{\n\t",
1090    !strconcat(".reg .b32 %lhs;\n\t",
1091    !strconcat(".reg .b32 %rhs;\n\t",
1092    !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1093    !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1094    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1095    !strconcat("}}", ""))))))),
1096    []>;
1097
1098def SUB_FRM_32 : SDNodeXForm<imm, [{
1099    return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1100}]>;
1101
1102def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1103          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
1104def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1105          (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
1106
1107def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1108    Int32Regs:$amt),
1109    !strconcat("{{\n\t",
1110    !strconcat(".reg .b32 %lhs;\n\t",
1111    !strconcat(".reg .b32 %rhs;\n\t",
1112    !strconcat(".reg .b32 %amt2;\n\t",
1113    !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1114    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1115    !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1116    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1117    !strconcat("}}", ""))))))))),
1118    [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
1119
1120def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1121    Int32Regs:$amt),
1122    !strconcat("{{\n\t",
1123    !strconcat(".reg .b32 %lhs;\n\t",
1124    !strconcat(".reg .b32 %rhs;\n\t",
1125    !strconcat(".reg .b32 %amt2;\n\t",
1126    !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1127    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1128    !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1129    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1130    !strconcat("}}", ""))))))))),
1131    [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
1132
1133// 64bit
1134def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1135    i32imm:$amt1, i32imm:$amt2),
1136    !strconcat("{{\n\t",
1137    !strconcat(".reg .b64 %lhs;\n\t",
1138    !strconcat(".reg .b64 %rhs;\n\t",
1139    !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1140    !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1141    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1142    !strconcat("}}", ""))))))),
1143    []>;
1144
1145def SUB_FRM_64 : SDNodeXForm<imm, [{
1146    return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1147}]>;
1148
1149def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1150          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1151def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1152          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1153
1154def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1155    Int32Regs:$amt),
1156    !strconcat("{{\n\t",
1157    !strconcat(".reg .b64 %lhs;\n\t",
1158    !strconcat(".reg .b64 %rhs;\n\t",
1159    !strconcat(".reg .u32 %amt2;\n\t",
1160    !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1161    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1162    !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1163    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1164    !strconcat("}}", ""))))))))),
1165    [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1166
1167def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1168    Int32Regs:$amt),
1169    !strconcat("{{\n\t",
1170    !strconcat(".reg .b64 %lhs;\n\t",
1171    !strconcat(".reg .b64 %rhs;\n\t",
1172    !strconcat(".reg .u32 %amt2;\n\t",
1173    !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1174    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1175    !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1176    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1177    !strconcat("}}", ""))))))))),
1178    [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1179
1180
1181//-----------------------------------
1182// General Comparison
1183//-----------------------------------
1184
1185// General setp instructions
1186multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1187  def rr : NVPTXInst<(outs Int1Regs:$dst),
1188                     (ins RC:$a, RC:$b, CmpMode:$cmp),
1189            !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1190                     []>;
1191  def ri : NVPTXInst<(outs Int1Regs:$dst),
1192                     (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1193            !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1194                     []>;
1195  def ir : NVPTXInst<(outs Int1Regs:$dst),
1196                     (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1197            !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1198                     []>;
1199}
1200
1201defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
1202defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>;
1203defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>;
1204defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>;
1205defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>;
1206defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>;
1207defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>;
1208defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>;
1209defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
1210defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
1211defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
1212
1213// General set instructions
1214multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
1215  def rr : NVPTXInst<(outs Int32Regs:$dst),
1216                     (ins RC:$a, RC:$b, CmpMode:$cmp),
1217                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1218  def ri : NVPTXInst<(outs Int32Regs:$dst),
1219                     (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1220                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1221  def ir : NVPTXInst<(outs Int32Regs:$dst),
1222                     (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1223                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1224}
1225
1226defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
1227defm SET_s16 : SET<"s16", Int16Regs, i16imm>;
1228defm SET_u16 : SET<"u16", Int16Regs, i16imm>;
1229defm SET_b32 : SET<"b32", Int32Regs, i32imm>;
1230defm SET_s32 : SET<"s32", Int32Regs, i32imm>;
1231defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
1232defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
1233defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
1234defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
1235defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
1236defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
1237
1238//-----------------------------------
1239// General Selection
1240//-----------------------------------
1241
1242// General selp instructions
1243multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1244  def rr : NVPTXInst<(outs RC:$dst),
1245                     (ins RC:$a, RC:$b, Int1Regs:$p),
1246                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1247  def ri : NVPTXInst<(outs RC:$dst),
1248                     (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1249                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1250  def ir : NVPTXInst<(outs RC:$dst),
1251                     (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1252                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1253  def ii : NVPTXInst<(outs RC:$dst),
1254                     (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1255                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1256}
1257
1258multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
1259                        SDNode ImmNode> {
1260  def rr : NVPTXInst<(outs RC:$dst),
1261                     (ins RC:$a, RC:$b, Int1Regs:$p),
1262                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1263                     [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
1264  def ri : NVPTXInst<(outs RC:$dst),
1265                     (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1266                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1267                     [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
1268  def ir : NVPTXInst<(outs RC:$dst),
1269                     (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1270                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1271                     [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
1272  def ii : NVPTXInst<(outs RC:$dst),
1273                     (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1274                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1275                 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
1276}
1277
1278defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>;
1279defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
1280defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
1281defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>;
1282defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>;
1283defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
1284defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>;
1285defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
1286defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
1287defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
1288defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
1289
1290// Special select for predicate operands
1291def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1292              (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1293              (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1294
1295//-----------------------------------
1296// Data Movement (Load / Store, Move)
1297//-----------------------------------
1298
1299def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1300  [SDNPWantRoot]>;
1301def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1302  [SDNPWantRoot]>;
1303
1304def MEMri : Operand<i32> {
1305  let PrintMethod = "printMemOperand";
1306  let MIOperandInfo = (ops Int32Regs, i32imm);
1307}
1308def MEMri64 : Operand<i64> {
1309  let PrintMethod = "printMemOperand";
1310  let MIOperandInfo = (ops Int64Regs, i64imm);
1311}
1312
1313def imem : Operand<iPTR> {
1314    let PrintMethod = "printOperand";
1315}
1316
1317def imemAny : Operand<iPTRAny> {
1318    let PrintMethod = "printOperand";
1319}
1320
1321def LdStCode : Operand<i32> {
1322    let PrintMethod = "printLdStCode";
1323}
1324
1325def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1326def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1327
1328def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1329                     "mov.u32 \t$dst, $a;",
1330                     [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1331
1332def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1333                     "mov.u64 \t$dst, $a;",
1334                     [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1335
1336// Get pointer to local stack
1337def MOV_DEPOT_ADDR
1338  : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
1339              "mov.u32 \t$d, __local_depot$num;", []>;
1340def MOV_DEPOT_ADDR_64
1341  : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
1342              "mov.u64 \t$d, __local_depot$num;", []>;
1343
1344
1345// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1346let IsSimpleMove=1 in {
1347def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1348                   "mov.pred \t$dst, $sss;", []>;
1349def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1350                    "mov.u16 \t$dst, $sss;", []>;
1351def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1352                    "mov.u32 \t$dst, $sss;", []>;
1353def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1354                    "mov.u64 \t$dst, $sss;", []>;
1355
1356def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1357                    "mov.f32 \t$dst, $src;", []>;
1358def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1359                    "mov.f64 \t$dst, $src;", []>;
1360}
1361def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1362                    "mov.pred \t$dst, $src;",
1363          [(set Int1Regs:$dst, imm:$src)]>;
1364def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1365                    "mov.u16 \t$dst, $src;",
1366          [(set Int16Regs:$dst, imm:$src)]>;
1367def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1368                    "mov.u32 \t$dst, $src;",
1369          [(set Int32Regs:$dst, imm:$src)]>;
1370def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1371                    "mov.u64 \t$dst, $src;",
1372          [(set Int64Regs:$dst, imm:$src)]>;
1373
1374def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1375                    "mov.f32 \t$dst, $src;",
1376          [(set Float32Regs:$dst, fpimm:$src)]>;
1377def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1378                    "mov.f64 \t$dst, $src;",
1379          [(set Float64Regs:$dst, fpimm:$src)]>;
1380
1381def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1382
1383//---- Copy Frame Index ----
1384def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1385                        "add.u32 \t$dst, ${addr:add};",
1386                        [(set Int32Regs:$dst, ADDRri:$addr)]>;
1387def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1388                        "add.u64 \t$dst, ${addr:add};",
1389                        [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1390
1391//-----------------------------------
1392// Comparison and Selection
1393//-----------------------------------
1394
1395multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
1396                       Instruction setp_16rr,
1397                       Instruction setp_16ri,
1398                       Instruction setp_16ir,
1399                       Instruction setp_32rr,
1400                       Instruction setp_32ri,
1401                       Instruction setp_32ir,
1402                       Instruction setp_64rr,
1403                       Instruction setp_64ri,
1404                       Instruction setp_64ir,
1405                       Instruction set_16rr,
1406                       Instruction set_16ri,
1407                       Instruction set_16ir,
1408                       Instruction set_32rr,
1409                       Instruction set_32ri,
1410                       Instruction set_32ir,
1411                       Instruction set_64rr,
1412                       Instruction set_64ri,
1413                       Instruction set_64ir> {
1414  // i16 -> pred
1415  def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)),
1416            (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1417  def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)),
1418            (setp_16ri Int16Regs:$a, imm:$b, Mode)>;
1419  def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)),
1420            (setp_16ir imm:$a, Int16Regs:$b, Mode)>;
1421  // i32 -> pred
1422  def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)),
1423            (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1424  def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)),
1425            (setp_32ri Int32Regs:$a, imm:$b, Mode)>;
1426  def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)),
1427            (setp_32ir imm:$a, Int32Regs:$b, Mode)>;
1428  // i64 -> pred
1429  def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)),
1430            (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1431  def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)),
1432            (setp_64ri Int64Regs:$a, imm:$b, Mode)>;
1433  def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)),
1434            (setp_64ir imm:$a, Int64Regs:$b, Mode)>;
1435
1436  // i16 -> i32
1437  def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)),
1438            (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1439  def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)),
1440            (set_16ri Int16Regs:$a, imm:$b, Mode)>;
1441  def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)),
1442            (set_16ir imm:$a, Int16Regs:$b, Mode)>;
1443  // i32 -> i32
1444  def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)),
1445            (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1446  def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)),
1447            (set_32ri Int32Regs:$a, imm:$b, Mode)>;
1448  def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)),
1449            (set_32ir imm:$a, Int32Regs:$b, Mode)>;
1450  // i64 -> i32
1451  def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)),
1452            (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1453  def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)),
1454            (set_64ri Int64Regs:$a, imm:$b, Mode)>;
1455  def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)),
1456            (set_64ir imm:$a, Int64Regs:$b, Mode)>;
1457}
1458
1459multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
1460  : ISET_FORMAT<OpNode, Mode,
1461                SETP_s16rr, SETP_s16ri, SETP_s16ir,
1462                SETP_s32rr, SETP_s32ri, SETP_s32ir,
1463                SETP_s64rr, SETP_s64ri, SETP_s64ir,
1464                SET_s16rr, SET_s16ri, SET_s16ir,
1465                SET_s32rr, SET_s32ri, SET_s32ir,
1466                SET_s64rr, SET_s64ri, SET_s64ir> {
1467  // TableGen doesn't like empty multiclasses
1468  def : PatLeaf<(i32 0)>;
1469}
1470
1471multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
1472  : ISET_FORMAT<OpNode, Mode,
1473                SETP_u16rr, SETP_u16ri, SETP_u16ir,
1474                SETP_u32rr, SETP_u32ri, SETP_u32ir,
1475                SETP_u64rr, SETP_u64ri, SETP_u64ir,
1476                SET_u16rr, SET_u16ri, SET_u16ir,
1477                SET_u32rr, SET_u32ri, SET_u32ir,
1478                SET_u64rr, SET_u64ri, SET_u64ir> {
1479  // TableGen doesn't like empty multiclasses
1480  def : PatLeaf<(i32 0)>;
1481}
1482
1483defm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
1484defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
1485defm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
1486defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
1487defm : ISET_FORMAT_SIGNED<setge, CmpGE>;
1488defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
1489defm : ISET_FORMAT_SIGNED<setle, CmpLE>;
1490defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
1491defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
1492defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
1493defm : ISET_FORMAT_SIGNED<setne, CmpNE>;
1494defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
1495
1496// i1 compares
1497def : Pat<(setne Int1Regs:$a, Int1Regs:$b),
1498          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1499def : Pat<(setune Int1Regs:$a, Int1Regs:$b),
1500          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1501
1502def : Pat<(seteq Int1Regs:$a, Int1Regs:$b),
1503          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1504def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
1505          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1506
1507// i1 compare -> i32
1508def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1509          (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1510def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1511          (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1512
1513
1514
1515multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
1516  // f32 -> pred
1517  def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1518            (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1519        Requires<[doF32FTZ]>;
1520  def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1521            (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1522  def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1523            (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1524        Requires<[doF32FTZ]>;
1525  def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1526            (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1527  def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1528            (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1529        Requires<[doF32FTZ]>;
1530  def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1531            (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1532
1533  // f64 -> pred
1534  def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)),
1535            (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1536  def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)),
1537            (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1538  def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)),
1539            (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1540
1541  // f32 -> i32
1542  def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1543            (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1544        Requires<[doF32FTZ]>;
1545  def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1546            (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1547  def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1548            (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1549        Requires<[doF32FTZ]>;
1550  def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1551            (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1552  def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1553            (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1554        Requires<[doF32FTZ]>;
1555  def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1556            (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1557
1558  // f64 -> i32
1559  def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)),
1560            (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1561  def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)),
1562            (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1563  def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)),
1564            (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1565}
1566
1567defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
1568defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
1569defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
1570defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
1571defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
1572defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
1573
1574defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
1575defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
1576defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>;
1577defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
1578defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
1579defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
1580
1581defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
1582defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
1583
1584//def ld_param         : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1585//                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1586
1587def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1588  SDTCisInt<2>]>;
1589def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1590  SDTCisInt<1>, SDTCisInt<2>]>;
1591def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1592def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
1593def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
1594def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1595def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1596def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1597def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
1598def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
1599def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1600def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1601def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1602def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1603def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1604def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1605def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1606def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
1607def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
1608def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1609
1610def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1611                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1612def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1613  SDTDeclareScalarParamProfile,
1614                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1615def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1616  SDTDeclareParamProfile,
1617                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1618def DeclareRet   : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1619                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1620def LoadParam    : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1621                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1622def LoadParamV2  : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
1623                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1624def LoadParamV4  : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
1625                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1626def PrintCall    : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1627                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1628def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1629                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1630def StoreParam   : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1631                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1632def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
1633                         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1634def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
1635                         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1636def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1637                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1638def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1639                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1640def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1641                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1642def CallArg      : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1643                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1644def LastCallArg  : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1645                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1646def CallArgEnd   : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1647                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1648def CallVoid     : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1649                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1650def Prototype    : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1651                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1652def CallVal      : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1653                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1654def MoveParam    : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1655                         []>;
1656def StoreRetval  : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1657                         [SDNPHasChain, SDNPSideEffect]>;
1658def StoreRetvalV2  : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
1659                           [SDNPHasChain, SDNPSideEffect]>;
1660def StoreRetvalV4  : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
1661                           [SDNPHasChain, SDNPSideEffect]>;
1662def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1663  SDTPseudoUseParamProfile,
1664                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1665def RETURNNode   : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1666                         [SDNPHasChain, SDNPSideEffect]>;
1667
1668class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1669      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1670                !strconcat(!strconcat("ld.param", opstr),
1671                "\t$dst, [retval0+$b];"),
1672                []>;
1673
1674class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1675      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1676                !strconcat(!strconcat("mov", opstr),
1677                "\t$dst, retval$b;"),
1678                [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1679
1680class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
1681      NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
1682                !strconcat(!strconcat("ld.param.v2", opstr),
1683                "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
1684
1685class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
1686      NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
1687                      regclass:$dst4),
1688                (ins i32imm:$b),
1689                !strconcat(!strconcat("ld.param.v4", opstr),
1690                "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>;
1691
1692class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1693      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1694                !strconcat(!strconcat("st.param", opstr),
1695                "\t[param$a+$b], $val;"),
1696                []>;
1697
1698class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
1699      NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
1700                             i32imm:$a, i32imm:$b),
1701                !strconcat(!strconcat("st.param.v2", opstr),
1702                "\t[param$a+$b], {{$val, $val2}};"),
1703                []>;
1704
1705class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
1706      NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
1707                             regclass:$val3, i32imm:$a, i32imm:$b),
1708                !strconcat(!strconcat("st.param.v4", opstr),
1709                "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
1710                []>;
1711
1712class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1713      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1714                !strconcat(!strconcat("st.param", opstr),
1715                "\t[func_retval0+$a], $val;"),
1716                []>;
1717
1718class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
1719      NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
1720                !strconcat(!strconcat("st.param.v2", opstr),
1721                "\t[func_retval0+$a], {{$val, $val2}};"),
1722                []>;
1723
1724class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
1725      NVPTXInst<(outs),
1726                (ins regclass:$val, regclass:$val2, regclass:$val3,
1727                     regclass:$val4, i32imm:$a),
1728                !strconcat(!strconcat("st.param.v4", opstr),
1729                "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
1730                []>;
1731
1732def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1733"call (retval0), ",
1734                                [(PrintCall (i32 1))]>;
1735def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1736"call (retval0, retval1), ",
1737                                [(PrintCall (i32 2))]>;
1738def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1739"call (retval0, retval1, retval2), ",
1740                                [(PrintCall (i32 3))]>;
1741def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1742"call (retval0, retval1, retval2, retval3), ",
1743                                [(PrintCall (i32 4))]>;
1744def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1745"call (retval0, retval1, retval2, retval3, retval4), ",
1746                                [(PrintCall (i32 5))]>;
1747def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1748"call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1749                                [(PrintCall (i32 6))]>;
1750def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1751"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1752                                [(PrintCall (i32 7))]>;
1753def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1754!strconcat("call (retval0, retval1, retval2, retval3, retval4",
1755           ", retval5, retval6, retval7), "),
1756                                [(PrintCall (i32 8))]>;
1757
1758def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1759                                [(PrintCall (i32 0))]>;
1760
1761def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1762"call.uni (retval0), ",
1763                                [(PrintCallUni (i32 1))]>;
1764def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1765"call.uni (retval0, retval1), ",
1766                                [(PrintCallUni (i32 2))]>;
1767def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1768"call.uni (retval0, retval1, retval2), ",
1769                                [(PrintCallUni (i32 3))]>;
1770def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1771"call.uni (retval0, retval1, retval2, retval3), ",
1772                                [(PrintCallUni (i32 4))]>;
1773def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1774"call.uni (retval0, retval1, retval2, retval3, retval4), ",
1775                                [(PrintCallUni (i32 5))]>;
1776def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1777"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1778                                [(PrintCallUni (i32 6))]>;
1779def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1780"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1781                                [(PrintCallUni (i32 7))]>;
1782def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1783!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1784           ", retval5, retval6, retval7), "),
1785                                [(PrintCallUni (i32 8))]>;
1786
1787def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1788                                [(PrintCallUni (i32 0))]>;
1789
1790def LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
1791def LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
1792def LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
1793def LoadParamMemI8     : LoadParamMemInst<Int16Regs, ".b8">;
1794def LoadParamMemV2I64  : LoadParamV2MemInst<Int64Regs, ".b64">;
1795def LoadParamMemV2I32  : LoadParamV2MemInst<Int32Regs, ".b32">;
1796def LoadParamMemV2I16  : LoadParamV2MemInst<Int16Regs, ".b16">;
1797def LoadParamMemV2I8   : LoadParamV2MemInst<Int16Regs, ".b8">;
1798def LoadParamMemV4I32  : LoadParamV4MemInst<Int32Regs, ".b32">;
1799def LoadParamMemV4I16  : LoadParamV4MemInst<Int16Regs, ".b16">;
1800def LoadParamMemV4I8   : LoadParamV4MemInst<Int16Regs, ".b8">;
1801def LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
1802def LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
1803def LoadParamMemV2F32  : LoadParamV2MemInst<Float32Regs, ".f32">;
1804def LoadParamMemV2F64  : LoadParamV2MemInst<Float64Regs, ".f64">;
1805def LoadParamMemV4F32  : LoadParamV4MemInst<Float32Regs, ".f32">;
1806
1807def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
1808def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
1809
1810def StoreParamI16    : StoreParamInst<Int16Regs, ".b16">;
1811def StoreParamI8     : StoreParamInst<Int16Regs, ".b8">;
1812def StoreParamV2I64  : StoreParamV2Inst<Int64Regs, ".b64">;
1813def StoreParamV2I32  : StoreParamV2Inst<Int32Regs, ".b32">;
1814def StoreParamV2I16  : StoreParamV2Inst<Int16Regs, ".b16">;
1815def StoreParamV2I8   : StoreParamV2Inst<Int16Regs, ".b8">;
1816
1817// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1818//def StoreParamV4I32    : StoreParamV4Inst<Int32Regs, ".b32">;
1819def StoreParamV4I32    : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
1820                                               Int32Regs:$val3, Int32Regs:$val4,
1821                                                i32imm:$a, i32imm:$b),
1822                   "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1823                         []>;
1824
1825def StoreParamV4I16    : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1826                                               Int16Regs:$val3, Int16Regs:$val4,
1827                                                i32imm:$a, i32imm:$b),
1828                "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1829                         []>;
1830
1831def StoreParamV4I8     : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1832                                                Int16Regs:$val3, Int16Regs:$val4,
1833                                                i32imm:$a, i32imm:$b),
1834                 "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1835                         []>;
1836
1837def StoreParamF32    : StoreParamInst<Float32Regs, ".f32">;
1838def StoreParamF64    : StoreParamInst<Float64Regs, ".f64">;
1839def StoreParamV2F32    : StoreParamV2Inst<Float32Regs, ".f32">;
1840def StoreParamV2F64    : StoreParamV2Inst<Float64Regs, ".f64">;
1841// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1842//def StoreParamV4F32    : StoreParamV4Inst<Float32Regs, ".f32">;
1843def StoreParamV4F32    : NVPTXInst<(outs),
1844                                   (ins Float32Regs:$val, Float32Regs:$val2,
1845                                        Float32Regs:$val3, Float32Regs:$val4,
1846                                        i32imm:$a, i32imm:$b),
1847                "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1848                        []>;
1849
1850
1851def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
1852def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
1853def StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
1854def StoreRetvalI8     : StoreRetvalInst<Int16Regs, ".b8">;
1855def StoreRetvalV2I64  : StoreRetvalV2Inst<Int64Regs, ".b64">;
1856def StoreRetvalV2I32  : StoreRetvalV2Inst<Int32Regs, ".b32">;
1857def StoreRetvalV2I16  : StoreRetvalV2Inst<Int16Regs, ".b16">;
1858def StoreRetvalV2I8   : StoreRetvalV2Inst<Int16Regs, ".b8">;
1859def StoreRetvalV4I32  : StoreRetvalV4Inst<Int32Regs, ".b32">;
1860def StoreRetvalV4I16  : StoreRetvalV4Inst<Int16Regs, ".b16">;
1861def StoreRetvalV4I8   : StoreRetvalV4Inst<Int16Regs, ".b8">;
1862
1863def StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
1864def StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
1865def StoreRetvalV2F64  : StoreRetvalV2Inst<Float64Regs, ".f64">;
1866def StoreRetvalV2F32  : StoreRetvalV2Inst<Float32Regs, ".f32">;
1867def StoreRetvalV4F32  : StoreRetvalV4Inst<Float32Regs, ".f32">;
1868
1869def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
1870def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
1871def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
1872def RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
1873
1874class CallArgInst<NVPTXRegClass regclass> :
1875      NVPTXInst<(outs), (ins regclass:$a), "$a, ",
1876                [(CallArg (i32 0), regclass:$a)]>;
1877
1878class LastCallArgInst<NVPTXRegClass regclass> :
1879      NVPTXInst<(outs), (ins regclass:$a), "$a",
1880                [(LastCallArg (i32 0), regclass:$a)]>;
1881
1882def CallArgI64     : CallArgInst<Int64Regs>;
1883def CallArgI32     : CallArgInst<Int32Regs>;
1884def CallArgI16     : CallArgInst<Int16Regs>;
1885
1886def CallArgF64     : CallArgInst<Float64Regs>;
1887def CallArgF32     : CallArgInst<Float32Regs>;
1888
1889def LastCallArgI64 : LastCallArgInst<Int64Regs>;
1890def LastCallArgI32 : LastCallArgInst<Int32Regs>;
1891def LastCallArgI16 : LastCallArgInst<Int16Regs>;
1892
1893def LastCallArgF64 : LastCallArgInst<Float64Regs>;
1894def LastCallArgF32 : LastCallArgInst<Float32Regs>;
1895
1896def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
1897                              [(CallArg (i32 0), (i32 imm:$a))]>;
1898def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
1899                              [(LastCallArg (i32 0), (i32 imm:$a))]>;
1900
1901def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
1902                             [(CallArg (i32 1), (i32 imm:$a))]>;
1903def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
1904                             [(LastCallArg (i32 1), (i32 imm:$a))]>;
1905
1906def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
1907                             "$addr, ",
1908                             [(CallVoid (Wrapper tglobaladdr:$addr))]>;
1909def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
1910                             "$addr, ",
1911                             [(CallVoid Int32Regs:$addr)]>;
1912def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
1913                             "$addr, ",
1914                             [(CallVoid Int64Regs:$addr)]>;
1915def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
1916                             ", prototype_$val;",
1917                             [(Prototype (i32 imm:$val))]>;
1918
1919def DeclareRetMemInst : NVPTXInst<(outs),
1920  (ins i32imm:$align, i32imm:$size, i32imm:$num),
1921         ".param .align $align .b8 retval$num[$size];",
1922         [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
1923def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
1924         ".param .b$size retval$num;",
1925         [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
1926def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
1927         ".reg .b$size retval$num;",
1928         [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
1929
1930def DeclareParamInst : NVPTXInst<(outs),
1931  (ins i32imm:$align, i32imm:$a, i32imm:$size),
1932         ".param .align $align .b8 param$a[$size];",
1933         [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
1934def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
1935         ".param .b$size param$a;",
1936         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
1937def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
1938         ".reg .b$size param$a;",
1939         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
1940
1941class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
1942      NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
1943                !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
1944                [(set regclass:$dst, (MoveParam regclass:$src))]>;
1945
1946def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
1947def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
1948def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1949                   "cvt.u16.u32\t$dst, $src;",
1950                   [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
1951def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
1952def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
1953
1954class PseudoUseParamInst<NVPTXRegClass regclass> :
1955      NVPTXInst<(outs), (ins regclass:$src),
1956      "// Pseudo use of $src",
1957      [(PseudoUseParam regclass:$src)]>;
1958
1959def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
1960def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
1961def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
1962def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
1963def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
1964
1965
1966//
1967// Load / Store Handling
1968//
1969multiclass LD<NVPTXRegClass regclass> {
1970  def _avar : NVPTXInst<(outs regclass:$dst),
1971    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1972      i32imm:$fromWidth, imem:$addr),
1973!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1974           "$fromWidth \t$dst, [$addr];"), []>;
1975  def _areg : NVPTXInst<(outs regclass:$dst),
1976    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1977      i32imm:$fromWidth, Int32Regs:$addr),
1978!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1979           "$fromWidth \t$dst, [$addr];"), []>;
1980  def _areg_64 : NVPTXInst<(outs regclass:$dst),
1981    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1982     i32imm:$fromWidth, Int64Regs:$addr),
1983     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
1984                " \t$dst, [$addr];"), []>;
1985  def _ari : NVPTXInst<(outs regclass:$dst),
1986    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1987      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
1988!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1989           "$fromWidth \t$dst, [$addr+$offset];"), []>;
1990  def _ari_64 : NVPTXInst<(outs regclass:$dst),
1991    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1992     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
1993    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
1994               " \t$dst, [$addr+$offset];"), []>;
1995  def _asi : NVPTXInst<(outs regclass:$dst),
1996    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
1997      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
1998!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
1999           "$fromWidth \t$dst, [$addr+$offset];"), []>;
2000}
2001
2002let mayLoad=1, neverHasSideEffects=1 in {
2003defm LD_i8  : LD<Int16Regs>;
2004defm LD_i16 : LD<Int16Regs>;
2005defm LD_i32 : LD<Int32Regs>;
2006defm LD_i64 : LD<Int64Regs>;
2007defm LD_f32 : LD<Float32Regs>;
2008defm LD_f64 : LD<Float64Regs>;
2009}
2010
2011multiclass ST<NVPTXRegClass regclass> {
2012  def _avar : NVPTXInst<(outs),
2013    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2014      LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2015!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2016           " \t[$addr], $src;"), []>;
2017  def _areg : NVPTXInst<(outs),
2018    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2019      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2020!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2021           " \t[$addr], $src;"), []>;
2022  def _areg_64 : NVPTXInst<(outs),
2023    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2024     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2025  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2026               "\t[$addr], $src;"), []>;
2027  def _ari : NVPTXInst<(outs),
2028    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2029      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2030!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2031           " \t[$addr+$offset], $src;"), []>;
2032  def _ari_64 : NVPTXInst<(outs),
2033    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2034     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2035  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2036               "\t[$addr+$offset], $src;"), []>;
2037  def _asi : NVPTXInst<(outs),
2038    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2039      LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2040!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2041           " \t[$addr+$offset], $src;"), []>;
2042}
2043
2044let mayStore=1, neverHasSideEffects=1 in {
2045defm ST_i8  : ST<Int16Regs>;
2046defm ST_i16 : ST<Int16Regs>;
2047defm ST_i32 : ST<Int32Regs>;
2048defm ST_i64 : ST<Int64Regs>;
2049defm ST_f32 : ST<Float32Regs>;
2050defm ST_f64 : ST<Float64Regs>;
2051}
2052
2053// The following is used only in and after vector elementizations.
2054// Vector elementization happens at the machine instruction level, so the
2055// following instruction
2056// never appears in the DAG.
2057multiclass LD_VEC<NVPTXRegClass regclass> {
2058  def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2059    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2060      i32imm:$fromWidth, imem:$addr),
2061    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2062               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2063  def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2064    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2065      i32imm:$fromWidth, Int32Regs:$addr),
2066    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2067               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2068  def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2069    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2070     i32imm:$fromWidth, Int64Regs:$addr),
2071    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2072               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2073  def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2074    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2075      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2076    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2077               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2078  def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2079    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2080     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2081    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2082               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2083  def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2084    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2085      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2086    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2087               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2088  def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2089      regclass:$dst3, regclass:$dst4),
2090    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2091      i32imm:$fromWidth, imem:$addr),
2092    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2093               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2094  def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2095      regclass:$dst4),
2096    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2097      i32imm:$fromWidth, Int32Regs:$addr),
2098    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2099               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2100  def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2101                               regclass:$dst3, regclass:$dst4),
2102    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2103     i32imm:$fromWidth, Int64Regs:$addr),
2104    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2105               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2106  def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2107      regclass:$dst4),
2108    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2109      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2110    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2111               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2112                []>;
2113  def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2114                              regclass:$dst3, regclass:$dst4),
2115    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2116     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2117    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2118               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2119    []>;
2120  def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2121      regclass:$dst4),
2122    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2123      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2124    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2125               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2126                []>;
2127}
2128let mayLoad=1, neverHasSideEffects=1 in {
2129defm LDV_i8  : LD_VEC<Int16Regs>;
2130defm LDV_i16 : LD_VEC<Int16Regs>;
2131defm LDV_i32 : LD_VEC<Int32Regs>;
2132defm LDV_i64 : LD_VEC<Int64Regs>;
2133defm LDV_f32 : LD_VEC<Float32Regs>;
2134defm LDV_f64 : LD_VEC<Float64Regs>;
2135}
2136
2137multiclass ST_VEC<NVPTXRegClass regclass> {
2138  def _v2_avar : NVPTXInst<(outs),
2139    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2140      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2141    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2142               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2143  def _v2_areg : NVPTXInst<(outs),
2144    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2145      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2146    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2147               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2148  def _v2_areg_64 : NVPTXInst<(outs),
2149    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2150     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2151    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2152               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2153  def _v2_ari : NVPTXInst<(outs),
2154    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2155      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2156      i32imm:$offset),
2157    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2158               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2159  def _v2_ari_64 : NVPTXInst<(outs),
2160    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2161     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2162     i32imm:$offset),
2163    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2164               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2165  def _v2_asi : NVPTXInst<(outs),
2166    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2167      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2168      i32imm:$offset),
2169    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2170               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2171  def _v4_avar : NVPTXInst<(outs),
2172    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2173      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2174      i32imm:$fromWidth, imem:$addr),
2175    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2176               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2177  def _v4_areg : NVPTXInst<(outs),
2178    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2179      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2180      i32imm:$fromWidth, Int32Regs:$addr),
2181    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2182               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2183  def _v4_areg_64 : NVPTXInst<(outs),
2184    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2185     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2186     i32imm:$fromWidth, Int64Regs:$addr),
2187    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2188               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2189  def _v4_ari : NVPTXInst<(outs),
2190    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2191      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2192      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2193    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2194               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2195    []>;
2196  def _v4_ari_64 : NVPTXInst<(outs),
2197    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2198     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2199     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2200    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2201               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2202     []>;
2203  def _v4_asi : NVPTXInst<(outs),
2204    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2205      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2206      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2207    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2208               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2209    []>;
2210}
2211let mayStore=1, neverHasSideEffects=1 in {
2212defm STV_i8  : ST_VEC<Int16Regs>;
2213defm STV_i16 : ST_VEC<Int16Regs>;
2214defm STV_i32 : ST_VEC<Int32Regs>;
2215defm STV_i64 : ST_VEC<Int64Regs>;
2216defm STV_f32 : ST_VEC<Float32Regs>;
2217defm STV_f64 : ST_VEC<Float64Regs>;
2218}
2219
2220
2221//---- Conversion ----
2222
2223class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2224  NVPTXRegClass regclassOut> :
2225           NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2226           !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2227     [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2228
2229def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2230def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2231def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2232def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2233
2234// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
2235// we cannot specify floating-point literals in isel patterns.  Therefore, we
2236// use an integer selp to select either 1 or 0 and then cvt to floating-point.
2237
2238// sint -> f32
2239def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
2240          (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2241def : Pat<(f32 (sint_to_fp Int16Regs:$a)),
2242          (CVT_f32_s16 Int16Regs:$a, CvtRN)>;
2243def : Pat<(f32 (sint_to_fp Int32Regs:$a)),
2244          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
2245def : Pat<(f32 (sint_to_fp Int64Regs:$a)),
2246          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
2247
2248// uint -> f32
2249def : Pat<(f32 (uint_to_fp Int1Regs:$a)),
2250          (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2251def : Pat<(f32 (uint_to_fp Int16Regs:$a)),
2252          (CVT_f32_u16 Int16Regs:$a, CvtRN)>;
2253def : Pat<(f32 (uint_to_fp Int32Regs:$a)),
2254          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
2255def : Pat<(f32 (uint_to_fp Int64Regs:$a)),
2256          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
2257
2258// sint -> f64
2259def : Pat<(f64 (sint_to_fp Int1Regs:$a)),
2260          (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2261def : Pat<(f64 (sint_to_fp Int16Regs:$a)),
2262          (CVT_f64_s16 Int16Regs:$a, CvtRN)>;
2263def : Pat<(f64 (sint_to_fp Int32Regs:$a)),
2264          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
2265def : Pat<(f64 (sint_to_fp Int64Regs:$a)),
2266          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
2267
2268// uint -> f64
2269def : Pat<(f64 (uint_to_fp Int1Regs:$a)),
2270          (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2271def : Pat<(f64 (uint_to_fp Int16Regs:$a)),
2272          (CVT_f64_u16 Int16Regs:$a, CvtRN)>;
2273def : Pat<(f64 (uint_to_fp Int32Regs:$a)),
2274          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
2275def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
2276          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
2277
2278
2279// f32 -> sint
2280def : Pat<(i1 (fp_to_sint Float32Regs:$a)),
2281          (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2282def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2283          (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2284def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2285          (CVT_s16_f32 Float32Regs:$a, CvtRZI)>;
2286def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2287          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2288def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2289          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
2290def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2291          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2292def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2293          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
2294
2295// f32 -> uint
2296def : Pat<(i1 (fp_to_uint Float32Regs:$a)),
2297          (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2298def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2299          (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2300def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2301          (CVT_u16_f32 Float32Regs:$a, CvtRZI)>;
2302def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2303          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2304def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2305          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
2306def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2307          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2308def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2309          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
2310
2311// f64 -> sint
2312def : Pat<(i1 (fp_to_sint Float64Regs:$a)),
2313          (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2314def : Pat<(i16 (fp_to_sint Float64Regs:$a)),
2315          (CVT_s16_f64 Float64Regs:$a, CvtRZI)>;
2316def : Pat<(i32 (fp_to_sint Float64Regs:$a)),
2317          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
2318def : Pat<(i64 (fp_to_sint Float64Regs:$a)),
2319          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
2320
2321// f64 -> uint
2322def : Pat<(i1 (fp_to_uint Float64Regs:$a)),
2323          (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2324def : Pat<(i16 (fp_to_uint Float64Regs:$a)),
2325          (CVT_u16_f64 Float64Regs:$a, CvtRZI)>;
2326def : Pat<(i32 (fp_to_uint Float64Regs:$a)),
2327          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
2328def : Pat<(i64 (fp_to_uint Float64Regs:$a)),
2329          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
2330
2331// sext i1
2332def : Pat<(i16 (sext Int1Regs:$a)),
2333          (SELP_s16ii -1, 0, Int1Regs:$a)>;
2334def : Pat<(i32 (sext Int1Regs:$a)),
2335          (SELP_s32ii -1, 0, Int1Regs:$a)>;
2336def : Pat<(i64 (sext Int1Regs:$a)),
2337          (SELP_s64ii -1, 0, Int1Regs:$a)>;
2338
2339// zext i1
2340def : Pat<(i16 (zext Int1Regs:$a)),
2341          (SELP_u16ii 1, 0, Int1Regs:$a)>;
2342def : Pat<(i32 (zext Int1Regs:$a)),
2343          (SELP_u32ii 1, 0, Int1Regs:$a)>;
2344def : Pat<(i64 (zext Int1Regs:$a)),
2345          (SELP_u64ii 1, 0, Int1Regs:$a)>;
2346
2347// anyext i1
2348def : Pat<(i16 (anyext Int1Regs:$a)),
2349          (SELP_u16ii -1, 0, Int1Regs:$a)>;
2350def : Pat<(i32 (anyext Int1Regs:$a)),
2351          (SELP_u32ii -1, 0, Int1Regs:$a)>;
2352def : Pat<(i64 (anyext Int1Regs:$a)),
2353          (SELP_u64ii -1, 0, Int1Regs:$a)>;
2354
2355// sext i16
2356def : Pat<(i32 (sext Int16Regs:$a)),
2357          (CVT_s32_s16 Int16Regs:$a, CvtNONE)>;
2358def : Pat<(i64 (sext Int16Regs:$a)),
2359          (CVT_s64_s16 Int16Regs:$a, CvtNONE)>;
2360
2361// zext i16
2362def : Pat<(i32 (zext Int16Regs:$a)),
2363          (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2364def : Pat<(i64 (zext Int16Regs:$a)),
2365          (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2366
2367// anyext i16
2368def : Pat<(i32 (anyext Int16Regs:$a)),
2369          (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2370def : Pat<(i64 (anyext Int16Regs:$a)),
2371          (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2372
2373// sext i32
2374def : Pat<(i64 (sext Int32Regs:$a)),
2375          (CVT_s64_s32 Int32Regs:$a, CvtNONE)>;
2376
2377// zext i32
2378def : Pat<(i64 (zext Int32Regs:$a)),
2379          (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2380
2381// anyext i32
2382def : Pat<(i64 (anyext Int32Regs:$a)),
2383          (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2384
2385
2386// truncate i64
2387def : Pat<(i32 (trunc Int64Regs:$a)),
2388          (CVT_u32_u64 Int64Regs:$a, CvtNONE)>;
2389def : Pat<(i16 (trunc Int64Regs:$a)),
2390          (CVT_u16_u64 Int64Regs:$a, CvtNONE)>;
2391def : Pat<(i1 (trunc Int64Regs:$a)),
2392          (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>;
2393
2394// truncate i32
2395def : Pat<(i16 (trunc Int32Regs:$a)),
2396          (CVT_u16_u32 Int32Regs:$a, CvtNONE)>;
2397def : Pat<(i1 (trunc Int32Regs:$a)),
2398          (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>;
2399
2400// truncate i16
2401def : Pat<(i1 (trunc Int16Regs:$a)),
2402          (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>;
2403
2404// sext_inreg
2405def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>;
2406def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>;
2407def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>;
2408def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>;
2409def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>;
2410def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
2411
2412
2413// Select instructions with 32-bit predicates
2414def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2415          (SELP_b16rr Int16Regs:$a, Int16Regs:$b,
2416          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2417def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2418          (SELP_b32rr Int32Regs:$a, Int32Regs:$b,
2419          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2420def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2421          (SELP_b64rr Int64Regs:$a, Int64Regs:$b,
2422          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2423def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2424          (SELP_f32rr Float32Regs:$a, Float32Regs:$b,
2425          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2426def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2427          (SELP_f64rr Float64Regs:$a, Float64Regs:$b,
2428          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2429
2430
2431// pack a set of smaller int registers to a larger int register
2432def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2433                          (ins Int16Regs:$s1, Int16Regs:$s2,
2434                               Int16Regs:$s3, Int16Regs:$s4),
2435                          "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2436                          []>;
2437def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2438                          (ins Int16Regs:$s1, Int16Regs:$s2),
2439                          "mov.b32\t$d, {{$s1, $s2}};",
2440                          []>;
2441def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2442                          (ins Int32Regs:$s1, Int32Regs:$s2),
2443                          "mov.b64\t$d, {{$s1, $s2}};",
2444                          []>;
2445def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2446                          (ins Float32Regs:$s1, Float32Regs:$s2),
2447                          "mov.b64\t$d, {{$s1, $s2}};",
2448                          []>;
2449
2450// unpack a larger int register to a set of smaller int registers
2451def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2452                                 Int16Regs:$d3, Int16Regs:$d4),
2453                           (ins Int64Regs:$s),
2454                           "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2455                          []>;
2456def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2457                           (ins Int32Regs:$s),
2458                           "mov.b32\t{{$d1, $d2}}, $s;",
2459                          []>;
2460def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2461                           (ins Int64Regs:$s),
2462                           "mov.b64\t{{$d1, $d2}}, $s;",
2463                          []>;
2464def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2465                           (ins Float64Regs:$s),
2466                           "mov.b64\t{{$d1, $d2}}, $s;",
2467                          []>;
2468
2469// Count leading zeros
2470def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2471                       "clz.b32\t$d, $a;",
2472                       []>;
2473def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2474                       "clz.b64\t$d, $a;",
2475                       []>;
2476
2477// 32-bit has a direct PTX instruction
2478def : Pat<(ctlz Int32Regs:$a),
2479          (CLZr32 Int32Regs:$a)>;
2480def : Pat<(ctlz_zero_undef Int32Regs:$a),
2481          (CLZr32 Int32Regs:$a)>;
2482
2483// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2484// to 64-bit to match the LLVM semantics
2485def : Pat<(ctlz Int64Regs:$a),
2486          (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2487def : Pat<(ctlz_zero_undef Int64Regs:$a),
2488          (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2489
2490// For 16-bit, we zero-extend to 32-bit, then trunc the result back
2491// to 16-bits (ctlz of a 16-bit value is guaranteed to require less
2492// than 16 bits to store). We also need to subtract 16 because the
2493// high-order 16 zeros were counted.
2494def : Pat<(ctlz Int16Regs:$a),
2495          (SUBi16ri (CVT_u16_u32 (CLZr32
2496            (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2497           CvtNONE), 16)>;
2498def : Pat<(ctlz_zero_undef Int16Regs:$a),
2499          (SUBi16ri (CVT_u16_u32 (CLZr32
2500            (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2501           CvtNONE), 16)>;
2502
2503// Population count
2504def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2505                        "popc.b32\t$d, $a;",
2506                        []>;
2507def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2508                        "popc.b64\t$d, $a;",
2509                        []>;
2510
2511// 32-bit has a direct PTX instruction
2512def : Pat<(ctpop Int32Regs:$a),
2513          (POPCr32 Int32Regs:$a)>;
2514
2515// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2516// to 64-bit to match the LLVM semantics
2517def : Pat<(ctpop Int64Regs:$a),
2518          (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
2519
2520// For 16-bit, we zero-extend to 32-bit, then trunc the result back
2521// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
2522// than 16 bits to store)
2523def : Pat<(ctpop Int16Regs:$a),
2524          (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2525           CvtNONE)>;
2526
2527// fround f64 -> f32
2528def : Pat<(f32 (fround Float64Regs:$a)),
2529          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
2530def : Pat<(f32 (fround Float64Regs:$a)),
2531          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
2532
2533// fextend f32 -> f64
2534def : Pat<(f64 (fextend Float32Regs:$a)),
2535          (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
2536def : Pat<(f64 (fextend Float32Regs:$a)),
2537          (CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
2538
2539def retflag       : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2540                           [SDNPHasChain, SDNPOptInGlue]>;
2541
2542//-----------------------------------
2543// Control-flow
2544//-----------------------------------
2545
2546let isTerminator=1 in {
2547   let isReturn=1, isBarrier=1 in
2548      def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2549
2550   let isBranch=1 in
2551      def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2552                          "@$a bra \t$target;",
2553                           [(brcond Int1Regs:$a, bb:$target)]>;
2554   let isBranch=1 in
2555      def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2556                          "@!$a bra \t$target;",
2557                           []>;
2558
2559   let isBranch=1, isBarrier=1 in
2560      def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2561                        "bra.uni \t$target;",
2562                  [(br bb:$target)]>;
2563}
2564
2565def : Pat<(brcond Int32Regs:$a, bb:$target),
2566          (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
2567
2568// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2569// conditional branch if
2570// the target block is the next block so that the code can fall through to the
2571// target block.
2572// The invertion is done by 'xor condition, 1', which will be translated to
2573// (setne condition, -1).
2574// Since ptx supports '@!pred bra target', we should use it.
2575def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2576  (CBranchOther Int1Regs:$a, bb:$target)>;
2577
2578// Call
2579def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2580def SDT_NVPTXCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2581                                        SDTCisVT<1, i32> ]>;
2582
2583def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2584                           [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2585def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_NVPTXCallSeqEnd,
2586                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2587                           SDNPSideEffect]>;
2588
2589def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2590def call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2591                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2592def calltarget : Operand<i32>;
2593let isCall=1 in {
2594   def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2595                  "call \t$dst, (1);", []>;
2596}
2597
2598def : Pat<(call tglobaladdr:$dst),
2599          (CALL tglobaladdr:$dst)>;
2600def : Pat<(call texternalsym:$dst),
2601          (CALL texternalsym:$dst)>;
2602
2603// Pseudo instructions.
2604class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2605   : NVPTXInst<outs, ins, asmstr, pattern>;
2606
2607// @TODO: We use some tricks here to emit curly braces.  Can we clean this up
2608// a bit without TableGen modifications?
2609def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2610  "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2611                               [(callseq_start timm:$amt)]>;
2612def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2613  "\n\t//{{\n\t}}// Callseq End $amt1",
2614                            [(callseq_end timm:$amt1, timm:$amt2)]>;
2615
2616// trap instruction
2617
2618def trapinst : NVPTXInst<(outs), (ins),
2619                         "trap;",
2620                         [(trap)]>;
2621
2622// Call prototype wrapper
2623def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
2624def CallPrototype
2625  : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
2626           [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
2627def ProtoIdent : Operand<i32> {
2628  let PrintMethod = "printProtoIdent";
2629}
2630def CALL_PROTOTYPE
2631  : NVPTXInst<(outs), (ins ProtoIdent:$ident),
2632              "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
2633
2634
2635
2636include "NVPTXIntrinsics.td"
2637
2638
2639//-----------------------------------
2640// Notes
2641//-----------------------------------
2642// BSWAP is currently expanded. The following is a more efficient
2643// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2644// - for sm_20, use pmpt (use vector scalar mov to get the pack and
2645//   unpack). sm_20 supports native 32-bit register, but not native 16-bit
2646// register.
2647