NVPTXInstrInfo.td revision cd81d94322a39503e4a3e87b6ee03d4fcb3465fb
1//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the PTX instructions in TableGen format.
11//
12//===----------------------------------------------------------------------===//
13
14include "NVPTXInstrFormats.td"
15
16// A NOP instruction
17def NOP : NVPTXInst<(outs), (ins), "", []>;
18
19// List of vector specific properties
20def isVecLD      : VecInstTypeEnum<1>;
21def isVecST      : VecInstTypeEnum<2>;
22def isVecBuild   : VecInstTypeEnum<3>;
23def isVecShuffle : VecInstTypeEnum<4>;
24def isVecExtract : VecInstTypeEnum<5>;
25def isVecInsert  : VecInstTypeEnum<6>;
26def isVecDest    : VecInstTypeEnum<7>;
27def isVecOther   : VecInstTypeEnum<15>;
28
29//===----------------------------------------------------------------------===//
30// NVPTX Operand Definitions.
31//===----------------------------------------------------------------------===//
32
33def brtarget    : Operand<OtherVT>;
34
35// CVT conversion modes
36// These must match the enum in NVPTX.h
37def CvtNONE : PatLeaf<(i32 0x0)>;
38def CvtRNI  : PatLeaf<(i32 0x1)>;
39def CvtRZI  : PatLeaf<(i32 0x2)>;
40def CvtRMI  : PatLeaf<(i32 0x3)>;
41def CvtRPI  : PatLeaf<(i32 0x4)>;
42def CvtRN   : PatLeaf<(i32 0x5)>;
43def CvtRZ   : PatLeaf<(i32 0x6)>;
44def CvtRM   : PatLeaf<(i32 0x7)>;
45def CvtRP   : PatLeaf<(i32 0x8)>;
46
47def CvtNONE_FTZ : PatLeaf<(i32 0x10)>;
48def CvtRNI_FTZ  : PatLeaf<(i32 0x11)>;
49def CvtRZI_FTZ  : PatLeaf<(i32 0x12)>;
50def CvtRMI_FTZ  : PatLeaf<(i32 0x13)>;
51def CvtRPI_FTZ  : PatLeaf<(i32 0x14)>;
52def CvtRN_FTZ   : PatLeaf<(i32 0x15)>;
53def CvtRZ_FTZ   : PatLeaf<(i32 0x16)>;
54def CvtRM_FTZ   : PatLeaf<(i32 0x17)>;
55def CvtRP_FTZ   : PatLeaf<(i32 0x18)>;
56
57def CvtSAT      : PatLeaf<(i32 0x20)>;
58def CvtSAT_FTZ  : PatLeaf<(i32 0x30)>;
59
60def CvtMode : Operand<i32> {
61  let PrintMethod = "printCvtMode";
62}
63
64// Compare modes
65// These must match the enum in NVPTX.h
66def CmpEQ   : PatLeaf<(i32 0)>;
67def CmpNE   : PatLeaf<(i32 1)>;
68def CmpLT   : PatLeaf<(i32 2)>;
69def CmpLE   : PatLeaf<(i32 3)>;
70def CmpGT   : PatLeaf<(i32 4)>;
71def CmpGE   : PatLeaf<(i32 5)>;
72def CmpLO   : PatLeaf<(i32 6)>;
73def CmpLS   : PatLeaf<(i32 7)>;
74def CmpHI   : PatLeaf<(i32 8)>;
75def CmpHS   : PatLeaf<(i32 9)>;
76def CmpEQU  : PatLeaf<(i32 10)>;
77def CmpNEU  : PatLeaf<(i32 11)>;
78def CmpLTU  : PatLeaf<(i32 12)>;
79def CmpLEU  : PatLeaf<(i32 13)>;
80def CmpGTU  : PatLeaf<(i32 14)>;
81def CmpGEU  : PatLeaf<(i32 15)>;
82def CmpNUM  : PatLeaf<(i32 16)>;
83def CmpNAN  : PatLeaf<(i32 17)>;
84
85def CmpEQ_FTZ   : PatLeaf<(i32 0x100)>;
86def CmpNE_FTZ   : PatLeaf<(i32 0x101)>;
87def CmpLT_FTZ   : PatLeaf<(i32 0x102)>;
88def CmpLE_FTZ   : PatLeaf<(i32 0x103)>;
89def CmpGT_FTZ   : PatLeaf<(i32 0x104)>;
90def CmpGE_FTZ   : PatLeaf<(i32 0x105)>;
91def CmpLO_FTZ   : PatLeaf<(i32 0x106)>;
92def CmpLS_FTZ   : PatLeaf<(i32 0x107)>;
93def CmpHI_FTZ   : PatLeaf<(i32 0x108)>;
94def CmpHS_FTZ   : PatLeaf<(i32 0x109)>;
95def CmpEQU_FTZ  : PatLeaf<(i32 0x10A)>;
96def CmpNEU_FTZ  : PatLeaf<(i32 0x10B)>;
97def CmpLTU_FTZ  : PatLeaf<(i32 0x10C)>;
98def CmpLEU_FTZ  : PatLeaf<(i32 0x10D)>;
99def CmpGTU_FTZ  : PatLeaf<(i32 0x10E)>;
100def CmpGEU_FTZ  : PatLeaf<(i32 0x10F)>;
101def CmpNUM_FTZ  : PatLeaf<(i32 0x110)>;
102def CmpNAN_FTZ  : PatLeaf<(i32 0x111)>;
103
104def CmpMode : Operand<i32> {
105  let PrintMethod = "printCmpMode";
106}
107
108def F32ConstZero : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
109    return CurDAG->getTargetConstantFP(0.0, MVT::f32);
110  }]>;
111def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
112    return CurDAG->getTargetConstantFP(1.0, MVT::f32);
113  }]>;
114
115//===----------------------------------------------------------------------===//
116// NVPTX Instruction Predicate Definitions
117//===----------------------------------------------------------------------===//
118
119
120def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
121def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
122def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
123def useAtomRedG32forGen32 :
124  Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
125def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
126def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
127def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
128def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
129def useAtomRedG64forGen64 :
130  Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
131def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
132def hasVote : Predicate<"Subtarget.hasVote()">;
133def hasDouble : Predicate<"Subtarget.hasDouble()">;
134def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
135def hasLDG : Predicate<"Subtarget.hasLDG()">;
136def hasLDU : Predicate<"Subtarget.hasLDU()">;
137def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
138
139def doF32FTZ : Predicate<"useF32FTZ()">;
140def doNoF32FTZ : Predicate<"!useF32FTZ()">;
141
142def doFMAF32      : Predicate<"doFMAF32">;
143def doFMAF32_ftz  : Predicate<"(doFMAF32 && useF32FTZ())">;
144def doFMAF32AGG      : Predicate<"doFMAF32AGG">;
145def doFMAF32AGG_ftz  : Predicate<"(doFMAF32AGG && useF32FTZ())">;
146def doFMAF64      : Predicate<"doFMAF64">;
147def doFMAF64AGG      : Predicate<"doFMAF64AGG">;
148
149def doMulWide      : Predicate<"doMulWide">;
150
151def allowFMA : Predicate<"allowFMA">;
152def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">;
153
154def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
155def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
156
157def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
158def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
159
160def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
161def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">;
162
163def true : Predicate<"1">;
164
165def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">;
166
167
168//===----------------------------------------------------------------------===//
169// Some Common Instruction Class Templates
170//===----------------------------------------------------------------------===//
171
172multiclass I3<string OpcStr, SDNode OpNode> {
173  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
174                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
175                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
176                       Int64Regs:$b))]>;
177  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
178                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
179                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
180  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
181                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
182                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
183                       Int32Regs:$b))]>;
184  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
185                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
186                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
187  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
188                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
189                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
190                       Int16Regs:$b))]>;
191  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
192                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
193                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
194}
195
196multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
197   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
198       Int32Regs:$b),
199                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
200                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
201                        Int32Regs:$b))]>;
202   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
203                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
204                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
205}
206
207multiclass F3<string OpcStr, SDNode OpNode> {
208   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
209                      (ins Float64Regs:$a, Float64Regs:$b),
210                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
211                      [(set Float64Regs:$dst,
212                        (OpNode Float64Regs:$a, Float64Regs:$b))]>,
213                      Requires<[allowFMA]>;
214   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
215                      (ins Float64Regs:$a, f64imm:$b),
216                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
217                      [(set Float64Regs:$dst,
218                        (OpNode Float64Regs:$a, fpimm:$b))]>,
219                      Requires<[allowFMA]>;
220   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
221                      (ins Float32Regs:$a, Float32Regs:$b),
222                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
223                      [(set Float32Regs:$dst,
224                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
225                      Requires<[allowFMA_ftz]>;
226   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
227                      (ins Float32Regs:$a, f32imm:$b),
228                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
229                      [(set Float32Regs:$dst,
230                        (OpNode Float32Regs:$a, fpimm:$b))]>,
231                      Requires<[allowFMA_ftz]>;
232   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
233                      (ins Float32Regs:$a, Float32Regs:$b),
234                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
235                      [(set Float32Regs:$dst,
236                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
237                      Requires<[allowFMA]>;
238   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
239                      (ins Float32Regs:$a, f32imm:$b),
240                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
241                      [(set Float32Regs:$dst,
242                        (OpNode Float32Regs:$a, fpimm:$b))]>,
243                      Requires<[allowFMA]>;
244}
245
246multiclass F3_rn<string OpcStr, SDNode OpNode> {
247   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
248                      (ins Float64Regs:$a, Float64Regs:$b),
249                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
250                      [(set Float64Regs:$dst,
251                        (OpNode Float64Regs:$a, Float64Regs:$b))]>;
252   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
253                      (ins Float64Regs:$a, f64imm:$b),
254                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
255                      [(set Float64Regs:$dst,
256                        (OpNode Float64Regs:$a, fpimm:$b))]>;
257   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
258                      (ins Float32Regs:$a, Float32Regs:$b),
259                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
260                      [(set Float32Regs:$dst,
261                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
262                      Requires<[doF32FTZ]>;
263   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
264                      (ins Float32Regs:$a, f32imm:$b),
265                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
266                      [(set Float32Regs:$dst,
267                        (OpNode Float32Regs:$a, fpimm:$b))]>,
268                      Requires<[doF32FTZ]>;
269   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
270                      (ins Float32Regs:$a, Float32Regs:$b),
271                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
272                      [(set Float32Regs:$dst,
273                        (OpNode Float32Regs:$a, Float32Regs:$b))]>;
274   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
275                      (ins Float32Regs:$a, f32imm:$b),
276                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
277                      [(set Float32Regs:$dst,
278                        (OpNode Float32Regs:$a, fpimm:$b))]>;
279}
280
281multiclass F2<string OpcStr, SDNode OpNode> {
282   def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
283                      !strconcat(OpcStr, ".f64 \t$dst, $a;"),
284                      [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
285   def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
286                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
287                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
288                      Requires<[doF32FTZ]>;
289   def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
290                      !strconcat(OpcStr, ".f32 \t$dst, $a;"),
291                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
292}
293
294//===----------------------------------------------------------------------===//
295// NVPTX Instructions.
296//===----------------------------------------------------------------------===//
297
298//-----------------------------------
299// General Type Conversion
300//-----------------------------------
301
302let neverHasSideEffects = 1 in {
303// Generate a cvt to the given type from all possible types.
304// Each instance takes a CvtMode immediate that defines the conversion mode to
305// use.  It can be CvtNONE to omit a conversion mode.
306multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
307  def _s16 : NVPTXInst<(outs RC:$dst),
308                       (ins Int16Regs:$src, CvtMode:$mode),
309                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
310                       FromName, ".s16\t$dst, $src;"),
311                       []>;
312  def _u16 : NVPTXInst<(outs RC:$dst),
313                       (ins Int16Regs:$src, CvtMode:$mode),
314                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
315                       FromName, ".u16\t$dst, $src;"),
316                       []>;
317  def _f16 : NVPTXInst<(outs RC:$dst),
318                       (ins Int16Regs:$src, CvtMode:$mode),
319                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
320                       FromName, ".f16\t$dst, $src;"),
321                       []>;
322  def _s32 : NVPTXInst<(outs RC:$dst),
323                       (ins Int32Regs:$src, CvtMode:$mode),
324                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
325                       FromName, ".s32\t$dst, $src;"),
326                       []>;
327  def _u32 : NVPTXInst<(outs RC:$dst),
328                       (ins Int32Regs:$src, CvtMode:$mode),
329                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
330                       FromName, ".u32\t$dst, $src;"),
331                       []>;
332  def _s64 : NVPTXInst<(outs RC:$dst),
333                       (ins Int64Regs:$src, CvtMode:$mode),
334                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
335                       FromName, ".s64\t$dst, $src;"),
336                       []>;
337  def _u64 : NVPTXInst<(outs RC:$dst),
338                       (ins Int64Regs:$src, CvtMode:$mode),
339                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
340                       FromName, ".u64\t$dst, $src;"),
341                       []>;
342  def _f32 : NVPTXInst<(outs RC:$dst),
343                       (ins Float32Regs:$src, CvtMode:$mode),
344                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
345                       FromName, ".f32\t$dst, $src;"),
346                       []>;
347  def _f64 : NVPTXInst<(outs RC:$dst),
348                       (ins Float64Regs:$src, CvtMode:$mode),
349                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
350                       FromName, ".f64\t$dst, $src;"),
351                       []>;
352}
353
354// Generate a cvt to all possible types.
355defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
356defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
357defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
358defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
359defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
360defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
361defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
362defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
363defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
364
365// This set of cvt is different from the above. The type of the source
366// and target are the same.
367//
368def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
369                        "cvt.s16.s8 \t$dst, $src;", []>;
370def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
371                        "cvt.s32.s8 \t$dst, $src;", []>;
372def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
373                        "cvt.s32.s16 \t$dst, $src;", []>;
374def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
375                        "cvt.s64.s8 \t$dst, $src;", []>;
376def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
377                        "cvt.s64.s16 \t$dst, $src;", []>;
378def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
379                        "cvt.s64.s32 \t$dst, $src;", []>;
380}
381
382//-----------------------------------
383// Integer Arithmetic
384//-----------------------------------
385
386multiclass ADD_SUB_i1<SDNode OpNode> {
387   def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
388          "xor.pred \t$dst, $a, $b;",
389      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
390   def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
391          "xor.pred \t$dst, $a, $b;",
392      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
393}
394
395defm ADD_i1 : ADD_SUB_i1<add>;
396defm SUB_i1 : ADD_SUB_i1<sub>;
397
398
399defm ADD : I3<"add.s", add>;
400defm SUB : I3<"sub.s", sub>;
401
402defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
403defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
404
405defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
406defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
407
408//mul.wide PTX instruction
409def SInt32Const : PatLeaf<(imm), [{
410  const APInt &v = N->getAPIntValue();
411  if (v.isSignedIntN(32))
412    return true;
413  return false;
414}]>;
415
416def UInt32Const : PatLeaf<(imm), [{
417  const APInt &v = N->getAPIntValue();
418  if (v.isIntN(32))
419    return true;
420  return false;
421}]>;
422
423def SInt16Const : PatLeaf<(imm), [{
424  const APInt &v = N->getAPIntValue();
425  if (v.isSignedIntN(16))
426    return true;
427  return false;
428}]>;
429
430def UInt16Const : PatLeaf<(imm), [{
431  const APInt &v = N->getAPIntValue();
432  if (v.isIntN(16))
433    return true;
434  return false;
435}]>;
436
437def Int5Const : PatLeaf<(imm), [{
438  const APInt &v = N->getAPIntValue();
439  // Check if 0 <= v < 32
440  // Only then the result from (x << v) will be i32
441  if (v.sge(0) && v.slt(32))
442    return true;
443  return false;
444}]>;
445
446def Int4Const : PatLeaf<(imm), [{
447  const APInt &v = N->getAPIntValue();
448  // Check if 0 <= v < 16
449  // Only then the result from (x << v) will be i16
450  if (v.sge(0) && v.slt(16))
451    return true;
452  return false;
453}]>;
454
455def SHL2MUL32 : SDNodeXForm<imm, [{
456  const APInt &v = N->getAPIntValue();
457  APInt temp(32, 1);
458  return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
459}]>;
460
461def SHL2MUL16 : SDNodeXForm<imm, [{
462  const APInt &v = N->getAPIntValue();
463  APInt temp(16, 1);
464  return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
465}]>;
466
467def MULWIDES64
468  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
469              "mul.wide.s32 \t$dst, $a, $b;", []>;
470def MULWIDES64Imm
471  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
472                           "mul.wide.s32 \t$dst, $a, $b;", []>;
473def MULWIDES64Imm64
474  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
475                           "mul.wide.s32 \t$dst, $a, $b;", []>;
476
477def MULWIDEU64
478  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
479              "mul.wide.u32 \t$dst, $a, $b;", []>;
480def MULWIDEU64Imm
481  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
482                           "mul.wide.u32 \t$dst, $a, $b;", []>;
483def MULWIDEU64Imm64
484  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
485                           "mul.wide.u32 \t$dst, $a, $b;", []>;
486
487def MULWIDES32
488  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
489                           "mul.wide.s16 \t$dst, $a, $b;", []>;
490def MULWIDES32Imm
491  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
492              "mul.wide.s16 \t$dst, $a, $b;", []>;
493def MULWIDES32Imm32
494  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
495                           "mul.wide.s16 \t$dst, $a, $b;", []>;
496
497def MULWIDEU32
498  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
499              "mul.wide.u16 \t$dst, $a, $b;", []>;
500def MULWIDEU32Imm
501  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
502                           "mul.wide.u16 \t$dst, $a, $b;", []>;
503def MULWIDEU32Imm32
504  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
505                            "mul.wide.u16 \t$dst, $a, $b;", []>;
506
507def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
508          (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
509          Requires<[doMulWide]>;
510def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
511          (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
512          Requires<[doMulWide]>;
513
514def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
515          (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
516          Requires<[doMulWide]>;
517def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
518          (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
519          Requires<[doMulWide]>;
520
521def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
522          (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
523          Requires<[doMulWide]>;
524def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
525          (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
526          Requires<[doMulWide]>;
527
528def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
529          (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
530      Requires<[doMulWide]>;
531def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
532          (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
533          Requires<[doMulWide]>;
534
535def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
536          (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
537      Requires<[doMulWide]>;
538def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
539          (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
540          Requires<[doMulWide]>;
541
542def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
543          (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
544      Requires<[doMulWide]>;
545def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
546          (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
547          Requires<[doMulWide]>;
548
549
550def SDTMulWide
551  : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
552def mul_wide_signed
553  : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
554def mul_wide_unsigned
555  : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
556
557def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
558          (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
559      Requires<[doMulWide]>;
560def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
561          (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
562          Requires<[doMulWide]>;
563def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
564          (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
565          Requires<[doMulWide]>;
566def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
567          (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
568          Requires<[doMulWide]>;
569
570
571def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
572          (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
573          Requires<[doMulWide]>;
574def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
575          (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
576          Requires<[doMulWide]>;
577def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
578          (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
579          Requires<[doMulWide]>;
580def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
581          (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
582          Requires<[doMulWide]>;
583
584defm MULT : I3<"mul.lo.s", mul>;
585
586defm MULTHS : I3<"mul.hi.s", mulhs>;
587defm MULTHU : I3<"mul.hi.u", mulhu>;
588
589defm SDIV : I3<"div.s", sdiv>;
590defm UDIV : I3<"div.u", udiv>;
591
592defm SREM : I3<"rem.s", srem>;
593// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
594defm UREM : I3<"rem.u", urem>;
595// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
596
597def SDTIMAD
598  : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
599                         SDTCisInt<2>, SDTCisSameAs<0, 2>,
600                         SDTCisSameAs<0, 3>]>;
601def imad
602  : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
603
604def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
605                      (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
606                      "mad.lo.s16 \t$dst, $a, $b, $c;",
607                      [(set Int16Regs:$dst,
608                         (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
609def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
610                      (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
611                      "mad.lo.s16 \t$dst, $a, $b, $c;",
612                      [(set Int16Regs:$dst,
613                         (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
614def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
615                      (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
616                      "mad.lo.s16 \t$dst, $a, $b, $c;",
617                      [(set Int16Regs:$dst,
618                        (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
619def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
620    (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
621                      "mad.lo.s16 \t$dst, $a, $b, $c;",
622                      [(set Int16Regs:$dst,
623                        (imad Int16Regs:$a, imm:$b, imm:$c))]>;
624
625def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
626                      (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
627                      "mad.lo.s32 \t$dst, $a, $b, $c;",
628                      [(set Int32Regs:$dst,
629                        (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
630def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
631                      (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
632                      "mad.lo.s32 \t$dst, $a, $b, $c;",
633                      [(set Int32Regs:$dst,
634                        (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
635def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
636                      (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
637                      "mad.lo.s32 \t$dst, $a, $b, $c;",
638                      [(set Int32Regs:$dst,
639                        (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
640def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
641                      (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
642                      "mad.lo.s32 \t$dst, $a, $b, $c;",
643                      [(set Int32Regs:$dst,
644                        (imad Int32Regs:$a, imm:$b, imm:$c))]>;
645
646def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
647                      (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
648                      "mad.lo.s64 \t$dst, $a, $b, $c;",
649                      [(set Int64Regs:$dst,
650                        (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
651def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
652                      (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
653                      "mad.lo.s64 \t$dst, $a, $b, $c;",
654                      [(set Int64Regs:$dst,
655                        (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
656def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
657                      (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
658                      "mad.lo.s64 \t$dst, $a, $b, $c;",
659                      [(set Int64Regs:$dst,
660                        (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
661def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
662                      (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
663                      "mad.lo.s64 \t$dst, $a, $b, $c;",
664                      [(set Int64Regs:$dst,
665                        (imad Int64Regs:$a, imm:$b, imm:$c))]>;
666
667def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
668                     "neg.s16 \t$dst, $src;",
669         [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
670def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
671                     "neg.s32 \t$dst, $src;",
672         [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
673def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
674                     "neg.s64 \t$dst, $src;",
675         [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
676
677//-----------------------------------
678// Floating Point Arithmetic
679//-----------------------------------
680
681// Constant 1.0f
682def FloatConst1 : PatLeaf<(fpimm), [{
683    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
684      return false;
685    float f = (float)N->getValueAPF().convertToFloat();
686    return (f==1.0f);
687}]>;
688// Constand (double)1.0
689def DoubleConst1 : PatLeaf<(fpimm), [{
690    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
691      return false;
692    double d = (double)N->getValueAPF().convertToDouble();
693    return (d==1.0);
694}]>;
695
696defm FADD : F3<"add", fadd>;
697defm FSUB : F3<"sub", fsub>;
698defm FMUL : F3<"mul", fmul>;
699
700defm FADD_rn : F3_rn<"add", fadd>;
701defm FSUB_rn : F3_rn<"sub", fsub>;
702defm FMUL_rn : F3_rn<"mul", fmul>;
703
704defm FABS : F2<"abs", fabs>;
705defm FNEG : F2<"neg", fneg>;
706defm FSQRT : F2<"sqrt.rn", fsqrt>;
707
708//
709// F64 division
710//
711def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
712                      (ins f64imm:$a, Float64Regs:$b),
713                      "rcp.rn.f64 \t$dst, $b;",
714                      [(set Float64Regs:$dst,
715                        (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
716def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
717                      (ins Float64Regs:$a, Float64Regs:$b),
718                      "div.rn.f64 \t$dst, $a, $b;",
719                      [(set Float64Regs:$dst,
720                        (fdiv Float64Regs:$a, Float64Regs:$b))]>;
721def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
722                      (ins Float64Regs:$a, f64imm:$b),
723                      "div.rn.f64 \t$dst, $a, $b;",
724                      [(set Float64Regs:$dst,
725                        (fdiv Float64Regs:$a, fpimm:$b))]>;
726
727//
728// F32 Approximate reciprocal
729//
730def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
731                      (ins f32imm:$a, Float32Regs:$b),
732                      "rcp.approx.ftz.f32 \t$dst, $b;",
733                      [(set Float32Regs:$dst,
734                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
735                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
736def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
737                        (ins f32imm:$a, Float32Regs:$b),
738                       "rcp.approx.f32 \t$dst, $b;",
739                      [(set Float32Regs:$dst,
740                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
741                      Requires<[do_DIVF32_APPROX]>;
742//
743// F32 Approximate division
744//
745def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
746                      (ins Float32Regs:$a, Float32Regs:$b),
747                      "div.approx.ftz.f32 \t$dst, $a, $b;",
748                      [(set Float32Regs:$dst,
749                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
750                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
751def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst),
752                      (ins Float32Regs:$a, f32imm:$b),
753                      "div.approx.ftz.f32 \t$dst, $a, $b;",
754                      [(set Float32Regs:$dst,
755                        (fdiv Float32Regs:$a, fpimm:$b))]>,
756                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
757def FDIV32approxrr     : NVPTXInst<(outs Float32Regs:$dst),
758                      (ins Float32Regs:$a, Float32Regs:$b),
759                      "div.approx.f32 \t$dst, $a, $b;",
760                      [(set Float32Regs:$dst,
761                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
762                      Requires<[do_DIVF32_APPROX]>;
763def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst),
764                      (ins Float32Regs:$a, f32imm:$b),
765                      "div.approx.f32 \t$dst, $a, $b;",
766                      [(set Float32Regs:$dst,
767                        (fdiv Float32Regs:$a, fpimm:$b))]>,
768                      Requires<[do_DIVF32_APPROX]>;
769//
770// F32 Semi-accurate reciprocal
771//
772// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
773//
774def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
775                      (ins f32imm:$a, Float32Regs:$b),
776                      "rcp.approx.ftz.f32 \t$dst, $b;",
777                      [(set Float32Regs:$dst,
778                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
779                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
780def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
781                      (ins f32imm:$a, Float32Regs:$b),
782                      "rcp.approx.f32 \t$dst, $b;",
783                      [(set Float32Regs:$dst,
784                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
785                      Requires<[do_DIVF32_FULL]>;
786//
787// F32 Semi-accurate division
788//
789def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
790                      (ins Float32Regs:$a, Float32Regs:$b),
791                      "div.full.ftz.f32 \t$dst, $a, $b;",
792                      [(set Float32Regs:$dst,
793                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
794                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
795def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
796                      (ins Float32Regs:$a, f32imm:$b),
797                      "div.full.ftz.f32 \t$dst, $a, $b;",
798                      [(set Float32Regs:$dst,
799                        (fdiv Float32Regs:$a, fpimm:$b))]>,
800                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
801def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
802                      (ins Float32Regs:$a, Float32Regs:$b),
803                      "div.full.f32 \t$dst, $a, $b;",
804                      [(set Float32Regs:$dst,
805                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
806                      Requires<[do_DIVF32_FULL]>;
807def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
808                      (ins Float32Regs:$a, f32imm:$b),
809                      "div.full.f32 \t$dst, $a, $b;",
810                      [(set Float32Regs:$dst,
811                        (fdiv Float32Regs:$a, fpimm:$b))]>,
812                      Requires<[do_DIVF32_FULL]>;
813//
814// F32 Accurate reciprocal
815//
816def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
817                        (ins f32imm:$a, Float32Regs:$b),
818                       "rcp.rn.ftz.f32 \t$dst, $b;",
819                      [(set Float32Regs:$dst,
820                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
821                      Requires<[reqPTX20, doF32FTZ]>;
822def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
823                      (ins f32imm:$a, Float32Regs:$b),
824                       "rcp.rn.f32 \t$dst, $b;",
825                      [(set Float32Regs:$dst,
826                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
827                      Requires<[reqPTX20]>;
828//
829// F32 Accurate division
830//
831def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
832                      (ins Float32Regs:$a, Float32Regs:$b),
833                      "div.rn.ftz.f32 \t$dst, $a, $b;",
834                      [(set Float32Regs:$dst,
835                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
836                      Requires<[doF32FTZ, reqPTX20]>;
837def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
838                      (ins Float32Regs:$a, f32imm:$b),
839                      "div.rn.ftz.f32 \t$dst, $a, $b;",
840                      [(set Float32Regs:$dst,
841                        (fdiv Float32Regs:$a, fpimm:$b))]>,
842                      Requires<[doF32FTZ, reqPTX20]>;
843def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
844                      (ins Float32Regs:$a, Float32Regs:$b),
845                      "div.rn.f32 \t$dst, $a, $b;",
846                      [(set Float32Regs:$dst,
847                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
848                      Requires<[reqPTX20]>;
849def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
850                      (ins Float32Regs:$a, f32imm:$b),
851                      "div.rn.f32 \t$dst, $a, $b;",
852                      [(set Float32Regs:$dst,
853                        (fdiv Float32Regs:$a, fpimm:$b))]>,
854                      Requires<[reqPTX20]>;
855
856//
857// F32 rsqrt
858//
859
860def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
861                       "rsqrt.approx.f32 \t$dst, $b;", []>;
862
863def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
864         (RSQRTF32approx1r Float32Regs:$b)>,
865         Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
866
867multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
868   def rrr : NVPTXInst<(outs Float32Regs:$dst),
869                      (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
870                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
871                      [(set Float32Regs:$dst,
872                        (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>,
873                      Requires<[Pred]>;
874   def rri : NVPTXInst<(outs Float32Regs:$dst),
875                      (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
876                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
877                      [(set Float32Regs:$dst,
878                        (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>,
879                      Requires<[Pred]>;
880   def rir : NVPTXInst<(outs Float32Regs:$dst),
881                      (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
882                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
883                      [(set Float32Regs:$dst,
884                        (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>,
885                      Requires<[Pred]>;
886   def rii : NVPTXInst<(outs Float32Regs:$dst),
887                      (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
888                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
889                      [(set Float32Regs:$dst,
890                        (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>,
891                      Requires<[Pred]>;
892}
893
894multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
895   def rrr : NVPTXInst<(outs Float64Regs:$dst),
896                      (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
897                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
898                      [(set Float64Regs:$dst,
899                        (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>,
900                      Requires<[Pred]>;
901   def rri : NVPTXInst<(outs Float64Regs:$dst),
902                      (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
903                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
904                      [(set Float64Regs:$dst,
905                        (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>,
906                      Requires<[Pred]>;
907   def rir : NVPTXInst<(outs Float64Regs:$dst),
908                      (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
909                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
910                      [(set Float64Regs:$dst,
911                        (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>,
912                      Requires<[Pred]>;
913   def rii : NVPTXInst<(outs Float64Regs:$dst),
914                      (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
915                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
916                      [(set Float64Regs:$dst,
917                        (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>,
918                      Requires<[Pred]>;
919}
920
921defm FMA32_ftz  : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>;
922defm FMA32  : FPCONTRACT32<"fma.rn.f32", doNoF32FTZ>;
923defm FMA64  : FPCONTRACT64<"fma.rn.f64", doNoF32FTZ>;
924
925def SINF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
926                      "sin.approx.f32 \t$dst, $src;",
927                      [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
928def COSF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
929                      "cos.approx.f32 \t$dst, $src;",
930                      [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
931
932// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y))
933// e.g. "poor man's fmod()"
934
935// frem - f32 FTZ
936def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
937          (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
938            (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ),
939             Float32Regs:$y))>,
940          Requires<[doF32FTZ]>;
941def : Pat<(frem Float32Regs:$x, fpimm:$y),
942          (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
943            (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ),
944             fpimm:$y))>,
945          Requires<[doF32FTZ]>;
946
947// frem - f32
948def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
949          (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
950            (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI),
951             Float32Regs:$y))>;
952def : Pat<(frem Float32Regs:$x, fpimm:$y),
953          (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
954            (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI),
955             fpimm:$y))>;
956
957// frem - f64
958def : Pat<(frem Float64Regs:$x, Float64Regs:$y),
959          (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
960            (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI),
961             Float64Regs:$y))>;
962def : Pat<(frem Float64Regs:$x, fpimm:$y),
963          (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
964            (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI),
965             fpimm:$y))>;
966
967//-----------------------------------
968// Logical Arithmetic
969//-----------------------------------
970
971multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
972  def b1rr:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
973                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
974                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
975  def b1ri:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
976                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
977                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
978  def b16rr:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
979                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
980                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
981                        Int16Regs:$b))]>;
982  def b16ri:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
983                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
984                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
985  def b32rr:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
986                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
987                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
988                        Int32Regs:$b))]>;
989  def b32ri:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
990                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
991                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
992  def b64rr:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
993                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
994                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
995                        Int64Regs:$b))]>;
996  def b64ri:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
997                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
998                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
999}
1000
1001defm OR  : LOG_FORMAT<"or", or>;
1002defm AND : LOG_FORMAT<"and", and>;
1003defm XOR : LOG_FORMAT<"xor", xor>;
1004
1005def NOT1:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1006                      "not.pred \t$dst, $src;",
1007                      [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1008def NOT16:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1009                      "not.b16 \t$dst, $src;",
1010                      [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1011def NOT32:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1012                      "not.b32 \t$dst, $src;",
1013                      [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1014def NOT64:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1015                      "not.b64 \t$dst, $src;",
1016                      [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1017
1018// For shifts, the second src operand must be 32-bit value
1019multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1020   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1021                      Int32Regs:$b),
1022                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1023                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1024                        Int32Regs:$b))]>;
1025   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1026                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1027                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1028                        (i32 imm:$b)))]>;
1029   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1030                      Int32Regs:$b),
1031                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1032                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1033                        Int32Regs:$b))]>;
1034   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1035                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1036                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1037                        (i32 imm:$b)))]>;
1038   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1039                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1040                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1041                        (i32 imm:$b)))]>;
1042   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1043                      Int32Regs:$b),
1044                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1045                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1046                        Int32Regs:$b))]>;
1047   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1048                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1049                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1050                        (i32 imm:$b)))]>;
1051}
1052
1053defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1054
1055// For shifts, the second src operand must be 32-bit value
1056// Need to add cvt for the 8-bits.
1057multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1058   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1059                      Int32Regs:$b),
1060                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1061                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1062                        Int32Regs:$b))]>;
1063   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1064                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1065                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1066                        (i32 imm:$b)))]>;
1067   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1068                      Int32Regs:$b),
1069                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1070                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1071                        Int32Regs:$b))]>;
1072   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1073                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1074                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1075                        (i32 imm:$b)))]>;
1076   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1077                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1078                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1079                        (i32 imm:$b)))]>;
1080   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1081                      Int32Regs:$b),
1082                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1083                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1084                        Int32Regs:$b))]>;
1085   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1086                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1087                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1088                        (i32 imm:$b)))]>;
1089}
1090
1091defm SRA : RSHIFT_FORMAT<"shr.s", sra>;
1092defm SRL : RSHIFT_FORMAT<"shr.u", srl>;
1093
1094//
1095// Rotate: use ptx shf instruction if available.
1096//
1097
1098// 32 bit r2 = rotl r1, n
1099//    =>
1100//        r2 = shf.l r1, r1, n
1101def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1102                             (ins Int32Regs:$src, i32imm:$amt),
1103              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1104    [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
1105    Requires<[hasHWROT32]> ;
1106
1107def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1108                             (ins Int32Regs:$src, Int32Regs:$amt),
1109              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1110    [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1111    Requires<[hasHWROT32]>;
1112
1113// 32 bit r2 = rotr r1, n
1114//    =>
1115//        r2 = shf.r r1, r1, n
1116def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1117                             (ins Int32Regs:$src, i32imm:$amt),
1118              "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1119    [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
1120    Requires<[hasHWROT32]>;
1121
1122def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1123                             (ins Int32Regs:$src, Int32Regs:$amt),
1124              "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1125    [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1126    Requires<[hasHWROT32]>;
1127
1128//
1129// Rotate: if ptx shf instruction is not available, then use shift+add
1130//
1131// 32bit
1132def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1133  (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1134    !strconcat("{{\n\t",
1135    !strconcat(".reg .b32 %lhs;\n\t",
1136    !strconcat(".reg .b32 %rhs;\n\t",
1137    !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1138    !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1139    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1140    !strconcat("}}", ""))))))),
1141    []>;
1142
1143def SUB_FRM_32 : SDNodeXForm<imm, [{
1144    return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1145}]>;
1146
1147def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1148          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1149      Requires<[noHWROT32]>;
1150def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1151          (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
1152      Requires<[noHWROT32]>;
1153
1154def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1155    Int32Regs:$amt),
1156    !strconcat("{{\n\t",
1157    !strconcat(".reg .b32 %lhs;\n\t",
1158    !strconcat(".reg .b32 %rhs;\n\t",
1159    !strconcat(".reg .b32 %amt2;\n\t",
1160    !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1161    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1162    !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1163    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1164    !strconcat("}}", ""))))))))),
1165    [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1166    Requires<[noHWROT32]>;
1167
1168def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1169    Int32Regs:$amt),
1170    !strconcat("{{\n\t",
1171    !strconcat(".reg .b32 %lhs;\n\t",
1172    !strconcat(".reg .b32 %rhs;\n\t",
1173    !strconcat(".reg .b32 %amt2;\n\t",
1174    !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1175    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1176    !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1177    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1178    !strconcat("}}", ""))))))))),
1179    [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1180    Requires<[noHWROT32]>;
1181
1182// 64bit
1183def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1184    i32imm:$amt1, i32imm:$amt2),
1185    !strconcat("{{\n\t",
1186    !strconcat(".reg .b64 %lhs;\n\t",
1187    !strconcat(".reg .b64 %rhs;\n\t",
1188    !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1189    !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1190    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1191    !strconcat("}}", ""))))))),
1192    []>;
1193
1194def SUB_FRM_64 : SDNodeXForm<imm, [{
1195    return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1196}]>;
1197
1198def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1199          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1200def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1201          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1202
1203def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1204    Int32Regs:$amt),
1205    !strconcat("{{\n\t",
1206    !strconcat(".reg .b64 %lhs;\n\t",
1207    !strconcat(".reg .b64 %rhs;\n\t",
1208    !strconcat(".reg .u32 %amt2;\n\t",
1209    !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1210    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1211    !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1212    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1213    !strconcat("}}", ""))))))))),
1214    [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1215
1216def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1217    Int32Regs:$amt),
1218    !strconcat("{{\n\t",
1219    !strconcat(".reg .b64 %lhs;\n\t",
1220    !strconcat(".reg .b64 %rhs;\n\t",
1221    !strconcat(".reg .u32 %amt2;\n\t",
1222    !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1223    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1224    !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1225    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1226    !strconcat("}}", ""))))))))),
1227    [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1228
1229// BFE - bit-field extract
1230
1231multiclass BFE<string TyStr, RegisterClass RC> {
1232  // BFE supports both 32-bit and 64-bit values, but the start and length
1233  // operands are always 32-bit
1234  def rrr
1235    : NVPTXInst<(outs RC:$d),
1236                (ins RC:$a, Int32Regs:$b, Int32Regs:$c),
1237                !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1238  def rri
1239    : NVPTXInst<(outs RC:$d),
1240                (ins RC:$a, Int32Regs:$b, i32imm:$c),
1241                !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1242  def rii
1243    : NVPTXInst<(outs RC:$d),
1244                (ins RC:$a, i32imm:$b, i32imm:$c),
1245                !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1246}
1247
1248defm BFE_S32 : BFE<"s32", Int32Regs>;
1249defm BFE_U32 : BFE<"u32", Int32Regs>;
1250defm BFE_S64 : BFE<"s64", Int64Regs>;
1251defm BFE_U64 : BFE<"u64", Int64Regs>;
1252
1253//-----------------------------------
1254// General Comparison
1255//-----------------------------------
1256
1257// General setp instructions
1258multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1259  def rr : NVPTXInst<(outs Int1Regs:$dst),
1260                     (ins RC:$a, RC:$b, CmpMode:$cmp),
1261            !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1262                     []>;
1263  def ri : NVPTXInst<(outs Int1Regs:$dst),
1264                     (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1265            !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1266                     []>;
1267  def ir : NVPTXInst<(outs Int1Regs:$dst),
1268                     (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1269            !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1270                     []>;
1271}
1272
1273defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
1274defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>;
1275defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>;
1276defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>;
1277defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>;
1278defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>;
1279defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>;
1280defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>;
1281defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
1282defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
1283defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
1284
1285// General set instructions
1286multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
1287  def rr : NVPTXInst<(outs Int32Regs:$dst),
1288                     (ins RC:$a, RC:$b, CmpMode:$cmp),
1289                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1290  def ri : NVPTXInst<(outs Int32Regs:$dst),
1291                     (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1292                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1293  def ir : NVPTXInst<(outs Int32Regs:$dst),
1294                     (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1295                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1296}
1297
1298defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
1299defm SET_s16 : SET<"s16", Int16Regs, i16imm>;
1300defm SET_u16 : SET<"u16", Int16Regs, i16imm>;
1301defm SET_b32 : SET<"b32", Int32Regs, i32imm>;
1302defm SET_s32 : SET<"s32", Int32Regs, i32imm>;
1303defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
1304defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
1305defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
1306defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
1307defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
1308defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
1309
1310//-----------------------------------
1311// General Selection
1312//-----------------------------------
1313
1314// General selp instructions
1315multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1316  def rr : NVPTXInst<(outs RC:$dst),
1317                     (ins RC:$a, RC:$b, Int1Regs:$p),
1318                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1319  def ri : NVPTXInst<(outs RC:$dst),
1320                     (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1321                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1322  def ir : NVPTXInst<(outs RC:$dst),
1323                     (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1324                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1325  def ii : NVPTXInst<(outs RC:$dst),
1326                     (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1327                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1328}
1329
1330multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
1331                        SDNode ImmNode> {
1332  def rr : NVPTXInst<(outs RC:$dst),
1333                     (ins RC:$a, RC:$b, Int1Regs:$p),
1334                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1335                     [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
1336  def ri : NVPTXInst<(outs RC:$dst),
1337                     (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1338                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1339                     [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
1340  def ir : NVPTXInst<(outs RC:$dst),
1341                     (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1342                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1343                     [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
1344  def ii : NVPTXInst<(outs RC:$dst),
1345                     (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1346                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1347                 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
1348}
1349
1350defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>;
1351defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
1352defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
1353defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>;
1354defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>;
1355defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
1356defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>;
1357defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
1358defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
1359defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
1360defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
1361
1362// Special select for predicate operands
1363def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1364              (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1365              (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1366
1367//
1368// Funnnel shift in clamp mode
1369//
1370// - SDNodes are created so they can be used in the DAG code,
1371//   e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
1372//
1373def SDTIntShiftDOp: SDTypeProfile<1, 3,
1374                                  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1375                                   SDTCisInt<0>, SDTCisInt<3>]>;
1376def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
1377def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
1378
1379def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),
1380                             (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1381                  "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
1382                  [(set Int32Regs:$dst,
1383                     (FUN_SHFL_CLAMP Int32Regs:$lo,
1384                        Int32Regs:$hi, Int32Regs:$amt))]>;
1385
1386def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),
1387                             (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1388                  "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
1389                  [(set Int32Regs:$dst,
1390                     (FUN_SHFR_CLAMP Int32Regs:$lo,
1391                        Int32Regs:$hi, Int32Regs:$amt))]>;
1392
1393//-----------------------------------
1394// Data Movement (Load / Store, Move)
1395//-----------------------------------
1396
1397def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1398  [SDNPWantRoot]>;
1399def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1400  [SDNPWantRoot]>;
1401
1402def MEMri : Operand<i32> {
1403  let PrintMethod = "printMemOperand";
1404  let MIOperandInfo = (ops Int32Regs, i32imm);
1405}
1406def MEMri64 : Operand<i64> {
1407  let PrintMethod = "printMemOperand";
1408  let MIOperandInfo = (ops Int64Regs, i64imm);
1409}
1410
1411def imem : Operand<iPTR> {
1412    let PrintMethod = "printOperand";
1413}
1414
1415def imemAny : Operand<iPTRAny> {
1416    let PrintMethod = "printOperand";
1417}
1418
1419def LdStCode : Operand<i32> {
1420    let PrintMethod = "printLdStCode";
1421}
1422
1423def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1424def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1425
1426def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1427                     "mov.u32 \t$dst, $a;",
1428                     [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1429
1430def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1431                     "mov.u64 \t$dst, $a;",
1432                     [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1433
1434// Get pointer to local stack
1435def MOV_DEPOT_ADDR
1436  : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
1437              "mov.u32 \t$d, __local_depot$num;", []>;
1438def MOV_DEPOT_ADDR_64
1439  : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
1440              "mov.u64 \t$d, __local_depot$num;", []>;
1441
1442
1443// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1444let IsSimpleMove=1 in {
1445def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1446                   "mov.pred \t$dst, $sss;", []>;
1447def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1448                    "mov.u16 \t$dst, $sss;", []>;
1449def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1450                    "mov.u32 \t$dst, $sss;", []>;
1451def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1452                    "mov.u64 \t$dst, $sss;", []>;
1453
1454def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1455                    "mov.f32 \t$dst, $src;", []>;
1456def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1457                    "mov.f64 \t$dst, $src;", []>;
1458}
1459def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1460                    "mov.pred \t$dst, $src;",
1461          [(set Int1Regs:$dst, imm:$src)]>;
1462def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1463                    "mov.u16 \t$dst, $src;",
1464          [(set Int16Regs:$dst, imm:$src)]>;
1465def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1466                    "mov.u32 \t$dst, $src;",
1467          [(set Int32Regs:$dst, imm:$src)]>;
1468def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1469                    "mov.u64 \t$dst, $src;",
1470          [(set Int64Regs:$dst, imm:$src)]>;
1471
1472def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1473                    "mov.f32 \t$dst, $src;",
1474          [(set Float32Regs:$dst, fpimm:$src)]>;
1475def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1476                    "mov.f64 \t$dst, $src;",
1477          [(set Float64Regs:$dst, fpimm:$src)]>;
1478
1479def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1480
1481//---- Copy Frame Index ----
1482def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1483                        "add.u32 \t$dst, ${addr:add};",
1484                        [(set Int32Regs:$dst, ADDRri:$addr)]>;
1485def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1486                        "add.u64 \t$dst, ${addr:add};",
1487                        [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1488
1489//-----------------------------------
1490// Comparison and Selection
1491//-----------------------------------
1492
1493multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
1494                       Instruction setp_16rr,
1495                       Instruction setp_16ri,
1496                       Instruction setp_16ir,
1497                       Instruction setp_32rr,
1498                       Instruction setp_32ri,
1499                       Instruction setp_32ir,
1500                       Instruction setp_64rr,
1501                       Instruction setp_64ri,
1502                       Instruction setp_64ir,
1503                       Instruction set_16rr,
1504                       Instruction set_16ri,
1505                       Instruction set_16ir,
1506                       Instruction set_32rr,
1507                       Instruction set_32ri,
1508                       Instruction set_32ir,
1509                       Instruction set_64rr,
1510                       Instruction set_64ri,
1511                       Instruction set_64ir> {
1512  // i16 -> pred
1513  def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)),
1514            (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1515  def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)),
1516            (setp_16ri Int16Regs:$a, imm:$b, Mode)>;
1517  def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)),
1518            (setp_16ir imm:$a, Int16Regs:$b, Mode)>;
1519  // i32 -> pred
1520  def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)),
1521            (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1522  def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)),
1523            (setp_32ri Int32Regs:$a, imm:$b, Mode)>;
1524  def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)),
1525            (setp_32ir imm:$a, Int32Regs:$b, Mode)>;
1526  // i64 -> pred
1527  def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)),
1528            (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1529  def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)),
1530            (setp_64ri Int64Regs:$a, imm:$b, Mode)>;
1531  def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)),
1532            (setp_64ir imm:$a, Int64Regs:$b, Mode)>;
1533
1534  // i16 -> i32
1535  def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)),
1536            (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1537  def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)),
1538            (set_16ri Int16Regs:$a, imm:$b, Mode)>;
1539  def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)),
1540            (set_16ir imm:$a, Int16Regs:$b, Mode)>;
1541  // i32 -> i32
1542  def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)),
1543            (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1544  def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)),
1545            (set_32ri Int32Regs:$a, imm:$b, Mode)>;
1546  def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)),
1547            (set_32ir imm:$a, Int32Regs:$b, Mode)>;
1548  // i64 -> i32
1549  def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)),
1550            (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1551  def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)),
1552            (set_64ri Int64Regs:$a, imm:$b, Mode)>;
1553  def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)),
1554            (set_64ir imm:$a, Int64Regs:$b, Mode)>;
1555}
1556
1557multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
1558  : ISET_FORMAT<OpNode, Mode,
1559                SETP_s16rr, SETP_s16ri, SETP_s16ir,
1560                SETP_s32rr, SETP_s32ri, SETP_s32ir,
1561                SETP_s64rr, SETP_s64ri, SETP_s64ir,
1562                SET_s16rr, SET_s16ri, SET_s16ir,
1563                SET_s32rr, SET_s32ri, SET_s32ir,
1564                SET_s64rr, SET_s64ri, SET_s64ir> {
1565  // TableGen doesn't like empty multiclasses
1566  def : PatLeaf<(i32 0)>;
1567}
1568
1569multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
1570  : ISET_FORMAT<OpNode, Mode,
1571                SETP_u16rr, SETP_u16ri, SETP_u16ir,
1572                SETP_u32rr, SETP_u32ri, SETP_u32ir,
1573                SETP_u64rr, SETP_u64ri, SETP_u64ir,
1574                SET_u16rr, SET_u16ri, SET_u16ir,
1575                SET_u32rr, SET_u32ri, SET_u32ir,
1576                SET_u64rr, SET_u64ri, SET_u64ir> {
1577  // TableGen doesn't like empty multiclasses
1578  def : PatLeaf<(i32 0)>;
1579}
1580
1581defm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
1582defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
1583defm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
1584defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
1585defm : ISET_FORMAT_SIGNED<setge, CmpGE>;
1586defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
1587defm : ISET_FORMAT_SIGNED<setle, CmpLE>;
1588defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
1589defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
1590defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
1591defm : ISET_FORMAT_SIGNED<setne, CmpNE>;
1592defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
1593
1594// i1 compares
1595def : Pat<(setne Int1Regs:$a, Int1Regs:$b),
1596          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1597def : Pat<(setune Int1Regs:$a, Int1Regs:$b),
1598          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1599
1600def : Pat<(seteq Int1Regs:$a, Int1Regs:$b),
1601          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1602def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
1603          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1604
1605// i1 compare -> i32
1606def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1607          (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1608def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1609          (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1610
1611
1612
1613multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
1614  // f32 -> pred
1615  def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1616            (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1617        Requires<[doF32FTZ]>;
1618  def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1619            (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1620  def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1621            (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1622        Requires<[doF32FTZ]>;
1623  def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1624            (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1625  def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1626            (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1627        Requires<[doF32FTZ]>;
1628  def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1629            (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1630
1631  // f64 -> pred
1632  def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)),
1633            (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1634  def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)),
1635            (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1636  def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)),
1637            (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1638
1639  // f32 -> i32
1640  def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1641            (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1642        Requires<[doF32FTZ]>;
1643  def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1644            (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1645  def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1646            (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1647        Requires<[doF32FTZ]>;
1648  def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1649            (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1650  def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1651            (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1652        Requires<[doF32FTZ]>;
1653  def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1654            (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1655
1656  // f64 -> i32
1657  def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)),
1658            (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1659  def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)),
1660            (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1661  def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)),
1662            (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1663}
1664
1665defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
1666defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
1667defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
1668defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
1669defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
1670defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
1671
1672defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
1673defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
1674defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>;
1675defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
1676defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
1677defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
1678
1679defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
1680defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
1681
1682//def ld_param         : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1683//                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1684
1685def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1686  SDTCisInt<2>]>;
1687def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1688  SDTCisInt<1>, SDTCisInt<2>]>;
1689def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1690def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
1691def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
1692def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1693def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1694def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1695def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
1696def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
1697def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1698def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1699def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1700def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1701def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1702def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1703def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1704def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
1705def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
1706def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1707
1708def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1709                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1710def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1711  SDTDeclareScalarParamProfile,
1712                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1713def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1714  SDTDeclareParamProfile,
1715                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1716def DeclareRet   : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1717                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1718def LoadParam    : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1719                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1720def LoadParamV2  : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
1721                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1722def LoadParamV4  : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
1723                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1724def PrintCall    : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1725                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1726def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1727                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1728def StoreParam   : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1729                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1730def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
1731                         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1732def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
1733                         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1734def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1735                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1736def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1737                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1738def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1739                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1740def CallArg      : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1741                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1742def LastCallArg  : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1743                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1744def CallArgEnd   : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1745                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1746def CallVoid     : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1747                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1748def Prototype    : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1749                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1750def CallVal      : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1751                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1752def MoveParam    : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1753                         []>;
1754def StoreRetval  : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1755                         [SDNPHasChain, SDNPSideEffect]>;
1756def StoreRetvalV2  : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
1757                           [SDNPHasChain, SDNPSideEffect]>;
1758def StoreRetvalV4  : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
1759                           [SDNPHasChain, SDNPSideEffect]>;
1760def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1761  SDTPseudoUseParamProfile,
1762                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1763def RETURNNode   : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1764                         [SDNPHasChain, SDNPSideEffect]>;
1765
1766class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1767      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1768                !strconcat(!strconcat("ld.param", opstr),
1769                "\t$dst, [retval0+$b];"),
1770                []>;
1771
1772class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1773      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1774                !strconcat(!strconcat("mov", opstr),
1775                "\t$dst, retval$b;"),
1776                [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1777
1778class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
1779      NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
1780                !strconcat(!strconcat("ld.param.v2", opstr),
1781                "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
1782
1783class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
1784      NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
1785                      regclass:$dst4),
1786                (ins i32imm:$b),
1787                !strconcat(!strconcat("ld.param.v4", opstr),
1788                "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>;
1789
1790class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1791      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1792                !strconcat(!strconcat("st.param", opstr),
1793                "\t[param$a+$b], $val;"),
1794                []>;
1795
1796class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
1797      NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
1798                             i32imm:$a, i32imm:$b),
1799                !strconcat(!strconcat("st.param.v2", opstr),
1800                "\t[param$a+$b], {{$val, $val2}};"),
1801                []>;
1802
1803class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
1804      NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
1805                             regclass:$val3, i32imm:$a, i32imm:$b),
1806                !strconcat(!strconcat("st.param.v4", opstr),
1807                "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
1808                []>;
1809
1810class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1811      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1812                !strconcat(!strconcat("st.param", opstr),
1813                "\t[func_retval0+$a], $val;"),
1814                []>;
1815
1816class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
1817      NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
1818                !strconcat(!strconcat("st.param.v2", opstr),
1819                "\t[func_retval0+$a], {{$val, $val2}};"),
1820                []>;
1821
1822class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
1823      NVPTXInst<(outs),
1824                (ins regclass:$val, regclass:$val2, regclass:$val3,
1825                     regclass:$val4, i32imm:$a),
1826                !strconcat(!strconcat("st.param.v4", opstr),
1827                "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
1828                []>;
1829
1830def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1831"call (retval0), ",
1832                                [(PrintCall (i32 1))]>;
1833def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1834"call (retval0, retval1), ",
1835                                [(PrintCall (i32 2))]>;
1836def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1837"call (retval0, retval1, retval2), ",
1838                                [(PrintCall (i32 3))]>;
1839def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1840"call (retval0, retval1, retval2, retval3), ",
1841                                [(PrintCall (i32 4))]>;
1842def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1843"call (retval0, retval1, retval2, retval3, retval4), ",
1844                                [(PrintCall (i32 5))]>;
1845def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1846"call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1847                                [(PrintCall (i32 6))]>;
1848def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1849"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1850                                [(PrintCall (i32 7))]>;
1851def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1852!strconcat("call (retval0, retval1, retval2, retval3, retval4",
1853           ", retval5, retval6, retval7), "),
1854                                [(PrintCall (i32 8))]>;
1855
1856def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1857                                [(PrintCall (i32 0))]>;
1858
1859def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1860"call.uni (retval0), ",
1861                                [(PrintCallUni (i32 1))]>;
1862def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1863"call.uni (retval0, retval1), ",
1864                                [(PrintCallUni (i32 2))]>;
1865def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1866"call.uni (retval0, retval1, retval2), ",
1867                                [(PrintCallUni (i32 3))]>;
1868def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1869"call.uni (retval0, retval1, retval2, retval3), ",
1870                                [(PrintCallUni (i32 4))]>;
1871def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1872"call.uni (retval0, retval1, retval2, retval3, retval4), ",
1873                                [(PrintCallUni (i32 5))]>;
1874def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1875"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1876                                [(PrintCallUni (i32 6))]>;
1877def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1878"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1879                                [(PrintCallUni (i32 7))]>;
1880def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1881!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1882           ", retval5, retval6, retval7), "),
1883                                [(PrintCallUni (i32 8))]>;
1884
1885def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1886                                [(PrintCallUni (i32 0))]>;
1887
1888def LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
1889def LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
1890def LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
1891def LoadParamMemI8     : LoadParamMemInst<Int16Regs, ".b8">;
1892def LoadParamMemV2I64  : LoadParamV2MemInst<Int64Regs, ".b64">;
1893def LoadParamMemV2I32  : LoadParamV2MemInst<Int32Regs, ".b32">;
1894def LoadParamMemV2I16  : LoadParamV2MemInst<Int16Regs, ".b16">;
1895def LoadParamMemV2I8   : LoadParamV2MemInst<Int16Regs, ".b8">;
1896def LoadParamMemV4I32  : LoadParamV4MemInst<Int32Regs, ".b32">;
1897def LoadParamMemV4I16  : LoadParamV4MemInst<Int16Regs, ".b16">;
1898def LoadParamMemV4I8   : LoadParamV4MemInst<Int16Regs, ".b8">;
1899def LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
1900def LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
1901def LoadParamMemV2F32  : LoadParamV2MemInst<Float32Regs, ".f32">;
1902def LoadParamMemV2F64  : LoadParamV2MemInst<Float64Regs, ".f64">;
1903def LoadParamMemV4F32  : LoadParamV4MemInst<Float32Regs, ".f32">;
1904
1905def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
1906def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
1907
1908def StoreParamI16    : StoreParamInst<Int16Regs, ".b16">;
1909def StoreParamI8     : StoreParamInst<Int16Regs, ".b8">;
1910def StoreParamV2I64  : StoreParamV2Inst<Int64Regs, ".b64">;
1911def StoreParamV2I32  : StoreParamV2Inst<Int32Regs, ".b32">;
1912def StoreParamV2I16  : StoreParamV2Inst<Int16Regs, ".b16">;
1913def StoreParamV2I8   : StoreParamV2Inst<Int16Regs, ".b8">;
1914
1915// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1916//def StoreParamV4I32    : StoreParamV4Inst<Int32Regs, ".b32">;
1917def StoreParamV4I32    : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
1918                                               Int32Regs:$val3, Int32Regs:$val4,
1919                                                i32imm:$a, i32imm:$b),
1920                   "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1921                         []>;
1922
1923def StoreParamV4I16    : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1924                                               Int16Regs:$val3, Int16Regs:$val4,
1925                                                i32imm:$a, i32imm:$b),
1926                "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1927                         []>;
1928
1929def StoreParamV4I8     : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1930                                                Int16Regs:$val3, Int16Regs:$val4,
1931                                                i32imm:$a, i32imm:$b),
1932                 "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1933                         []>;
1934
1935def StoreParamF32    : StoreParamInst<Float32Regs, ".f32">;
1936def StoreParamF64    : StoreParamInst<Float64Regs, ".f64">;
1937def StoreParamV2F32    : StoreParamV2Inst<Float32Regs, ".f32">;
1938def StoreParamV2F64    : StoreParamV2Inst<Float64Regs, ".f64">;
1939// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1940//def StoreParamV4F32    : StoreParamV4Inst<Float32Regs, ".f32">;
1941def StoreParamV4F32    : NVPTXInst<(outs),
1942                                   (ins Float32Regs:$val, Float32Regs:$val2,
1943                                        Float32Regs:$val3, Float32Regs:$val4,
1944                                        i32imm:$a, i32imm:$b),
1945                "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1946                        []>;
1947
1948
1949def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
1950def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
1951def StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
1952def StoreRetvalI8     : StoreRetvalInst<Int16Regs, ".b8">;
1953def StoreRetvalV2I64  : StoreRetvalV2Inst<Int64Regs, ".b64">;
1954def StoreRetvalV2I32  : StoreRetvalV2Inst<Int32Regs, ".b32">;
1955def StoreRetvalV2I16  : StoreRetvalV2Inst<Int16Regs, ".b16">;
1956def StoreRetvalV2I8   : StoreRetvalV2Inst<Int16Regs, ".b8">;
1957def StoreRetvalV4I32  : StoreRetvalV4Inst<Int32Regs, ".b32">;
1958def StoreRetvalV4I16  : StoreRetvalV4Inst<Int16Regs, ".b16">;
1959def StoreRetvalV4I8   : StoreRetvalV4Inst<Int16Regs, ".b8">;
1960
1961def StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
1962def StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
1963def StoreRetvalV2F64  : StoreRetvalV2Inst<Float64Regs, ".f64">;
1964def StoreRetvalV2F32  : StoreRetvalV2Inst<Float32Regs, ".f32">;
1965def StoreRetvalV4F32  : StoreRetvalV4Inst<Float32Regs, ".f32">;
1966
1967def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
1968def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
1969def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
1970def RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
1971
1972class CallArgInst<NVPTXRegClass regclass> :
1973      NVPTXInst<(outs), (ins regclass:$a), "$a, ",
1974                [(CallArg (i32 0), regclass:$a)]>;
1975
1976class LastCallArgInst<NVPTXRegClass regclass> :
1977      NVPTXInst<(outs), (ins regclass:$a), "$a",
1978                [(LastCallArg (i32 0), regclass:$a)]>;
1979
1980def CallArgI64     : CallArgInst<Int64Regs>;
1981def CallArgI32     : CallArgInst<Int32Regs>;
1982def CallArgI16     : CallArgInst<Int16Regs>;
1983
1984def CallArgF64     : CallArgInst<Float64Regs>;
1985def CallArgF32     : CallArgInst<Float32Regs>;
1986
1987def LastCallArgI64 : LastCallArgInst<Int64Regs>;
1988def LastCallArgI32 : LastCallArgInst<Int32Regs>;
1989def LastCallArgI16 : LastCallArgInst<Int16Regs>;
1990
1991def LastCallArgF64 : LastCallArgInst<Float64Regs>;
1992def LastCallArgF32 : LastCallArgInst<Float32Regs>;
1993
1994def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
1995                              [(CallArg (i32 0), (i32 imm:$a))]>;
1996def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
1997                              [(LastCallArg (i32 0), (i32 imm:$a))]>;
1998
1999def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
2000                             [(CallArg (i32 1), (i32 imm:$a))]>;
2001def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
2002                             [(LastCallArg (i32 1), (i32 imm:$a))]>;
2003
2004def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
2005                             "$addr, ",
2006                             [(CallVoid (Wrapper tglobaladdr:$addr))]>;
2007def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
2008                             "$addr, ",
2009                             [(CallVoid Int32Regs:$addr)]>;
2010def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
2011                             "$addr, ",
2012                             [(CallVoid Int64Regs:$addr)]>;
2013def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
2014                             ", prototype_$val;",
2015                             [(Prototype (i32 imm:$val))]>;
2016
2017def DeclareRetMemInst : NVPTXInst<(outs),
2018  (ins i32imm:$align, i32imm:$size, i32imm:$num),
2019         ".param .align $align .b8 retval$num[$size];",
2020         [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
2021def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2022         ".param .b$size retval$num;",
2023         [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
2024def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2025         ".reg .b$size retval$num;",
2026         [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2027
2028def DeclareParamInst : NVPTXInst<(outs),
2029  (ins i32imm:$align, i32imm:$a, i32imm:$size),
2030         ".param .align $align .b8 param$a[$size];",
2031         [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2032def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2033         ".param .b$size param$a;",
2034         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2035def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2036         ".reg .b$size param$a;",
2037         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2038
2039class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2040      NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2041                !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2042                [(set regclass:$dst, (MoveParam regclass:$src))]>;
2043
2044def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2045def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2046def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2047                   "cvt.u16.u32\t$dst, $src;",
2048                   [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2049def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2050def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2051
2052class PseudoUseParamInst<NVPTXRegClass regclass> :
2053      NVPTXInst<(outs), (ins regclass:$src),
2054      "// Pseudo use of $src",
2055      [(PseudoUseParam regclass:$src)]>;
2056
2057def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2058def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2059def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2060def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2061def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2062
2063
2064//
2065// Load / Store Handling
2066//
2067multiclass LD<NVPTXRegClass regclass> {
2068  def _avar : NVPTXInst<(outs regclass:$dst),
2069    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2070      i32imm:$fromWidth, imem:$addr),
2071!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2072           "$fromWidth \t$dst, [$addr];"), []>;
2073  def _areg : NVPTXInst<(outs regclass:$dst),
2074    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2075      i32imm:$fromWidth, Int32Regs:$addr),
2076!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2077           "$fromWidth \t$dst, [$addr];"), []>;
2078  def _areg_64 : NVPTXInst<(outs regclass:$dst),
2079    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2080     i32imm:$fromWidth, Int64Regs:$addr),
2081     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2082                " \t$dst, [$addr];"), []>;
2083  def _ari : NVPTXInst<(outs regclass:$dst),
2084    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2085      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2086!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2087           "$fromWidth \t$dst, [$addr+$offset];"), []>;
2088  def _ari_64 : NVPTXInst<(outs regclass:$dst),
2089    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2090     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2091    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2092               " \t$dst, [$addr+$offset];"), []>;
2093  def _asi : NVPTXInst<(outs regclass:$dst),
2094    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2095      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2096!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2097           "$fromWidth \t$dst, [$addr+$offset];"), []>;
2098}
2099
2100let mayLoad=1, neverHasSideEffects=1 in {
2101defm LD_i8  : LD<Int16Regs>;
2102defm LD_i16 : LD<Int16Regs>;
2103defm LD_i32 : LD<Int32Regs>;
2104defm LD_i64 : LD<Int64Regs>;
2105defm LD_f32 : LD<Float32Regs>;
2106defm LD_f64 : LD<Float64Regs>;
2107}
2108
2109multiclass ST<NVPTXRegClass regclass> {
2110  def _avar : NVPTXInst<(outs),
2111    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2112      LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2113!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2114           " \t[$addr], $src;"), []>;
2115  def _areg : NVPTXInst<(outs),
2116    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2117      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2118!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2119           " \t[$addr], $src;"), []>;
2120  def _areg_64 : NVPTXInst<(outs),
2121    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2122     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2123  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2124               "\t[$addr], $src;"), []>;
2125  def _ari : NVPTXInst<(outs),
2126    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2127      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2128!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2129           " \t[$addr+$offset], $src;"), []>;
2130  def _ari_64 : NVPTXInst<(outs),
2131    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2132     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2133  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2134               "\t[$addr+$offset], $src;"), []>;
2135  def _asi : NVPTXInst<(outs),
2136    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2137      LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2138!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2139           " \t[$addr+$offset], $src;"), []>;
2140}
2141
2142let mayStore=1, neverHasSideEffects=1 in {
2143defm ST_i8  : ST<Int16Regs>;
2144defm ST_i16 : ST<Int16Regs>;
2145defm ST_i32 : ST<Int32Regs>;
2146defm ST_i64 : ST<Int64Regs>;
2147defm ST_f32 : ST<Float32Regs>;
2148defm ST_f64 : ST<Float64Regs>;
2149}
2150
2151// The following is used only in and after vector elementizations.
2152// Vector elementization happens at the machine instruction level, so the
2153// following instruction
2154// never appears in the DAG.
2155multiclass LD_VEC<NVPTXRegClass regclass> {
2156  def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2157    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2158      i32imm:$fromWidth, imem:$addr),
2159    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2160               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2161  def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2162    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2163      i32imm:$fromWidth, Int32Regs:$addr),
2164    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2165               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2166  def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2167    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2168     i32imm:$fromWidth, Int64Regs:$addr),
2169    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2170               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2171  def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2172    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2173      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2174    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2175               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2176  def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2177    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2178     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2179    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2180               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2181  def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2182    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2183      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2184    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2185               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2186  def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2187      regclass:$dst3, regclass:$dst4),
2188    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2189      i32imm:$fromWidth, imem:$addr),
2190    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2191               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2192  def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2193      regclass:$dst4),
2194    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2195      i32imm:$fromWidth, Int32Regs:$addr),
2196    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2197               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2198  def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2199                               regclass:$dst3, regclass:$dst4),
2200    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2201     i32imm:$fromWidth, Int64Regs:$addr),
2202    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2203               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2204  def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2205      regclass:$dst4),
2206    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2207      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2208    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2209               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2210                []>;
2211  def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2212                              regclass:$dst3, regclass:$dst4),
2213    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2214     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2215    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2216               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2217    []>;
2218  def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2219      regclass:$dst4),
2220    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2221      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2222    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2223               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2224                []>;
2225}
2226let mayLoad=1, neverHasSideEffects=1 in {
2227defm LDV_i8  : LD_VEC<Int16Regs>;
2228defm LDV_i16 : LD_VEC<Int16Regs>;
2229defm LDV_i32 : LD_VEC<Int32Regs>;
2230defm LDV_i64 : LD_VEC<Int64Regs>;
2231defm LDV_f32 : LD_VEC<Float32Regs>;
2232defm LDV_f64 : LD_VEC<Float64Regs>;
2233}
2234
2235multiclass ST_VEC<NVPTXRegClass regclass> {
2236  def _v2_avar : NVPTXInst<(outs),
2237    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2238      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2239    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2240               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2241  def _v2_areg : NVPTXInst<(outs),
2242    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2243      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2244    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2245               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2246  def _v2_areg_64 : NVPTXInst<(outs),
2247    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2248     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2249    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2250               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2251  def _v2_ari : NVPTXInst<(outs),
2252    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2253      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2254      i32imm:$offset),
2255    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2256               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2257  def _v2_ari_64 : NVPTXInst<(outs),
2258    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2259     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2260     i32imm:$offset),
2261    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2262               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2263  def _v2_asi : NVPTXInst<(outs),
2264    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2265      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2266      i32imm:$offset),
2267    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2268               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2269  def _v4_avar : NVPTXInst<(outs),
2270    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2271      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2272      i32imm:$fromWidth, imem:$addr),
2273    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2274               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2275  def _v4_areg : NVPTXInst<(outs),
2276    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2277      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2278      i32imm:$fromWidth, Int32Regs:$addr),
2279    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2280               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2281  def _v4_areg_64 : NVPTXInst<(outs),
2282    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2283     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2284     i32imm:$fromWidth, Int64Regs:$addr),
2285    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2286               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2287  def _v4_ari : NVPTXInst<(outs),
2288    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2289      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2290      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2291    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2292               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2293    []>;
2294  def _v4_ari_64 : NVPTXInst<(outs),
2295    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2296     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2297     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2298    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2299               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2300     []>;
2301  def _v4_asi : NVPTXInst<(outs),
2302    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2303      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2304      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2305    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2306               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2307    []>;
2308}
2309let mayStore=1, neverHasSideEffects=1 in {
2310defm STV_i8  : ST_VEC<Int16Regs>;
2311defm STV_i16 : ST_VEC<Int16Regs>;
2312defm STV_i32 : ST_VEC<Int32Regs>;
2313defm STV_i64 : ST_VEC<Int64Regs>;
2314defm STV_f32 : ST_VEC<Float32Regs>;
2315defm STV_f64 : ST_VEC<Float64Regs>;
2316}
2317
2318
2319//---- Conversion ----
2320
2321class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2322  NVPTXRegClass regclassOut> :
2323           NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2324           !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2325     [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2326
2327def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2328def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2329def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2330def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2331
2332// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
2333// we cannot specify floating-point literals in isel patterns.  Therefore, we
2334// use an integer selp to select either 1 or 0 and then cvt to floating-point.
2335
2336// sint -> f32
2337def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
2338          (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2339def : Pat<(f32 (sint_to_fp Int16Regs:$a)),
2340          (CVT_f32_s16 Int16Regs:$a, CvtRN)>;
2341def : Pat<(f32 (sint_to_fp Int32Regs:$a)),
2342          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
2343def : Pat<(f32 (sint_to_fp Int64Regs:$a)),
2344          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
2345
2346// uint -> f32
2347def : Pat<(f32 (uint_to_fp Int1Regs:$a)),
2348          (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2349def : Pat<(f32 (uint_to_fp Int16Regs:$a)),
2350          (CVT_f32_u16 Int16Regs:$a, CvtRN)>;
2351def : Pat<(f32 (uint_to_fp Int32Regs:$a)),
2352          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
2353def : Pat<(f32 (uint_to_fp Int64Regs:$a)),
2354          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
2355
2356// sint -> f64
2357def : Pat<(f64 (sint_to_fp Int1Regs:$a)),
2358          (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2359def : Pat<(f64 (sint_to_fp Int16Regs:$a)),
2360          (CVT_f64_s16 Int16Regs:$a, CvtRN)>;
2361def : Pat<(f64 (sint_to_fp Int32Regs:$a)),
2362          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
2363def : Pat<(f64 (sint_to_fp Int64Regs:$a)),
2364          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
2365
2366// uint -> f64
2367def : Pat<(f64 (uint_to_fp Int1Regs:$a)),
2368          (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2369def : Pat<(f64 (uint_to_fp Int16Regs:$a)),
2370          (CVT_f64_u16 Int16Regs:$a, CvtRN)>;
2371def : Pat<(f64 (uint_to_fp Int32Regs:$a)),
2372          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
2373def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
2374          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
2375
2376
2377// f32 -> sint
2378def : Pat<(i1 (fp_to_sint Float32Regs:$a)),
2379          (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2380def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2381          (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2382def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2383          (CVT_s16_f32 Float32Regs:$a, CvtRZI)>;
2384def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2385          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2386def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2387          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
2388def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2389          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2390def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2391          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
2392
2393// f32 -> uint
2394def : Pat<(i1 (fp_to_uint Float32Regs:$a)),
2395          (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2396def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2397          (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2398def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2399          (CVT_u16_f32 Float32Regs:$a, CvtRZI)>;
2400def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2401          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2402def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2403          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
2404def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2405          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2406def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2407          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
2408
2409// f64 -> sint
2410def : Pat<(i1 (fp_to_sint Float64Regs:$a)),
2411          (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2412def : Pat<(i16 (fp_to_sint Float64Regs:$a)),
2413          (CVT_s16_f64 Float64Regs:$a, CvtRZI)>;
2414def : Pat<(i32 (fp_to_sint Float64Regs:$a)),
2415          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
2416def : Pat<(i64 (fp_to_sint Float64Regs:$a)),
2417          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
2418
2419// f64 -> uint
2420def : Pat<(i1 (fp_to_uint Float64Regs:$a)),
2421          (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2422def : Pat<(i16 (fp_to_uint Float64Regs:$a)),
2423          (CVT_u16_f64 Float64Regs:$a, CvtRZI)>;
2424def : Pat<(i32 (fp_to_uint Float64Regs:$a)),
2425          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
2426def : Pat<(i64 (fp_to_uint Float64Regs:$a)),
2427          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
2428
2429// sext i1
2430def : Pat<(i16 (sext Int1Regs:$a)),
2431          (SELP_s16ii -1, 0, Int1Regs:$a)>;
2432def : Pat<(i32 (sext Int1Regs:$a)),
2433          (SELP_s32ii -1, 0, Int1Regs:$a)>;
2434def : Pat<(i64 (sext Int1Regs:$a)),
2435          (SELP_s64ii -1, 0, Int1Regs:$a)>;
2436
2437// zext i1
2438def : Pat<(i16 (zext Int1Regs:$a)),
2439          (SELP_u16ii 1, 0, Int1Regs:$a)>;
2440def : Pat<(i32 (zext Int1Regs:$a)),
2441          (SELP_u32ii 1, 0, Int1Regs:$a)>;
2442def : Pat<(i64 (zext Int1Regs:$a)),
2443          (SELP_u64ii 1, 0, Int1Regs:$a)>;
2444
2445// anyext i1
2446def : Pat<(i16 (anyext Int1Regs:$a)),
2447          (SELP_u16ii -1, 0, Int1Regs:$a)>;
2448def : Pat<(i32 (anyext Int1Regs:$a)),
2449          (SELP_u32ii -1, 0, Int1Regs:$a)>;
2450def : Pat<(i64 (anyext Int1Regs:$a)),
2451          (SELP_u64ii -1, 0, Int1Regs:$a)>;
2452
2453// sext i16
2454def : Pat<(i32 (sext Int16Regs:$a)),
2455          (CVT_s32_s16 Int16Regs:$a, CvtNONE)>;
2456def : Pat<(i64 (sext Int16Regs:$a)),
2457          (CVT_s64_s16 Int16Regs:$a, CvtNONE)>;
2458
2459// zext i16
2460def : Pat<(i32 (zext Int16Regs:$a)),
2461          (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2462def : Pat<(i64 (zext Int16Regs:$a)),
2463          (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2464
2465// anyext i16
2466def : Pat<(i32 (anyext Int16Regs:$a)),
2467          (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2468def : Pat<(i64 (anyext Int16Regs:$a)),
2469          (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2470
2471// sext i32
2472def : Pat<(i64 (sext Int32Regs:$a)),
2473          (CVT_s64_s32 Int32Regs:$a, CvtNONE)>;
2474
2475// zext i32
2476def : Pat<(i64 (zext Int32Regs:$a)),
2477          (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2478
2479// anyext i32
2480def : Pat<(i64 (anyext Int32Regs:$a)),
2481          (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2482
2483
2484// truncate i64
2485def : Pat<(i32 (trunc Int64Regs:$a)),
2486          (CVT_u32_u64 Int64Regs:$a, CvtNONE)>;
2487def : Pat<(i16 (trunc Int64Regs:$a)),
2488          (CVT_u16_u64 Int64Regs:$a, CvtNONE)>;
2489def : Pat<(i1 (trunc Int64Regs:$a)),
2490          (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>;
2491
2492// truncate i32
2493def : Pat<(i16 (trunc Int32Regs:$a)),
2494          (CVT_u16_u32 Int32Regs:$a, CvtNONE)>;
2495def : Pat<(i1 (trunc Int32Regs:$a)),
2496          (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>;
2497
2498// truncate i16
2499def : Pat<(i1 (trunc Int16Regs:$a)),
2500          (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>;
2501
2502// sext_inreg
2503def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>;
2504def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>;
2505def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>;
2506def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>;
2507def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>;
2508def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
2509
2510
2511// Select instructions with 32-bit predicates
2512def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2513          (SELP_b16rr Int16Regs:$a, Int16Regs:$b,
2514          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2515def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2516          (SELP_b32rr Int32Regs:$a, Int32Regs:$b,
2517          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2518def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2519          (SELP_b64rr Int64Regs:$a, Int64Regs:$b,
2520          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2521def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2522          (SELP_f32rr Float32Regs:$a, Float32Regs:$b,
2523          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2524def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2525          (SELP_f64rr Float64Regs:$a, Float64Regs:$b,
2526          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2527
2528
2529// pack a set of smaller int registers to a larger int register
2530def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2531                          (ins Int16Regs:$s1, Int16Regs:$s2,
2532                               Int16Regs:$s3, Int16Regs:$s4),
2533                          "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2534                          []>;
2535def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2536                          (ins Int16Regs:$s1, Int16Regs:$s2),
2537                          "mov.b32\t$d, {{$s1, $s2}};",
2538                          []>;
2539def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2540                          (ins Int32Regs:$s1, Int32Regs:$s2),
2541                          "mov.b64\t$d, {{$s1, $s2}};",
2542                          []>;
2543def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2544                          (ins Float32Regs:$s1, Float32Regs:$s2),
2545                          "mov.b64\t$d, {{$s1, $s2}};",
2546                          []>;
2547
2548// unpack a larger int register to a set of smaller int registers
2549def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2550                                 Int16Regs:$d3, Int16Regs:$d4),
2551                           (ins Int64Regs:$s),
2552                           "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2553                          []>;
2554def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2555                           (ins Int32Regs:$s),
2556                           "mov.b32\t{{$d1, $d2}}, $s;",
2557                          []>;
2558def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2559                           (ins Int64Regs:$s),
2560                           "mov.b64\t{{$d1, $d2}}, $s;",
2561                          []>;
2562def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2563                           (ins Float64Regs:$s),
2564                           "mov.b64\t{{$d1, $d2}}, $s;",
2565                          []>;
2566
2567// Count leading zeros
2568def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2569                       "clz.b32\t$d, $a;",
2570                       []>;
2571def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2572                       "clz.b64\t$d, $a;",
2573                       []>;
2574
2575// 32-bit has a direct PTX instruction
2576def : Pat<(ctlz Int32Regs:$a),
2577          (CLZr32 Int32Regs:$a)>;
2578def : Pat<(ctlz_zero_undef Int32Regs:$a),
2579          (CLZr32 Int32Regs:$a)>;
2580
2581// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2582// to 64-bit to match the LLVM semantics
2583def : Pat<(ctlz Int64Regs:$a),
2584          (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2585def : Pat<(ctlz_zero_undef Int64Regs:$a),
2586          (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2587
2588// For 16-bit, we zero-extend to 32-bit, then trunc the result back
2589// to 16-bits (ctlz of a 16-bit value is guaranteed to require less
2590// than 16 bits to store). We also need to subtract 16 because the
2591// high-order 16 zeros were counted.
2592def : Pat<(ctlz Int16Regs:$a),
2593          (SUBi16ri (CVT_u16_u32 (CLZr32
2594            (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2595           CvtNONE), 16)>;
2596def : Pat<(ctlz_zero_undef Int16Regs:$a),
2597          (SUBi16ri (CVT_u16_u32 (CLZr32
2598            (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2599           CvtNONE), 16)>;
2600
2601// Population count
2602def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2603                        "popc.b32\t$d, $a;",
2604                        []>;
2605def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2606                        "popc.b64\t$d, $a;",
2607                        []>;
2608
2609// 32-bit has a direct PTX instruction
2610def : Pat<(ctpop Int32Regs:$a),
2611          (POPCr32 Int32Regs:$a)>;
2612
2613// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2614// to 64-bit to match the LLVM semantics
2615def : Pat<(ctpop Int64Regs:$a),
2616          (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
2617
2618// For 16-bit, we zero-extend to 32-bit, then trunc the result back
2619// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
2620// than 16 bits to store)
2621def : Pat<(ctpop Int16Regs:$a),
2622          (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2623           CvtNONE)>;
2624
2625// fround f64 -> f32
2626def : Pat<(f32 (fround Float64Regs:$a)),
2627          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
2628def : Pat<(f32 (fround Float64Regs:$a)),
2629          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
2630
2631// fextend f32 -> f64
2632def : Pat<(f64 (fextend Float32Regs:$a)),
2633          (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
2634def : Pat<(f64 (fextend Float32Regs:$a)),
2635          (CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
2636
2637def retflag       : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2638                           [SDNPHasChain, SDNPOptInGlue]>;
2639
2640//-----------------------------------
2641// Control-flow
2642//-----------------------------------
2643
2644let isTerminator=1 in {
2645   let isReturn=1, isBarrier=1 in
2646      def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2647
2648   let isBranch=1 in
2649      def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2650                          "@$a bra \t$target;",
2651                           [(brcond Int1Regs:$a, bb:$target)]>;
2652   let isBranch=1 in
2653      def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2654                          "@!$a bra \t$target;",
2655                           []>;
2656
2657   let isBranch=1, isBarrier=1 in
2658      def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2659                        "bra.uni \t$target;",
2660                  [(br bb:$target)]>;
2661}
2662
2663def : Pat<(brcond Int32Regs:$a, bb:$target),
2664          (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
2665
2666// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2667// conditional branch if
2668// the target block is the next block so that the code can fall through to the
2669// target block.
2670// The invertion is done by 'xor condition, 1', which will be translated to
2671// (setne condition, -1).
2672// Since ptx supports '@!pred bra target', we should use it.
2673def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2674  (CBranchOther Int1Regs:$a, bb:$target)>;
2675
2676// Call
2677def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2678def SDT_NVPTXCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2679                                        SDTCisVT<1, i32> ]>;
2680
2681def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2682                           [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2683def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_NVPTXCallSeqEnd,
2684                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2685                           SDNPSideEffect]>;
2686
2687def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2688def call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2689                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2690def calltarget : Operand<i32>;
2691let isCall=1 in {
2692   def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2693                  "call \t$dst, (1);", []>;
2694}
2695
2696def : Pat<(call tglobaladdr:$dst),
2697          (CALL tglobaladdr:$dst)>;
2698def : Pat<(call texternalsym:$dst),
2699          (CALL texternalsym:$dst)>;
2700
2701// Pseudo instructions.
2702class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2703   : NVPTXInst<outs, ins, asmstr, pattern>;
2704
2705// @TODO: We use some tricks here to emit curly braces.  Can we clean this up
2706// a bit without TableGen modifications?
2707def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2708  "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2709                               [(callseq_start timm:$amt)]>;
2710def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2711  "\n\t//{{\n\t}}// Callseq End $amt1",
2712                            [(callseq_end timm:$amt1, timm:$amt2)]>;
2713
2714// trap instruction
2715
2716def trapinst : NVPTXInst<(outs), (ins),
2717                         "trap;",
2718                         [(trap)]>;
2719
2720// Call prototype wrapper
2721def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
2722def CallPrototype
2723  : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
2724           [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
2725def ProtoIdent : Operand<i32> {
2726  let PrintMethod = "printProtoIdent";
2727}
2728def CALL_PROTOTYPE
2729  : NVPTXInst<(outs), (ins ProtoIdent:$ident),
2730              "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
2731
2732
2733
2734include "NVPTXIntrinsics.td"
2735
2736
2737//-----------------------------------
2738// Notes
2739//-----------------------------------
2740// BSWAP is currently expanded. The following is a more efficient
2741// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2742// - for sm_20, use pmpt (use vector scalar mov to get the pack and
2743//   unpack). sm_20 supports native 32-bit register, but not native 16-bit
2744// register.
2745