ARMInstrNEON.td revision 70e48b23a3455e4689ee24cec4eb153d67223e86
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the ARM NEON instruction set.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// NEON-specific DAG Nodes.
16//===----------------------------------------------------------------------===//
17
18def SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
19
20def NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
21def NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
22def NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
23def NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
24def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
25def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
26
27// Types for vector shift by immediates.  The "SHX" version is for long and
28// narrow operations where the source and destination vectors have different
29// types.  The "SHINS" version is for shift and insert operations.
30def SDTARMVSH     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
31                                         SDTCisVT<2, i32>]>;
32def SDTARMVSHX    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
33                                         SDTCisVT<2, i32>]>;
34def SDTARMVSHINS  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
35                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
36
37def NEONvshl      : SDNode<"ARMISD::VSHL", SDTARMVSH>;
38def NEONvshrs     : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
39def NEONvshru     : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
40def NEONvshlls    : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
41def NEONvshllu    : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
42def NEONvshlli    : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
43def NEONvshrn     : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
44
45def NEONvrshrs    : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
46def NEONvrshru    : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
47def NEONvrshrn    : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
48
49def NEONvqshls    : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
50def NEONvqshlu    : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
51def NEONvqshlsu   : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
52def NEONvqshrns   : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
53def NEONvqshrnu   : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
54def NEONvqshrnsu  : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
55
56def NEONvqrshrns  : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
57def NEONvqrshrnu  : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
58def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
59
60def NEONvsli      : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
61def NEONvsri      : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
62
63def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
64                                         SDTCisVT<2, i32>]>;
65def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
66def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
67
68def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
69def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
70def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
71
72def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
73
74// VDUPLANE can produce a quad-register result from a double-register source,
75// so the result is not constrained to match the source.
76def NEONvduplane  : SDNode<"ARMISD::VDUPLANE",
77                           SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
78                                                SDTCisVT<2, i32>]>>;
79
80def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
81                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
82def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
83
84def SDTARMVSHUF   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
85def NEONvrev64    : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
86def NEONvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
87def NEONvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
88
89def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
90                                         SDTCisSameAs<0, 2>,
91                                         SDTCisSameAs<0, 3>]>;
92def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
93def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
94def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
95
96def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
97                                         SDTCisSameAs<0, 2>]>;
98def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
99def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
100
101def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
102  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
103  unsigned EltBits = 0;
104  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
105  return (EltBits == 32 && EltVal == 0);
106}]>;
107
108def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
109  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
110  unsigned EltBits = 0;
111  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
112  return (EltBits == 8 && EltVal == 0xff);
113}]>;
114
115//===----------------------------------------------------------------------===//
116// NEON operand definitions
117//===----------------------------------------------------------------------===//
118
119def nModImm : Operand<i32> {
120  let PrintMethod = "printNEONModImmOperand";
121}
122
123//===----------------------------------------------------------------------===//
124// NEON load / store instructions
125//===----------------------------------------------------------------------===//
126
127let mayLoad = 1, neverHasSideEffects = 1 in {
128// Use vldmia to load a Q register as a D register pair.
129// This is equivalent to VLDMD except that it has a Q register operand
130// instead of a pair of D registers.
131def VLDMQ
132  : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
133          IndexModeNone, IIC_fpLoadm,
134          "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
135
136// Use vld1 to load a Q register as a D register pair.
137// This alternative to VLDMQ allows an alignment to be specified.
138// This is equivalent to VLD1q64 except that it has a Q register operand.
139def VLD1q
140  : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
141          IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
142} // mayLoad = 1, neverHasSideEffects = 1
143
144let mayStore = 1, neverHasSideEffects = 1 in {
145// Use vstmia to store a Q register as a D register pair.
146// This is equivalent to VSTMD except that it has a Q register operand
147// instead of a pair of D registers.
148def VSTMQ
149  : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
150          IndexModeNone, IIC_fpStorem,
151          "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
152
153// Use vst1 to store a Q register as a D register pair.
154// This alternative to VSTMQ allows an alignment to be specified.
155// This is equivalent to VST1q64 except that it has a Q register operand.
156def VST1q
157  : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
158          IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
159} // mayStore = 1, neverHasSideEffects = 1
160
161let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
162
163//   VLD1     : Vector Load (multiple single elements)
164class VLD1D<bits<4> op7_4, string Dt>
165  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
166          (ins addrmode6:$addr), IIC_VLD1,
167          "vld1", Dt, "\\{$dst\\}, $addr", "", []>;
168class VLD1Q<bits<4> op7_4, string Dt>
169  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2),
170          (ins addrmode6:$addr), IIC_VLD1,
171          "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
172
173def  VLD1d8   : VLD1D<0b0000, "8">;
174def  VLD1d16  : VLD1D<0b0100, "16">;
175def  VLD1d32  : VLD1D<0b1000, "32">;
176def  VLD1d64  : VLD1D<0b1100, "64">;
177
178def  VLD1q8   : VLD1Q<0b0000, "8">;
179def  VLD1q16  : VLD1Q<0b0100, "16">;
180def  VLD1q32  : VLD1Q<0b1000, "32">;
181def  VLD1q64  : VLD1Q<0b1100, "64">;
182
183// ...with address register writeback:
184class VLD1DWB<bits<4> op7_4, string Dt>
185  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
186          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
187          "vld1", Dt, "\\{$dst\\}, $addr$offset",
188          "$addr.addr = $wb", []>;
189class VLD1QWB<bits<4> op7_4, string Dt>
190  : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb),
191          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
192          "vld1", Dt, "${dst:dregpair}, $addr$offset",
193          "$addr.addr = $wb", []>;
194
195def VLD1d8_UPD  : VLD1DWB<0b0000, "8">;
196def VLD1d16_UPD : VLD1DWB<0b0100, "16">;
197def VLD1d32_UPD : VLD1DWB<0b1000, "32">;
198def VLD1d64_UPD : VLD1DWB<0b1100, "64">;
199
200def VLD1q8_UPD  : VLD1QWB<0b0000, "8">;
201def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
202def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
203def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
204
205// ...with 3 registers (some of these are only for the disassembler):
206class VLD1D3<bits<4> op7_4, string Dt>
207  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
208          (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
209          "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
210class VLD1D3WB<bits<4> op7_4, string Dt>
211  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
212          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
213          "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>;
214
215def VLD1d8T      : VLD1D3<0b0000, "8">;
216def VLD1d16T     : VLD1D3<0b0100, "16">;
217def VLD1d32T     : VLD1D3<0b1000, "32">;
218def VLD1d64T     : VLD1D3<0b1100, "64">;
219
220def VLD1d8T_UPD  : VLD1D3WB<0b0000, "8">;
221def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
222def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
223def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
224
225// ...with 4 registers (some of these are only for the disassembler):
226class VLD1D4<bits<4> op7_4, string Dt>
227  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
228          (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
229          "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
230class VLD1D4WB<bits<4> op7_4, string Dt>
231  : NLdSt<0,0b10,0b0010,op7_4,
232          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
233          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
234          "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
235          []>;
236
237def VLD1d8Q      : VLD1D4<0b0000, "8">;
238def VLD1d16Q     : VLD1D4<0b0100, "16">;
239def VLD1d32Q     : VLD1D4<0b1000, "32">;
240def VLD1d64Q     : VLD1D4<0b1100, "64">;
241
242def VLD1d8Q_UPD  : VLD1D4WB<0b0000, "8">;
243def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
244def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
245def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
246
247//   VLD2     : Vector Load (multiple 2-element structures)
248class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
249  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
250          (ins addrmode6:$addr), IIC_VLD2,
251          "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
252class VLD2Q<bits<4> op7_4, string Dt>
253  : NLdSt<0, 0b10, 0b0011, op7_4,
254          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
255          (ins addrmode6:$addr), IIC_VLD2,
256          "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
257
258def  VLD2d8   : VLD2D<0b1000, 0b0000, "8">;
259def  VLD2d16  : VLD2D<0b1000, 0b0100, "16">;
260def  VLD2d32  : VLD2D<0b1000, 0b1000, "32">;
261
262def  VLD2q8   : VLD2Q<0b0000, "8">;
263def  VLD2q16  : VLD2Q<0b0100, "16">;
264def  VLD2q32  : VLD2Q<0b1000, "32">;
265
266// ...with address register writeback:
267class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
268  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
269          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
270          "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
271          "$addr.addr = $wb", []>;
272class VLD2QWB<bits<4> op7_4, string Dt>
273  : NLdSt<0, 0b10, 0b0011, op7_4,
274          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
275          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
276          "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
277          "$addr.addr = $wb", []>;
278
279def VLD2d8_UPD  : VLD2DWB<0b1000, 0b0000, "8">;
280def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
281def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
282
283def VLD2q8_UPD  : VLD2QWB<0b0000, "8">;
284def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
285def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
286
287// ...with double-spaced registers (for disassembly only):
288def VLD2b8      : VLD2D<0b1001, 0b0000, "8">;
289def VLD2b16     : VLD2D<0b1001, 0b0100, "16">;
290def VLD2b32     : VLD2D<0b1001, 0b1000, "32">;
291def VLD2b8_UPD  : VLD2DWB<0b1001, 0b0000, "8">;
292def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">;
293def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">;
294
295//   VLD3     : Vector Load (multiple 3-element structures)
296class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
297  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
298          (ins addrmode6:$addr), IIC_VLD3,
299          "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
300
301def  VLD3d8   : VLD3D<0b0100, 0b0000, "8">;
302def  VLD3d16  : VLD3D<0b0100, 0b0100, "16">;
303def  VLD3d32  : VLD3D<0b0100, 0b1000, "32">;
304
305// ...with address register writeback:
306class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
307  : NLdSt<0, 0b10, op11_8, op7_4,
308          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
309          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3,
310          "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
311          "$addr.addr = $wb", []>;
312
313def VLD3d8_UPD  : VLD3DWB<0b0100, 0b0000, "8">;
314def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
315def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
316
317// ...with double-spaced registers (non-updating versions for disassembly only):
318def VLD3q8      : VLD3D<0b0101, 0b0000, "8">;
319def VLD3q16     : VLD3D<0b0101, 0b0100, "16">;
320def VLD3q32     : VLD3D<0b0101, 0b1000, "32">;
321def VLD3q8_UPD  : VLD3DWB<0b0101, 0b0000, "8">;
322def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
323def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
324
325// ...alternate versions to be allocated odd register numbers:
326def VLD3q8odd_UPD  : VLD3DWB<0b0101, 0b0000, "8">;
327def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
328def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
329
330//   VLD4     : Vector Load (multiple 4-element structures)
331class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
332  : NLdSt<0, 0b10, op11_8, op7_4,
333          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
334          (ins addrmode6:$addr), IIC_VLD4,
335          "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
336
337def  VLD4d8   : VLD4D<0b0000, 0b0000, "8">;
338def  VLD4d16  : VLD4D<0b0000, 0b0100, "16">;
339def  VLD4d32  : VLD4D<0b0000, 0b1000, "32">;
340
341// ...with address register writeback:
342class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
343  : NLdSt<0, 0b10, op11_8, op7_4,
344          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
345          (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4,
346          "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
347          "$addr.addr = $wb", []>;
348
349def VLD4d8_UPD  : VLD4DWB<0b0000, 0b0000, "8">;
350def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
351def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
352
353// ...with double-spaced registers (non-updating versions for disassembly only):
354def VLD4q8      : VLD4D<0b0001, 0b0000, "8">;
355def VLD4q16     : VLD4D<0b0001, 0b0100, "16">;
356def VLD4q32     : VLD4D<0b0001, 0b1000, "32">;
357def VLD4q8_UPD  : VLD4DWB<0b0001, 0b0000, "8">;
358def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
359def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
360
361// ...alternate versions to be allocated odd register numbers:
362def VLD4q8odd_UPD  : VLD4DWB<0b0001, 0b0000, "8">;
363def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
364def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
365
366//   VLD1LN   : Vector Load (single element to one lane)
367//   FIXME: Not yet implemented.
368
369//   VLD2LN   : Vector Load (single 2-element structure to one lane)
370class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
371  : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
372          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
373          IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
374          "$src1 = $dst1, $src2 = $dst2", []>;
375
376def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8">;
377def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
378def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
379
380// ...with double-spaced registers:
381def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
382def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
383
384// ...alternate versions to be allocated odd register numbers:
385def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
386def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
387
388// ...with address register writeback:
389class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
390  : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
391          (ins addrmode6:$addr, am6offset:$offset,
392           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
393          "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
394          "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
395
396def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8">;
397def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
398def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
399
400def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
401def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
402
403//   VLD3LN   : Vector Load (single 3-element structure to one lane)
404class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
405  : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
406          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
407          nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
408          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
409          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
410
411def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8">;
412def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
413def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
414
415// ...with double-spaced registers:
416def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
417def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
418
419// ...alternate versions to be allocated odd register numbers:
420def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
421def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
422
423// ...with address register writeback:
424class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
425  : NLdSt<1, 0b10, op11_8, op7_4,
426          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
427          (ins addrmode6:$addr, am6offset:$offset,
428           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
429          IIC_VLD3, "vld3", Dt,
430          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset",
431          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
432          []>;
433
434def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8">;
435def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
436def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
437
438def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
439def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
440
441//   VLD4LN   : Vector Load (single 4-element structure to one lane)
442class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
443  : NLdSt<1, 0b10, op11_8, op7_4,
444          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
445          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
446          nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
447          "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
448          "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
449
450def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8">;
451def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
452def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
453
454// ...with double-spaced registers:
455def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
456def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
457
458// ...alternate versions to be allocated odd register numbers:
459def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
460def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
461
462// ...with address register writeback:
463class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
464  : NLdSt<1, 0b10, op11_8, op7_4,
465          (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
466          (ins addrmode6:$addr, am6offset:$offset,
467           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
468          IIC_VLD4, "vld4", Dt,
469"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset",
470"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
471          []>;
472
473def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8">;
474def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
475def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
476
477def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
478def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
479
480//   VLD1DUP  : Vector Load (single element to all lanes)
481//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
482//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
483//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
484//   FIXME: Not yet implemented.
485} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
486
487let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
488
489// Classes for VST* pseudo-instructions with multi-register operands.
490// These are expanded to real instructions after register allocation.
491class VSTQQPseudo
492  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
493class VSTQQWBPseudo
494  : PseudoNLdSt<(outs GPR:$wb),
495                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
496                "$addr.addr = $wb">;
497class VSTQQQQWBPseudo
498  : PseudoNLdSt<(outs GPR:$wb),
499                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
500                "$addr.addr = $wb">;
501
502//   VST1     : Vector Store (multiple single elements)
503class VST1D<bits<4> op7_4, string Dt>
504  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
505          "vst1", Dt, "\\{$src\\}, $addr", "", []>;
506class VST1Q<bits<4> op7_4, string Dt>
507  : NLdSt<0,0b00,0b1010,op7_4, (outs),
508          (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
509          "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
510
511def  VST1d8   : VST1D<0b0000, "8">;
512def  VST1d16  : VST1D<0b0100, "16">;
513def  VST1d32  : VST1D<0b1000, "32">;
514def  VST1d64  : VST1D<0b1100, "64">;
515
516def  VST1q8   : VST1Q<0b0000, "8">;
517def  VST1q16  : VST1Q<0b0100, "16">;
518def  VST1q32  : VST1Q<0b1000, "32">;
519def  VST1q64  : VST1Q<0b1100, "64">;
520
521// ...with address register writeback:
522class VST1DWB<bits<4> op7_4, string Dt>
523  : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
524          (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST,
525          "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>;
526class VST1QWB<bits<4> op7_4, string Dt>
527  : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
528          (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
529          "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
530
531def VST1d8_UPD  : VST1DWB<0b0000, "8">;
532def VST1d16_UPD : VST1DWB<0b0100, "16">;
533def VST1d32_UPD : VST1DWB<0b1000, "32">;
534def VST1d64_UPD : VST1DWB<0b1100, "64">;
535
536def VST1q8_UPD  : VST1QWB<0b0000, "8">;
537def VST1q16_UPD : VST1QWB<0b0100, "16">;
538def VST1q32_UPD : VST1QWB<0b1000, "32">;
539def VST1q64_UPD : VST1QWB<0b1100, "64">;
540
541// ...with 3 registers (some of these are only for the disassembler):
542class VST1D3<bits<4> op7_4, string Dt>
543  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
544          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
545          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
546class VST1D3WB<bits<4> op7_4, string Dt>
547  : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
548          (ins addrmode6:$addr, am6offset:$offset,
549           DPR:$src1, DPR:$src2, DPR:$src3),
550          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
551          "$addr.addr = $wb", []>;
552
553def VST1d8T      : VST1D3<0b0000, "8">;
554def VST1d16T     : VST1D3<0b0100, "16">;
555def VST1d32T     : VST1D3<0b1000, "32">;
556def VST1d64T     : VST1D3<0b1100, "64">;
557
558def VST1d8T_UPD  : VST1D3WB<0b0000, "8">;
559def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
560def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
561def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
562
563// ...with 4 registers (some of these are only for the disassembler):
564class VST1D4<bits<4> op7_4, string Dt>
565  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
566          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
567          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
568          []>;
569class VST1D4WB<bits<4> op7_4, string Dt>
570  : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
571          (ins addrmode6:$addr, am6offset:$offset,
572           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
573          IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
574          "$addr.addr = $wb", []>;
575
576def VST1d8Q      : VST1D4<0b0000, "8">;
577def VST1d16Q     : VST1D4<0b0100, "16">;
578def VST1d32Q     : VST1D4<0b1000, "32">;
579def VST1d64Q     : VST1D4<0b1100, "64">;
580
581def VST1d8Q_UPD  : VST1D4WB<0b0000, "8">;
582def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
583def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
584def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
585
586def VST1d64QPseudo     : VSTQQPseudo;
587def VST1d64QPseudo_UPD : VSTQQWBPseudo;
588
589//   VST2     : Vector Store (multiple 2-element structures)
590class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
591  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
592          (ins addrmode6:$addr, DPR:$src1, DPR:$src2),
593          IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
594class VST2Q<bits<4> op7_4, string Dt>
595  : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
596          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
597          IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
598          "", []>;
599
600def  VST2d8   : VST2D<0b1000, 0b0000, "8">;
601def  VST2d16  : VST2D<0b1000, 0b0100, "16">;
602def  VST2d32  : VST2D<0b1000, 0b1000, "32">;
603
604def  VST2q8   : VST2Q<0b0000, "8">;
605def  VST2q16  : VST2Q<0b0100, "16">;
606def  VST2q32  : VST2Q<0b1000, "32">;
607
608// ...with address register writeback:
609class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
610  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
611          (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2),
612          IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset",
613          "$addr.addr = $wb", []>;
614class VST2QWB<bits<4> op7_4, string Dt>
615  : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
616          (ins addrmode6:$addr, am6offset:$offset,
617           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
618          IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
619          "$addr.addr = $wb", []>;
620
621def VST2d8_UPD  : VST2DWB<0b1000, 0b0000, "8">;
622def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
623def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
624
625def VST2q8_UPD  : VST2QWB<0b0000, "8">;
626def VST2q16_UPD : VST2QWB<0b0100, "16">;
627def VST2q32_UPD : VST2QWB<0b1000, "32">;
628
629// ...with double-spaced registers (for disassembly only):
630def VST2b8      : VST2D<0b1001, 0b0000, "8">;
631def VST2b16     : VST2D<0b1001, 0b0100, "16">;
632def VST2b32     : VST2D<0b1001, 0b1000, "32">;
633def VST2b8_UPD  : VST2DWB<0b1001, 0b0000, "8">;
634def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">;
635def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">;
636
637//   VST3     : Vector Store (multiple 3-element structures)
638class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
639  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
640          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
641          "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
642
643def  VST3d8   : VST3D<0b0100, 0b0000, "8">;
644def  VST3d16  : VST3D<0b0100, 0b0100, "16">;
645def  VST3d32  : VST3D<0b0100, 0b1000, "32">;
646
647// ...with address register writeback:
648class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
649  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
650          (ins addrmode6:$addr, am6offset:$offset,
651           DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
652          "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
653          "$addr.addr = $wb", []>;
654
655def VST3d8_UPD  : VST3DWB<0b0100, 0b0000, "8">;
656def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
657def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
658
659// ...with double-spaced registers (non-updating versions for disassembly only):
660def VST3q8      : VST3D<0b0101, 0b0000, "8">;
661def VST3q16     : VST3D<0b0101, 0b0100, "16">;
662def VST3q32     : VST3D<0b0101, 0b1000, "32">;
663def VST3q8_UPD  : VST3DWB<0b0101, 0b0000, "8">;
664def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
665def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
666
667// ...alternate versions to be allocated odd register numbers:
668def VST3q8odd_UPD  : VST3DWB<0b0101, 0b0000, "8">;
669def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
670def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
671
672//   VST4     : Vector Store (multiple 4-element structures)
673class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
674  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
675          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
676          IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
677          "", []>;
678
679def  VST4d8   : VST4D<0b0000, 0b0000, "8">;
680def  VST4d16  : VST4D<0b0000, 0b0100, "16">;
681def  VST4d32  : VST4D<0b0000, 0b1000, "32">;
682
683def  VST4d8Pseudo  : VSTQQPseudo;
684def  VST4d16Pseudo : VSTQQPseudo;
685def  VST4d32Pseudo : VSTQQPseudo;
686
687// ...with address register writeback:
688class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
689  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
690          (ins addrmode6:$addr, am6offset:$offset,
691           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
692           "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
693          "$addr.addr = $wb", []>;
694
695def VST4d8_UPD  : VST4DWB<0b0000, 0b0000, "8">;
696def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
697def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
698
699def VST4d8Pseudo_UPD  : VSTQQWBPseudo;
700def VST4d16Pseudo_UPD : VSTQQWBPseudo;
701def VST4d32Pseudo_UPD : VSTQQWBPseudo;
702
703// ...with double-spaced registers (non-updating versions for disassembly only):
704def VST4q8      : VST4D<0b0001, 0b0000, "8">;
705def VST4q16     : VST4D<0b0001, 0b0100, "16">;
706def VST4q32     : VST4D<0b0001, 0b1000, "32">;
707def VST4q8_UPD  : VST4DWB<0b0001, 0b0000, "8">;
708def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
709def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
710
711def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo;
712def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
713def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
714
715// ...alternate versions to be allocated odd register numbers:
716def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo;
717def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
718def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
719
720//   VST1LN   : Vector Store (single element from one lane)
721//   FIXME: Not yet implemented.
722
723//   VST2LN   : Vector Store (single 2-element structure from one lane)
724class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
725  : NLdSt<1, 0b00, op11_8, op7_4, (outs),
726          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
727          IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
728          "", []>;
729
730def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8">;
731def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
732def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
733
734// ...with double-spaced registers:
735def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
736def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
737
738// ...alternate versions to be allocated odd register numbers:
739def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
740def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
741
742// ...with address register writeback:
743class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
744  : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
745          (ins addrmode6:$addr, am6offset:$offset,
746           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
747          "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
748          "$addr.addr = $wb", []>;
749
750def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8">;
751def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
752def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
753
754def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
755def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
756
757//   VST3LN   : Vector Store (single 3-element structure from one lane)
758class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
759  : NLdSt<1, 0b00, op11_8, op7_4, (outs),
760          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
761           nohash_imm:$lane), IIC_VST, "vst3", Dt,
762          "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
763
764def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8">;
765def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
766def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
767
768// ...with double-spaced registers:
769def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
770def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
771
772// ...alternate versions to be allocated odd register numbers:
773def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
774def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
775
776// ...with address register writeback:
777class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
778  : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
779          (ins addrmode6:$addr, am6offset:$offset,
780           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
781          IIC_VST, "vst3", Dt,
782          "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
783          "$addr.addr = $wb", []>;
784
785def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8">;
786def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
787def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
788
789def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
790def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
791
792//   VST4LN   : Vector Store (single 4-element structure from one lane)
793class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
794  : NLdSt<1, 0b00, op11_8, op7_4, (outs),
795          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
796           nohash_imm:$lane), IIC_VST, "vst4", Dt,
797          "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
798          "", []>;
799
800def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8">;
801def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
802def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
803
804// ...with double-spaced registers:
805def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
806def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
807
808// ...alternate versions to be allocated odd register numbers:
809def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
810def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
811
812// ...with address register writeback:
813class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
814  : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
815          (ins addrmode6:$addr, am6offset:$offset,
816           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
817          IIC_VST, "vst4", Dt,
818  "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
819          "$addr.addr = $wb", []>;
820
821def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8">;
822def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
823def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
824
825def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
826def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
827
828} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
829
830
831//===----------------------------------------------------------------------===//
832// NEON pattern fragments
833//===----------------------------------------------------------------------===//
834
835// Extract D sub-registers of Q registers.
836def DSubReg_i8_reg  : SDNodeXForm<imm, [{
837  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
838  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
839}]>;
840def DSubReg_i16_reg : SDNodeXForm<imm, [{
841  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
842  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
843}]>;
844def DSubReg_i32_reg : SDNodeXForm<imm, [{
845  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
846  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
847}]>;
848def DSubReg_f64_reg : SDNodeXForm<imm, [{
849  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
850  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
851}]>;
852
853// Extract S sub-registers of Q/D registers.
854def SSubReg_f32_reg : SDNodeXForm<imm, [{
855  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
856  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
857}]>;
858
859// Translate lane numbers from Q registers to D subregs.
860def SubReg_i8_lane  : SDNodeXForm<imm, [{
861  return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
862}]>;
863def SubReg_i16_lane : SDNodeXForm<imm, [{
864  return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
865}]>;
866def SubReg_i32_lane : SDNodeXForm<imm, [{
867  return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
868}]>;
869
870//===----------------------------------------------------------------------===//
871// Instruction Classes
872//===----------------------------------------------------------------------===//
873
874// Basic 2-register operations: single-, double- and quad-register.
875class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
876           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
877           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
878  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
879        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
880        IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
881class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
882           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
883           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
884  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
885        (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "",
886        [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
887class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
888           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
889           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
890  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
891        (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "",
892        [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
893
894// Basic 2-register intrinsics, both double- and quad-register.
895class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
896              bits<2> op17_16, bits<5> op11_7, bit op4,
897              InstrItinClass itin, string OpcodeStr, string Dt,
898              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
899  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
900        (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
901        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
902class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
903              bits<2> op17_16, bits<5> op11_7, bit op4,
904              InstrItinClass itin, string OpcodeStr, string Dt,
905              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
906  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
907        (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
908        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
909
910// Narrow 2-register intrinsics.
911class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
912              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
913              InstrItinClass itin, string OpcodeStr, string Dt,
914              ValueType TyD, ValueType TyQ, Intrinsic IntOp>
915  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
916        (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
917        [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
918
919// Long 2-register operations (currently only used for VMOVL).
920class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
921           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
922           InstrItinClass itin, string OpcodeStr, string Dt,
923           ValueType TyQ, ValueType TyD, SDNode OpNode>
924  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
925        (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
926        [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
927
928// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
929class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
930  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2),
931        (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 
932        OpcodeStr, Dt, "$dst1, $dst2",
933        "$src1 = $dst1, $src2 = $dst2", []>;
934class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
935                  InstrItinClass itin, string OpcodeStr, string Dt>
936  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
937        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2",
938        "$src1 = $dst1, $src2 = $dst2", []>;
939
940// Basic 3-register operations: single-, double- and quad-register.
941class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
942           string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
943           SDNode OpNode, bit Commutable>
944  : N3V<op24, op23, op21_20, op11_8, 0, op4,
945        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm,
946        IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
947  let isCommutable = Commutable;
948}
949
950class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
951           InstrItinClass itin, string OpcodeStr, string Dt,
952           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
953  : N3V<op24, op23, op21_20, op11_8, 0, op4,
954        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
955        OpcodeStr, Dt, "$dst, $src1, $src2", "",
956        [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
957  let isCommutable = Commutable;
958}
959// Same as N3VD but no data type.
960class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
961           InstrItinClass itin, string OpcodeStr,
962           ValueType ResTy, ValueType OpTy,
963           SDNode OpNode, bit Commutable>
964  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
965         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, 
966         OpcodeStr, "$dst, $src1, $src2", "",
967         [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{
968  let isCommutable = Commutable;
969}
970
971class N3VDSL<bits<2> op21_20, bits<4> op11_8, 
972             InstrItinClass itin, string OpcodeStr, string Dt,
973             ValueType Ty, SDNode ShOp>
974  : N3V<0, 1, op21_20, op11_8, 1, 0,
975        (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
976        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
977        [(set (Ty DPR:$dst),
978              (Ty (ShOp (Ty DPR:$src1),
979                        (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> {
980  let isCommutable = 0;
981}
982class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 
983               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
984  : N3V<0, 1, op21_20, op11_8, 1, 0,
985        (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
986        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
987        [(set (Ty DPR:$dst),
988              (Ty (ShOp (Ty DPR:$src1),
989                        (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
990  let isCommutable = 0;
991}
992
993class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
994           InstrItinClass itin, string OpcodeStr, string Dt,
995           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
996  : N3V<op24, op23, op21_20, op11_8, 1, op4,
997        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, 
998        OpcodeStr, Dt, "$dst, $src1, $src2", "",
999        [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
1000  let isCommutable = Commutable;
1001}
1002class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1003           InstrItinClass itin, string OpcodeStr,
1004           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
1005  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
1006         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, 
1007         OpcodeStr, "$dst, $src1, $src2", "",
1008         [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{
1009  let isCommutable = Commutable;
1010}
1011class N3VQSL<bits<2> op21_20, bits<4> op11_8, 
1012             InstrItinClass itin, string OpcodeStr, string Dt,
1013             ValueType ResTy, ValueType OpTy, SDNode ShOp>
1014  : N3V<1, 1, op21_20, op11_8, 1, 0,
1015        (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
1016        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1017        [(set (ResTy QPR:$dst),
1018              (ResTy (ShOp (ResTy QPR:$src1),
1019                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
1020                                                imm:$lane)))))]> {
1021  let isCommutable = 0;
1022}
1023class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
1024               ValueType ResTy, ValueType OpTy, SDNode ShOp>
1025  : N3V<1, 1, op21_20, op11_8, 1, 0,
1026        (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
1027        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
1028        [(set (ResTy QPR:$dst),
1029              (ResTy (ShOp (ResTy QPR:$src1),
1030                           (ResTy (NEONvduplane (OpTy DPR_8:$src2),
1031                                                imm:$lane)))))]> {
1032  let isCommutable = 0;
1033}
1034
1035// Basic 3-register intrinsics, both double- and quad-register.
1036class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1037              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
1038              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
1039  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1040        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin,
1041        OpcodeStr, Dt, "$dst, $src1, $src2", "",
1042        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
1043  let isCommutable = Commutable;
1044}
1045class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 
1046                string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
1047  : N3V<0, 1, op21_20, op11_8, 1, 0,
1048        (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
1049        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1050        [(set (Ty DPR:$dst),
1051              (Ty (IntOp (Ty DPR:$src1),
1052                         (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
1053                                           imm:$lane)))))]> {
1054  let isCommutable = 0;
1055}
1056class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1057                  string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
1058  : N3V<0, 1, op21_20, op11_8, 1, 0,
1059        (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
1060        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1061        [(set (Ty DPR:$dst),
1062              (Ty (IntOp (Ty DPR:$src1),
1063                         (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
1064  let isCommutable = 0;
1065}
1066
1067class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1068              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
1069              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
1070  : N3V<op24, op23, op21_20, op11_8, 1, op4,
1071        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin,
1072        OpcodeStr, Dt, "$dst, $src1, $src2", "",
1073        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
1074  let isCommutable = Commutable;
1075}
1076class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 
1077                string OpcodeStr, string Dt,
1078                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1079  : N3V<1, 1, op21_20, op11_8, 1, 0,
1080        (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
1081        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1082        [(set (ResTy QPR:$dst),
1083              (ResTy (IntOp (ResTy QPR:$src1),
1084                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
1085                                                 imm:$lane)))))]> {
1086  let isCommutable = 0;
1087}
1088class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1089                  string OpcodeStr, string Dt,
1090                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1091  : N3V<1, 1, op21_20, op11_8, 1, 0,
1092        (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
1093        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1094        [(set (ResTy QPR:$dst),
1095              (ResTy (IntOp (ResTy QPR:$src1),
1096                            (ResTy (NEONvduplane (OpTy DPR_8:$src2),
1097                                                 imm:$lane)))))]> {
1098  let isCommutable = 0;
1099}
1100
1101// Multiply-Add/Sub operations: single-, double- and quad-register.
1102class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1103                InstrItinClass itin, string OpcodeStr, string Dt,
1104                ValueType Ty, SDNode MulOp, SDNode OpNode>
1105  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1106        (outs DPR_VFP2:$dst),
1107        (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin,
1108        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
1109
1110class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1111                InstrItinClass itin, string OpcodeStr, string Dt,
1112                ValueType Ty, SDNode MulOp, SDNode OpNode>
1113  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1114        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
1115        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1116        [(set DPR:$dst, (Ty (OpNode DPR:$src1,
1117                             (Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
1118class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1119                  string OpcodeStr, string Dt,
1120                  ValueType Ty, SDNode MulOp, SDNode ShOp>
1121  : N3V<0, 1, op21_20, op11_8, 1, 0,
1122        (outs DPR:$dst),
1123        (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
1124        NVMulSLFrm, itin,
1125        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1126        [(set (Ty DPR:$dst),
1127              (Ty (ShOp (Ty DPR:$src1),
1128                        (Ty (MulOp DPR:$src2,
1129                                   (Ty (NEONvduplane (Ty DPR_VFP2:$src3),
1130                                                     imm:$lane)))))))]>;
1131class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1132                    string OpcodeStr, string Dt,
1133                    ValueType Ty, SDNode MulOp, SDNode ShOp>
1134  : N3V<0, 1, op21_20, op11_8, 1, 0,
1135        (outs DPR:$dst),
1136        (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
1137        NVMulSLFrm, itin,
1138        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1139        [(set (Ty DPR:$dst),
1140              (Ty (ShOp (Ty DPR:$src1),
1141                        (Ty (MulOp DPR:$src2,
1142                                   (Ty (NEONvduplane (Ty DPR_8:$src3),
1143                                                     imm:$lane)))))))]>;
1144
1145class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1146                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
1147                SDNode MulOp, SDNode OpNode>
1148  : N3V<op24, op23, op21_20, op11_8, 1, op4,
1149        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
1150        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1151        [(set QPR:$dst, (Ty (OpNode QPR:$src1,
1152                             (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
1153class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1154                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
1155                  SDNode MulOp, SDNode ShOp>
1156  : N3V<1, 1, op21_20, op11_8, 1, 0,
1157        (outs QPR:$dst),
1158        (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
1159        NVMulSLFrm, itin,
1160        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1161        [(set (ResTy QPR:$dst),
1162              (ResTy (ShOp (ResTy QPR:$src1),
1163                           (ResTy (MulOp QPR:$src2,
1164                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
1165                                                        imm:$lane)))))))]>;
1166class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1167                    string OpcodeStr, string Dt,
1168                    ValueType ResTy, ValueType OpTy,
1169                    SDNode MulOp, SDNode ShOp>
1170  : N3V<1, 1, op21_20, op11_8, 1, 0,
1171        (outs QPR:$dst),
1172        (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane),
1173        NVMulSLFrm, itin,
1174        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1175        [(set (ResTy QPR:$dst),
1176              (ResTy (ShOp (ResTy QPR:$src1),
1177                           (ResTy (MulOp QPR:$src2,
1178                                   (ResTy (NEONvduplane (OpTy DPR_8:$src3),
1179                                                        imm:$lane)))))))]>;
1180
1181// Neon 3-argument intrinsics, both double- and quad-register.
1182// The destination register is also used as the first source operand register.
1183class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1184               InstrItinClass itin, string OpcodeStr, string Dt,
1185               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1186  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1187        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
1188        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1189        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
1190                                      (OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
1191class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1192               InstrItinClass itin, string OpcodeStr, string Dt,
1193               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1194  : N3V<op24, op23, op21_20, op11_8, 1, op4,
1195        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
1196        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1197        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
1198                                      (OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
1199
1200// Neon Long 3-argument intrinsic.  The destination register is
1201// a quad-register and is also used as the first source operand register.
1202class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1203               InstrItinClass itin, string OpcodeStr, string Dt,
1204               ValueType TyQ, ValueType TyD, Intrinsic IntOp>
1205  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1206        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
1207        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1208        [(set QPR:$dst,
1209          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
1210class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1211                 string OpcodeStr, string Dt,
1212                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1213  : N3V<op24, 1, op21_20, op11_8, 1, 0,
1214        (outs QPR:$dst),
1215        (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
1216        NVMulSLFrm, itin,
1217        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1218        [(set (ResTy QPR:$dst),
1219              (ResTy (IntOp (ResTy QPR:$src1),
1220                            (OpTy DPR:$src2),
1221                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
1222                                                imm:$lane)))))]>;
1223class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
1224                   InstrItinClass itin, string OpcodeStr, string Dt,
1225                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1226  : N3V<op24, 1, op21_20, op11_8, 1, 0,
1227        (outs QPR:$dst),
1228        (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
1229        NVMulSLFrm, itin,
1230        OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1231        [(set (ResTy QPR:$dst),
1232              (ResTy (IntOp (ResTy QPR:$src1),
1233                            (OpTy DPR:$src2),
1234                            (OpTy (NEONvduplane (OpTy DPR_8:$src3),
1235                                                imm:$lane)))))]>;
1236
1237// Narrowing 3-register intrinsics.
1238class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1239              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
1240              Intrinsic IntOp, bit Commutable>
1241  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1242        (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D,
1243        OpcodeStr, Dt, "$dst, $src1, $src2", "",
1244        [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
1245  let isCommutable = Commutable;
1246}
1247
1248// Long 3-register intrinsics.
1249class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1250              InstrItinClass itin, string OpcodeStr, string Dt,
1251              ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
1252  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1253        (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
1254        OpcodeStr, Dt, "$dst, $src1, $src2", "",
1255        [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
1256  let isCommutable = Commutable;
1257}
1258class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1259                string OpcodeStr, string Dt,
1260                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1261  : N3V<op24, 1, op21_20, op11_8, 1, 0,
1262        (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
1263        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1264        [(set (ResTy QPR:$dst),
1265              (ResTy (IntOp (OpTy DPR:$src1),
1266                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
1267                                                imm:$lane)))))]>;
1268class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
1269                  InstrItinClass itin, string OpcodeStr, string Dt,
1270                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1271  : N3V<op24, 1, op21_20, op11_8, 1, 0,
1272        (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 
1273        NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1274        [(set (ResTy QPR:$dst),
1275              (ResTy (IntOp (OpTy DPR:$src1),
1276                            (OpTy (NEONvduplane (OpTy DPR_8:$src2),
1277                                                imm:$lane)))))]>;
1278
1279// Wide 3-register intrinsics.
1280class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1281              string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
1282              Intrinsic IntOp, bit Commutable>
1283  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1284        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
1285        OpcodeStr, Dt, "$dst, $src1, $src2", "",
1286        [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
1287  let isCommutable = Commutable;
1288}
1289
1290// Pairwise long 2-register intrinsics, both double- and quad-register.
1291class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1292                bits<2> op17_16, bits<5> op11_7, bit op4,
1293                string OpcodeStr, string Dt,
1294                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1295  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
1296        (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
1297        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
1298class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1299                bits<2> op17_16, bits<5> op11_7, bit op4,
1300                string OpcodeStr, string Dt,
1301                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1302  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
1303        (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
1304        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
1305
1306// Pairwise long 2-register accumulate intrinsics,
1307// both double- and quad-register.
1308// The destination register is also used as the first source operand register.
1309class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1310                 bits<2> op17_16, bits<5> op11_7, bit op4,
1311                 string OpcodeStr, string Dt,
1312                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1313  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
1314        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD,
1315        OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
1316        [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>;
1317class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1318                 bits<2> op17_16, bits<5> op11_7, bit op4,
1319                 string OpcodeStr, string Dt,
1320                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1321  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
1322        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ,
1323        OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
1324        [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>;
1325
1326// Shift by immediate,
1327// both double- and quad-register.
1328class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1329             Format f, InstrItinClass itin, string OpcodeStr, string Dt,
1330             ValueType Ty, SDNode OpNode>
1331  : N2VImm<op24, op23, op11_8, op7, 0, op4,
1332           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin,
1333           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1334           [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
1335class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1336             Format f, InstrItinClass itin, string OpcodeStr, string Dt,
1337             ValueType Ty, SDNode OpNode>
1338  : N2VImm<op24, op23, op11_8, op7, 1, op4,
1339           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin,
1340           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1341           [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
1342
1343// Long shift by immediate.
1344class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
1345             string OpcodeStr, string Dt,
1346             ValueType ResTy, ValueType OpTy, SDNode OpNode>
1347  : N2VImm<op24, op23, op11_8, op7, op6, op4,
1348           (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm,
1349           IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1350           [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
1351                                          (i32 imm:$SIMM))))]>;
1352
1353// Narrow shift by immediate.
1354class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
1355             InstrItinClass itin, string OpcodeStr, string Dt,
1356             ValueType ResTy, ValueType OpTy, SDNode OpNode>
1357  : N2VImm<op24, op23, op11_8, op7, op6, op4,
1358           (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin,
1359           OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1360           [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
1361                                          (i32 imm:$SIMM))))]>;
1362
1363// Shift right by immediate and accumulate,
1364// both double- and quad-register.
1365class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1366                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
1367  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
1368           (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
1369           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
1370           [(set DPR:$dst, (Ty (add DPR:$src1,
1371                                (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
1372class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1373                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
1374  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
1375           (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
1376           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
1377           [(set QPR:$dst, (Ty (add QPR:$src1,
1378                                (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
1379
1380// Shift by immediate and insert,
1381// both double- and quad-register.
1382class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1383                Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
1384  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
1385           (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD,
1386           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
1387           [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
1388class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1389                Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
1390  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
1391           (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ,
1392           OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
1393           [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
1394
1395// Convert, with fractional bits immediate,
1396// both double- and quad-register.
1397class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1398              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
1399              Intrinsic IntOp>
1400  : N2VImm<op24, op23, op11_8, op7, 0, op4,
1401           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm,
1402           IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1403           [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
1404class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1405              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
1406              Intrinsic IntOp>
1407  : N2VImm<op24, op23, op11_8, op7, 1, op4,
1408           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm,
1409           IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1410           [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
1411
1412//===----------------------------------------------------------------------===//
1413// Multiclasses
1414//===----------------------------------------------------------------------===//
1415
1416// Abbreviations used in multiclass suffixes:
1417//   Q = quarter int (8 bit) elements
1418//   H = half int (16 bit) elements
1419//   S = single int (32 bit) elements
1420//   D = double int (64 bit) elements
1421
1422// Neon 2-register vector operations -- for disassembly only.
1423
1424// First with only element sizes of 8, 16 and 32 bits:
1425multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1426                       bits<5> op11_7, bit op4, string opc, string Dt,
1427                       string asm> {
1428  // 64-bit vector types.
1429  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
1430                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
1431                  opc, !strconcat(Dt, "8"), asm, "", []>;
1432  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
1433                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
1434                  opc, !strconcat(Dt, "16"), asm, "", []>;
1435  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
1436                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
1437                  opc, !strconcat(Dt, "32"), asm, "", []>;
1438  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
1439                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
1440                  opc, "f32", asm, "", []> {
1441    let Inst{10} = 1; // overwrite F = 1
1442  }
1443
1444  // 128-bit vector types.
1445  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
1446                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
1447                  opc, !strconcat(Dt, "8"), asm, "", []>;
1448  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
1449                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
1450                  opc, !strconcat(Dt, "16"), asm, "", []>;
1451  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
1452                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
1453                  opc, !strconcat(Dt, "32"), asm, "", []>;
1454  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
1455                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
1456                  opc, "f32", asm, "", []> {
1457    let Inst{10} = 1; // overwrite F = 1
1458  }
1459}
1460
1461// Neon 3-register vector operations.
1462
1463// First with only element sizes of 8, 16 and 32 bits:
1464multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1465                   InstrItinClass itinD16, InstrItinClass itinD32,
1466                   InstrItinClass itinQ16, InstrItinClass itinQ32,
1467                   string OpcodeStr, string Dt,
1468                   SDNode OpNode, bit Commutable = 0> {
1469  // 64-bit vector types.
1470  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 
1471                   OpcodeStr, !strconcat(Dt, "8"),
1472                   v8i8, v8i8, OpNode, Commutable>;
1473  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
1474                   OpcodeStr, !strconcat(Dt, "16"),
1475                   v4i16, v4i16, OpNode, Commutable>;
1476  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
1477                   OpcodeStr, !strconcat(Dt, "32"),
1478                   v2i32, v2i32, OpNode, Commutable>;
1479
1480  // 128-bit vector types.
1481  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
1482                   OpcodeStr, !strconcat(Dt, "8"),
1483                   v16i8, v16i8, OpNode, Commutable>;
1484  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
1485                   OpcodeStr, !strconcat(Dt, "16"),
1486                   v8i16, v8i16, OpNode, Commutable>;
1487  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
1488                   OpcodeStr, !strconcat(Dt, "32"),
1489                   v4i32, v4i32, OpNode, Commutable>;
1490}
1491
1492multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
1493  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
1494                       v4i16, ShOp>;
1495  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
1496                     v2i32, ShOp>;
1497  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
1498                       v8i16, v4i16, ShOp>;
1499  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
1500                     v4i32, v2i32, ShOp>;
1501}
1502
1503// ....then also with element size 64 bits:
1504multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
1505                    InstrItinClass itinD, InstrItinClass itinQ,
1506                    string OpcodeStr, string Dt,
1507                    SDNode OpNode, bit Commutable = 0>
1508  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
1509            OpcodeStr, Dt, OpNode, Commutable> {
1510  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
1511                   OpcodeStr, !strconcat(Dt, "64"),
1512                   v1i64, v1i64, OpNode, Commutable>;
1513  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
1514                   OpcodeStr, !strconcat(Dt, "64"),
1515                   v2i64, v2i64, OpNode, Commutable>;
1516}
1517
1518
1519// Neon Narrowing 2-register vector intrinsics,
1520//   source operand element sizes of 16, 32 and 64 bits:
1521multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1522                       bits<5> op11_7, bit op6, bit op4, 
1523                       InstrItinClass itin, string OpcodeStr, string Dt,
1524                       Intrinsic IntOp> {
1525  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
1526                      itin, OpcodeStr, !strconcat(Dt, "16"),
1527                      v8i8, v8i16, IntOp>;
1528  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
1529                      itin, OpcodeStr, !strconcat(Dt, "32"),
1530                      v4i16, v4i32, IntOp>;
1531  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
1532                      itin, OpcodeStr, !strconcat(Dt, "64"),
1533                      v2i32, v2i64, IntOp>;
1534}
1535
1536
1537// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
1538//   source operand element sizes of 16, 32 and 64 bits:
1539multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
1540                    string OpcodeStr, string Dt, SDNode OpNode> {
1541  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
1542                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
1543  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
1544                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
1545  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
1546                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
1547}
1548
1549
1550// Neon 3-register vector intrinsics.
1551
1552// First with only element sizes of 16 and 32 bits:
1553multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
1554                     InstrItinClass itinD16, InstrItinClass itinD32,
1555                     InstrItinClass itinQ16, InstrItinClass itinQ32,
1556                     string OpcodeStr, string Dt,
1557                     Intrinsic IntOp, bit Commutable = 0> {
1558  // 64-bit vector types.
1559  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
1560                      OpcodeStr, !strconcat(Dt, "16"),
1561                      v4i16, v4i16, IntOp, Commutable>;
1562  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
1563                      OpcodeStr, !strconcat(Dt, "32"),
1564                      v2i32, v2i32, IntOp, Commutable>;
1565
1566  // 128-bit vector types.
1567  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
1568                      OpcodeStr, !strconcat(Dt, "16"),
1569                      v8i16, v8i16, IntOp, Commutable>;
1570  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
1571                      OpcodeStr, !strconcat(Dt, "32"),
1572                      v4i32, v4i32, IntOp, Commutable>;
1573}
1574
1575multiclass N3VIntSL_HS<bits<4> op11_8, 
1576                       InstrItinClass itinD16, InstrItinClass itinD32,
1577                       InstrItinClass itinQ16, InstrItinClass itinQ32,
1578                       string OpcodeStr, string Dt, Intrinsic IntOp> {
1579  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
1580                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
1581  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
1582                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
1583  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
1584                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
1585  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
1586                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
1587}
1588
1589// ....then also with element size of 8 bits:
1590multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
1591                      InstrItinClass itinD16, InstrItinClass itinD32,
1592                      InstrItinClass itinQ16, InstrItinClass itinQ32,
1593                      string OpcodeStr, string Dt,
1594                      Intrinsic IntOp, bit Commutable = 0>
1595  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
1596              OpcodeStr, Dt, IntOp, Commutable> {
1597  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
1598                      OpcodeStr, !strconcat(Dt, "8"),
1599                      v8i8, v8i8, IntOp, Commutable>;
1600  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
1601                      OpcodeStr, !strconcat(Dt, "8"),
1602                      v16i8, v16i8, IntOp, Commutable>;
1603}
1604
1605// ....then also with element size of 64 bits:
1606multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
1607                       InstrItinClass itinD16, InstrItinClass itinD32,
1608                       InstrItinClass itinQ16, InstrItinClass itinQ32,
1609                       string OpcodeStr, string Dt,
1610                       Intrinsic IntOp, bit Commutable = 0>
1611  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
1612               OpcodeStr, Dt, IntOp, Commutable> {
1613  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
1614                      OpcodeStr, !strconcat(Dt, "64"),
1615                      v1i64, v1i64, IntOp, Commutable>;
1616  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
1617                      OpcodeStr, !strconcat(Dt, "64"),
1618                      v2i64, v2i64, IntOp, Commutable>;
1619}
1620
1621// Neon Narrowing 3-register vector intrinsics,
1622//   source operand element sizes of 16, 32 and 64 bits:
1623multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
1624                       string OpcodeStr, string Dt,
1625                       Intrinsic IntOp, bit Commutable = 0> {
1626  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
1627                      OpcodeStr, !strconcat(Dt, "16"),
1628                      v8i8, v8i16, IntOp, Commutable>;
1629  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
1630                      OpcodeStr, !strconcat(Dt, "32"),
1631                      v4i16, v4i32, IntOp, Commutable>;
1632  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
1633                      OpcodeStr, !strconcat(Dt, "64"),
1634                      v2i32, v2i64, IntOp, Commutable>;
1635}
1636
1637
1638// Neon Long 3-register vector intrinsics.
1639
1640// First with only element sizes of 16 and 32 bits:
1641multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
1642                      InstrItinClass itin16, InstrItinClass itin32,
1643                      string OpcodeStr, string Dt,
1644                      Intrinsic IntOp, bit Commutable = 0> {
1645  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 
1646                      OpcodeStr, !strconcat(Dt, "16"),
1647                      v4i32, v4i16, IntOp, Commutable>;
1648  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
1649                      OpcodeStr, !strconcat(Dt, "32"),
1650                      v2i64, v2i32, IntOp, Commutable>;
1651}
1652
1653multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
1654                        InstrItinClass itin, string OpcodeStr, string Dt,
1655                        Intrinsic IntOp> {
1656  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 
1657                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
1658  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
1659                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
1660}
1661
1662// ....then also with element size of 8 bits:
1663multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1664                       InstrItinClass itin16, InstrItinClass itin32,
1665                       string OpcodeStr, string Dt,
1666                       Intrinsic IntOp, bit Commutable = 0>
1667  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
1668               IntOp, Commutable> {
1669  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
1670                      OpcodeStr, !strconcat(Dt, "8"),
1671                      v8i16, v8i8, IntOp, Commutable>;
1672}
1673
1674
1675// Neon Wide 3-register vector intrinsics,
1676//   source operand element sizes of 8, 16 and 32 bits:
1677multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1678                       string OpcodeStr, string Dt,
1679                       Intrinsic IntOp, bit Commutable = 0> {
1680  def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4,
1681                      OpcodeStr, !strconcat(Dt, "8"),
1682                      v8i16, v8i8, IntOp, Commutable>;
1683  def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4,
1684                      OpcodeStr, !strconcat(Dt, "16"),
1685                      v4i32, v4i16, IntOp, Commutable>;
1686  def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4,
1687                      OpcodeStr, !strconcat(Dt, "32"),
1688                      v2i64, v2i32, IntOp, Commutable>;
1689}
1690
1691
1692// Neon Multiply-Op vector operations,
1693//   element sizes of 8, 16 and 32 bits:
1694multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1695                        InstrItinClass itinD16, InstrItinClass itinD32,
1696                        InstrItinClass itinQ16, InstrItinClass itinQ32,
1697                        string OpcodeStr, string Dt, SDNode OpNode> {
1698  // 64-bit vector types.
1699  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
1700                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
1701  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
1702                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
1703  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
1704                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
1705
1706  // 128-bit vector types.
1707  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
1708                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
1709  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
1710                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
1711  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
1712                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
1713}
1714
1715multiclass N3VMulOpSL_HS<bits<4> op11_8, 
1716                         InstrItinClass itinD16, InstrItinClass itinD32,
1717                         InstrItinClass itinQ16, InstrItinClass itinQ32,
1718                         string OpcodeStr, string Dt, SDNode ShOp> {
1719  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
1720                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
1721  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
1722                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
1723  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
1724                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
1725                            mul, ShOp>;
1726  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
1727                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
1728                          mul, ShOp>;
1729}
1730
1731// Neon 3-argument intrinsics,
1732//   element sizes of 8, 16 and 32 bits:
1733multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1734                       InstrItinClass itinD, InstrItinClass itinQ,
1735                       string OpcodeStr, string Dt, Intrinsic IntOp> {
1736  // 64-bit vector types.
1737  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
1738                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
1739  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
1740                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
1741  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
1742                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
1743
1744  // 128-bit vector types.
1745  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
1746                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
1747  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
1748                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
1749  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
1750                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
1751}
1752
1753
1754// Neon Long 3-argument intrinsics.
1755
1756// First with only element sizes of 16 and 32 bits:
1757multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
1758                       InstrItinClass itin16, InstrItinClass itin32,
1759                       string OpcodeStr, string Dt, Intrinsic IntOp> {
1760  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
1761                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
1762  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
1763                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
1764}
1765
1766multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
1767                         string OpcodeStr, string Dt, Intrinsic IntOp> {
1768  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
1769                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
1770  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
1771                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
1772}
1773
1774// ....then also with element size of 8 bits:
1775multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1776                        InstrItinClass itin16, InstrItinClass itin32,
1777                        string OpcodeStr, string Dt, Intrinsic IntOp>
1778  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
1779  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
1780                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
1781}
1782
1783
1784// Neon 2-register vector intrinsics,
1785//   element sizes of 8, 16 and 32 bits:
1786multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1787                      bits<5> op11_7, bit op4,
1788                      InstrItinClass itinD, InstrItinClass itinQ,
1789                      string OpcodeStr, string Dt, Intrinsic IntOp> {
1790  // 64-bit vector types.
1791  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1792                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
1793  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1794                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
1795  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1796                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
1797
1798  // 128-bit vector types.
1799  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1800                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
1801  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1802                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
1803  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1804                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
1805}
1806
1807
1808// Neon Pairwise long 2-register intrinsics,
1809//   element sizes of 8, 16 and 32 bits:
1810multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1811                        bits<5> op11_7, bit op4,
1812                        string OpcodeStr, string Dt, Intrinsic IntOp> {
1813  // 64-bit vector types.
1814  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1815                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
1816  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1817                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
1818  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1819                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
1820
1821  // 128-bit vector types.
1822  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1823                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
1824  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1825                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
1826  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1827                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
1828}
1829
1830
1831// Neon Pairwise long 2-register accumulate intrinsics,
1832//   element sizes of 8, 16 and 32 bits:
1833multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1834                         bits<5> op11_7, bit op4,
1835                         string OpcodeStr, string Dt, Intrinsic IntOp> {
1836  // 64-bit vector types.
1837  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1838                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
1839  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1840                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
1841  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1842                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
1843
1844  // 128-bit vector types.
1845  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1846                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
1847  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1848                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
1849  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1850                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
1851}
1852
1853
1854// Neon 2-register vector shift by immediate,
1855//   with f of either N2RegVShLFrm or N2RegVShRFrm
1856//   element sizes of 8, 16, 32 and 64 bits:
1857multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
1858                     InstrItinClass itin, string OpcodeStr, string Dt,
1859                     SDNode OpNode, Format f> {
1860  // 64-bit vector types.
1861  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
1862                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
1863    let Inst{21-19} = 0b001; // imm6 = 001xxx
1864  }
1865  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
1866                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
1867    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
1868  }
1869  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
1870                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
1871    let Inst{21} = 0b1;      // imm6 = 1xxxxx
1872  }
1873  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin,
1874                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
1875                             // imm6 = xxxxxx
1876
1877  // 128-bit vector types.
1878  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
1879                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
1880    let Inst{21-19} = 0b001; // imm6 = 001xxx
1881  }
1882  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
1883                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
1884    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
1885  }
1886  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
1887                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
1888    let Inst{21} = 0b1;      // imm6 = 1xxxxx
1889  }
1890  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin,
1891                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
1892                             // imm6 = xxxxxx
1893}
1894
1895// Neon Shift-Accumulate vector operations,
1896//   element sizes of 8, 16, 32 and 64 bits:
1897multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
1898                         string OpcodeStr, string Dt, SDNode ShOp> {
1899  // 64-bit vector types.
1900  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4,
1901                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
1902    let Inst{21-19} = 0b001; // imm6 = 001xxx
1903  }
1904  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4,
1905                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
1906    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
1907  }
1908  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4,
1909                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
1910    let Inst{21} = 0b1;      // imm6 = 1xxxxx
1911  }
1912  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4,
1913                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
1914                             // imm6 = xxxxxx
1915
1916  // 128-bit vector types.
1917  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4,
1918                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
1919    let Inst{21-19} = 0b001; // imm6 = 001xxx
1920  }
1921  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4,
1922                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
1923    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
1924  }
1925  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4,
1926                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
1927    let Inst{21} = 0b1;      // imm6 = 1xxxxx
1928  }
1929  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4,
1930                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
1931                             // imm6 = xxxxxx
1932}
1933
1934
1935// Neon Shift-Insert vector operations,
1936//   with f of either N2RegVShLFrm or N2RegVShRFrm
1937//   element sizes of 8, 16, 32 and 64 bits:
1938multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
1939                         string OpcodeStr, SDNode ShOp,
1940                         Format f> {
1941  // 64-bit vector types.
1942  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4,
1943                        f, OpcodeStr, "8", v8i8, ShOp> {
1944    let Inst{21-19} = 0b001; // imm6 = 001xxx
1945  }
1946  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
1947                        f, OpcodeStr, "16", v4i16, ShOp> {
1948    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
1949  }
1950  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
1951                        f, OpcodeStr, "32", v2i32, ShOp> {
1952    let Inst{21} = 0b1;      // imm6 = 1xxxxx
1953  }
1954  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
1955                        f, OpcodeStr, "64", v1i64, ShOp>;
1956                             // imm6 = xxxxxx
1957
1958  // 128-bit vector types.
1959  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
1960                        f, OpcodeStr, "8", v16i8, ShOp> {
1961    let Inst{21-19} = 0b001; // imm6 = 001xxx
1962  }
1963  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
1964                        f, OpcodeStr, "16", v8i16, ShOp> {
1965    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
1966  }
1967  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
1968                        f, OpcodeStr, "32", v4i32, ShOp> {
1969    let Inst{21} = 0b1;      // imm6 = 1xxxxx
1970  }
1971  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
1972                        f, OpcodeStr, "64", v2i64, ShOp>;
1973                             // imm6 = xxxxxx
1974}
1975
1976// Neon Shift Long operations,
1977//   element sizes of 8, 16, 32 bits:
1978multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
1979                      bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
1980  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
1981                 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
1982    let Inst{21-19} = 0b001; // imm6 = 001xxx
1983  }
1984  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
1985                  OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
1986    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
1987  }
1988  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
1989                  OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
1990    let Inst{21} = 0b1;      // imm6 = 1xxxxx
1991  }
1992}
1993
1994// Neon Shift Narrow operations,
1995//   element sizes of 16, 32, 64 bits:
1996multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
1997                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
1998                      SDNode OpNode> {
1999  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
2000                    OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> {
2001    let Inst{21-19} = 0b001; // imm6 = 001xxx
2002  }
2003  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
2004                     OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> {
2005    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
2006  }
2007  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
2008                     OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> {
2009    let Inst{21} = 0b1;      // imm6 = 1xxxxx
2010  }
2011}
2012
2013//===----------------------------------------------------------------------===//
2014// Instruction Definitions.
2015//===----------------------------------------------------------------------===//
2016
2017// Vector Add Operations.
2018
2019//   VADD     : Vector Add (integer and floating-point)
2020defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
2021                         add, 1>;
2022def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
2023                     v2f32, v2f32, fadd, 1>;
2024def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
2025                     v4f32, v4f32, fadd, 1>;
2026//   VADDL    : Vector Add Long (Q = D + D)
2027defm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
2028                            "vaddl", "s", int_arm_neon_vaddls, 1>;
2029defm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
2030                            "vaddl", "u", int_arm_neon_vaddlu, 1>;
2031//   VADDW    : Vector Add Wide (Q = Q + D)
2032defm VADDWs   : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
2033defm VADDWu   : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
2034//   VHADD    : Vector Halving Add
2035defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
2036                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2037                           "vhadd", "s", int_arm_neon_vhadds, 1>;
2038defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
2039                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2040                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
2041//   VRHADD   : Vector Rounding Halving Add
2042defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
2043                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2044                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
2045defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
2046                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2047                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
2048//   VQADD    : Vector Saturating Add
2049defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
2050                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2051                            "vqadd", "s", int_arm_neon_vqadds, 1>;
2052defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
2053                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2054                            "vqadd", "u", int_arm_neon_vqaddu, 1>;
2055//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
2056defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
2057                            int_arm_neon_vaddhn, 1>;
2058//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
2059defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
2060                            int_arm_neon_vraddhn, 1>;
2061
2062// Vector Multiply Operations.
2063
2064//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
2065defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
2066                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
2067def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
2068                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
2069def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
2070                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
2071def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
2072                     v2f32, v2f32, fmul, 1>;
2073def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
2074                     v4f32, v4f32, fmul, 1>;
2075defm VMULsl   : N3VSL_HS<0b1000, "vmul", "i", mul>;
2076def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
2077def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
2078                       v2f32, fmul>;
2079
2080def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
2081                      (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
2082          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
2083                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
2084                                      (DSubReg_i16_reg imm:$lane))),
2085                              (SubReg_i16_lane imm:$lane)))>;
2086def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
2087                      (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
2088          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
2089                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
2090                                      (DSubReg_i32_reg imm:$lane))),
2091                              (SubReg_i32_lane imm:$lane)))>;
2092def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
2093                       (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
2094          (v4f32 (VMULslfq (v4f32 QPR:$src1),
2095                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
2096                                   (DSubReg_i32_reg imm:$lane))),
2097                           (SubReg_i32_lane imm:$lane)))>;
2098
2099//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
2100defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
2101                          IIC_VMULi16Q, IIC_VMULi32Q, 
2102                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
2103defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
2104                            IIC_VMULi16Q, IIC_VMULi32Q,
2105                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
2106def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
2107                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2),
2108                                                            imm:$lane)))),
2109          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
2110                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
2111                                         (DSubReg_i16_reg imm:$lane))),
2112                                 (SubReg_i16_lane imm:$lane)))>;
2113def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
2114                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2),
2115                                                            imm:$lane)))),
2116          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
2117                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
2118                                         (DSubReg_i32_reg imm:$lane))),
2119                                 (SubReg_i32_lane imm:$lane)))>;
2120
2121//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
2122defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
2123                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
2124                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
2125defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
2126                              IIC_VMULi16Q, IIC_VMULi32Q,
2127                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
2128def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
2129                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2),
2130                                                             imm:$lane)))),
2131          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
2132                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
2133                                          (DSubReg_i16_reg imm:$lane))),
2134                                  (SubReg_i16_lane imm:$lane)))>;
2135def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
2136                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2),
2137                                                             imm:$lane)))),
2138          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
2139                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
2140                                          (DSubReg_i32_reg imm:$lane))),
2141                                  (SubReg_i32_lane imm:$lane)))>;
2142
2143//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
2144defm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
2145                            "vmull", "s", int_arm_neon_vmulls, 1>;
2146defm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
2147                            "vmull", "u", int_arm_neon_vmullu, 1>;
2148def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
2149                        v8i16, v8i8, int_arm_neon_vmullp, 1>;
2150defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
2151                             int_arm_neon_vmulls>;
2152defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
2153                             int_arm_neon_vmullu>;
2154
2155//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
2156defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
2157                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
2158defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
2159                             "vqdmull", "s", int_arm_neon_vqdmull>;
2160
2161// Vector Multiply-Accumulate and Multiply-Subtract Operations.
2162
2163//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
2164defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
2165                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
2166def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
2167                          v2f32, fmul, fadd>;
2168def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
2169                          v4f32, fmul, fadd>;
2170defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
2171                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
2172def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
2173                            v2f32, fmul, fadd>;
2174def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
2175                            v4f32, v2f32, fmul, fadd>;
2176
2177def : Pat<(v8i16 (add (v8i16 QPR:$src1),
2178                  (mul (v8i16 QPR:$src2),
2179                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
2180          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
2181                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
2182                                      (DSubReg_i16_reg imm:$lane))),
2183                              (SubReg_i16_lane imm:$lane)))>;
2184
2185def : Pat<(v4i32 (add (v4i32 QPR:$src1),
2186                  (mul (v4i32 QPR:$src2),
2187                       (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
2188          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
2189                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
2190                                      (DSubReg_i32_reg imm:$lane))),
2191                              (SubReg_i32_lane imm:$lane)))>;
2192
2193def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
2194                  (fmul (v4f32 QPR:$src2),
2195                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
2196          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
2197                           (v4f32 QPR:$src2),
2198                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
2199                                   (DSubReg_i32_reg imm:$lane))),
2200                           (SubReg_i32_lane imm:$lane)))>;
2201
2202//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
2203defm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
2204                             "vmlal", "s", int_arm_neon_vmlals>;
2205defm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
2206                             "vmlal", "u", int_arm_neon_vmlalu>;
2207
2208defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
2209defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
2210
2211//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
2212defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
2213                            "vqdmlal", "s", int_arm_neon_vqdmlal>;
2214defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
2215
2216//   VMLS     : Vector Multiply Subtract (integer and floating-point)
2217defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
2218                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
2219def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
2220                          v2f32, fmul, fsub>;
2221def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
2222                          v4f32, fmul, fsub>;
2223defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
2224                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
2225def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
2226                            v2f32, fmul, fsub>;
2227def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
2228                            v4f32, v2f32, fmul, fsub>;
2229
2230def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
2231                  (mul (v8i16 QPR:$src2),
2232                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
2233          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
2234                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
2235                                      (DSubReg_i16_reg imm:$lane))),
2236                              (SubReg_i16_lane imm:$lane)))>;
2237
2238def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
2239                  (mul (v4i32 QPR:$src2),
2240                     (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
2241          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
2242                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
2243                                      (DSubReg_i32_reg imm:$lane))),
2244                              (SubReg_i32_lane imm:$lane)))>;
2245
2246def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
2247                  (fmul (v4f32 QPR:$src2),
2248                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
2249          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
2250                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
2251                                   (DSubReg_i32_reg imm:$lane))),
2252                           (SubReg_i32_lane imm:$lane)))>;
2253
2254//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
2255defm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
2256                             "vmlsl", "s", int_arm_neon_vmlsls>;
2257defm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
2258                             "vmlsl", "u", int_arm_neon_vmlslu>;
2259
2260defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
2261defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
2262
2263//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
2264defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
2265                            "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
2266defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
2267
2268// Vector Subtract Operations.
2269
2270//   VSUB     : Vector Subtract (integer and floating-point)
2271defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
2272                         "vsub", "i", sub, 0>;
2273def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
2274                     v2f32, v2f32, fsub, 0>;
2275def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
2276                     v4f32, v4f32, fsub, 0>;
2277//   VSUBL    : Vector Subtract Long (Q = D - D)
2278defm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
2279                            "vsubl", "s", int_arm_neon_vsubls, 1>;
2280defm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
2281                            "vsubl", "u", int_arm_neon_vsublu, 1>;
2282//   VSUBW    : Vector Subtract Wide (Q = Q - D)
2283defm VSUBWs   : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
2284defm VSUBWu   : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
2285//   VHSUB    : Vector Halving Subtract
2286defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
2287                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2288                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
2289defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
2290                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2291                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
2292//   VQSUB    : Vector Saturing Subtract
2293defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
2294                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2295                            "vqsub", "s", int_arm_neon_vqsubs, 0>;
2296defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
2297                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2298                            "vqsub", "u", int_arm_neon_vqsubu, 0>;
2299//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
2300defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
2301                            int_arm_neon_vsubhn, 0>;
2302//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
2303defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
2304                            int_arm_neon_vrsubhn, 0>;
2305
2306// Vector Comparisons.
2307
2308//   VCEQ     : Vector Compare Equal
2309defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
2310                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
2311def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
2312                     NEONvceq, 1>;
2313def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
2314                     NEONvceq, 1>;
2315// For disassembly only.
2316defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
2317                            "$dst, $src, #0">;
2318
2319//   VCGE     : Vector Compare Greater Than or Equal
2320defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
2321                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
2322defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 
2323                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
2324def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
2325                     NEONvcge, 0>;
2326def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
2327                     NEONvcge, 0>;
2328// For disassembly only.
2329defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
2330                            "$dst, $src, #0">;
2331// For disassembly only.
2332defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
2333                            "$dst, $src, #0">;
2334
2335//   VCGT     : Vector Compare Greater Than
2336defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
2337                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
2338defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
2339                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
2340def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
2341                     NEONvcgt, 0>;
2342def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
2343                     NEONvcgt, 0>;
2344// For disassembly only.
2345defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
2346                            "$dst, $src, #0">;
2347// For disassembly only.
2348defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
2349                            "$dst, $src, #0">;
2350
2351//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
2352def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
2353                        "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
2354def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
2355                        "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
2356//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
2357def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
2358                        "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
2359def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
2360                        "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
2361//   VTST     : Vector Test Bits
2362defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
2363                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
2364
2365// Vector Bitwise Operations.
2366
2367def vnotd : PatFrag<(ops node:$in),
2368                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
2369def vnotq : PatFrag<(ops node:$in),
2370                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
2371
2372
2373//   VAND     : Vector Bitwise AND
2374def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
2375                      v2i32, v2i32, and, 1>;
2376def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
2377                      v4i32, v4i32, and, 1>;
2378
2379//   VEOR     : Vector Bitwise Exclusive OR
2380def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
2381                      v2i32, v2i32, xor, 1>;
2382def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
2383                      v4i32, v4i32, xor, 1>;
2384
2385//   VORR     : Vector Bitwise OR
2386def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
2387                      v2i32, v2i32, or, 1>;
2388def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
2389                      v4i32, v4i32, or, 1>;
2390
2391//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
2392def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
2393                     (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
2394                     "vbic", "$dst, $src1, $src2", "",
2395                     [(set DPR:$dst, (v2i32 (and DPR:$src1,
2396                                                 (vnotd DPR:$src2))))]>;
2397def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
2398                     (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
2399                     "vbic", "$dst, $src1, $src2", "",
2400                     [(set QPR:$dst, (v4i32 (and QPR:$src1,
2401                                                 (vnotq QPR:$src2))))]>;
2402
2403//   VORN     : Vector Bitwise OR NOT
2404def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
2405                     (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
2406                     "vorn", "$dst, $src1, $src2", "",
2407                     [(set DPR:$dst, (v2i32 (or DPR:$src1,
2408                                                (vnotd DPR:$src2))))]>;
2409def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
2410                     (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
2411                     "vorn", "$dst, $src1, $src2", "",
2412                     [(set QPR:$dst, (v4i32 (or QPR:$src1,
2413                                                (vnotq QPR:$src2))))]>;
2414
2415//   VMVN     : Vector Bitwise NOT (Immediate)
2416
2417let isReMaterializable = 1 in {
2418def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst),
2419                         (ins nModImm:$SIMM), IIC_VMOVImm,
2420                         "vmvn", "i16", "$dst, $SIMM", "",
2421                         [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>;
2422def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst),
2423                         (ins nModImm:$SIMM), IIC_VMOVImm,
2424                         "vmvn", "i16", "$dst, $SIMM", "",
2425                         [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>;
2426
2427def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst),
2428                         (ins nModImm:$SIMM), IIC_VMOVImm,
2429                         "vmvn", "i32", "$dst, $SIMM", "",
2430                         [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>;
2431def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst),
2432                         (ins nModImm:$SIMM), IIC_VMOVImm,
2433                         "vmvn", "i32", "$dst, $SIMM", "",
2434                         [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>;
2435}
2436
2437//   VMVN     : Vector Bitwise NOT
2438def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
2439                     (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
2440                     "vmvn", "$dst, $src", "",
2441                     [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>;
2442def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
2443                     (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
2444                     "vmvn", "$dst, $src", "",
2445                     [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>;
2446def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
2447def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
2448
2449//   VBSL     : Vector Bitwise Select
2450def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
2451                     (ins DPR:$src1, DPR:$src2, DPR:$src3),
2452                     N3RegFrm, IIC_VCNTiD,
2453                     "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
2454                     [(set DPR:$dst,
2455                       (v2i32 (or (and DPR:$src2, DPR:$src1),
2456                                  (and DPR:$src3, (vnotd DPR:$src1)))))]>;
2457def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
2458                     (ins QPR:$src1, QPR:$src2, QPR:$src3),
2459                     N3RegFrm, IIC_VCNTiQ,
2460                     "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
2461                     [(set QPR:$dst,
2462                       (v4i32 (or (and QPR:$src2, QPR:$src1),
2463                                  (and QPR:$src3, (vnotq QPR:$src1)))))]>;
2464
2465//   VBIF     : Vector Bitwise Insert if False
2466//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
2467def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
2468                     (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
2469                     N3RegFrm, IIC_VBINiD,
2470                     "vbif", "$dst, $src2, $src3", "$src1 = $dst",
2471                     [/* For disassembly only; pattern left blank */]>;
2472def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
2473                     (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
2474                     N3RegFrm, IIC_VBINiQ,
2475                     "vbif", "$dst, $src2, $src3", "$src1 = $dst",
2476                     [/* For disassembly only; pattern left blank */]>;
2477
2478//   VBIT     : Vector Bitwise Insert if True
2479//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
2480def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
2481                     (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
2482                     N3RegFrm, IIC_VBINiD,
2483                     "vbit", "$dst, $src2, $src3", "$src1 = $dst",
2484                     [/* For disassembly only; pattern left blank */]>;
2485def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
2486                     (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
2487                     N3RegFrm, IIC_VBINiQ,
2488                     "vbit", "$dst, $src2, $src3", "$src1 = $dst",
2489                     [/* For disassembly only; pattern left blank */]>;
2490
2491// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
2492// for equivalent operations with different register constraints; it just
2493// inserts copies.
2494
2495// Vector Absolute Differences.
2496
2497//   VABD     : Vector Absolute Difference
2498defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
2499                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2500                           "vabd", "s", int_arm_neon_vabds, 0>;
2501defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
2502                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2503                           "vabd", "u", int_arm_neon_vabdu, 0>;
2504def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
2505                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
2506def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
2507                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
2508
2509//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
2510defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
2511                            "vabdl", "s", int_arm_neon_vabdls, 0>;
2512defm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
2513                             "vabdl", "u", int_arm_neon_vabdlu, 0>;
2514
2515//   VABA     : Vector Absolute Difference and Accumulate
2516defm VABAs    : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
2517                            "vaba", "s", int_arm_neon_vabas>;
2518defm VABAu    : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
2519                            "vaba", "u", int_arm_neon_vabau>;
2520
2521//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
2522defm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
2523                             "vabal", "s", int_arm_neon_vabals>;
2524defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
2525                             "vabal", "u", int_arm_neon_vabalu>;
2526
2527// Vector Maximum and Minimum.
2528
2529//   VMAX     : Vector Maximum
2530defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
2531                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2532                           "vmax", "s", int_arm_neon_vmaxs, 1>;
2533defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
2534                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2535                           "vmax", "u", int_arm_neon_vmaxu, 1>;
2536def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
2537                        "vmax", "f32",
2538                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
2539def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
2540                        "vmax", "f32",
2541                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
2542
2543//   VMIN     : Vector Minimum
2544defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
2545                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2546                           "vmin", "s", int_arm_neon_vmins, 1>;
2547defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
2548                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2549                           "vmin", "u", int_arm_neon_vminu, 1>;
2550def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
2551                        "vmin", "f32",
2552                        v2f32, v2f32, int_arm_neon_vmins, 1>;
2553def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
2554                        "vmin", "f32",
2555                        v4f32, v4f32, int_arm_neon_vmins, 1>;
2556
2557// Vector Pairwise Operations.
2558
2559//   VPADD    : Vector Pairwise Add
2560def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
2561                        "vpadd", "i8",
2562                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
2563def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
2564                        "vpadd", "i16",
2565                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
2566def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
2567                        "vpadd", "i32",
2568                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
2569def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 
2570                        IIC_VBIND, "vpadd", "f32",
2571                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
2572
2573//   VPADDL   : Vector Pairwise Add Long
2574defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
2575                             int_arm_neon_vpaddls>;
2576defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
2577                             int_arm_neon_vpaddlu>;
2578
2579//   VPADAL   : Vector Pairwise Add and Accumulate Long
2580defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
2581                              int_arm_neon_vpadals>;
2582defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
2583                              int_arm_neon_vpadalu>;
2584
2585//   VPMAX    : Vector Pairwise Maximum
2586def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2587                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
2588def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2589                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
2590def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2591                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
2592def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2593                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
2594def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2595                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
2596def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2597                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
2598def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2599                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
2600
2601//   VPMIN    : Vector Pairwise Minimum
2602def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2603                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
2604def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2605                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
2606def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2607                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
2608def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2609                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
2610def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2611                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
2612def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2613                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
2614def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin",
2615                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
2616
2617// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
2618
2619//   VRECPE   : Vector Reciprocal Estimate
2620def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 
2621                        IIC_VUNAD, "vrecpe", "u32",
2622                        v2i32, v2i32, int_arm_neon_vrecpe>;
2623def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 
2624                        IIC_VUNAQ, "vrecpe", "u32",
2625                        v4i32, v4i32, int_arm_neon_vrecpe>;
2626def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
2627                        IIC_VUNAD, "vrecpe", "f32",
2628                        v2f32, v2f32, int_arm_neon_vrecpe>;
2629def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
2630                        IIC_VUNAQ, "vrecpe", "f32",
2631                        v4f32, v4f32, int_arm_neon_vrecpe>;
2632
2633//   VRECPS   : Vector Reciprocal Step
2634def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
2635                        IIC_VRECSD, "vrecps", "f32",
2636                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
2637def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
2638                        IIC_VRECSQ, "vrecps", "f32",
2639                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
2640
2641//   VRSQRTE  : Vector Reciprocal Square Root Estimate
2642def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
2643                         IIC_VUNAD, "vrsqrte", "u32",
2644                         v2i32, v2i32, int_arm_neon_vrsqrte>;
2645def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
2646                         IIC_VUNAQ, "vrsqrte", "u32",
2647                         v4i32, v4i32, int_arm_neon_vrsqrte>;
2648def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
2649                         IIC_VUNAD, "vrsqrte", "f32",
2650                         v2f32, v2f32, int_arm_neon_vrsqrte>;
2651def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 
2652                         IIC_VUNAQ, "vrsqrte", "f32",
2653                         v4f32, v4f32, int_arm_neon_vrsqrte>;
2654
2655//   VRSQRTS  : Vector Reciprocal Square Root Step
2656def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
2657                        IIC_VRECSD, "vrsqrts", "f32",
2658                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
2659def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
2660                        IIC_VRECSQ, "vrsqrts", "f32",
2661                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
2662
2663// Vector Shifts.
2664
2665//   VSHL     : Vector Shift
2666defm VSHLs    : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm,
2667                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
2668                            "vshl", "s", int_arm_neon_vshifts, 0>;
2669defm VSHLu    : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm,
2670                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
2671                            "vshl", "u", int_arm_neon_vshiftu, 0>;
2672//   VSHL     : Vector Shift Left (Immediate)
2673defm VSHLi    : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
2674                           N2RegVShLFrm>;
2675//   VSHR     : Vector Shift Right (Immediate)
2676defm VSHRs    : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs,
2677                           N2RegVShRFrm>;
2678defm VSHRu    : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru,
2679                           N2RegVShRFrm>;
2680
2681//   VSHLL    : Vector Shift Left Long
2682defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
2683defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
2684
2685//   VSHLL    : Vector Shift Left Long (with maximum shift count)
2686class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
2687                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
2688                ValueType OpTy, SDNode OpNode>
2689  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
2690           ResTy, OpTy, OpNode> {
2691  let Inst{21-16} = op21_16;
2692}
2693def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
2694                          v8i16, v8i8, NEONvshlli>;
2695def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
2696                          v4i32, v4i16, NEONvshlli>;
2697def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
2698                          v2i64, v2i32, NEONvshlli>;
2699
2700//   VSHRN    : Vector Shift Right and Narrow
2701defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
2702                           NEONvshrn>;
2703
2704//   VRSHL    : Vector Rounding Shift
2705defm VRSHLs   : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm,
2706                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2707                            "vrshl", "s", int_arm_neon_vrshifts, 0>;
2708defm VRSHLu   : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm,
2709                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2710                            "vrshl", "u", int_arm_neon_vrshiftu, 0>;
2711//   VRSHR    : Vector Rounding Shift Right
2712defm VRSHRs   : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
2713                           N2RegVShRFrm>;
2714defm VRSHRu   : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru,
2715                           N2RegVShRFrm>;
2716
2717//   VRSHRN   : Vector Rounding Shift Right and Narrow
2718defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
2719                           NEONvrshrn>;
2720
2721//   VQSHL    : Vector Saturating Shift
2722defm VQSHLs   : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm,
2723                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2724                            "vqshl", "s", int_arm_neon_vqshifts, 0>;
2725defm VQSHLu   : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm,
2726                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2727                            "vqshl", "u", int_arm_neon_vqshiftu, 0>;
2728//   VQSHL    : Vector Saturating Shift Left (Immediate)
2729defm VQSHLsi  : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
2730                           N2RegVShLFrm>;
2731defm VQSHLui  : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu,
2732                           N2RegVShLFrm>;
2733//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
2734defm VQSHLsu  : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu,
2735                           N2RegVShLFrm>;
2736
2737//   VQSHRN   : Vector Saturating Shift Right and Narrow
2738defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
2739                           NEONvqshrns>;
2740defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
2741                           NEONvqshrnu>;
2742
2743//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
2744defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
2745                           NEONvqshrnsu>;
2746
2747//   VQRSHL   : Vector Saturating Rounding Shift
2748defm VQRSHLs  : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm,
2749                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2750                            "vqrshl", "s", int_arm_neon_vqrshifts, 0>;
2751defm VQRSHLu  : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm,
2752                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2753                            "vqrshl", "u", int_arm_neon_vqrshiftu, 0>;
2754
2755//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
2756defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
2757                           NEONvqrshrns>;
2758defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
2759                           NEONvqrshrnu>;
2760
2761//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
2762defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
2763                           NEONvqrshrnsu>;
2764
2765//   VSRA     : Vector Shift Right and Accumulate
2766defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
2767defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
2768//   VRSRA    : Vector Rounding Shift Right and Accumulate
2769defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
2770defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
2771
2772//   VSLI     : Vector Shift Left and Insert
2773defm VSLI     : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>;
2774//   VSRI     : Vector Shift Right and Insert
2775defm VSRI     : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
2776
2777// Vector Absolute and Saturating Absolute.
2778
2779//   VABS     : Vector Absolute Value
2780defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 
2781                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
2782                           int_arm_neon_vabs>;
2783def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
2784                        IIC_VUNAD, "vabs", "f32",
2785                        v2f32, v2f32, int_arm_neon_vabs>;
2786def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
2787                        IIC_VUNAQ, "vabs", "f32",
2788                        v4f32, v4f32, int_arm_neon_vabs>;
2789
2790//   VQABS    : Vector Saturating Absolute Value
2791defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 
2792                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
2793                           int_arm_neon_vqabs>;
2794
2795// Vector Negate.
2796
2797def vnegd  : PatFrag<(ops node:$in),
2798                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
2799def vnegq  : PatFrag<(ops node:$in),
2800                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
2801
2802class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
2803  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
2804        IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
2805        [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>;
2806class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
2807  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
2808        IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
2809        [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>;
2810
2811//   VNEG     : Vector Negate (integer)
2812def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
2813def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
2814def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
2815def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
2816def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
2817def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
2818
2819//   VNEG     : Vector Negate (floating-point)
2820def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
2821                    (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
2822                    "vneg", "f32", "$dst, $src", "",
2823                    [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
2824def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
2825                    (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ,
2826                    "vneg", "f32", "$dst, $src", "",
2827                    [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
2828
2829def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
2830def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
2831def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
2832def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
2833def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
2834def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
2835
2836//   VQNEG    : Vector Saturating Negate
2837defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 
2838                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
2839                           int_arm_neon_vqneg>;
2840
2841// Vector Bit Counting Operations.
2842
2843//   VCLS     : Vector Count Leading Sign Bits
2844defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 
2845                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
2846                           int_arm_neon_vcls>;
2847//   VCLZ     : Vector Count Leading Zeros
2848defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 
2849                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
2850                           int_arm_neon_vclz>;
2851//   VCNT     : Vector Count One Bits
2852def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 
2853                        IIC_VCNTiD, "vcnt", "8",
2854                        v8i8, v8i8, int_arm_neon_vcnt>;
2855def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
2856                        IIC_VCNTiQ, "vcnt", "8",
2857                        v16i8, v16i8, int_arm_neon_vcnt>;
2858
2859// Vector Swap -- for disassembly only.
2860def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
2861                     (outs DPR:$dst), (ins DPR:$src), NoItinerary,
2862                     "vswp", "$dst, $src", "", []>;
2863def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
2864                     (outs QPR:$dst), (ins QPR:$src), NoItinerary,
2865                     "vswp", "$dst, $src", "", []>;
2866
2867// Vector Move Operations.
2868
2869//   VMOV     : Vector Move (Register)
2870
2871let neverHasSideEffects = 1 in {
2872def  VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
2873                     N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
2874def  VMOVQ    : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
2875                     N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
2876
2877// Pseudo vector move instructions for QQ and QQQQ registers. This should
2878// be expanded after register allocation is completed.
2879def  VMOVQQ   : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
2880                NoItinerary, "${:comment} vmov\t$dst, $src", []>;
2881
2882def  VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
2883                NoItinerary, "${:comment} vmov\t$dst, $src", []>;
2884} // neverHasSideEffects
2885
2886//   VMOV     : Vector Move (Immediate)
2887
2888let isReMaterializable = 1 in {
2889def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
2890                         (ins nModImm:$SIMM), IIC_VMOVImm,
2891                         "vmov", "i8", "$dst, $SIMM", "",
2892                         [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
2893def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
2894                         (ins nModImm:$SIMM), IIC_VMOVImm,
2895                         "vmov", "i8", "$dst, $SIMM", "",
2896                         [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
2897
2898def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
2899                         (ins nModImm:$SIMM), IIC_VMOVImm,
2900                         "vmov", "i16", "$dst, $SIMM", "",
2901                         [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>;
2902def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
2903                         (ins nModImm:$SIMM), IIC_VMOVImm,
2904                         "vmov", "i16", "$dst, $SIMM", "",
2905                         [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>;
2906
2907def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst),
2908                         (ins nModImm:$SIMM), IIC_VMOVImm,
2909                         "vmov", "i32", "$dst, $SIMM", "",
2910                         [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>;
2911def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst),
2912                         (ins nModImm:$SIMM), IIC_VMOVImm,
2913                         "vmov", "i32", "$dst, $SIMM", "",
2914                         [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>;
2915
2916def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
2917                         (ins nModImm:$SIMM), IIC_VMOVImm,
2918                         "vmov", "i64", "$dst, $SIMM", "",
2919                         [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
2920def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
2921                         (ins nModImm:$SIMM), IIC_VMOVImm,
2922                         "vmov", "i64", "$dst, $SIMM", "",
2923                         [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
2924} // isReMaterializable
2925
2926//   VMOV     : Vector Get Lane (move scalar to ARM core register)
2927
2928def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
2929                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
2930                          IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]",
2931                          [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src),
2932                                           imm:$lane))]>;
2933def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
2934                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
2935                          IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]",
2936                          [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src),
2937                                           imm:$lane))]>;
2938def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
2939                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
2940                          IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]",
2941                          [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src),
2942                                           imm:$lane))]>;
2943def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
2944                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
2945                          IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]",
2946                          [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src),
2947                                           imm:$lane))]>;
2948def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
2949                          (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
2950                          IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]",
2951                          [(set GPR:$dst, (extractelt (v2i32 DPR:$src),
2952                                           imm:$lane))]>;
2953// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
2954def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
2955          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
2956                           (DSubReg_i8_reg imm:$lane))),
2957                     (SubReg_i8_lane imm:$lane))>;
2958def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
2959          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
2960                             (DSubReg_i16_reg imm:$lane))),
2961                     (SubReg_i16_lane imm:$lane))>;
2962def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
2963          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
2964                           (DSubReg_i8_reg imm:$lane))),
2965                     (SubReg_i8_lane imm:$lane))>;
2966def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
2967          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
2968                             (DSubReg_i16_reg imm:$lane))),
2969                     (SubReg_i16_lane imm:$lane))>;
2970def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
2971          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
2972                             (DSubReg_i32_reg imm:$lane))),
2973                     (SubReg_i32_lane imm:$lane))>;
2974def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
2975          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
2976                          (SSubReg_f32_reg imm:$src2))>;
2977def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
2978          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
2979                          (SSubReg_f32_reg imm:$src2))>;
2980//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
2981//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
2982def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
2983          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
2984
2985
2986//   VMOV     : Vector Set Lane (move ARM core register to scalar)
2987
2988let Constraints = "$src1 = $dst" in {
2989def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst),
2990                          (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
2991                          IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2",
2992                          [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1),
2993                                           GPR:$src2, imm:$lane))]>;
2994def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst),
2995                          (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
2996                          IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2",
2997                          [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1),
2998                                           GPR:$src2, imm:$lane))]>;
2999def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst),
3000                          (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
3001                          IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2",
3002                          [(set DPR:$dst, (insertelt (v2i32 DPR:$src1),
3003                                           GPR:$src2, imm:$lane))]>;
3004}
3005def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
3006          (v16i8 (INSERT_SUBREG QPR:$src1, 
3007                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
3008                                   (DSubReg_i8_reg imm:$lane))),
3009                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
3010                  (DSubReg_i8_reg imm:$lane)))>;
3011def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
3012          (v8i16 (INSERT_SUBREG QPR:$src1, 
3013                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
3014                                     (DSubReg_i16_reg imm:$lane))),
3015                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
3016                  (DSubReg_i16_reg imm:$lane)))>;
3017def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
3018          (v4i32 (INSERT_SUBREG QPR:$src1, 
3019                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
3020                                     (DSubReg_i32_reg imm:$lane))),
3021                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
3022                  (DSubReg_i32_reg imm:$lane)))>;
3023
3024def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
3025          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
3026                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
3027def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
3028          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
3029                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
3030
3031//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
3032//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
3033def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
3034          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
3035
3036def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
3037          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
3038def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
3039          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
3040def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
3041          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
3042
3043def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
3044          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
3045def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
3046          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
3047def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
3048          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
3049
3050def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
3051          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3052                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
3053                         dsub_0)>;
3054def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
3055          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
3056                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
3057                         dsub_0)>;
3058def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
3059          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
3060                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
3061                         dsub_0)>;
3062
3063//   VDUP     : Vector Duplicate (from ARM core register to all elements)
3064
3065class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
3066  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
3067          IIC_VMOVIS, "vdup", Dt, "$dst, $src",
3068          [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
3069class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
3070  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
3071          IIC_VMOVIS, "vdup", Dt, "$dst, $src",
3072          [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
3073
3074def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
3075def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
3076def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>;
3077def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
3078def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
3079def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
3080
3081def  VDUPfd   : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
3082                      IIC_VMOVIS, "vdup", "32", "$dst, $src",
3083                      [(set DPR:$dst, (v2f32 (NEONvdup
3084                                              (f32 (bitconvert GPR:$src)))))]>;
3085def  VDUPfq   : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
3086                      IIC_VMOVIS, "vdup", "32", "$dst, $src",
3087                      [(set QPR:$dst, (v4f32 (NEONvdup
3088                                              (f32 (bitconvert GPR:$src)))))]>;
3089
3090//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
3091
3092class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
3093              ValueType Ty>
3094  : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane),
3095              IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
3096              [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
3097
3098class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
3099              ValueType ResTy, ValueType OpTy>
3100  : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane),
3101              IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
3102              [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src),
3103                                      imm:$lane)))]>;
3104
3105// Inst{19-16} is partially specified depending on the element size.
3106
3107def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>;
3108def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>;
3109def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>;
3110def VDUPLNfd  : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>;
3111def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>;
3112def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>;
3113def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>;
3114def VDUPLNfq  : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>;
3115
3116def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
3117          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
3118                                  (DSubReg_i8_reg imm:$lane))),
3119                           (SubReg_i8_lane imm:$lane)))>;
3120def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
3121          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
3122                                    (DSubReg_i16_reg imm:$lane))),
3123                            (SubReg_i16_lane imm:$lane)))>;
3124def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
3125          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
3126                                    (DSubReg_i32_reg imm:$lane))),
3127                            (SubReg_i32_lane imm:$lane)))>;
3128def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
3129          (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src,
3130                                   (DSubReg_i32_reg imm:$lane))),
3131                           (SubReg_i32_lane imm:$lane)))>;
3132
3133def  VDUPfdf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0,
3134                    (outs DPR:$dst), (ins SPR:$src),
3135                    IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
3136                    [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
3137
3138def  VDUPfqf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
3139                    (outs QPR:$dst), (ins SPR:$src),
3140                    IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
3141                    [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
3142
3143//   VMOVN    : Vector Narrowing Move
3144defm VMOVN    : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
3145                            "vmovn", "i", int_arm_neon_vmovn>;
3146//   VQMOVN   : Vector Saturating Narrowing Move
3147defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
3148                            "vqmovn", "s", int_arm_neon_vqmovns>;
3149defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
3150                            "vqmovn", "u", int_arm_neon_vqmovnu>;
3151defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
3152                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
3153//   VMOVL    : Vector Lengthening Move
3154defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
3155defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
3156
3157// Vector Conversions.
3158
3159//   VCVT     : Vector Convert Between Floating-Point and Integers
3160def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
3161                     v2i32, v2f32, fp_to_sint>;
3162def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
3163                     v2i32, v2f32, fp_to_uint>;
3164def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
3165                     v2f32, v2i32, sint_to_fp>;
3166def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
3167                     v2f32, v2i32, uint_to_fp>;
3168
3169def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
3170                     v4i32, v4f32, fp_to_sint>;
3171def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
3172                     v4i32, v4f32, fp_to_uint>;
3173def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
3174                     v4f32, v4i32, sint_to_fp>;
3175def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
3176                     v4f32, v4i32, uint_to_fp>;
3177
3178//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
3179def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
3180                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
3181def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
3182                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
3183def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
3184                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
3185def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
3186                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
3187
3188def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
3189                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
3190def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
3191                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
3192def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
3193                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
3194def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
3195                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
3196
3197// Vector Reverse.
3198
3199//   VREV64   : Vector Reverse elements within 64-bit doublewords
3200
3201class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3202  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst),
3203        (ins DPR:$src), IIC_VMOVD, 
3204        OpcodeStr, Dt, "$dst, $src", "",
3205        [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>;
3206class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3207  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
3208        (ins QPR:$src), IIC_VMOVD, 
3209        OpcodeStr, Dt, "$dst, $src", "",
3210        [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>;
3211
3212def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
3213def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
3214def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
3215def VREV64df  : VREV64D<0b10, "vrev64", "32", v2f32>;
3216
3217def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
3218def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
3219def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
3220def VREV64qf  : VREV64Q<0b10, "vrev64", "32", v4f32>;
3221
3222//   VREV32   : Vector Reverse elements within 32-bit words
3223
3224class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3225  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst),
3226        (ins DPR:$src), IIC_VMOVD, 
3227        OpcodeStr, Dt, "$dst, $src", "",
3228        [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>;
3229class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3230  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
3231        (ins QPR:$src), IIC_VMOVD, 
3232        OpcodeStr, Dt, "$dst, $src", "",
3233        [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>;
3234
3235def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
3236def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
3237
3238def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
3239def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
3240
3241//   VREV16   : Vector Reverse elements within 16-bit halfwords
3242
3243class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3244  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst),
3245        (ins DPR:$src), IIC_VMOVD, 
3246        OpcodeStr, Dt, "$dst, $src", "",
3247        [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>;
3248class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3249  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
3250        (ins QPR:$src), IIC_VMOVD, 
3251        OpcodeStr, Dt, "$dst, $src", "",
3252        [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>;
3253
3254def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
3255def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
3256
3257// Other Vector Shuffles.
3258
3259//   VEXT     : Vector Extract
3260
3261class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
3262  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst),
3263        (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm,
3264        IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
3265        [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
3266                                      (Ty DPR:$rhs), imm:$index)))]>;
3267
3268class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
3269  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst),
3270        (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm,
3271        IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
3272        [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
3273                                      (Ty QPR:$rhs), imm:$index)))]>;
3274
3275def VEXTd8  : VEXTd<"vext", "8",  v8i8>;
3276def VEXTd16 : VEXTd<"vext", "16", v4i16>;
3277def VEXTd32 : VEXTd<"vext", "32", v2i32>;
3278def VEXTdf  : VEXTd<"vext", "32", v2f32>;
3279
3280def VEXTq8  : VEXTq<"vext", "8",  v16i8>;
3281def VEXTq16 : VEXTq<"vext", "16", v8i16>;
3282def VEXTq32 : VEXTq<"vext", "32", v4i32>;
3283def VEXTqf  : VEXTq<"vext", "32", v4f32>;
3284
3285//   VTRN     : Vector Transpose
3286
3287def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
3288def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
3289def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
3290
3291def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
3292def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
3293def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
3294
3295//   VUZP     : Vector Unzip (Deinterleave)
3296
3297def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
3298def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
3299def  VUZPd32  : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
3300
3301def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
3302def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
3303def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
3304
3305//   VZIP     : Vector Zip (Interleave)
3306
3307def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
3308def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
3309def  VZIPd32  : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
3310
3311def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
3312def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
3313def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
3314
3315// Vector Table Lookup and Table Extension.
3316
3317//   VTBL     : Vector Table Lookup
3318def  VTBL1
3319  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
3320        (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1,
3321        "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "",
3322        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
3323let hasExtraSrcRegAllocReq = 1 in {
3324def  VTBL2
3325  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
3326        (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2,
3327        "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>;
3328def  VTBL3
3329  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
3330        (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3,
3331        "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>;
3332def  VTBL4
3333  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
3334        (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src),
3335        NVTBLFrm, IIC_VTB4,
3336        "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>;
3337} // hasExtraSrcRegAllocReq = 1
3338
3339//   VTBX     : Vector Table Extension
3340def  VTBX1
3341  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
3342        (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1,
3343        "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst",
3344        [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
3345                               DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
3346let hasExtraSrcRegAllocReq = 1 in {
3347def  VTBX2
3348  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
3349        (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2,
3350        "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>;
3351def  VTBX3
3352  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
3353        (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src),
3354        NVTBLFrm, IIC_VTBX3,
3355        "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src",
3356        "$orig = $dst", []>;
3357def  VTBX4
3358  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
3359        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4,
3360        "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src",
3361        "$orig = $dst", []>;
3362} // hasExtraSrcRegAllocReq = 1
3363
3364//===----------------------------------------------------------------------===//
3365// NEON instructions for single-precision FP math
3366//===----------------------------------------------------------------------===//
3367
3368class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
3369  : NEONFPPat<(ResTy (OpNode SPR:$a)),
3370              (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
3371                                                       SPR:$a, ssub_0))),
3372                              ssub_0)>;
3373
3374class N3VSPat<SDNode OpNode, NeonI Inst>
3375  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
3376              (EXTRACT_SUBREG (v2f32
3377                                 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3378                                                      SPR:$a, ssub_0),
3379                                       (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3380                                                      SPR:$b, ssub_0))),
3381                              ssub_0)>;
3382
3383class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
3384  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
3385              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3386                                                   SPR:$acc, ssub_0),
3387                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3388                                                   SPR:$a, ssub_0),
3389                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3390                                                   SPR:$b, ssub_0)),
3391                              ssub_0)>;
3392
3393// These need separate instructions because they must use DPR_VFP2 register
3394// class which have SPR sub-registers.
3395
3396// Vector Add Operations used for single-precision FP
3397let neverHasSideEffects = 1 in
3398def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>;
3399def : N3VSPat<fadd, VADDfd_sfp>;
3400
3401// Vector Sub Operations used for single-precision FP
3402let neverHasSideEffects = 1 in
3403def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>;
3404def : N3VSPat<fsub, VSUBfd_sfp>;
3405
3406// Vector Multiply Operations used for single-precision FP
3407let neverHasSideEffects = 1 in
3408def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>;
3409def : N3VSPat<fmul, VMULfd_sfp>;
3410
3411// Vector Multiply-Accumulate/Subtract used for single-precision FP
3412// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
3413// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
3414
3415//let neverHasSideEffects = 1 in
3416//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
3417//                           v2f32, fmul, fadd>;
3418//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
3419
3420//let neverHasSideEffects = 1 in
3421//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
3422//                           v2f32, fmul, fsub>;
3423//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
3424
3425// Vector Absolute used for single-precision FP
3426let neverHasSideEffects = 1 in
3427def  VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0,
3428                      (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
3429                      "vabs", "f32", "$dst, $src", "", []>;
3430def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
3431
3432// Vector Negate used for single-precision FP
3433let neverHasSideEffects = 1 in
3434def  VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
3435                      (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
3436                      "vneg", "f32", "$dst, $src", "", []>;
3437def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
3438
3439// Vector Maximum used for single-precision FP
3440let neverHasSideEffects = 1 in
3441def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
3442                     (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
3443                     "vmax", "f32", "$dst, $src1, $src2", "", []>;
3444def : N3VSPat<NEONfmax, VMAXfd_sfp>;
3445
3446// Vector Minimum used for single-precision FP
3447let neverHasSideEffects = 1 in
3448def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
3449                     (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
3450                     "vmin", "f32", "$dst, $src1, $src2", "", []>;
3451def : N3VSPat<NEONfmin, VMINfd_sfp>;
3452
3453// Vector Convert between single-precision FP and integer
3454let neverHasSideEffects = 1 in
3455def  VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
3456                         v2i32, v2f32, fp_to_sint>;
3457def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
3458
3459let neverHasSideEffects = 1 in
3460def  VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
3461                         v2i32, v2f32, fp_to_uint>;
3462def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
3463
3464let neverHasSideEffects = 1 in
3465def  VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
3466                         v2f32, v2i32, sint_to_fp>;
3467def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
3468
3469let neverHasSideEffects = 1 in
3470def  VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
3471                         v2f32, v2i32, uint_to_fp>;
3472def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
3473
3474//===----------------------------------------------------------------------===//
3475// Non-Instruction Patterns
3476//===----------------------------------------------------------------------===//
3477
3478// bit_convert
3479def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
3480def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
3481def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
3482def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
3483def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
3484def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
3485def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
3486def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
3487def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
3488def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
3489def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
3490def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
3491def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
3492def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
3493def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
3494def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
3495def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
3496def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
3497def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
3498def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
3499def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
3500def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
3501def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
3502def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
3503def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
3504def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
3505def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
3506def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
3507def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
3508def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
3509
3510def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
3511def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
3512def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
3513def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
3514def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
3515def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
3516def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
3517def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
3518def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
3519def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
3520def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
3521def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
3522def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
3523def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
3524def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
3525def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
3526def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
3527def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
3528def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
3529def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
3530def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
3531def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
3532def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
3533def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
3534def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
3535def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
3536def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
3537def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
3538def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
3539def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
3540