ARMInstrNEON.td revision 460a90540b045c102012da2492999557e6840526
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the ARM NEON instruction set.
11//
12//===----------------------------------------------------------------------===//
13
14
15//===----------------------------------------------------------------------===//
16// NEON-specific Operands.
17//===----------------------------------------------------------------------===//
18def VectorIndex8Operand  : AsmOperandClass { let Name = "VectorIndex8"; }
19def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
20def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
21def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
22  return ((uint64_t)Imm) < 8;
23}]> {
24  let ParserMatchClass = VectorIndex8Operand;
25  let PrintMethod = "printVectorIndex";
26  let MIOperandInfo = (ops i32imm);
27}
28def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
29  return ((uint64_t)Imm) < 4;
30}]> {
31  let ParserMatchClass = VectorIndex16Operand;
32  let PrintMethod = "printVectorIndex";
33  let MIOperandInfo = (ops i32imm);
34}
35def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
36  return ((uint64_t)Imm) < 2;
37}]> {
38  let ParserMatchClass = VectorIndex32Operand;
39  let PrintMethod = "printVectorIndex";
40  let MIOperandInfo = (ops i32imm);
41}
42
43//===----------------------------------------------------------------------===//
44// NEON-specific DAG Nodes.
45//===----------------------------------------------------------------------===//
46
47def SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
48def SDTARMVCMPZ   : SDTypeProfile<1, 1, []>;
49
50def NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
51def NEONvceqz     : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
52def NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
53def NEONvcgez     : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
54def NEONvclez     : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
55def NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
56def NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
57def NEONvcgtz     : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
58def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
59def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
60def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
61
62// Types for vector shift by immediates.  The "SHX" version is for long and
63// narrow operations where the source and destination vectors have different
64// types.  The "SHINS" version is for shift and insert operations.
65def SDTARMVSH     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
66                                         SDTCisVT<2, i32>]>;
67def SDTARMVSHX    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
68                                         SDTCisVT<2, i32>]>;
69def SDTARMVSHINS  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
70                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
71
72def NEONvshl      : SDNode<"ARMISD::VSHL", SDTARMVSH>;
73def NEONvshrs     : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
74def NEONvshru     : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
75def NEONvshlls    : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
76def NEONvshllu    : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
77def NEONvshlli    : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
78def NEONvshrn     : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
79
80def NEONvrshrs    : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
81def NEONvrshru    : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
82def NEONvrshrn    : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
83
84def NEONvqshls    : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
85def NEONvqshlu    : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
86def NEONvqshlsu   : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
87def NEONvqshrns   : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
88def NEONvqshrnu   : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
89def NEONvqshrnsu  : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
90
91def NEONvqrshrns  : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
92def NEONvqrshrnu  : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
93def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
94
95def NEONvsli      : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
96def NEONvsri      : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
97
98def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
99                                         SDTCisVT<2, i32>]>;
100def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
101def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
102
103def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
104def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
105def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
106
107def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
108                                           SDTCisVT<2, i32>]>;
109def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
110def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
111
112def NEONvbsl      : SDNode<"ARMISD::VBSL",
113                           SDTypeProfile<1, 3, [SDTCisVec<0>,
114                                                SDTCisSameAs<0, 1>,
115                                                SDTCisSameAs<0, 2>,
116                                                SDTCisSameAs<0, 3>]>>;
117
118def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
119
120// VDUPLANE can produce a quad-register result from a double-register source,
121// so the result is not constrained to match the source.
122def NEONvduplane  : SDNode<"ARMISD::VDUPLANE",
123                           SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
124                                                SDTCisVT<2, i32>]>>;
125
126def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
127                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
128def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
129
130def SDTARMVSHUF   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
131def NEONvrev64    : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
132def NEONvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
133def NEONvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
134
135def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
136                                         SDTCisSameAs<0, 2>,
137                                         SDTCisSameAs<0, 3>]>;
138def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
139def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
140def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
141
142def SDTARMVMULL   : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
143                                         SDTCisSameAs<1, 2>]>;
144def NEONvmulls    : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
145def NEONvmullu    : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
146
147def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
148                                         SDTCisSameAs<0, 2>]>;
149def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
150def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
151
152def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
153  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
154  unsigned EltBits = 0;
155  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
156  return (EltBits == 32 && EltVal == 0);
157}]>;
158
159def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
160  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
161  unsigned EltBits = 0;
162  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
163  return (EltBits == 8 && EltVal == 0xff);
164}]>;
165
166//===----------------------------------------------------------------------===//
167// NEON operand definitions
168//===----------------------------------------------------------------------===//
169
170def nModImm : Operand<i32> {
171  let PrintMethod = "printNEONModImmOperand";
172}
173
174//===----------------------------------------------------------------------===//
175// NEON load / store instructions
176//===----------------------------------------------------------------------===//
177
178// Use VLDM to load a Q register as a D register pair.
179// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
180def VLDMQIA
181  : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
182                    IIC_fpLoad_m, "",
183                   [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
184
185// Use VSTM to store a Q register as a D register pair.
186// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
187def VSTMQIA
188  : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
189                    IIC_fpStore_m, "",
190                   [(store (v2f64 QPR:$src), GPR:$Rn)]>;
191
192// Classes for VLD* pseudo-instructions with multi-register operands.
193// These are expanded to real instructions after register allocation.
194class VLDQPseudo<InstrItinClass itin>
195  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
196class VLDQWBPseudo<InstrItinClass itin>
197  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
198                (ins addrmode6:$addr, am6offset:$offset), itin,
199                "$addr.addr = $wb">;
200class VLDQQPseudo<InstrItinClass itin>
201  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
202class VLDQQWBPseudo<InstrItinClass itin>
203  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
204                (ins addrmode6:$addr, am6offset:$offset), itin,
205                "$addr.addr = $wb">;
206class VLDQQQQPseudo<InstrItinClass itin>
207  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
208                "$src = $dst">;
209class VLDQQQQWBPseudo<InstrItinClass itin>
210  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
211                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
212                "$addr.addr = $wb, $src = $dst">;
213
214let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
215
216//   VLD1     : Vector Load (multiple single elements)
217class VLD1D<bits<4> op7_4, string Dt>
218  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
219          (ins addrmode6:$Rn), IIC_VLD1,
220          "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
221  let Rm = 0b1111;
222  let Inst{4} = Rn{4};
223  let DecoderMethod = "DecodeVLDInstruction";
224}
225class VLD1Q<bits<4> op7_4, string Dt>
226  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
227          (ins addrmode6:$Rn), IIC_VLD1x2,
228          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
229  let Rm = 0b1111;
230  let Inst{5-4} = Rn{5-4};
231  let DecoderMethod = "DecodeVLDInstruction";
232}
233
234def  VLD1d8   : VLD1D<{0,0,0,?}, "8">;
235def  VLD1d16  : VLD1D<{0,1,0,?}, "16">;
236def  VLD1d32  : VLD1D<{1,0,0,?}, "32">;
237def  VLD1d64  : VLD1D<{1,1,0,?}, "64">;
238
239def  VLD1q8   : VLD1Q<{0,0,?,?}, "8">;
240def  VLD1q16  : VLD1Q<{0,1,?,?}, "16">;
241def  VLD1q32  : VLD1Q<{1,0,?,?}, "32">;
242def  VLD1q64  : VLD1Q<{1,1,?,?}, "64">;
243
244def  VLD1q8Pseudo  : VLDQPseudo<IIC_VLD1x2>;
245def  VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
246def  VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
247def  VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
248
249// ...with address register writeback:
250class VLD1DWB<bits<4> op7_4, string Dt>
251  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
252          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
253          "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
254          "$Rn.addr = $wb", []> {
255  let Inst{4} = Rn{4};
256  let DecoderMethod = "DecodeVLDInstruction";
257}
258class VLD1QWB<bits<4> op7_4, string Dt>
259  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
260          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
261          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
262          "$Rn.addr = $wb", []> {
263  let Inst{5-4} = Rn{5-4};
264  let DecoderMethod = "DecodeVLDInstruction";
265}
266
267def VLD1d8_UPD  : VLD1DWB<{0,0,0,?}, "8">;
268def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
269def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
270def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
271
272def VLD1q8_UPD  : VLD1QWB<{0,0,?,?}, "8">;
273def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
274def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
275def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
276
277def VLD1q8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1x2u>;
278def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
279def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
280def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
281
282// ...with 3 registers (some of these are only for the disassembler):
283class VLD1D3<bits<4> op7_4, string Dt>
284  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
285          (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
286          "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
287  let Rm = 0b1111;
288  let Inst{4} = Rn{4};
289  let DecoderMethod = "DecodeVLDInstruction";
290}
291class VLD1D3WB<bits<4> op7_4, string Dt>
292  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
293          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
294          "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
295  let Inst{4} = Rn{4};
296  let DecoderMethod = "DecodeVLDInstruction";
297}
298
299def VLD1d8T      : VLD1D3<{0,0,0,?}, "8">;
300def VLD1d16T     : VLD1D3<{0,1,0,?}, "16">;
301def VLD1d32T     : VLD1D3<{1,0,0,?}, "32">;
302def VLD1d64T     : VLD1D3<{1,1,0,?}, "64">;
303
304def VLD1d8T_UPD  : VLD1D3WB<{0,0,0,?}, "8">;
305def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
306def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
307def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
308
309def VLD1d64TPseudo     : VLDQQPseudo<IIC_VLD1x3>;
310def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
311
312// ...with 4 registers (some of these are only for the disassembler):
313class VLD1D4<bits<4> op7_4, string Dt>
314  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
315          (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
316          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
317  let Rm = 0b1111;
318  let Inst{5-4} = Rn{5-4};
319  let DecoderMethod = "DecodeVLDInstruction";
320}
321class VLD1D4WB<bits<4> op7_4, string Dt>
322  : NLdSt<0,0b10,0b0010,op7_4,
323          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
324          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
325          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
326          []> {
327  let Inst{5-4} = Rn{5-4};
328  let DecoderMethod = "DecodeVLDInstruction";
329}
330
331def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8">;
332def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16">;
333def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32">;
334def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64">;
335
336def VLD1d8Q_UPD  : VLD1D4WB<{0,0,?,?}, "8">;
337def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
338def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
339def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
340
341def VLD1d64QPseudo     : VLDQQPseudo<IIC_VLD1x4>;
342def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
343
344//   VLD2     : Vector Load (multiple 2-element structures)
345class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
346  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
347          (ins addrmode6:$Rn), IIC_VLD2,
348          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
349  let Rm = 0b1111;
350  let Inst{5-4} = Rn{5-4};
351  let DecoderMethod = "DecodeVLDInstruction";
352}
353class VLD2Q<bits<4> op7_4, string Dt>
354  : NLdSt<0, 0b10, 0b0011, op7_4,
355          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
356          (ins addrmode6:$Rn), IIC_VLD2x2,
357          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
358  let Rm = 0b1111;
359  let Inst{5-4} = Rn{5-4};
360  let DecoderMethod = "DecodeVLDInstruction";
361}
362
363def  VLD2d8   : VLD2D<0b1000, {0,0,?,?}, "8">;
364def  VLD2d16  : VLD2D<0b1000, {0,1,?,?}, "16">;
365def  VLD2d32  : VLD2D<0b1000, {1,0,?,?}, "32">;
366
367def  VLD2q8   : VLD2Q<{0,0,?,?}, "8">;
368def  VLD2q16  : VLD2Q<{0,1,?,?}, "16">;
369def  VLD2q32  : VLD2Q<{1,0,?,?}, "32">;
370
371def  VLD2d8Pseudo  : VLDQPseudo<IIC_VLD2>;
372def  VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
373def  VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
374
375def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>;
376def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
377def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
378
379// ...with address register writeback:
380class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
381  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
382          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
383          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
384          "$Rn.addr = $wb", []> {
385  let Inst{5-4} = Rn{5-4};
386  let DecoderMethod = "DecodeVLDInstruction";
387}
388class VLD2QWB<bits<4> op7_4, string Dt>
389  : NLdSt<0, 0b10, 0b0011, op7_4,
390          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
391          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
392          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
393          "$Rn.addr = $wb", []> {
394  let Inst{5-4} = Rn{5-4};
395  let DecoderMethod = "DecodeVLDInstruction";
396}
397
398def VLD2d8_UPD  : VLD2DWB<0b1000, {0,0,?,?}, "8">;
399def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
400def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
401
402def VLD2q8_UPD  : VLD2QWB<{0,0,?,?}, "8">;
403def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
404def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
405
406def VLD2d8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2u>;
407def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
408def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
409
410def VLD2q8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD2x2u>;
411def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
412def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
413
414// ...with double-spaced registers (for disassembly only):
415def VLD2b8      : VLD2D<0b1001, {0,0,?,?}, "8">;
416def VLD2b16     : VLD2D<0b1001, {0,1,?,?}, "16">;
417def VLD2b32     : VLD2D<0b1001, {1,0,?,?}, "32">;
418def VLD2b8_UPD  : VLD2DWB<0b1001, {0,0,?,?}, "8">;
419def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
420def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
421
422//   VLD3     : Vector Load (multiple 3-element structures)
423class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
424  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
425          (ins addrmode6:$Rn), IIC_VLD3,
426          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
427  let Rm = 0b1111;
428  let Inst{4} = Rn{4};
429  let DecoderMethod = "DecodeVLDInstruction";
430}
431
432def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
433def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
434def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
435
436def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>;
437def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
438def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
439
440// ...with address register writeback:
441class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
442  : NLdSt<0, 0b10, op11_8, op7_4,
443          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
444          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
445          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
446          "$Rn.addr = $wb", []> {
447  let Inst{4} = Rn{4};
448  let DecoderMethod = "DecodeVLDInstruction";
449}
450
451def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
452def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
453def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
454
455def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>;
456def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
457def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
458
459// ...with double-spaced registers:
460def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
461def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
462def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
463def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
464def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
465def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
466
467def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
468def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
469def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
470
471// ...alternate versions to be allocated odd register numbers:
472def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>;
473def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
474def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
475
476def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
477def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
478def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
479
480//   VLD4     : Vector Load (multiple 4-element structures)
481class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
482  : NLdSt<0, 0b10, op11_8, op7_4,
483          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
484          (ins addrmode6:$Rn), IIC_VLD4,
485          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
486  let Rm = 0b1111;
487  let Inst{5-4} = Rn{5-4};
488  let DecoderMethod = "DecodeVLDInstruction";
489}
490
491def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
492def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
493def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
494
495def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>;
496def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
497def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
498
499// ...with address register writeback:
500class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
501  : NLdSt<0, 0b10, op11_8, op7_4,
502          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
503          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
504          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
505          "$Rn.addr = $wb", []> {
506  let Inst{5-4} = Rn{5-4};
507  let DecoderMethod = "DecodeVLDInstruction";
508}
509
510def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
511def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
512def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
513
514def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>;
515def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
516def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
517
518// ...with double-spaced registers:
519def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
520def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
521def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
522def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
523def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
524def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
525
526def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
527def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
528def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
529
530// ...alternate versions to be allocated odd register numbers:
531def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>;
532def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
533def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
534
535def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
536def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
537def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
538
539} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
540
541// Classes for VLD*LN pseudo-instructions with multi-register operands.
542// These are expanded to real instructions after register allocation.
543class VLDQLNPseudo<InstrItinClass itin>
544  : PseudoNLdSt<(outs QPR:$dst),
545                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
546                itin, "$src = $dst">;
547class VLDQLNWBPseudo<InstrItinClass itin>
548  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
549                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
550                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
551class VLDQQLNPseudo<InstrItinClass itin>
552  : PseudoNLdSt<(outs QQPR:$dst),
553                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
554                itin, "$src = $dst">;
555class VLDQQLNWBPseudo<InstrItinClass itin>
556  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
557                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
558                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
559class VLDQQQQLNPseudo<InstrItinClass itin>
560  : PseudoNLdSt<(outs QQQQPR:$dst),
561                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
562                itin, "$src = $dst">;
563class VLDQQQQLNWBPseudo<InstrItinClass itin>
564  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
565                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
566                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
567
568//   VLD1LN   : Vector Load (single element to one lane)
569class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
570             PatFrag LoadOp>
571  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
572          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
573          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
574          "$src = $Vd",
575          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
576                                         (i32 (LoadOp addrmode6:$Rn)),
577                                         imm:$lane))]> {
578  let Rm = 0b1111;
579  let DecoderMethod = "DecodeVLD1LN";
580}
581class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
582             PatFrag LoadOp>
583  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
584          (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
585          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
586          "$src = $Vd",
587          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
588                                         (i32 (LoadOp addrmode6oneL32:$Rn)),
589                                         imm:$lane))]> {
590  let Rm = 0b1111;
591  let DecoderMethod = "DecodeVLD1LN";
592}
593class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
594  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
595                                               (i32 (LoadOp addrmode6:$addr)),
596                                               imm:$lane))];
597}
598
599def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
600  let Inst{7-5} = lane{2-0};
601}
602def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
603  let Inst{7-6} = lane{1-0};
604  let Inst{4}   = Rn{4};
605}
606def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
607  let Inst{7} = lane{0};
608  let Inst{5} = Rn{4};
609  let Inst{4} = Rn{4};
610}
611
612def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
613def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
614def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
615
616def : Pat<(vector_insert (v2f32 DPR:$src),
617                         (f32 (load addrmode6:$addr)), imm:$lane),
618          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
619def : Pat<(vector_insert (v4f32 QPR:$src),
620                         (f32 (load addrmode6:$addr)), imm:$lane),
621          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
622
623let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
624
625// ...with address register writeback:
626class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
627  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
628          (ins addrmode6:$Rn, am6offset:$Rm,
629           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
630          "\\{$Vd[$lane]\\}, $Rn$Rm",
631          "$src = $Vd, $Rn.addr = $wb", []> {
632  let DecoderMethod = "DecodeVLD1LN";
633}
634
635def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
636  let Inst{7-5} = lane{2-0};
637}
638def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
639  let Inst{7-6} = lane{1-0};
640  let Inst{4}   = Rn{4};
641}
642def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
643  let Inst{7} = lane{0};
644  let Inst{5} = Rn{4};
645  let Inst{4} = Rn{4};
646}
647
648def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>;
649def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
650def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
651
652//   VLD2LN   : Vector Load (single 2-element structure to one lane)
653class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
654  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
655          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
656          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
657          "$src1 = $Vd, $src2 = $dst2", []> {
658  let Rm = 0b1111;
659  let Inst{4}   = Rn{4};
660  let DecoderMethod = "DecodeVLD2LN";
661}
662
663def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
664  let Inst{7-5} = lane{2-0};
665}
666def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
667  let Inst{7-6} = lane{1-0};
668}
669def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
670  let Inst{7} = lane{0};
671}
672
673def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>;
674def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
675def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
676
677// ...with double-spaced registers:
678def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
679  let Inst{7-6} = lane{1-0};
680}
681def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
682  let Inst{7} = lane{0};
683}
684
685def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
686def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
687
688// ...with address register writeback:
689class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
690  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
691          (ins addrmode6:$Rn, am6offset:$Rm,
692           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
693          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
694          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
695  let Inst{4}   = Rn{4};
696  let DecoderMethod = "DecodeVLD2LN";
697}
698
699def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
700  let Inst{7-5} = lane{2-0};
701}
702def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
703  let Inst{7-6} = lane{1-0};
704}
705def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
706  let Inst{7} = lane{0};
707}
708
709def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>;
710def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
711def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
712
713def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
714  let Inst{7-6} = lane{1-0};
715}
716def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
717  let Inst{7} = lane{0};
718}
719
720def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
721def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
722
723//   VLD3LN   : Vector Load (single 3-element structure to one lane)
724class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
725  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
726          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
727          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
728          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
729          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
730  let Rm = 0b1111;
731  let DecoderMethod = "DecodeVLD3LN";
732}
733
734def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
735  let Inst{7-5} = lane{2-0};
736}
737def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
738  let Inst{7-6} = lane{1-0};
739}
740def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
741  let Inst{7}   = lane{0};
742}
743
744def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>;
745def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
746def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
747
748// ...with double-spaced registers:
749def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
750  let Inst{7-6} = lane{1-0};
751}
752def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
753  let Inst{7}   = lane{0};
754}
755
756def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
757def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
758
759// ...with address register writeback:
760class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
761  : NLdStLn<1, 0b10, op11_8, op7_4,
762          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
763          (ins addrmode6:$Rn, am6offset:$Rm,
764           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
765          IIC_VLD3lnu, "vld3", Dt,
766          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
767          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
768          []> {
769  let DecoderMethod = "DecodeVLD3LN";
770}
771
772def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
773  let Inst{7-5} = lane{2-0};
774}
775def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
776  let Inst{7-6} = lane{1-0};
777}
778def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
779  let Inst{7}   = lane{0};
780}
781
782def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>;
783def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
784def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
785
786def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
787  let Inst{7-6} = lane{1-0};
788}
789def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
790  let Inst{7}   = lane{0};
791}
792
793def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
794def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
795
796//   VLD4LN   : Vector Load (single 4-element structure to one lane)
797class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
798  : NLdStLn<1, 0b10, op11_8, op7_4,
799          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
800          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
801          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
802          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
803          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
804  let Rm = 0b1111;
805  let Inst{4}   = Rn{4};
806  let DecoderMethod = "DecodeVLD4LN";
807}
808
809def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
810  let Inst{7-5} = lane{2-0};
811}
812def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
813  let Inst{7-6} = lane{1-0};
814}
815def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
816  let Inst{7}   = lane{0};
817  let Inst{5} = Rn{5};
818}
819
820def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>;
821def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
822def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
823
824// ...with double-spaced registers:
825def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
826  let Inst{7-6} = lane{1-0};
827}
828def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
829  let Inst{7}   = lane{0};
830  let Inst{5} = Rn{5};
831}
832
833def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
834def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
835
836// ...with address register writeback:
837class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
838  : NLdStLn<1, 0b10, op11_8, op7_4,
839          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
840          (ins addrmode6:$Rn, am6offset:$Rm,
841           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
842          IIC_VLD4lnu, "vld4", Dt,
843"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
844"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
845          []> {
846  let Inst{4}   = Rn{4};
847  let DecoderMethod = "DecodeVLD4LN"  ;
848}
849
850def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
851  let Inst{7-5} = lane{2-0};
852}
853def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
854  let Inst{7-6} = lane{1-0};
855}
856def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
857  let Inst{7}   = lane{0};
858  let Inst{5} = Rn{5};
859}
860
861def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>;
862def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
863def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
864
865def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
866  let Inst{7-6} = lane{1-0};
867}
868def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
869  let Inst{7}   = lane{0};
870  let Inst{5} = Rn{5};
871}
872
873def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
874def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
875
876} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
877
878//   VLD1DUP  : Vector Load (single element to all lanes)
879class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
880  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
881          IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
882          [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
883  let Rm = 0b1111;
884  let Inst{4} = Rn{4};
885  let DecoderMethod = "DecodeVLD1DupInstruction";
886}
887class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
888  let Pattern = [(set QPR:$dst,
889                      (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
890}
891
892def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
893def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
894def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
895
896def VLD1DUPq8Pseudo  : VLD1QDUPPseudo<v16i8, extloadi8>;
897def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
898def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
899
900def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
901          (VLD1DUPd32 addrmode6:$addr)>;
902def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
903          (VLD1DUPq32Pseudo addrmode6:$addr)>;
904
905let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
906
907class VLD1QDUP<bits<4> op7_4, string Dt>
908  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
909          (ins addrmode6dup:$Rn), IIC_VLD1dup,
910          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
911  let Rm = 0b1111;
912  let Inst{4} = Rn{4};
913  let DecoderMethod = "DecodeVLD1DupInstruction";
914}
915
916def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8">;
917def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
918def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
919
920// ...with address register writeback:
921class VLD1DUPWB<bits<4> op7_4, string Dt>
922  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
923          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
924          "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
925  let Inst{4} = Rn{4};
926  let DecoderMethod = "DecodeVLD1DupInstruction";
927}
928class VLD1QDUPWB<bits<4> op7_4, string Dt>
929  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
930          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
931          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
932  let Inst{4} = Rn{4};
933  let DecoderMethod = "DecodeVLD1DupInstruction";
934}
935
936def VLD1DUPd8_UPD  : VLD1DUPWB<{0,0,0,0}, "8">;
937def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
938def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
939
940def VLD1DUPq8_UPD  : VLD1QDUPWB<{0,0,1,0}, "8">;
941def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
942def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
943
944def VLD1DUPq8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1dupu>;
945def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
946def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
947
948//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
949class VLD2DUP<bits<4> op7_4, string Dt>
950  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
951          (ins addrmode6dup:$Rn), IIC_VLD2dup,
952          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
953  let Rm = 0b1111;
954  let Inst{4} = Rn{4};
955  let DecoderMethod = "DecodeVLD2DupInstruction";
956}
957
958def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8">;
959def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
960def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
961
962def VLD2DUPd8Pseudo  : VLDQPseudo<IIC_VLD2dup>;
963def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
964def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
965
966// ...with double-spaced registers (not used for codegen):
967def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8">;
968def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
969def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
970
971// ...with address register writeback:
972class VLD2DUPWB<bits<4> op7_4, string Dt>
973  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
974          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
975          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
976  let Inst{4} = Rn{4};
977  let DecoderMethod = "DecodeVLD2DupInstruction";
978}
979
980def VLD2DUPd8_UPD  : VLD2DUPWB<{0,0,0,0}, "8">;
981def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
982def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
983
984def VLD2DUPd8x2_UPD  : VLD2DUPWB<{0,0,1,0}, "8">;
985def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
986def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
987
988def VLD2DUPd8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2dupu>;
989def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
990def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
991
992//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
993class VLD3DUP<bits<4> op7_4, string Dt>
994  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
995          (ins addrmode6dup:$Rn), IIC_VLD3dup,
996          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
997  let Rm = 0b1111;
998  let Inst{4} = 0;
999  let DecoderMethod = "DecodeVLD3DupInstruction";
1000}
1001
1002def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
1003def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1004def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1005
1006def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>;
1007def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1008def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1009
1010// ...with double-spaced registers (not used for codegen):
1011def VLD3DUPd8x2  : VLD3DUP<{0,0,1,?}, "8">;
1012def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
1013def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
1014
1015// ...with address register writeback:
1016class VLD3DUPWB<bits<4> op7_4, string Dt>
1017  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1018          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1019          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1020          "$Rn.addr = $wb", []> {
1021  let Inst{4} = 0;
1022  let DecoderMethod = "DecodeVLD3DupInstruction";
1023}
1024
1025def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8">;
1026def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
1027def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
1028
1029def VLD3DUPd8x2_UPD  : VLD3DUPWB<{0,0,1,0}, "8">;
1030def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
1031def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
1032
1033def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>;
1034def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1035def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1036
1037//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
1038class VLD4DUP<bits<4> op7_4, string Dt>
1039  : NLdSt<1, 0b10, 0b1111, op7_4,
1040          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1041          (ins addrmode6dup:$Rn), IIC_VLD4dup,
1042          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1043  let Rm = 0b1111;
1044  let Inst{4} = Rn{4};
1045  let DecoderMethod = "DecodeVLD4DupInstruction";
1046}
1047
1048def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
1049def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1050def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1051
1052def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>;
1053def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1054def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1055
1056// ...with double-spaced registers (not used for codegen):
1057def VLD4DUPd8x2  : VLD4DUP<{0,0,1,?}, "8">;
1058def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
1059def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1060
1061// ...with address register writeback:
1062class VLD4DUPWB<bits<4> op7_4, string Dt>
1063  : NLdSt<1, 0b10, 0b1111, op7_4,
1064          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1065          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1066          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1067          "$Rn.addr = $wb", []> {
1068  let Inst{4} = Rn{4};
1069  let DecoderMethod = "DecodeVLD4DupInstruction";
1070}
1071
1072def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
1073def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1074def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1075
1076def VLD4DUPd8x2_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
1077def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1078def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1079
1080def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>;
1081def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1082def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1083
1084} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
1085
1086let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
1087
1088// Classes for VST* pseudo-instructions with multi-register operands.
1089// These are expanded to real instructions after register allocation.
1090class VSTQPseudo<InstrItinClass itin>
1091  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1092class VSTQWBPseudo<InstrItinClass itin>
1093  : PseudoNLdSt<(outs GPR:$wb),
1094                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1095                "$addr.addr = $wb">;
1096class VSTQQPseudo<InstrItinClass itin>
1097  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1098class VSTQQWBPseudo<InstrItinClass itin>
1099  : PseudoNLdSt<(outs GPR:$wb),
1100                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1101                "$addr.addr = $wb">;
1102class VSTQQQQPseudo<InstrItinClass itin>
1103  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1104class VSTQQQQWBPseudo<InstrItinClass itin>
1105  : PseudoNLdSt<(outs GPR:$wb),
1106                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1107                "$addr.addr = $wb">;
1108
1109//   VST1     : Vector Store (multiple single elements)
1110class VST1D<bits<4> op7_4, string Dt>
1111  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
1112          IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
1113  let Rm = 0b1111;
1114  let Inst{4} = Rn{4};
1115  let DecoderMethod = "DecodeVSTInstruction";
1116}
1117class VST1Q<bits<4> op7_4, string Dt>
1118  : NLdSt<0,0b00,0b1010,op7_4, (outs),
1119          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
1120          "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
1121  let Rm = 0b1111;
1122  let Inst{5-4} = Rn{5-4};
1123  let DecoderMethod = "DecodeVSTInstruction";
1124}
1125
1126def  VST1d8   : VST1D<{0,0,0,?}, "8">;
1127def  VST1d16  : VST1D<{0,1,0,?}, "16">;
1128def  VST1d32  : VST1D<{1,0,0,?}, "32">;
1129def  VST1d64  : VST1D<{1,1,0,?}, "64">;
1130
1131def  VST1q8   : VST1Q<{0,0,?,?}, "8">;
1132def  VST1q16  : VST1Q<{0,1,?,?}, "16">;
1133def  VST1q32  : VST1Q<{1,0,?,?}, "32">;
1134def  VST1q64  : VST1Q<{1,1,?,?}, "64">;
1135
1136def  VST1q8Pseudo  : VSTQPseudo<IIC_VST1x2>;
1137def  VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
1138def  VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
1139def  VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
1140
1141// ...with address register writeback:
1142class VST1DWB<bits<4> op7_4, string Dt>
1143  : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
1144          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
1145          "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
1146  let Inst{4} = Rn{4};
1147  let DecoderMethod = "DecodeVSTInstruction";
1148}
1149class VST1QWB<bits<4> op7_4, string Dt>
1150  : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
1151          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
1152          IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
1153          "$Rn.addr = $wb", []> {
1154  let Inst{5-4} = Rn{5-4};
1155  let DecoderMethod = "DecodeVSTInstruction";
1156}
1157
1158def VST1d8_UPD  : VST1DWB<{0,0,0,?}, "8">;
1159def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
1160def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
1161def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
1162
1163def VST1q8_UPD  : VST1QWB<{0,0,?,?}, "8">;
1164def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
1165def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
1166def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
1167
1168def VST1q8Pseudo_UPD  : VSTQWBPseudo<IIC_VST1x2u>;
1169def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
1170def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
1171def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
1172
1173// ...with 3 registers (some of these are only for the disassembler):
1174class VST1D3<bits<4> op7_4, string Dt>
1175  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1176          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
1177          IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
1178  let Rm = 0b1111;
1179  let Inst{4} = Rn{4};
1180  let DecoderMethod = "DecodeVSTInstruction";
1181}
1182class VST1D3WB<bits<4> op7_4, string Dt>
1183  : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
1184          (ins addrmode6:$Rn, am6offset:$Rm,
1185           DPR:$Vd, DPR:$src2, DPR:$src3),
1186          IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1187          "$Rn.addr = $wb", []> {
1188  let Inst{4} = Rn{4};
1189  let DecoderMethod = "DecodeVSTInstruction";
1190}
1191
1192def VST1d8T      : VST1D3<{0,0,0,?}, "8">;
1193def VST1d16T     : VST1D3<{0,1,0,?}, "16">;
1194def VST1d32T     : VST1D3<{1,0,0,?}, "32">;
1195def VST1d64T     : VST1D3<{1,1,0,?}, "64">;
1196
1197def VST1d8T_UPD  : VST1D3WB<{0,0,0,?}, "8">;
1198def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
1199def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
1200def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
1201
1202def VST1d64TPseudo     : VSTQQPseudo<IIC_VST1x3>;
1203def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
1204
1205// ...with 4 registers (some of these are only for the disassembler):
1206class VST1D4<bits<4> op7_4, string Dt>
1207  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1208          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1209          IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
1210          []> {
1211  let Rm = 0b1111;
1212  let Inst{5-4} = Rn{5-4};
1213  let DecoderMethod = "DecodeVSTInstruction";
1214}
1215class VST1D4WB<bits<4> op7_4, string Dt>
1216  : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
1217          (ins addrmode6:$Rn, am6offset:$Rm,
1218           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
1219          "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
1220          "$Rn.addr = $wb", []> {
1221  let Inst{5-4} = Rn{5-4};
1222  let DecoderMethod = "DecodeVSTInstruction";
1223}
1224
1225def VST1d8Q      : VST1D4<{0,0,?,?}, "8">;
1226def VST1d16Q     : VST1D4<{0,1,?,?}, "16">;
1227def VST1d32Q     : VST1D4<{1,0,?,?}, "32">;
1228def VST1d64Q     : VST1D4<{1,1,?,?}, "64">;
1229
1230def VST1d8Q_UPD  : VST1D4WB<{0,0,?,?}, "8">;
1231def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
1232def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
1233def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
1234
1235def VST1d64QPseudo     : VSTQQPseudo<IIC_VST1x4>;
1236def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
1237
1238//   VST2     : Vector Store (multiple 2-element structures)
1239class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
1240  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1241          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
1242          IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
1243  let Rm = 0b1111;
1244  let Inst{5-4} = Rn{5-4};
1245  let DecoderMethod = "DecodeVSTInstruction";
1246}
1247class VST2Q<bits<4> op7_4, string Dt>
1248  : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
1249          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1250          IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
1251          "", []> {
1252  let Rm = 0b1111;
1253  let Inst{5-4} = Rn{5-4};
1254  let DecoderMethod = "DecodeVSTInstruction";
1255}
1256
1257def  VST2d8   : VST2D<0b1000, {0,0,?,?}, "8">;
1258def  VST2d16  : VST2D<0b1000, {0,1,?,?}, "16">;
1259def  VST2d32  : VST2D<0b1000, {1,0,?,?}, "32">;
1260
1261def  VST2q8   : VST2Q<{0,0,?,?}, "8">;
1262def  VST2q16  : VST2Q<{0,1,?,?}, "16">;
1263def  VST2q32  : VST2Q<{1,0,?,?}, "32">;
1264
1265def  VST2d8Pseudo  : VSTQPseudo<IIC_VST2>;
1266def  VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
1267def  VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
1268
1269def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>;
1270def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
1271def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
1272
1273// ...with address register writeback:
1274class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1275  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1276          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
1277          IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
1278          "$Rn.addr = $wb", []> {
1279  let Inst{5-4} = Rn{5-4};
1280  let DecoderMethod = "DecodeVSTInstruction";
1281}
1282class VST2QWB<bits<4> op7_4, string Dt>
1283  : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1284          (ins addrmode6:$Rn, am6offset:$Rm,
1285           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
1286          "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
1287          "$Rn.addr = $wb", []> {
1288  let Inst{5-4} = Rn{5-4};
1289  let DecoderMethod = "DecodeVSTInstruction";
1290}
1291
1292def VST2d8_UPD  : VST2DWB<0b1000, {0,0,?,?}, "8">;
1293def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
1294def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
1295
1296def VST2q8_UPD  : VST2QWB<{0,0,?,?}, "8">;
1297def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
1298def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
1299
1300def VST2d8Pseudo_UPD  : VSTQWBPseudo<IIC_VST2u>;
1301def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
1302def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
1303
1304def VST2q8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST2x2u>;
1305def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
1306def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
1307
1308// ...with double-spaced registers (for disassembly only):
1309def VST2b8      : VST2D<0b1001, {0,0,?,?}, "8">;
1310def VST2b16     : VST2D<0b1001, {0,1,?,?}, "16">;
1311def VST2b32     : VST2D<0b1001, {1,0,?,?}, "32">;
1312def VST2b8_UPD  : VST2DWB<0b1001, {0,0,?,?}, "8">;
1313def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
1314def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
1315
1316//   VST3     : Vector Store (multiple 3-element structures)
1317class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1318  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1319          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1320          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
1321  let Rm = 0b1111;
1322  let Inst{4} = Rn{4};
1323  let DecoderMethod = "DecodeVSTInstruction";
1324}
1325
1326def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
1327def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
1328def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
1329
1330def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>;
1331def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
1332def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
1333
1334// ...with address register writeback:
1335class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1336  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1337          (ins addrmode6:$Rn, am6offset:$Rm,
1338           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1339          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1340          "$Rn.addr = $wb", []> {
1341  let Inst{4} = Rn{4};
1342  let DecoderMethod = "DecodeVSTInstruction";
1343}
1344
1345def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
1346def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
1347def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
1348
1349def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>;
1350def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1351def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1352
1353// ...with double-spaced registers:
1354def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
1355def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
1356def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
1357def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
1358def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
1359def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
1360
1361def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
1362def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1363def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1364
1365// ...alternate versions to be allocated odd register numbers:
1366def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>;
1367def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
1368def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
1369
1370def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
1371def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1372def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1373
1374//   VST4     : Vector Store (multiple 4-element structures)
1375class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
1376  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1377          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1378          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
1379          "", []> {
1380  let Rm = 0b1111;
1381  let Inst{5-4} = Rn{5-4};
1382  let DecoderMethod = "DecodeVSTInstruction";
1383}
1384
1385def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
1386def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
1387def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
1388
1389def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>;
1390def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
1391def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
1392
1393// ...with address register writeback:
1394class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1395  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1396          (ins addrmode6:$Rn, am6offset:$Rm,
1397           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
1398           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
1399          "$Rn.addr = $wb", []> {
1400  let Inst{5-4} = Rn{5-4};
1401  let DecoderMethod = "DecodeVSTInstruction";
1402}
1403
1404def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
1405def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
1406def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
1407
1408def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>;
1409def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1410def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1411
1412// ...with double-spaced registers:
1413def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
1414def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
1415def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
1416def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
1417def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
1418def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
1419
1420def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
1421def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1422def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1423
1424// ...alternate versions to be allocated odd register numbers:
1425def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>;
1426def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
1427def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
1428
1429def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
1430def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1431def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1432
1433} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
1434
1435// Classes for VST*LN pseudo-instructions with multi-register operands.
1436// These are expanded to real instructions after register allocation.
1437class VSTQLNPseudo<InstrItinClass itin>
1438  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1439                itin, "">;
1440class VSTQLNWBPseudo<InstrItinClass itin>
1441  : PseudoNLdSt<(outs GPR:$wb),
1442                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1443                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1444class VSTQQLNPseudo<InstrItinClass itin>
1445  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1446                itin, "">;
1447class VSTQQLNWBPseudo<InstrItinClass itin>
1448  : PseudoNLdSt<(outs GPR:$wb),
1449                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1450                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1451class VSTQQQQLNPseudo<InstrItinClass itin>
1452  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1453                itin, "">;
1454class VSTQQQQLNWBPseudo<InstrItinClass itin>
1455  : PseudoNLdSt<(outs GPR:$wb),
1456                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1457                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1458
1459//   VST1LN   : Vector Store (single element from one lane)
1460class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1461             PatFrag StoreOp, SDNode ExtractOp>
1462  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1463          (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
1464          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
1465          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
1466  let Rm = 0b1111;
1467  let DecoderMethod = "DecodeVST1LN";
1468}
1469class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1470             PatFrag StoreOp, SDNode ExtractOp>
1471  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1472          (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
1473          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
1474          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
1475  let Rm = 0b1111;
1476  let DecoderMethod = "DecodeVST1LN";
1477}
1478class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
1479  : VSTQLNPseudo<IIC_VST1ln> {
1480  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
1481                          addrmode6:$addr)];
1482}
1483
1484def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
1485                       NEONvgetlaneu> {
1486  let Inst{7-5} = lane{2-0};
1487}
1488def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
1489                       NEONvgetlaneu> {
1490  let Inst{7-6} = lane{1-0};
1491  let Inst{4}   = Rn{5};
1492}
1493
1494def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
1495  let Inst{7}   = lane{0};
1496  let Inst{5-4} = Rn{5-4};
1497}
1498
1499def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
1500def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
1501def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
1502
1503def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
1504          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1505def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
1506          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1507
1508// ...with address register writeback:
1509class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1510               PatFrag StoreOp, SDNode ExtractOp>
1511  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
1512          (ins addrmode6:$Rn, am6offset:$Rm,
1513           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
1514          "\\{$Vd[$lane]\\}, $Rn$Rm",
1515          "$Rn.addr = $wb",
1516          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
1517                                  addrmode6:$Rn, am6offset:$Rm))]> {
1518  let DecoderMethod = "DecodeVST1LN";
1519}
1520class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
1521  : VSTQLNWBPseudo<IIC_VST1lnu> {
1522  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
1523                                        addrmode6:$addr, am6offset:$offset))];
1524}
1525
1526def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
1527                             NEONvgetlaneu> {
1528  let Inst{7-5} = lane{2-0};
1529}
1530def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
1531                             NEONvgetlaneu> {
1532  let Inst{7-6} = lane{1-0};
1533  let Inst{4}   = Rn{5};
1534}
1535def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
1536                             extractelt> {
1537  let Inst{7}   = lane{0};
1538  let Inst{5-4} = Rn{5-4};
1539}
1540
1541def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
1542def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
1543def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
1544
1545let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
1546
1547//   VST2LN   : Vector Store (single 2-element structure from one lane)
1548class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1549  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1550          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
1551          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
1552          "", []> {
1553  let Rm = 0b1111;
1554  let Inst{4}   = Rn{4};
1555  let DecoderMethod = "DecodeVST2LN";
1556}
1557
1558def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
1559  let Inst{7-5} = lane{2-0};
1560}
1561def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
1562  let Inst{7-6} = lane{1-0};
1563}
1564def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
1565  let Inst{7}   = lane{0};
1566}
1567
1568def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>;
1569def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
1570def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
1571
1572// ...with double-spaced registers:
1573def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
1574  let Inst{7-6} = lane{1-0};
1575  let Inst{4}   = Rn{4};
1576}
1577def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
1578  let Inst{7}   = lane{0};
1579  let Inst{4}   = Rn{4};
1580}
1581
1582def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
1583def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
1584
1585// ...with address register writeback:
1586class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1587  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
1588          (ins addrmode6:$addr, am6offset:$offset,
1589           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
1590          "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
1591          "$addr.addr = $wb", []> {
1592  let Inst{4}   = Rn{4};
1593  let DecoderMethod = "DecodeVST2LN";
1594}
1595
1596def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
1597  let Inst{7-5} = lane{2-0};
1598}
1599def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
1600  let Inst{7-6} = lane{1-0};
1601}
1602def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
1603  let Inst{7}   = lane{0};
1604}
1605
1606def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>;
1607def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
1608def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
1609
1610def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
1611  let Inst{7-6} = lane{1-0};
1612}
1613def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
1614  let Inst{7}   = lane{0};
1615}
1616
1617def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
1618def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
1619
1620//   VST3LN   : Vector Store (single 3-element structure from one lane)
1621class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1622  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1623          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
1624           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
1625          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
1626  let Rm = 0b1111;
1627  let DecoderMethod = "DecodeVST3LN";
1628}
1629
1630def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
1631  let Inst{7-5} = lane{2-0};
1632}
1633def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
1634  let Inst{7-6} = lane{1-0};
1635}
1636def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
1637  let Inst{7}   = lane{0};
1638}
1639
1640def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>;
1641def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
1642def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
1643
1644// ...with double-spaced registers:
1645def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
1646  let Inst{7-6} = lane{1-0};
1647}
1648def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
1649  let Inst{7}   = lane{0};
1650}
1651
1652def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
1653def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
1654
1655// ...with address register writeback:
1656class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1657  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
1658          (ins addrmode6:$Rn, am6offset:$Rm,
1659           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1660          IIC_VST3lnu, "vst3", Dt,
1661          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
1662          "$Rn.addr = $wb", []> {
1663  let DecoderMethod = "DecodeVST3LN";
1664}
1665
1666def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
1667  let Inst{7-5} = lane{2-0};
1668}
1669def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
1670  let Inst{7-6} = lane{1-0};
1671}
1672def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
1673  let Inst{7}   = lane{0};
1674}
1675
1676def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>;
1677def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
1678def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
1679
1680def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
1681  let Inst{7-6} = lane{1-0};
1682}
1683def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
1684  let Inst{7}   = lane{0};
1685}
1686
1687def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
1688def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
1689
1690//   VST4LN   : Vector Store (single 4-element structure from one lane)
1691class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1692  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1693          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
1694           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
1695          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
1696          "", []> {
1697  let Rm = 0b1111;
1698  let Inst{4} = Rn{4};
1699  let DecoderMethod = "DecodeVST4LN";
1700}
1701
1702def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
1703  let Inst{7-5} = lane{2-0};
1704}
1705def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
1706  let Inst{7-6} = lane{1-0};
1707}
1708def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
1709  let Inst{7}   = lane{0};
1710  let Inst{5} = Rn{5};
1711}
1712
1713def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>;
1714def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
1715def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
1716
1717// ...with double-spaced registers:
1718def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
1719  let Inst{7-6} = lane{1-0};
1720}
1721def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
1722  let Inst{7}   = lane{0};
1723  let Inst{5} = Rn{5};
1724}
1725
1726def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
1727def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
1728
1729// ...with address register writeback:
1730class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1731  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
1732          (ins addrmode6:$Rn, am6offset:$Rm,
1733           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1734          IIC_VST4lnu, "vst4", Dt,
1735  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
1736          "$Rn.addr = $wb", []> {
1737  let Inst{4} = Rn{4};
1738  let DecoderMethod = "DecodeVST4LN";
1739}
1740
1741def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
1742  let Inst{7-5} = lane{2-0};
1743}
1744def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
1745  let Inst{7-6} = lane{1-0};
1746}
1747def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
1748  let Inst{7}   = lane{0};
1749  let Inst{5} = Rn{5};
1750}
1751
1752def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>;
1753def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
1754def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
1755
1756def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
1757  let Inst{7-6} = lane{1-0};
1758}
1759def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
1760  let Inst{7}   = lane{0};
1761  let Inst{5} = Rn{5};
1762}
1763
1764def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
1765def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
1766
1767} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
1768
1769
1770//===----------------------------------------------------------------------===//
1771// NEON pattern fragments
1772//===----------------------------------------------------------------------===//
1773
1774// Extract D sub-registers of Q registers.
1775def DSubReg_i8_reg  : SDNodeXForm<imm, [{
1776  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
1777  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
1778}]>;
1779def DSubReg_i16_reg : SDNodeXForm<imm, [{
1780  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
1781  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
1782}]>;
1783def DSubReg_i32_reg : SDNodeXForm<imm, [{
1784  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
1785  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
1786}]>;
1787def DSubReg_f64_reg : SDNodeXForm<imm, [{
1788  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
1789  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
1790}]>;
1791
1792// Extract S sub-registers of Q/D registers.
1793def SSubReg_f32_reg : SDNodeXForm<imm, [{
1794  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
1795  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
1796}]>;
1797
1798// Translate lane numbers from Q registers to D subregs.
1799def SubReg_i8_lane  : SDNodeXForm<imm, [{
1800  return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
1801}]>;
1802def SubReg_i16_lane : SDNodeXForm<imm, [{
1803  return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
1804}]>;
1805def SubReg_i32_lane : SDNodeXForm<imm, [{
1806  return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
1807}]>;
1808
1809//===----------------------------------------------------------------------===//
1810// Instruction Classes
1811//===----------------------------------------------------------------------===//
1812
1813// Basic 2-register operations: double- and quad-register.
1814class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1815           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
1816           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
1817  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
1818        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
1819        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
1820class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1821           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
1822           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
1823  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
1824        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
1825        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
1826
1827// Basic 2-register intrinsics, both double- and quad-register.
1828class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1829              bits<2> op17_16, bits<5> op11_7, bit op4,
1830              InstrItinClass itin, string OpcodeStr, string Dt,
1831              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1832  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
1833        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1834        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
1835class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1836              bits<2> op17_16, bits<5> op11_7, bit op4,
1837              InstrItinClass itin, string OpcodeStr, string Dt,
1838              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1839  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
1840        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1841        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
1842
1843// Narrow 2-register operations.
1844class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1845           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
1846           InstrItinClass itin, string OpcodeStr, string Dt,
1847           ValueType TyD, ValueType TyQ, SDNode OpNode>
1848  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
1849        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1850        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
1851
1852// Narrow 2-register intrinsics.
1853class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1854              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
1855              InstrItinClass itin, string OpcodeStr, string Dt,
1856              ValueType TyD, ValueType TyQ, Intrinsic IntOp>
1857  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
1858        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1859        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
1860
1861// Long 2-register operations (currently only used for VMOVL).
1862class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1863           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
1864           InstrItinClass itin, string OpcodeStr, string Dt,
1865           ValueType TyQ, ValueType TyD, SDNode OpNode>
1866  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
1867        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1868        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
1869
1870// Long 2-register intrinsics.
1871class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1872              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
1873              InstrItinClass itin, string OpcodeStr, string Dt,
1874              ValueType TyQ, ValueType TyD, Intrinsic IntOp>
1875  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
1876        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1877        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
1878
1879// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
1880class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
1881  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
1882        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
1883        OpcodeStr, Dt, "$Vd, $Vm",
1884        "$src1 = $Vd, $src2 = $Vm", []>;
1885class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
1886                  InstrItinClass itin, string OpcodeStr, string Dt>
1887  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
1888        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
1889        "$src1 = $Vd, $src2 = $Vm", []>;
1890
1891// Basic 3-register operations: double- and quad-register.
1892class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1893           InstrItinClass itin, string OpcodeStr, string Dt,
1894           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
1895  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1896        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
1897        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
1898        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
1899  let isCommutable = Commutable;
1900}
1901// Same as N3VD but no data type.
1902class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1903           InstrItinClass itin, string OpcodeStr,
1904           ValueType ResTy, ValueType OpTy,
1905           SDNode OpNode, bit Commutable>
1906  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
1907         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
1908         OpcodeStr, "$Vd, $Vn, $Vm", "",
1909         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
1910  let isCommutable = Commutable;
1911}
1912
1913class N3VDSL<bits<2> op21_20, bits<4> op11_8,
1914             InstrItinClass itin, string OpcodeStr, string Dt,
1915             ValueType Ty, SDNode ShOp>
1916  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
1917        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
1918        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
1919        [(set (Ty DPR:$Vd),
1920              (Ty (ShOp (Ty DPR:$Vn),
1921                        (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
1922  let isCommutable = 0;
1923}
1924class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
1925               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
1926  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
1927        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
1928        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
1929        [(set (Ty DPR:$Vd),
1930              (Ty (ShOp (Ty DPR:$Vn),
1931                        (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
1932  let isCommutable = 0;
1933}
1934
1935class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1936           InstrItinClass itin, string OpcodeStr, string Dt,
1937           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
1938  : N3V<op24, op23, op21_20, op11_8, 1, op4,
1939        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
1940        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
1941        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
1942  let isCommutable = Commutable;
1943}
1944class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1945           InstrItinClass itin, string OpcodeStr,
1946           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
1947  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
1948         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
1949         OpcodeStr, "$Vd, $Vn, $Vm", "",
1950         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
1951  let isCommutable = Commutable;
1952}
1953class N3VQSL<bits<2> op21_20, bits<4> op11_8,
1954             InstrItinClass itin, string OpcodeStr, string Dt,
1955             ValueType ResTy, ValueType OpTy, SDNode ShOp>
1956  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
1957        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
1958        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
1959        [(set (ResTy QPR:$Vd),
1960              (ResTy (ShOp (ResTy QPR:$Vn),
1961                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
1962                                                imm:$lane)))))]> {
1963  let isCommutable = 0;
1964}
1965class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
1966               ValueType ResTy, ValueType OpTy, SDNode ShOp>
1967  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
1968        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
1969        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
1970        [(set (ResTy QPR:$Vd),
1971              (ResTy (ShOp (ResTy QPR:$Vn),
1972                           (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
1973                                                imm:$lane)))))]> {
1974  let isCommutable = 0;
1975}
1976
1977// Basic 3-register intrinsics, both double- and quad-register.
1978class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1979              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
1980              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
1981  : N3V<op24, op23, op21_20, op11_8, 0, op4,
1982        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
1983        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
1984        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
1985  let isCommutable = Commutable;
1986}
1987class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1988                string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
1989  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
1990        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
1991        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
1992        [(set (Ty DPR:$Vd),
1993              (Ty (IntOp (Ty DPR:$Vn),
1994                         (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
1995                                           imm:$lane)))))]> {
1996  let isCommutable = 0;
1997}
1998class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1999                  string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
2000  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2001        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
2002        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
2003        [(set (Ty DPR:$Vd),
2004              (Ty (IntOp (Ty DPR:$Vn),
2005                         (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2006  let isCommutable = 0;
2007}
2008class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2009              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2010              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2011  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2012        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2013        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2014        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2015  let isCommutable = 0;
2016}
2017
2018class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2019              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2020              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
2021  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2022        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2023        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2024        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2025  let isCommutable = Commutable;
2026}
2027class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2028                string OpcodeStr, string Dt,
2029                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2030  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2031        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
2032        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
2033        [(set (ResTy QPR:$Vd),
2034              (ResTy (IntOp (ResTy QPR:$Vn),
2035                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2036                                                 imm:$lane)))))]> {
2037  let isCommutable = 0;
2038}
2039class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2040                  string OpcodeStr, string Dt,
2041                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2042  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2043        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
2044        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
2045        [(set (ResTy QPR:$Vd),
2046              (ResTy (IntOp (ResTy QPR:$Vn),
2047                            (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2048                                                 imm:$lane)))))]> {
2049  let isCommutable = 0;
2050}
2051class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2052              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2053              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2054  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2055        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2056        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2057        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2058  let isCommutable = 0;
2059}
2060
2061// Multiply-Add/Sub operations: double- and quad-register.
2062class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2063                InstrItinClass itin, string OpcodeStr, string Dt,
2064                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2065  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2066        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2067        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2068        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2069                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2070
2071class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2072                  string OpcodeStr, string Dt,
2073                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2074  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2075        (outs DPR:$Vd),
2076        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
2077        NVMulSLFrm, itin,
2078        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
2079        [(set (Ty DPR:$Vd),
2080              (Ty (ShOp (Ty DPR:$src1),
2081                        (Ty (MulOp DPR:$Vn,
2082                                   (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2083                                                     imm:$lane)))))))]>;
2084class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2085                    string OpcodeStr, string Dt,
2086                    ValueType Ty, SDNode MulOp, SDNode ShOp>
2087  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2088        (outs DPR:$Vd),
2089        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
2090        NVMulSLFrm, itin,
2091        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
2092        [(set (Ty DPR:$Vd),
2093              (Ty (ShOp (Ty DPR:$src1),
2094                        (Ty (MulOp DPR:$Vn,
2095                                   (Ty (NEONvduplane (Ty DPR_8:$Vm),
2096                                                     imm:$lane)))))))]>;
2097
2098class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2099                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2100                SDPatternOperator MulOp, SDPatternOperator OpNode>
2101  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2102        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2103        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2104        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2105                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2106class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2107                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2108                  SDPatternOperator MulOp, SDPatternOperator ShOp>
2109  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2110        (outs QPR:$Vd),
2111        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
2112        NVMulSLFrm, itin,
2113        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
2114        [(set (ResTy QPR:$Vd),
2115              (ResTy (ShOp (ResTy QPR:$src1),
2116                           (ResTy (MulOp QPR:$Vn,
2117                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2118                                                        imm:$lane)))))))]>;
2119class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2120                    string OpcodeStr, string Dt,
2121                    ValueType ResTy, ValueType OpTy,
2122                    SDNode MulOp, SDNode ShOp>
2123  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2124        (outs QPR:$Vd),
2125        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
2126        NVMulSLFrm, itin,
2127        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
2128        [(set (ResTy QPR:$Vd),
2129              (ResTy (ShOp (ResTy QPR:$src1),
2130                           (ResTy (MulOp QPR:$Vn,
2131                                   (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2132                                                        imm:$lane)))))))]>;
2133
2134// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2135class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2136                InstrItinClass itin, string OpcodeStr, string Dt,
2137                ValueType Ty, Intrinsic IntOp, SDNode OpNode>
2138  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2139        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2140        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2141        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2142                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2143class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2144                InstrItinClass itin, string OpcodeStr, string Dt,
2145                ValueType Ty, Intrinsic IntOp, SDNode OpNode>
2146  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2147        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2148        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2149        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2150                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2151
2152// Neon 3-argument intrinsics, both double- and quad-register.
2153// The destination register is also used as the first source operand register.
2154class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2155               InstrItinClass itin, string OpcodeStr, string Dt,
2156               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2157  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2158        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2159        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2160        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2161                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2162class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2163               InstrItinClass itin, string OpcodeStr, string Dt,
2164               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2165  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2166        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2167        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2168        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2169                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2170
2171// Long Multiply-Add/Sub operations.
2172class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2173                InstrItinClass itin, string OpcodeStr, string Dt,
2174                ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2175  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2176        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2177        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2178        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2179                                (TyQ (MulOp (TyD DPR:$Vn),
2180                                            (TyD DPR:$Vm)))))]>;
2181class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2182                  InstrItinClass itin, string OpcodeStr, string Dt,
2183                  ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2184  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2185        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
2186        NVMulSLFrm, itin,
2187        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
2188        [(set QPR:$Vd,
2189          (OpNode (TyQ QPR:$src1),
2190                  (TyQ (MulOp (TyD DPR:$Vn),
2191                              (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
2192                                                 imm:$lane))))))]>;
2193class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2194                    InstrItinClass itin, string OpcodeStr, string Dt,
2195                    ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2196  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2197        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
2198        NVMulSLFrm, itin,
2199        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
2200        [(set QPR:$Vd,
2201          (OpNode (TyQ QPR:$src1),
2202                  (TyQ (MulOp (TyD DPR:$Vn),
2203                              (TyD (NEONvduplane (TyD DPR_8:$Vm),
2204                                                 imm:$lane))))))]>;
2205
2206// Long Intrinsic-Op vector operations with explicit extend (VABAL).
2207class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2208                   InstrItinClass itin, string OpcodeStr, string Dt,
2209                   ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
2210                   SDNode OpNode>
2211  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2212        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2213        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2214        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2215                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2216                                                        (TyD DPR:$Vm)))))))]>;
2217
2218// Neon Long 3-argument intrinsic.  The destination register is
2219// a quad-register and is also used as the first source operand register.
2220class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2221               InstrItinClass itin, string OpcodeStr, string Dt,
2222               ValueType TyQ, ValueType TyD, Intrinsic IntOp>
2223  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2224        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2225        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2226        [(set QPR:$Vd,
2227          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
2228class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2229                 string OpcodeStr, string Dt,
2230                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2231  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2232        (outs QPR:$Vd),
2233        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
2234        NVMulSLFrm, itin,
2235        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
2236        [(set (ResTy QPR:$Vd),
2237              (ResTy (IntOp (ResTy QPR:$src1),
2238                            (OpTy DPR:$Vn),
2239                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2240                                                imm:$lane)))))]>;
2241class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2242                   InstrItinClass itin, string OpcodeStr, string Dt,
2243                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2244  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2245        (outs QPR:$Vd),
2246        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
2247        NVMulSLFrm, itin,
2248        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
2249        [(set (ResTy QPR:$Vd),
2250              (ResTy (IntOp (ResTy QPR:$src1),
2251                            (OpTy DPR:$Vn),
2252                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2253                                                imm:$lane)))))]>;
2254
2255// Narrowing 3-register intrinsics.
2256class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2257              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
2258              Intrinsic IntOp, bit Commutable>
2259  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2260        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
2261        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2262        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
2263  let isCommutable = Commutable;
2264}
2265
2266// Long 3-register operations.
2267class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2268           InstrItinClass itin, string OpcodeStr, string Dt,
2269           ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
2270  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2271        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2272        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2273        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2274  let isCommutable = Commutable;
2275}
2276class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2277             InstrItinClass itin, string OpcodeStr, string Dt,
2278             ValueType TyQ, ValueType TyD, SDNode OpNode>
2279  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2280        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
2281        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
2282        [(set QPR:$Vd,
2283          (TyQ (OpNode (TyD DPR:$Vn),
2284                       (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
2285class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2286               InstrItinClass itin, string OpcodeStr, string Dt,
2287               ValueType TyQ, ValueType TyD, SDNode OpNode>
2288  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2289        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
2290        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
2291        [(set QPR:$Vd,
2292          (TyQ (OpNode (TyD DPR:$Vn),
2293                       (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
2294
2295// Long 3-register operations with explicitly extended operands.
2296class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2297              InstrItinClass itin, string OpcodeStr, string Dt,
2298              ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
2299              bit Commutable>
2300  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2301        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2302        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2303        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
2304                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
2305  let isCommutable = Commutable;
2306}
2307
2308// Long 3-register intrinsics with explicit extend (VABDL).
2309class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2310                 InstrItinClass itin, string OpcodeStr, string Dt,
2311                 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
2312                 bit Commutable>
2313  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2314        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2315        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2316        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2317                                                (TyD DPR:$Vm))))))]> {
2318  let isCommutable = Commutable;
2319}
2320
2321// Long 3-register intrinsics.
2322class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2323              InstrItinClass itin, string OpcodeStr, string Dt,
2324              ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
2325  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2326        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2327        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2328        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2329  let isCommutable = Commutable;
2330}
2331class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2332                string OpcodeStr, string Dt,
2333                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2334  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2335        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
2336        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
2337        [(set (ResTy QPR:$Vd),
2338              (ResTy (IntOp (OpTy DPR:$Vn),
2339                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2340                                                imm:$lane)))))]>;
2341class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2342                  InstrItinClass itin, string OpcodeStr, string Dt,
2343                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2344  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2345        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
2346        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
2347        [(set (ResTy QPR:$Vd),
2348              (ResTy (IntOp (OpTy DPR:$Vn),
2349                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2350                                                imm:$lane)))))]>;
2351
2352// Wide 3-register operations.
2353class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2354           string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
2355           SDNode OpNode, SDNode ExtOp, bit Commutable>
2356  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2357        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
2358        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2359        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
2360                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
2361  let isCommutable = Commutable;
2362}
2363
2364// Pairwise long 2-register intrinsics, both double- and quad-register.
2365class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2366                bits<2> op17_16, bits<5> op11_7, bit op4,
2367                string OpcodeStr, string Dt,
2368                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2369  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2370        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
2371        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2372class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2373                bits<2> op17_16, bits<5> op11_7, bit op4,
2374                string OpcodeStr, string Dt,
2375                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2376  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2377        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
2378        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2379
2380// Pairwise long 2-register accumulate intrinsics,
2381// both double- and quad-register.
2382// The destination register is also used as the first source operand register.
2383class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2384                 bits<2> op17_16, bits<5> op11_7, bit op4,
2385                 string OpcodeStr, string Dt,
2386                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2387  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
2388        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
2389        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
2390        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
2391class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2392                 bits<2> op17_16, bits<5> op11_7, bit op4,
2393                 string OpcodeStr, string Dt,
2394                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2395  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
2396        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
2397        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
2398        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
2399
2400// Shift by immediate,
2401// both double- and quad-register.
2402class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2403             Format f, InstrItinClass itin, Operand ImmTy,
2404             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
2405  : N2VImm<op24, op23, op11_8, op7, 0, op4,
2406           (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
2407           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2408           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
2409class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2410             Format f, InstrItinClass itin, Operand ImmTy,
2411             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
2412  : N2VImm<op24, op23, op11_8, op7, 1, op4,
2413           (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
2414           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2415           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
2416
2417// Long shift by immediate.
2418class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
2419             string OpcodeStr, string Dt,
2420             ValueType ResTy, ValueType OpTy, SDNode OpNode>
2421  : N2VImm<op24, op23, op11_8, op7, op6, op4,
2422           (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
2423           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2424           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
2425                                          (i32 imm:$SIMM))))]>;
2426
2427// Narrow shift by immediate.
2428class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
2429             InstrItinClass itin, string OpcodeStr, string Dt,
2430             ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
2431  : N2VImm<op24, op23, op11_8, op7, op6, op4,
2432           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
2433           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2434           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
2435                                          (i32 imm:$SIMM))))]>;
2436
2437// Shift right by immediate and accumulate,
2438// both double- and quad-register.
2439class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2440                Operand ImmTy, string OpcodeStr, string Dt,
2441                ValueType Ty, SDNode ShOp>
2442  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
2443           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
2444           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2445           [(set DPR:$Vd, (Ty (add DPR:$src1,
2446                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
2447class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2448                Operand ImmTy, string OpcodeStr, string Dt,
2449                ValueType Ty, SDNode ShOp>
2450  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
2451           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
2452           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2453           [(set QPR:$Vd, (Ty (add QPR:$src1,
2454                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
2455
2456// Shift by immediate and insert,
2457// both double- and quad-register.
2458class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2459                Operand ImmTy, Format f, string OpcodeStr, string Dt,
2460                ValueType Ty,SDNode ShOp>
2461  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
2462           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
2463           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2464           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
2465class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2466                Operand ImmTy, Format f, string OpcodeStr, string Dt,
2467                ValueType Ty,SDNode ShOp>
2468  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
2469           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
2470           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2471           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
2472
2473// Convert, with fractional bits immediate,
2474// both double- and quad-register.
2475class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2476              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2477              Intrinsic IntOp>
2478  : N2VImm<op24, op23, op11_8, op7, 0, op4,
2479           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
2480           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2481           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
2482class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2483              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2484              Intrinsic IntOp>
2485  : N2VImm<op24, op23, op11_8, op7, 1, op4,
2486           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
2487           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2488           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
2489
2490//===----------------------------------------------------------------------===//
2491// Multiclasses
2492//===----------------------------------------------------------------------===//
2493
2494// Abbreviations used in multiclass suffixes:
2495//   Q = quarter int (8 bit) elements
2496//   H = half int (16 bit) elements
2497//   S = single int (32 bit) elements
2498//   D = double int (64 bit) elements
2499
2500// Neon 2-register vector operations and intrinsics.
2501
2502// Neon 2-register comparisons.
2503//   source operand element sizes of 8, 16 and 32 bits:
2504multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
2505                       bits<5> op11_7, bit op4, string opc, string Dt,
2506                       string asm, SDNode OpNode> {
2507  // 64-bit vector types.
2508  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
2509                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
2510                  opc, !strconcat(Dt, "8"), asm, "",
2511                  [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
2512  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
2513                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
2514                  opc, !strconcat(Dt, "16"), asm, "",
2515                  [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
2516  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
2517                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
2518                  opc, !strconcat(Dt, "32"), asm, "",
2519                  [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
2520  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
2521                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
2522                  opc, "f32", asm, "",
2523                  [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
2524    let Inst{10} = 1; // overwrite F = 1
2525  }
2526
2527  // 128-bit vector types.
2528  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
2529                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
2530                  opc, !strconcat(Dt, "8"), asm, "",
2531                  [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
2532  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
2533                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
2534                  opc, !strconcat(Dt, "16"), asm, "",
2535                  [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
2536  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
2537                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
2538                  opc, !strconcat(Dt, "32"), asm, "",
2539                  [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
2540  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
2541                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
2542                  opc, "f32", asm, "",
2543                  [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
2544    let Inst{10} = 1; // overwrite F = 1
2545  }
2546}
2547
2548
2549// Neon 2-register vector intrinsics,
2550//   element sizes of 8, 16 and 32 bits:
2551multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
2552                      bits<5> op11_7, bit op4,
2553                      InstrItinClass itinD, InstrItinClass itinQ,
2554                      string OpcodeStr, string Dt, Intrinsic IntOp> {
2555  // 64-bit vector types.
2556  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
2557                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
2558  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
2559                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
2560  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
2561                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
2562
2563  // 128-bit vector types.
2564  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
2565                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
2566  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
2567                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
2568  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
2569                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
2570}
2571
2572
2573// Neon Narrowing 2-register vector operations,
2574//   source operand element sizes of 16, 32 and 64 bits:
2575multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
2576                    bits<5> op11_7, bit op6, bit op4,
2577                    InstrItinClass itin, string OpcodeStr, string Dt,
2578                    SDNode OpNode> {
2579  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
2580                   itin, OpcodeStr, !strconcat(Dt, "16"),
2581                   v8i8, v8i16, OpNode>;
2582  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
2583                   itin, OpcodeStr, !strconcat(Dt, "32"),
2584                   v4i16, v4i32, OpNode>;
2585  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
2586                   itin, OpcodeStr, !strconcat(Dt, "64"),
2587                   v2i32, v2i64, OpNode>;
2588}
2589
2590// Neon Narrowing 2-register vector intrinsics,
2591//   source operand element sizes of 16, 32 and 64 bits:
2592multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
2593                       bits<5> op11_7, bit op6, bit op4,
2594                       InstrItinClass itin, string OpcodeStr, string Dt,
2595                       Intrinsic IntOp> {
2596  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
2597                      itin, OpcodeStr, !strconcat(Dt, "16"),
2598                      v8i8, v8i16, IntOp>;
2599  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
2600                      itin, OpcodeStr, !strconcat(Dt, "32"),
2601                      v4i16, v4i32, IntOp>;
2602  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
2603                      itin, OpcodeStr, !strconcat(Dt, "64"),
2604                      v2i32, v2i64, IntOp>;
2605}
2606
2607
2608// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
2609//   source operand element sizes of 16, 32 and 64 bits:
2610multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
2611                    string OpcodeStr, string Dt, SDNode OpNode> {
2612  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
2613                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
2614  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
2615                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
2616  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
2617                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
2618}
2619
2620
2621// Neon 3-register vector operations.
2622
2623// First with only element sizes of 8, 16 and 32 bits:
2624multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2625                   InstrItinClass itinD16, InstrItinClass itinD32,
2626                   InstrItinClass itinQ16, InstrItinClass itinQ32,
2627                   string OpcodeStr, string Dt,
2628                   SDNode OpNode, bit Commutable = 0> {
2629  // 64-bit vector types.
2630  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
2631                   OpcodeStr, !strconcat(Dt, "8"),
2632                   v8i8, v8i8, OpNode, Commutable>;
2633  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
2634                   OpcodeStr, !strconcat(Dt, "16"),
2635                   v4i16, v4i16, OpNode, Commutable>;
2636  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
2637                   OpcodeStr, !strconcat(Dt, "32"),
2638                   v2i32, v2i32, OpNode, Commutable>;
2639
2640  // 128-bit vector types.
2641  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
2642                   OpcodeStr, !strconcat(Dt, "8"),
2643                   v16i8, v16i8, OpNode, Commutable>;
2644  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
2645                   OpcodeStr, !strconcat(Dt, "16"),
2646                   v8i16, v8i16, OpNode, Commutable>;
2647  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
2648                   OpcodeStr, !strconcat(Dt, "32"),
2649                   v4i32, v4i32, OpNode, Commutable>;
2650}
2651
2652multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
2653  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
2654                       v4i16, ShOp>;
2655  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
2656                     v2i32, ShOp>;
2657  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
2658                       v8i16, v4i16, ShOp>;
2659  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
2660                     v4i32, v2i32, ShOp>;
2661}
2662
2663// ....then also with element size 64 bits:
2664multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
2665                    InstrItinClass itinD, InstrItinClass itinQ,
2666                    string OpcodeStr, string Dt,
2667                    SDNode OpNode, bit Commutable = 0>
2668  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
2669            OpcodeStr, Dt, OpNode, Commutable> {
2670  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
2671                   OpcodeStr, !strconcat(Dt, "64"),
2672                   v1i64, v1i64, OpNode, Commutable>;
2673  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
2674                   OpcodeStr, !strconcat(Dt, "64"),
2675                   v2i64, v2i64, OpNode, Commutable>;
2676}
2677
2678
2679// Neon 3-register vector intrinsics.
2680
2681// First with only element sizes of 16 and 32 bits:
2682multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2683                     InstrItinClass itinD16, InstrItinClass itinD32,
2684                     InstrItinClass itinQ16, InstrItinClass itinQ32,
2685                     string OpcodeStr, string Dt,
2686                     Intrinsic IntOp, bit Commutable = 0> {
2687  // 64-bit vector types.
2688  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
2689                      OpcodeStr, !strconcat(Dt, "16"),
2690                      v4i16, v4i16, IntOp, Commutable>;
2691  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
2692                      OpcodeStr, !strconcat(Dt, "32"),
2693                      v2i32, v2i32, IntOp, Commutable>;
2694
2695  // 128-bit vector types.
2696  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
2697                      OpcodeStr, !strconcat(Dt, "16"),
2698                      v8i16, v8i16, IntOp, Commutable>;
2699  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
2700                      OpcodeStr, !strconcat(Dt, "32"),
2701                      v4i32, v4i32, IntOp, Commutable>;
2702}
2703multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2704                     InstrItinClass itinD16, InstrItinClass itinD32,
2705                     InstrItinClass itinQ16, InstrItinClass itinQ32,
2706                     string OpcodeStr, string Dt,
2707                     Intrinsic IntOp> {
2708  // 64-bit vector types.
2709  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
2710                      OpcodeStr, !strconcat(Dt, "16"),
2711                      v4i16, v4i16, IntOp>;
2712  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
2713                      OpcodeStr, !strconcat(Dt, "32"),
2714                      v2i32, v2i32, IntOp>;
2715
2716  // 128-bit vector types.
2717  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
2718                      OpcodeStr, !strconcat(Dt, "16"),
2719                      v8i16, v8i16, IntOp>;
2720  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
2721                      OpcodeStr, !strconcat(Dt, "32"),
2722                      v4i32, v4i32, IntOp>;
2723}
2724
2725multiclass N3VIntSL_HS<bits<4> op11_8,
2726                       InstrItinClass itinD16, InstrItinClass itinD32,
2727                       InstrItinClass itinQ16, InstrItinClass itinQ32,
2728                       string OpcodeStr, string Dt, Intrinsic IntOp> {
2729  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
2730                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
2731  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
2732                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
2733  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
2734                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
2735  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
2736                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
2737}
2738
2739// ....then also with element size of 8 bits:
2740multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2741                      InstrItinClass itinD16, InstrItinClass itinD32,
2742                      InstrItinClass itinQ16, InstrItinClass itinQ32,
2743                      string OpcodeStr, string Dt,
2744                      Intrinsic IntOp, bit Commutable = 0>
2745  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
2746              OpcodeStr, Dt, IntOp, Commutable> {
2747  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
2748                      OpcodeStr, !strconcat(Dt, "8"),
2749                      v8i8, v8i8, IntOp, Commutable>;
2750  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
2751                      OpcodeStr, !strconcat(Dt, "8"),
2752                      v16i8, v16i8, IntOp, Commutable>;
2753}
2754multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2755                      InstrItinClass itinD16, InstrItinClass itinD32,
2756                      InstrItinClass itinQ16, InstrItinClass itinQ32,
2757                      string OpcodeStr, string Dt,
2758                      Intrinsic IntOp>
2759  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
2760              OpcodeStr, Dt, IntOp> {
2761  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
2762                      OpcodeStr, !strconcat(Dt, "8"),
2763                      v8i8, v8i8, IntOp>;
2764  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
2765                      OpcodeStr, !strconcat(Dt, "8"),
2766                      v16i8, v16i8, IntOp>;
2767}
2768
2769
2770// ....then also with element size of 64 bits:
2771multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2772                       InstrItinClass itinD16, InstrItinClass itinD32,
2773                       InstrItinClass itinQ16, InstrItinClass itinQ32,
2774                       string OpcodeStr, string Dt,
2775                       Intrinsic IntOp, bit Commutable = 0>
2776  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
2777               OpcodeStr, Dt, IntOp, Commutable> {
2778  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
2779                      OpcodeStr, !strconcat(Dt, "64"),
2780                      v1i64, v1i64, IntOp, Commutable>;
2781  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
2782                      OpcodeStr, !strconcat(Dt, "64"),
2783                      v2i64, v2i64, IntOp, Commutable>;
2784}
2785multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2786                       InstrItinClass itinD16, InstrItinClass itinD32,
2787                       InstrItinClass itinQ16, InstrItinClass itinQ32,
2788                       string OpcodeStr, string Dt,
2789                       Intrinsic IntOp>
2790  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
2791               OpcodeStr, Dt, IntOp> {
2792  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
2793                      OpcodeStr, !strconcat(Dt, "64"),
2794                      v1i64, v1i64, IntOp>;
2795  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
2796                      OpcodeStr, !strconcat(Dt, "64"),
2797                      v2i64, v2i64, IntOp>;
2798}
2799
2800// Neon Narrowing 3-register vector intrinsics,
2801//   source operand element sizes of 16, 32 and 64 bits:
2802multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
2803                       string OpcodeStr, string Dt,
2804                       Intrinsic IntOp, bit Commutable = 0> {
2805  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
2806                      OpcodeStr, !strconcat(Dt, "16"),
2807                      v8i8, v8i16, IntOp, Commutable>;
2808  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
2809                      OpcodeStr, !strconcat(Dt, "32"),
2810                      v4i16, v4i32, IntOp, Commutable>;
2811  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
2812                      OpcodeStr, !strconcat(Dt, "64"),
2813                      v2i32, v2i64, IntOp, Commutable>;
2814}
2815
2816
2817// Neon Long 3-register vector operations.
2818
2819multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2820                    InstrItinClass itin16, InstrItinClass itin32,
2821                    string OpcodeStr, string Dt,
2822                    SDNode OpNode, bit Commutable = 0> {
2823  def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
2824                   OpcodeStr, !strconcat(Dt, "8"),
2825                   v8i16, v8i8, OpNode, Commutable>;
2826  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
2827                   OpcodeStr, !strconcat(Dt, "16"),
2828                   v4i32, v4i16, OpNode, Commutable>;
2829  def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
2830                   OpcodeStr, !strconcat(Dt, "32"),
2831                   v2i64, v2i32, OpNode, Commutable>;
2832}
2833
2834multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
2835                     InstrItinClass itin, string OpcodeStr, string Dt,
2836                     SDNode OpNode> {
2837  def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
2838                       !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
2839  def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
2840                     !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
2841}
2842
2843multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2844                       InstrItinClass itin16, InstrItinClass itin32,
2845                       string OpcodeStr, string Dt,
2846                       SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
2847  def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
2848                      OpcodeStr, !strconcat(Dt, "8"),
2849                      v8i16, v8i8, OpNode, ExtOp, Commutable>;
2850  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
2851                      OpcodeStr, !strconcat(Dt, "16"),
2852                      v4i32, v4i16, OpNode, ExtOp, Commutable>;
2853  def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
2854                      OpcodeStr, !strconcat(Dt, "32"),
2855                      v2i64, v2i32, OpNode, ExtOp, Commutable>;
2856}
2857
2858// Neon Long 3-register vector intrinsics.
2859
2860// First with only element sizes of 16 and 32 bits:
2861multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
2862                      InstrItinClass itin16, InstrItinClass itin32,
2863                      string OpcodeStr, string Dt,
2864                      Intrinsic IntOp, bit Commutable = 0> {
2865  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
2866                      OpcodeStr, !strconcat(Dt, "16"),
2867                      v4i32, v4i16, IntOp, Commutable>;
2868  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
2869                      OpcodeStr, !strconcat(Dt, "32"),
2870                      v2i64, v2i32, IntOp, Commutable>;
2871}
2872
2873multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
2874                        InstrItinClass itin, string OpcodeStr, string Dt,
2875                        Intrinsic IntOp> {
2876  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
2877                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
2878  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
2879                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
2880}
2881
2882// ....then also with element size of 8 bits:
2883multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2884                       InstrItinClass itin16, InstrItinClass itin32,
2885                       string OpcodeStr, string Dt,
2886                       Intrinsic IntOp, bit Commutable = 0>
2887  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
2888               IntOp, Commutable> {
2889  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
2890                      OpcodeStr, !strconcat(Dt, "8"),
2891                      v8i16, v8i8, IntOp, Commutable>;
2892}
2893
2894// ....with explicit extend (VABDL).
2895multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2896                       InstrItinClass itin, string OpcodeStr, string Dt,
2897                       Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
2898  def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
2899                         OpcodeStr, !strconcat(Dt, "8"),
2900                         v8i16, v8i8, IntOp, ExtOp, Commutable>;
2901  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
2902                         OpcodeStr, !strconcat(Dt, "16"),
2903                         v4i32, v4i16, IntOp, ExtOp, Commutable>;
2904  def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
2905                         OpcodeStr, !strconcat(Dt, "32"),
2906                         v2i64, v2i32, IntOp, ExtOp, Commutable>;
2907}
2908
2909
2910// Neon Wide 3-register vector intrinsics,
2911//   source operand element sizes of 8, 16 and 32 bits:
2912multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2913                    string OpcodeStr, string Dt,
2914                    SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
2915  def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
2916                   OpcodeStr, !strconcat(Dt, "8"),
2917                   v8i16, v8i8, OpNode, ExtOp, Commutable>;
2918  def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
2919                   OpcodeStr, !strconcat(Dt, "16"),
2920                   v4i32, v4i16, OpNode, ExtOp, Commutable>;
2921  def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
2922                   OpcodeStr, !strconcat(Dt, "32"),
2923                   v2i64, v2i32, OpNode, ExtOp, Commutable>;
2924}
2925
2926
2927// Neon Multiply-Op vector operations,
2928//   element sizes of 8, 16 and 32 bits:
2929multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2930                        InstrItinClass itinD16, InstrItinClass itinD32,
2931                        InstrItinClass itinQ16, InstrItinClass itinQ32,
2932                        string OpcodeStr, string Dt, SDNode OpNode> {
2933  // 64-bit vector types.
2934  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
2935                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
2936  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
2937                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
2938  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
2939                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
2940
2941  // 128-bit vector types.
2942  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
2943                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
2944  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
2945                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
2946  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
2947                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
2948}
2949
2950multiclass N3VMulOpSL_HS<bits<4> op11_8,
2951                         InstrItinClass itinD16, InstrItinClass itinD32,
2952                         InstrItinClass itinQ16, InstrItinClass itinQ32,
2953                         string OpcodeStr, string Dt, SDNode ShOp> {
2954  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
2955                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
2956  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
2957                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
2958  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
2959                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
2960                            mul, ShOp>;
2961  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
2962                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
2963                          mul, ShOp>;
2964}
2965
2966// Neon Intrinsic-Op vector operations,
2967//   element sizes of 8, 16 and 32 bits:
2968multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2969                        InstrItinClass itinD, InstrItinClass itinQ,
2970                        string OpcodeStr, string Dt, Intrinsic IntOp,
2971                        SDNode OpNode> {
2972  // 64-bit vector types.
2973  def v8i8  : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
2974                        OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
2975  def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
2976                        OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
2977  def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
2978                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
2979
2980  // 128-bit vector types.
2981  def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
2982                        OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
2983  def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
2984                        OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
2985  def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
2986                        OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
2987}
2988
2989// Neon 3-argument intrinsics,
2990//   element sizes of 8, 16 and 32 bits:
2991multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2992                       InstrItinClass itinD, InstrItinClass itinQ,
2993                       string OpcodeStr, string Dt, Intrinsic IntOp> {
2994  // 64-bit vector types.
2995  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
2996                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
2997  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
2998                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
2999  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
3000                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3001
3002  // 128-bit vector types.
3003  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
3004                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3005  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
3006                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3007  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
3008                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3009}
3010
3011
3012// Neon Long Multiply-Op vector operations,
3013//   element sizes of 8, 16 and 32 bits:
3014multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3015                         InstrItinClass itin16, InstrItinClass itin32,
3016                         string OpcodeStr, string Dt, SDNode MulOp,
3017                         SDNode OpNode> {
3018  def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3019                        !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3020  def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3021                        !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3022  def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3023                        !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3024}
3025
3026multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3027                          string Dt, SDNode MulOp, SDNode OpNode> {
3028  def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3029                            !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3030  def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3031                          !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3032}
3033
3034
3035// Neon Long 3-argument intrinsics.
3036
3037// First with only element sizes of 16 and 32 bits:
3038multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3039                       InstrItinClass itin16, InstrItinClass itin32,
3040                       string OpcodeStr, string Dt, Intrinsic IntOp> {
3041  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3042                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3043  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3044                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3045}
3046
3047multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3048                         string OpcodeStr, string Dt, Intrinsic IntOp> {
3049  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3050                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3051  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3052                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3053}
3054
3055// ....then also with element size of 8 bits:
3056multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3057                        InstrItinClass itin16, InstrItinClass itin32,
3058                        string OpcodeStr, string Dt, Intrinsic IntOp>
3059  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3060  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3061                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3062}
3063
3064// ....with explicit extend (VABAL).
3065multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3066                            InstrItinClass itin, string OpcodeStr, string Dt,
3067                            Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
3068  def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3069                           OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3070                           IntOp, ExtOp, OpNode>;
3071  def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3072                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3073                           IntOp, ExtOp, OpNode>;
3074  def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3075                           OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3076                           IntOp, ExtOp, OpNode>;
3077}
3078
3079
3080// Neon Pairwise long 2-register intrinsics,
3081//   element sizes of 8, 16 and 32 bits:
3082multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3083                        bits<5> op11_7, bit op4,
3084                        string OpcodeStr, string Dt, Intrinsic IntOp> {
3085  // 64-bit vector types.
3086  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3087                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3088  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3089                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3090  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3091                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3092
3093  // 128-bit vector types.
3094  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3095                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3096  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3097                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3098  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3099                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3100}
3101
3102
3103// Neon Pairwise long 2-register accumulate intrinsics,
3104//   element sizes of 8, 16 and 32 bits:
3105multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3106                         bits<5> op11_7, bit op4,
3107                         string OpcodeStr, string Dt, Intrinsic IntOp> {
3108  // 64-bit vector types.
3109  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3110                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3111  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3112                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3113  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3114                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3115
3116  // 128-bit vector types.
3117  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3118                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3119  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3120                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3121  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3122                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3123}
3124
3125
3126// Neon 2-register vector shift by immediate,
3127//   with f of either N2RegVShLFrm or N2RegVShRFrm
3128//   element sizes of 8, 16, 32 and 64 bits:
3129multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3130                       InstrItinClass itin, string OpcodeStr, string Dt,
3131                       SDNode OpNode> {
3132  // 64-bit vector types.
3133  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3134                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3135    let Inst{21-19} = 0b001; // imm6 = 001xxx
3136  }
3137  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3138                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3139    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3140  }
3141  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3142                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3143    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3144  }
3145  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3146                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3147                             // imm6 = xxxxxx
3148
3149  // 128-bit vector types.
3150  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3151                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3152    let Inst{21-19} = 0b001; // imm6 = 001xxx
3153  }
3154  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3155                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3156    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3157  }
3158  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3159                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3160    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3161  }
3162  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3163                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3164                             // imm6 = xxxxxx
3165}
3166multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3167                       InstrItinClass itin, string OpcodeStr, string Dt,
3168                       SDNode OpNode> {
3169  // 64-bit vector types.
3170  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3171                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3172    let Inst{21-19} = 0b001; // imm6 = 001xxx
3173  }
3174  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3175                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3176    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3177  }
3178  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3179                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3180    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3181  }
3182  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3183                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3184                             // imm6 = xxxxxx
3185
3186  // 128-bit vector types.
3187  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3188                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3189    let Inst{21-19} = 0b001; // imm6 = 001xxx
3190  }
3191  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3192                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3193    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3194  }
3195  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3196                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3197    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3198  }
3199  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3200                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3201                             // imm6 = xxxxxx
3202}
3203
3204// Neon Shift-Accumulate vector operations,
3205//   element sizes of 8, 16, 32 and 64 bits:
3206multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3207                         string OpcodeStr, string Dt, SDNode ShOp> {
3208  // 64-bit vector types.
3209  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3210                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
3211    let Inst{21-19} = 0b001; // imm6 = 001xxx
3212  }
3213  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3214                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
3215    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3216  }
3217  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3218                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
3219    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3220  }
3221  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3222                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
3223                             // imm6 = xxxxxx
3224
3225  // 128-bit vector types.
3226  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3227                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
3228    let Inst{21-19} = 0b001; // imm6 = 001xxx
3229  }
3230  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3231                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
3232    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3233  }
3234  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3235                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
3236    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3237  }
3238  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3239                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
3240                             // imm6 = xxxxxx
3241}
3242
3243// Neon Shift-Insert vector operations,
3244//   with f of either N2RegVShLFrm or N2RegVShRFrm
3245//   element sizes of 8, 16, 32 and 64 bits:
3246multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3247                          string OpcodeStr> {
3248  // 64-bit vector types.
3249  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3250                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
3251    let Inst{21-19} = 0b001; // imm6 = 001xxx
3252  }
3253  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3254                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
3255    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3256  }
3257  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3258                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
3259    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3260  }
3261  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
3262                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
3263                             // imm6 = xxxxxx
3264
3265  // 128-bit vector types.
3266  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3267                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
3268    let Inst{21-19} = 0b001; // imm6 = 001xxx
3269  }
3270  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3271                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
3272    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3273  }
3274  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3275                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
3276    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3277  }
3278  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
3279                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
3280                             // imm6 = xxxxxx
3281}
3282multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3283                          string OpcodeStr> {
3284  // 64-bit vector types.
3285  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
3286                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
3287    let Inst{21-19} = 0b001; // imm6 = 001xxx
3288  }
3289  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
3290                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
3291    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3292  }
3293  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
3294                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
3295    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3296  }
3297  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
3298                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
3299                             // imm6 = xxxxxx
3300
3301  // 128-bit vector types.
3302  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
3303                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
3304    let Inst{21-19} = 0b001; // imm6 = 001xxx
3305  }
3306  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
3307                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
3308    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3309  }
3310  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
3311                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
3312    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3313  }
3314  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
3315                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
3316                             // imm6 = xxxxxx
3317}
3318
3319// Neon Shift Long operations,
3320//   element sizes of 8, 16, 32 bits:
3321multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
3322                      bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
3323  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3324                 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
3325    let Inst{21-19} = 0b001; // imm6 = 001xxx
3326  }
3327  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3328                  OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
3329    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3330  }
3331  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3332                  OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
3333    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3334  }
3335}
3336
3337// Neon Shift Narrow operations,
3338//   element sizes of 16, 32, 64 bits:
3339multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
3340                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
3341                      SDNode OpNode> {
3342  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3343                    OpcodeStr, !strconcat(Dt, "16"),
3344                    v8i8, v8i16, shr_imm8, OpNode> {
3345    let Inst{21-19} = 0b001; // imm6 = 001xxx
3346  }
3347  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3348                     OpcodeStr, !strconcat(Dt, "32"),
3349                     v4i16, v4i32, shr_imm16, OpNode> {
3350    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3351  }
3352  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3353                     OpcodeStr, !strconcat(Dt, "64"),
3354                     v2i32, v2i64, shr_imm32, OpNode> {
3355    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3356  }
3357}
3358
3359//===----------------------------------------------------------------------===//
3360// Instruction Definitions.
3361//===----------------------------------------------------------------------===//
3362
3363// Vector Add Operations.
3364
3365//   VADD     : Vector Add (integer and floating-point)
3366defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
3367                         add, 1>;
3368def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
3369                     v2f32, v2f32, fadd, 1>;
3370def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
3371                     v4f32, v4f32, fadd, 1>;
3372//   VADDL    : Vector Add Long (Q = D + D)
3373defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
3374                            "vaddl", "s", add, sext, 1>;
3375defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
3376                            "vaddl", "u", add, zext, 1>;
3377//   VADDW    : Vector Add Wide (Q = Q + D)
3378defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
3379defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
3380//   VHADD    : Vector Halving Add
3381defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
3382                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3383                           "vhadd", "s", int_arm_neon_vhadds, 1>;
3384defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
3385                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3386                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
3387//   VRHADD   : Vector Rounding Halving Add
3388defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
3389                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3390                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
3391defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
3392                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3393                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
3394//   VQADD    : Vector Saturating Add
3395defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
3396                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3397                            "vqadd", "s", int_arm_neon_vqadds, 1>;
3398defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
3399                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3400                            "vqadd", "u", int_arm_neon_vqaddu, 1>;
3401//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
3402defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
3403                            int_arm_neon_vaddhn, 1>;
3404//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
3405defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
3406                            int_arm_neon_vraddhn, 1>;
3407
3408// Vector Multiply Operations.
3409
3410//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
3411defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
3412                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
3413def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
3414                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
3415def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
3416                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
3417def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
3418                     v2f32, v2f32, fmul, 1>;
3419def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
3420                     v4f32, v4f32, fmul, 1>;
3421defm VMULsl   : N3VSL_HS<0b1000, "vmul", "i", mul>;
3422def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
3423def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
3424                       v2f32, fmul>;
3425
3426def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
3427                      (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
3428          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
3429                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
3430                                      (DSubReg_i16_reg imm:$lane))),
3431                              (SubReg_i16_lane imm:$lane)))>;
3432def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
3433                      (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
3434          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
3435                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
3436                                      (DSubReg_i32_reg imm:$lane))),
3437                              (SubReg_i32_lane imm:$lane)))>;
3438def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
3439                       (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
3440          (v4f32 (VMULslfq (v4f32 QPR:$src1),
3441                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
3442                                   (DSubReg_i32_reg imm:$lane))),
3443                           (SubReg_i32_lane imm:$lane)))>;
3444
3445//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
3446defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
3447                          IIC_VMULi16Q, IIC_VMULi32Q,
3448                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
3449defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
3450                            IIC_VMULi16Q, IIC_VMULi32Q,
3451                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
3452def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
3453                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2),
3454                                                            imm:$lane)))),
3455          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
3456                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
3457                                         (DSubReg_i16_reg imm:$lane))),
3458                                 (SubReg_i16_lane imm:$lane)))>;
3459def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
3460                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2),
3461                                                            imm:$lane)))),
3462          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
3463                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
3464                                         (DSubReg_i32_reg imm:$lane))),
3465                                 (SubReg_i32_lane imm:$lane)))>;
3466
3467//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
3468defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
3469                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
3470                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
3471defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
3472                              IIC_VMULi16Q, IIC_VMULi32Q,
3473                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
3474def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
3475                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2),
3476                                                             imm:$lane)))),
3477          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
3478                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
3479                                          (DSubReg_i16_reg imm:$lane))),
3480                                  (SubReg_i16_lane imm:$lane)))>;
3481def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
3482                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2),
3483                                                             imm:$lane)))),
3484          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
3485                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
3486                                          (DSubReg_i32_reg imm:$lane))),
3487                                  (SubReg_i32_lane imm:$lane)))>;
3488
3489//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
3490defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
3491                         "vmull", "s", NEONvmulls, 1>;
3492defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
3493                         "vmull", "u", NEONvmullu, 1>;
3494def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
3495                        v8i16, v8i8, int_arm_neon_vmullp, 1>;
3496defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
3497defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
3498
3499//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
3500defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
3501                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
3502defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
3503                             "vqdmull", "s", int_arm_neon_vqdmull>;
3504
3505// Vector Multiply-Accumulate and Multiply-Subtract Operations.
3506
3507//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
3508defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
3509                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
3510def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
3511                          v2f32, fmul_su, fadd_mlx>,
3512                Requires<[HasNEON, UseFPVMLx]>;
3513def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
3514                          v4f32, fmul_su, fadd_mlx>,
3515                Requires<[HasNEON, UseFPVMLx]>;
3516defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
3517                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
3518def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
3519                            v2f32, fmul_su, fadd_mlx>,
3520                Requires<[HasNEON, UseFPVMLx]>;
3521def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
3522                            v4f32, v2f32, fmul_su, fadd_mlx>,
3523                Requires<[HasNEON, UseFPVMLx]>;
3524
3525def : Pat<(v8i16 (add (v8i16 QPR:$src1),
3526                  (mul (v8i16 QPR:$src2),
3527                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
3528          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
3529                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
3530                                      (DSubReg_i16_reg imm:$lane))),
3531                              (SubReg_i16_lane imm:$lane)))>;
3532
3533def : Pat<(v4i32 (add (v4i32 QPR:$src1),
3534                  (mul (v4i32 QPR:$src2),
3535                       (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
3536          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
3537                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
3538                                      (DSubReg_i32_reg imm:$lane))),
3539                              (SubReg_i32_lane imm:$lane)))>;
3540
3541def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
3542                  (fmul_su (v4f32 QPR:$src2),
3543                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
3544          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
3545                           (v4f32 QPR:$src2),
3546                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
3547                                   (DSubReg_i32_reg imm:$lane))),
3548                           (SubReg_i32_lane imm:$lane)))>,
3549          Requires<[HasNEON, UseFPVMLx]>;
3550
3551//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
3552defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
3553                              "vmlal", "s", NEONvmulls, add>;
3554defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
3555                              "vmlal", "u", NEONvmullu, add>;
3556
3557defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
3558defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
3559
3560//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
3561defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
3562                            "vqdmlal", "s", int_arm_neon_vqdmlal>;
3563defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
3564
3565//   VMLS     : Vector Multiply Subtract (integer and floating-point)
3566defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
3567                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
3568def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
3569                          v2f32, fmul_su, fsub_mlx>,
3570                Requires<[HasNEON, UseFPVMLx]>;
3571def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
3572                          v4f32, fmul_su, fsub_mlx>,
3573                Requires<[HasNEON, UseFPVMLx]>;
3574defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
3575                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
3576def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
3577                            v2f32, fmul_su, fsub_mlx>,
3578                Requires<[HasNEON, UseFPVMLx]>;
3579def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
3580                            v4f32, v2f32, fmul_su, fsub_mlx>,
3581                Requires<[HasNEON, UseFPVMLx]>;
3582
3583def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
3584                  (mul (v8i16 QPR:$src2),
3585                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
3586          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
3587                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
3588                                      (DSubReg_i16_reg imm:$lane))),
3589                              (SubReg_i16_lane imm:$lane)))>;
3590
3591def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
3592                  (mul (v4i32 QPR:$src2),
3593                     (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
3594          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
3595                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
3596                                      (DSubReg_i32_reg imm:$lane))),
3597                              (SubReg_i32_lane imm:$lane)))>;
3598
3599def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
3600                  (fmul_su (v4f32 QPR:$src2),
3601                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
3602          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
3603                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
3604                                   (DSubReg_i32_reg imm:$lane))),
3605                           (SubReg_i32_lane imm:$lane)))>,
3606          Requires<[HasNEON, UseFPVMLx]>;
3607
3608//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
3609defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
3610                              "vmlsl", "s", NEONvmulls, sub>;
3611defm VMLSLu   : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
3612                              "vmlsl", "u", NEONvmullu, sub>;
3613
3614defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
3615defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
3616
3617//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
3618defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
3619                            "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
3620defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
3621
3622// Vector Subtract Operations.
3623
3624//   VSUB     : Vector Subtract (integer and floating-point)
3625defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
3626                         "vsub", "i", sub, 0>;
3627def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
3628                     v2f32, v2f32, fsub, 0>;
3629def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
3630                     v4f32, v4f32, fsub, 0>;
3631//   VSUBL    : Vector Subtract Long (Q = D - D)
3632defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
3633                            "vsubl", "s", sub, sext, 0>;
3634defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
3635                            "vsubl", "u", sub, zext, 0>;
3636//   VSUBW    : Vector Subtract Wide (Q = Q - D)
3637defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
3638defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
3639//   VHSUB    : Vector Halving Subtract
3640defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
3641                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3642                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
3643defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
3644                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3645                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
3646//   VQSUB    : Vector Saturing Subtract
3647defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
3648                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3649                            "vqsub", "s", int_arm_neon_vqsubs, 0>;
3650defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
3651                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3652                            "vqsub", "u", int_arm_neon_vqsubu, 0>;
3653//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
3654defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
3655                            int_arm_neon_vsubhn, 0>;
3656//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
3657defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
3658                            int_arm_neon_vrsubhn, 0>;
3659
3660// Vector Comparisons.
3661
3662//   VCEQ     : Vector Compare Equal
3663defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3664                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
3665def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
3666                     NEONvceq, 1>;
3667def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
3668                     NEONvceq, 1>;
3669
3670defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
3671                            "$Vd, $Vm, #0", NEONvceqz>;
3672
3673//   VCGE     : Vector Compare Greater Than or Equal
3674defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3675                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
3676defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3677                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
3678def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
3679                     NEONvcge, 0>;
3680def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
3681                     NEONvcge, 0>;
3682
3683defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
3684                            "$Vd, $Vm, #0", NEONvcgez>;
3685defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
3686                            "$Vd, $Vm, #0", NEONvclez>;
3687
3688//   VCGT     : Vector Compare Greater Than
3689defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3690                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
3691defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3692                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
3693def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
3694                     NEONvcgt, 0>;
3695def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
3696                     NEONvcgt, 0>;
3697
3698defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
3699                            "$Vd, $Vm, #0", NEONvcgtz>;
3700defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
3701                            "$Vd, $Vm, #0", NEONvcltz>;
3702
3703//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
3704def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
3705                        "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
3706def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
3707                        "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
3708//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
3709def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
3710                        "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
3711def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
3712                        "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
3713//   VTST     : Vector Test Bits
3714defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
3715                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
3716
3717// Vector Bitwise Operations.
3718
3719def vnotd : PatFrag<(ops node:$in),
3720                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
3721def vnotq : PatFrag<(ops node:$in),
3722                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
3723
3724
3725//   VAND     : Vector Bitwise AND
3726def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
3727                      v2i32, v2i32, and, 1>;
3728def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
3729                      v4i32, v4i32, and, 1>;
3730
3731//   VEOR     : Vector Bitwise Exclusive OR
3732def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
3733                      v2i32, v2i32, xor, 1>;
3734def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
3735                      v4i32, v4i32, xor, 1>;
3736
3737//   VORR     : Vector Bitwise OR
3738def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
3739                      v2i32, v2i32, or, 1>;
3740def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
3741                      v4i32, v4i32, or, 1>;
3742
3743def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
3744                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
3745                          IIC_VMOVImm,
3746                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
3747                          [(set DPR:$Vd,
3748                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
3749  let Inst{9} = SIMM{9};
3750}
3751
3752def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
3753                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
3754                          IIC_VMOVImm,
3755                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
3756                          [(set DPR:$Vd,
3757                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
3758  let Inst{10-9} = SIMM{10-9};
3759}
3760
3761def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
3762                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
3763                          IIC_VMOVImm,
3764                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
3765                          [(set QPR:$Vd,
3766                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
3767  let Inst{9} = SIMM{9};
3768}
3769
3770def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
3771                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
3772                          IIC_VMOVImm,
3773                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
3774                          [(set QPR:$Vd,
3775                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
3776  let Inst{10-9} = SIMM{10-9};
3777}
3778
3779
3780//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
3781def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
3782                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
3783                     "vbic", "$Vd, $Vn, $Vm", "",
3784                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
3785                                                 (vnotd DPR:$Vm))))]>;
3786def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
3787                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
3788                     "vbic", "$Vd, $Vn, $Vm", "",
3789                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
3790                                                 (vnotq QPR:$Vm))))]>;
3791
3792def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
3793                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
3794                          IIC_VMOVImm,
3795                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
3796                          [(set DPR:$Vd,
3797                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
3798  let Inst{9} = SIMM{9};
3799}
3800
3801def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
3802                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
3803                          IIC_VMOVImm,
3804                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
3805                          [(set DPR:$Vd,
3806                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
3807  let Inst{10-9} = SIMM{10-9};
3808}
3809
3810def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
3811                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
3812                          IIC_VMOVImm,
3813                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
3814                          [(set QPR:$Vd,
3815                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
3816  let Inst{9} = SIMM{9};
3817}
3818
3819def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
3820                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
3821                          IIC_VMOVImm,
3822                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
3823                          [(set QPR:$Vd,
3824                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
3825  let Inst{10-9} = SIMM{10-9};
3826}
3827
3828//   VORN     : Vector Bitwise OR NOT
3829def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
3830                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
3831                     "vorn", "$Vd, $Vn, $Vm", "",
3832                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
3833                                                (vnotd DPR:$Vm))))]>;
3834def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
3835                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
3836                     "vorn", "$Vd, $Vn, $Vm", "",
3837                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
3838                                                (vnotq QPR:$Vm))))]>;
3839
3840//   VMVN     : Vector Bitwise NOT (Immediate)
3841
3842let isReMaterializable = 1 in {
3843
3844def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
3845                         (ins nModImm:$SIMM), IIC_VMOVImm,
3846                         "vmvn", "i16", "$Vd, $SIMM", "",
3847                         [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
3848  let Inst{9} = SIMM{9};
3849}
3850
3851def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
3852                         (ins nModImm:$SIMM), IIC_VMOVImm,
3853                         "vmvn", "i16", "$Vd, $SIMM", "",
3854                         [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
3855  let Inst{9} = SIMM{9};
3856}
3857
3858def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
3859                         (ins nModImm:$SIMM), IIC_VMOVImm,
3860                         "vmvn", "i32", "$Vd, $SIMM", "",
3861                         [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
3862  let Inst{11-8} = SIMM{11-8};
3863}
3864
3865def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
3866                         (ins nModImm:$SIMM), IIC_VMOVImm,
3867                         "vmvn", "i32", "$Vd, $SIMM", "",
3868                         [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
3869  let Inst{11-8} = SIMM{11-8};
3870}
3871}
3872
3873//   VMVN     : Vector Bitwise NOT
3874def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
3875                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
3876                     "vmvn", "$Vd, $Vm", "",
3877                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
3878def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
3879                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
3880                     "vmvn", "$Vd, $Vm", "",
3881                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
3882def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
3883def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
3884
3885//   VBSL     : Vector Bitwise Select
3886def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
3887                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
3888                     N3RegFrm, IIC_VCNTiD,
3889                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
3890                     [(set DPR:$Vd,
3891                           (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
3892
3893def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
3894                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
3895          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
3896
3897def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
3898                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
3899                     N3RegFrm, IIC_VCNTiQ,
3900                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
3901                     [(set QPR:$Vd,
3902                           (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
3903
3904def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
3905                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
3906          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
3907
3908//   VBIF     : Vector Bitwise Insert if False
3909//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
3910// FIXME: This instruction's encoding MAY NOT BE correct.
3911def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
3912                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
3913                     N3RegFrm, IIC_VBINiD,
3914                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
3915                     [/* For disassembly only; pattern left blank */]>;
3916def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
3917                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
3918                     N3RegFrm, IIC_VBINiQ,
3919                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
3920                     [/* For disassembly only; pattern left blank */]>;
3921
3922//   VBIT     : Vector Bitwise Insert if True
3923//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
3924// FIXME: This instruction's encoding MAY NOT BE correct.
3925def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
3926                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
3927                     N3RegFrm, IIC_VBINiD,
3928                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
3929                     [/* For disassembly only; pattern left blank */]>;
3930def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
3931                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
3932                     N3RegFrm, IIC_VBINiQ,
3933                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
3934                     [/* For disassembly only; pattern left blank */]>;
3935
3936// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
3937// for equivalent operations with different register constraints; it just
3938// inserts copies.
3939
3940// Vector Absolute Differences.
3941
3942//   VABD     : Vector Absolute Difference
3943defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
3944                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3945                           "vabd", "s", int_arm_neon_vabds, 1>;
3946defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
3947                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3948                           "vabd", "u", int_arm_neon_vabdu, 1>;
3949def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
3950                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
3951def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
3952                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
3953
3954//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
3955defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
3956                               "vabdl", "s", int_arm_neon_vabds, zext, 1>;
3957defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
3958                               "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
3959
3960//   VABA     : Vector Absolute Difference and Accumulate
3961defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
3962                             "vaba", "s", int_arm_neon_vabds, add>;
3963defm VABAu    : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
3964                             "vaba", "u", int_arm_neon_vabdu, add>;
3965
3966//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
3967defm VABALs   : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
3968                                 "vabal", "s", int_arm_neon_vabds, zext, add>;
3969defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
3970                                 "vabal", "u", int_arm_neon_vabdu, zext, add>;
3971
3972// Vector Maximum and Minimum.
3973
3974//   VMAX     : Vector Maximum
3975defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
3976                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3977                           "vmax", "s", int_arm_neon_vmaxs, 1>;
3978defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
3979                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3980                           "vmax", "u", int_arm_neon_vmaxu, 1>;
3981def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
3982                        "vmax", "f32",
3983                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
3984def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
3985                        "vmax", "f32",
3986                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
3987
3988//   VMIN     : Vector Minimum
3989defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
3990                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3991                           "vmin", "s", int_arm_neon_vmins, 1>;
3992defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
3993                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3994                           "vmin", "u", int_arm_neon_vminu, 1>;
3995def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
3996                        "vmin", "f32",
3997                        v2f32, v2f32, int_arm_neon_vmins, 1>;
3998def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
3999                        "vmin", "f32",
4000                        v4f32, v4f32, int_arm_neon_vmins, 1>;
4001
4002// Vector Pairwise Operations.
4003
4004//   VPADD    : Vector Pairwise Add
4005def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4006                        "vpadd", "i8",
4007                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
4008def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4009                        "vpadd", "i16",
4010                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
4011def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4012                        "vpadd", "i32",
4013                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
4014def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
4015                        IIC_VPBIND, "vpadd", "f32",
4016                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
4017
4018//   VPADDL   : Vector Pairwise Add Long
4019defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
4020                             int_arm_neon_vpaddls>;
4021defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
4022                             int_arm_neon_vpaddlu>;
4023
4024//   VPADAL   : Vector Pairwise Add and Accumulate Long
4025defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
4026                              int_arm_neon_vpadals>;
4027defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
4028                              int_arm_neon_vpadalu>;
4029
4030//   VPMAX    : Vector Pairwise Maximum
4031def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4032                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
4033def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4034                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
4035def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4036                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
4037def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4038                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
4039def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4040                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
4041def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4042                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
4043def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
4044                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
4045
4046//   VPMIN    : Vector Pairwise Minimum
4047def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4048                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
4049def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4050                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
4051def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4052                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
4053def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4054                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
4055def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4056                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
4057def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4058                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
4059def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
4060                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
4061
4062// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
4063
4064//   VRECPE   : Vector Reciprocal Estimate
4065def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
4066                        IIC_VUNAD, "vrecpe", "u32",
4067                        v2i32, v2i32, int_arm_neon_vrecpe>;
4068def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
4069                        IIC_VUNAQ, "vrecpe", "u32",
4070                        v4i32, v4i32, int_arm_neon_vrecpe>;
4071def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
4072                        IIC_VUNAD, "vrecpe", "f32",
4073                        v2f32, v2f32, int_arm_neon_vrecpe>;
4074def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
4075                        IIC_VUNAQ, "vrecpe", "f32",
4076                        v4f32, v4f32, int_arm_neon_vrecpe>;
4077
4078//   VRECPS   : Vector Reciprocal Step
4079def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
4080                        IIC_VRECSD, "vrecps", "f32",
4081                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
4082def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
4083                        IIC_VRECSQ, "vrecps", "f32",
4084                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
4085
4086//   VRSQRTE  : Vector Reciprocal Square Root Estimate
4087def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
4088                         IIC_VUNAD, "vrsqrte", "u32",
4089                         v2i32, v2i32, int_arm_neon_vrsqrte>;
4090def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
4091                         IIC_VUNAQ, "vrsqrte", "u32",
4092                         v4i32, v4i32, int_arm_neon_vrsqrte>;
4093def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
4094                         IIC_VUNAD, "vrsqrte", "f32",
4095                         v2f32, v2f32, int_arm_neon_vrsqrte>;
4096def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
4097                         IIC_VUNAQ, "vrsqrte", "f32",
4098                         v4f32, v4f32, int_arm_neon_vrsqrte>;
4099
4100//   VRSQRTS  : Vector Reciprocal Square Root Step
4101def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
4102                        IIC_VRECSD, "vrsqrts", "f32",
4103                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
4104def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
4105                        IIC_VRECSQ, "vrsqrts", "f32",
4106                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
4107
4108// Vector Shifts.
4109
4110//   VSHL     : Vector Shift
4111defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
4112                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
4113                            "vshl", "s", int_arm_neon_vshifts>;
4114defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
4115                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
4116                            "vshl", "u", int_arm_neon_vshiftu>;
4117
4118//   VSHL     : Vector Shift Left (Immediate)
4119defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
4120
4121//   VSHR     : Vector Shift Right (Immediate)
4122defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
4123defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
4124
4125//   VSHLL    : Vector Shift Left Long
4126defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
4127defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
4128
4129//   VSHLL    : Vector Shift Left Long (with maximum shift count)
4130class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
4131                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
4132                ValueType OpTy, SDNode OpNode>
4133  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
4134           ResTy, OpTy, OpNode> {
4135  let Inst{21-16} = op21_16;
4136  let DecoderMethod = "DecodeVSHLMaxInstruction";
4137}
4138def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
4139                          v8i16, v8i8, NEONvshlli>;
4140def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
4141                          v4i32, v4i16, NEONvshlli>;
4142def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
4143                          v2i64, v2i32, NEONvshlli>;
4144
4145//   VSHRN    : Vector Shift Right and Narrow
4146defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
4147                           NEONvshrn>;
4148
4149//   VRSHL    : Vector Rounding Shift
4150defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
4151                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4152                            "vrshl", "s", int_arm_neon_vrshifts>;
4153defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
4154                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4155                            "vrshl", "u", int_arm_neon_vrshiftu>;
4156//   VRSHR    : Vector Rounding Shift Right
4157defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
4158defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
4159
4160//   VRSHRN   : Vector Rounding Shift Right and Narrow
4161defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
4162                           NEONvrshrn>;
4163
4164//   VQSHL    : Vector Saturating Shift
4165defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
4166                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4167                            "vqshl", "s", int_arm_neon_vqshifts>;
4168defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
4169                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4170                            "vqshl", "u", int_arm_neon_vqshiftu>;
4171//   VQSHL    : Vector Saturating Shift Left (Immediate)
4172defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
4173defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
4174
4175//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
4176defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
4177
4178//   VQSHRN   : Vector Saturating Shift Right and Narrow
4179defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
4180                           NEONvqshrns>;
4181defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
4182                           NEONvqshrnu>;
4183
4184//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
4185defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
4186                           NEONvqshrnsu>;
4187
4188//   VQRSHL   : Vector Saturating Rounding Shift
4189defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
4190                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4191                            "vqrshl", "s", int_arm_neon_vqrshifts>;
4192defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
4193                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4194                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
4195
4196//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
4197defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
4198                           NEONvqrshrns>;
4199defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
4200                           NEONvqrshrnu>;
4201
4202//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
4203defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
4204                           NEONvqrshrnsu>;
4205
4206//   VSRA     : Vector Shift Right and Accumulate
4207defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
4208defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
4209//   VRSRA    : Vector Rounding Shift Right and Accumulate
4210defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
4211defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
4212
4213//   VSLI     : Vector Shift Left and Insert
4214defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
4215
4216//   VSRI     : Vector Shift Right and Insert
4217defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
4218
4219// Vector Absolute and Saturating Absolute.
4220
4221//   VABS     : Vector Absolute Value
4222defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
4223                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
4224                           int_arm_neon_vabs>;
4225def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
4226                        IIC_VUNAD, "vabs", "f32",
4227                        v2f32, v2f32, int_arm_neon_vabs>;
4228def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
4229                        IIC_VUNAQ, "vabs", "f32",
4230                        v4f32, v4f32, int_arm_neon_vabs>;
4231
4232//   VQABS    : Vector Saturating Absolute Value
4233defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
4234                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
4235                           int_arm_neon_vqabs>;
4236
4237// Vector Negate.
4238
4239def vnegd  : PatFrag<(ops node:$in),
4240                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
4241def vnegq  : PatFrag<(ops node:$in),
4242                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
4243
4244class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
4245  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
4246        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
4247        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
4248class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
4249  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
4250        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
4251        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
4252
4253//   VNEG     : Vector Negate (integer)
4254def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
4255def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
4256def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
4257def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
4258def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
4259def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
4260
4261//   VNEG     : Vector Negate (floating-point)
4262def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
4263                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
4264                    "vneg", "f32", "$Vd, $Vm", "",
4265                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
4266def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
4267                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
4268                    "vneg", "f32", "$Vd, $Vm", "",
4269                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
4270
4271def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
4272def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
4273def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
4274def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
4275def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
4276def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
4277
4278//   VQNEG    : Vector Saturating Negate
4279defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
4280                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
4281                           int_arm_neon_vqneg>;
4282
4283// Vector Bit Counting Operations.
4284
4285//   VCLS     : Vector Count Leading Sign Bits
4286defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
4287                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
4288                           int_arm_neon_vcls>;
4289//   VCLZ     : Vector Count Leading Zeros
4290defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
4291                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
4292                           int_arm_neon_vclz>;
4293//   VCNT     : Vector Count One Bits
4294def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
4295                        IIC_VCNTiD, "vcnt", "8",
4296                        v8i8, v8i8, int_arm_neon_vcnt>;
4297def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
4298                        IIC_VCNTiQ, "vcnt", "8",
4299                        v16i8, v16i8, int_arm_neon_vcnt>;
4300
4301// Vector Swap -- for disassembly only.
4302def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
4303                     (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
4304                     "vswp", "$Vd, $Vm", "", []>;
4305def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
4306                     (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
4307                     "vswp", "$Vd, $Vm", "", []>;
4308
4309// Vector Move Operations.
4310
4311//   VMOV     : Vector Move (Register)
4312def : InstAlias<"vmov${p} $Vd, $Vm",
4313                (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
4314def : InstAlias<"vmov${p} $Vd, $Vm",
4315                (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
4316
4317//   VMOV     : Vector Move (Immediate)
4318
4319let isReMaterializable = 1 in {
4320def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
4321                         (ins nModImm:$SIMM), IIC_VMOVImm,
4322                         "vmov", "i8", "$Vd, $SIMM", "",
4323                         [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
4324def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
4325                         (ins nModImm:$SIMM), IIC_VMOVImm,
4326                         "vmov", "i8", "$Vd, $SIMM", "",
4327                         [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
4328
4329def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
4330                         (ins nModImm:$SIMM), IIC_VMOVImm,
4331                         "vmov", "i16", "$Vd, $SIMM", "",
4332                         [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
4333  let Inst{9} = SIMM{9};
4334}
4335
4336def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
4337                         (ins nModImm:$SIMM), IIC_VMOVImm,
4338                         "vmov", "i16", "$Vd, $SIMM", "",
4339                         [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
4340 let Inst{9} = SIMM{9};
4341}
4342
4343def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
4344                         (ins nModImm:$SIMM), IIC_VMOVImm,
4345                         "vmov", "i32", "$Vd, $SIMM", "",
4346                         [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
4347  let Inst{11-8} = SIMM{11-8};
4348}
4349
4350def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
4351                         (ins nModImm:$SIMM), IIC_VMOVImm,
4352                         "vmov", "i32", "$Vd, $SIMM", "",
4353                         [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
4354  let Inst{11-8} = SIMM{11-8};
4355}
4356
4357def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
4358                         (ins nModImm:$SIMM), IIC_VMOVImm,
4359                         "vmov", "i64", "$Vd, $SIMM", "",
4360                         [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
4361def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
4362                         (ins nModImm:$SIMM), IIC_VMOVImm,
4363                         "vmov", "i64", "$Vd, $SIMM", "",
4364                         [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
4365} // isReMaterializable
4366
4367//   VMOV     : Vector Get Lane (move scalar to ARM core register)
4368
4369def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
4370                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
4371                          IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
4372                          [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
4373                                           imm:$lane))]> {
4374  let Inst{21}  = lane{2};
4375  let Inst{6-5} = lane{1-0};
4376}
4377def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
4378                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
4379                          IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
4380                          [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
4381                                           imm:$lane))]> {
4382  let Inst{21} = lane{1};
4383  let Inst{6}  = lane{0};
4384}
4385def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
4386                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
4387                          IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
4388                          [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
4389                                           imm:$lane))]> {
4390  let Inst{21}  = lane{2};
4391  let Inst{6-5} = lane{1-0};
4392}
4393def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
4394                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
4395                          IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
4396                          [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
4397                                           imm:$lane))]> {
4398  let Inst{21} = lane{1};
4399  let Inst{6}  = lane{0};
4400}
4401def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
4402                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
4403                          IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
4404                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
4405                                           imm:$lane))]> {
4406  let Inst{21} = lane{0};
4407}
4408// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
4409def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
4410          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
4411                           (DSubReg_i8_reg imm:$lane))),
4412                     (SubReg_i8_lane imm:$lane))>;
4413def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
4414          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
4415                             (DSubReg_i16_reg imm:$lane))),
4416                     (SubReg_i16_lane imm:$lane))>;
4417def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
4418          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
4419                           (DSubReg_i8_reg imm:$lane))),
4420                     (SubReg_i8_lane imm:$lane))>;
4421def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
4422          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
4423                             (DSubReg_i16_reg imm:$lane))),
4424                     (SubReg_i16_lane imm:$lane))>;
4425def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
4426          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
4427                             (DSubReg_i32_reg imm:$lane))),
4428                     (SubReg_i32_lane imm:$lane))>;
4429def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
4430          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
4431                          (SSubReg_f32_reg imm:$src2))>;
4432def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
4433          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
4434                          (SSubReg_f32_reg imm:$src2))>;
4435//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
4436//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
4437def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
4438          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
4439
4440
4441//   VMOV     : Vector Set Lane (move ARM core register to scalar)
4442
4443let Constraints = "$src1 = $V" in {
4444def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
4445                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
4446                          IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
4447                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
4448                                           GPR:$R, imm:$lane))]> {
4449  let Inst{21}  = lane{2};
4450  let Inst{6-5} = lane{1-0};
4451}
4452def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
4453                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
4454                          IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
4455                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
4456                                           GPR:$R, imm:$lane))]> {
4457  let Inst{21} = lane{1};
4458  let Inst{6}  = lane{0};
4459}
4460def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
4461                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
4462                          IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
4463                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
4464                                           GPR:$R, imm:$lane))]> {
4465  let Inst{21} = lane{0};
4466}
4467}
4468def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
4469          (v16i8 (INSERT_SUBREG QPR:$src1,
4470                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
4471                                   (DSubReg_i8_reg imm:$lane))),
4472                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
4473                  (DSubReg_i8_reg imm:$lane)))>;
4474def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
4475          (v8i16 (INSERT_SUBREG QPR:$src1,
4476                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
4477                                     (DSubReg_i16_reg imm:$lane))),
4478                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
4479                  (DSubReg_i16_reg imm:$lane)))>;
4480def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
4481          (v4i32 (INSERT_SUBREG QPR:$src1,
4482                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
4483                                     (DSubReg_i32_reg imm:$lane))),
4484                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
4485                  (DSubReg_i32_reg imm:$lane)))>;
4486
4487def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
4488          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
4489                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
4490def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
4491          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
4492                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
4493
4494//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
4495//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
4496def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
4497          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
4498
4499def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
4500          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
4501def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
4502          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
4503def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
4504          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
4505
4506def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
4507          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
4508def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
4509          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
4510def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
4511          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
4512
4513def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
4514          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4515                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
4516                         dsub_0)>;
4517def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
4518          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
4519                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
4520                         dsub_0)>;
4521def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
4522          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
4523                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
4524                         dsub_0)>;
4525
4526//   VDUP     : Vector Duplicate (from ARM core register to all elements)
4527
4528class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
4529  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
4530          IIC_VMOVIS, "vdup", Dt, "$V, $R",
4531          [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
4532class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
4533  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
4534          IIC_VMOVIS, "vdup", Dt, "$V, $R",
4535          [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
4536
4537def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
4538def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
4539def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>;
4540def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
4541def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
4542def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
4543
4544def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
4545def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
4546
4547//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
4548
4549class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
4550              ValueType Ty, Operand IdxTy>
4551  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
4552              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
4553              [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
4554
4555class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
4556              ValueType ResTy, ValueType OpTy, Operand IdxTy>
4557  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
4558              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
4559              [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
4560                                      VectorIndex32:$lane)))]>;
4561
4562// Inst{19-16} is partially specified depending on the element size.
4563
4564def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
4565  bits<3> lane;
4566  let Inst{19-17} = lane{2-0};
4567}
4568def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
4569  bits<2> lane;
4570  let Inst{19-18} = lane{1-0};
4571}
4572def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
4573  bits<1> lane;
4574  let Inst{19} = lane{0};
4575}
4576def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
4577  bits<3> lane;
4578  let Inst{19-17} = lane{2-0};
4579}
4580def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
4581  bits<2> lane;
4582  let Inst{19-18} = lane{1-0};
4583}
4584def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
4585  bits<1> lane;
4586  let Inst{19} = lane{0};
4587}
4588
4589def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
4590          (VDUPLN32d DPR:$Vm, imm:$lane)>;
4591
4592def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
4593          (VDUPLN32q DPR:$Vm, imm:$lane)>;
4594
4595def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
4596          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
4597                                  (DSubReg_i8_reg imm:$lane))),
4598                           (SubReg_i8_lane imm:$lane)))>;
4599def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
4600          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
4601                                    (DSubReg_i16_reg imm:$lane))),
4602                            (SubReg_i16_lane imm:$lane)))>;
4603def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
4604          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
4605                                    (DSubReg_i32_reg imm:$lane))),
4606                            (SubReg_i32_lane imm:$lane)))>;
4607def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
4608          (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
4609                                   (DSubReg_i32_reg imm:$lane))),
4610                           (SubReg_i32_lane imm:$lane)))>;
4611
4612def  VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
4613                    [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
4614def  VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
4615                    [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
4616
4617//   VMOVN    : Vector Narrowing Move
4618defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
4619                         "vmovn", "i", trunc>;
4620//   VQMOVN   : Vector Saturating Narrowing Move
4621defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
4622                            "vqmovn", "s", int_arm_neon_vqmovns>;
4623defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
4624                            "vqmovn", "u", int_arm_neon_vqmovnu>;
4625defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
4626                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
4627//   VMOVL    : Vector Lengthening Move
4628defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
4629defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
4630
4631// Vector Conversions.
4632
4633//   VCVT     : Vector Convert Between Floating-Point and Integers
4634def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
4635                     v2i32, v2f32, fp_to_sint>;
4636def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
4637                     v2i32, v2f32, fp_to_uint>;
4638def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
4639                     v2f32, v2i32, sint_to_fp>;
4640def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
4641                     v2f32, v2i32, uint_to_fp>;
4642
4643def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
4644                     v4i32, v4f32, fp_to_sint>;
4645def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
4646                     v4i32, v4f32, fp_to_uint>;
4647def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
4648                     v4f32, v4i32, sint_to_fp>;
4649def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
4650                     v4f32, v4i32, uint_to_fp>;
4651
4652//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
4653def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
4654                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
4655def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
4656                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
4657def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
4658                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
4659def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
4660                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
4661
4662def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
4663                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
4664def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
4665                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
4666def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
4667                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
4668def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
4669                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
4670
4671//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
4672def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
4673                        IIC_VUNAQ, "vcvt", "f16.f32",
4674                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
4675                Requires<[HasNEON, HasFP16]>;
4676def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
4677                        IIC_VUNAQ, "vcvt", "f32.f16",
4678                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
4679                Requires<[HasNEON, HasFP16]>;
4680
4681// Vector Reverse.
4682
4683//   VREV64   : Vector Reverse elements within 64-bit doublewords
4684
4685class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4686  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
4687        (ins DPR:$Vm), IIC_VMOVD,
4688        OpcodeStr, Dt, "$Vd, $Vm", "",
4689        [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
4690class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4691  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
4692        (ins QPR:$Vm), IIC_VMOVQ,
4693        OpcodeStr, Dt, "$Vd, $Vm", "",
4694        [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
4695
4696def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
4697def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
4698def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
4699def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
4700
4701def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
4702def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
4703def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
4704def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
4705
4706//   VREV32   : Vector Reverse elements within 32-bit words
4707
4708class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4709  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
4710        (ins DPR:$Vm), IIC_VMOVD,
4711        OpcodeStr, Dt, "$Vd, $Vm", "",
4712        [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
4713class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4714  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
4715        (ins QPR:$Vm), IIC_VMOVQ,
4716        OpcodeStr, Dt, "$Vd, $Vm", "",
4717        [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
4718
4719def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
4720def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
4721
4722def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
4723def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
4724
4725//   VREV16   : Vector Reverse elements within 16-bit halfwords
4726
4727class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4728  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
4729        (ins DPR:$Vm), IIC_VMOVD,
4730        OpcodeStr, Dt, "$Vd, $Vm", "",
4731        [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
4732class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4733  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
4734        (ins QPR:$Vm), IIC_VMOVQ,
4735        OpcodeStr, Dt, "$Vd, $Vm", "",
4736        [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
4737
4738def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
4739def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
4740
4741// Other Vector Shuffles.
4742
4743//  Aligned extractions: really just dropping registers
4744
4745class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
4746      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
4747             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
4748
4749def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
4750
4751def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
4752
4753def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
4754
4755def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
4756
4757def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
4758
4759
4760//   VEXT     : Vector Extract
4761
4762class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
4763  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
4764        (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
4765        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
4766        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
4767                                      (Ty DPR:$Vm), imm:$index)))]> {
4768  bits<4> index;
4769  let Inst{11-8} = index{3-0};
4770}
4771
4772class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
4773  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
4774        (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
4775        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
4776        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
4777                                      (Ty QPR:$Vm), imm:$index)))]> {
4778  bits<4> index;
4779  let Inst{11-8} = index{3-0};
4780}
4781
4782def VEXTd8  : VEXTd<"vext", "8",  v8i8> {
4783  let Inst{11-8} = index{3-0};
4784}
4785def VEXTd16 : VEXTd<"vext", "16", v4i16> {
4786  let Inst{11-9} = index{2-0};
4787  let Inst{8}    = 0b0;
4788}
4789def VEXTd32 : VEXTd<"vext", "32", v2i32> {
4790  let Inst{11-10} = index{1-0};
4791  let Inst{9-8}    = 0b00;
4792}
4793def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
4794                           (v2f32 DPR:$Vm),
4795                           (i32 imm:$index))),
4796          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
4797
4798def VEXTq8  : VEXTq<"vext", "8",  v16i8> {
4799  let Inst{11-8} = index{3-0};
4800}
4801def VEXTq16 : VEXTq<"vext", "16", v8i16> {
4802  let Inst{11-9} = index{2-0};
4803  let Inst{8}    = 0b0;
4804}
4805def VEXTq32 : VEXTq<"vext", "32", v4i32> {
4806  let Inst{11-10} = index{1-0};
4807  let Inst{9-8}    = 0b00;
4808}
4809def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
4810                           (v4f32 QPR:$Vm),
4811                           (i32 imm:$index))),
4812          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
4813
4814//   VTRN     : Vector Transpose
4815
4816def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
4817def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
4818def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
4819
4820def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
4821def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
4822def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
4823
4824//   VUZP     : Vector Unzip (Deinterleave)
4825
4826def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
4827def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
4828def  VUZPd32  : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
4829
4830def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
4831def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
4832def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
4833
4834//   VZIP     : Vector Zip (Interleave)
4835
4836def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
4837def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
4838def  VZIPd32  : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
4839
4840def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
4841def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
4842def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
4843
4844// Vector Table Lookup and Table Extension.
4845
4846//   VTBL     : Vector Table Lookup
4847let DecoderMethod = "DecodeTBLInstruction" in {
4848def  VTBL1
4849  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
4850        (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
4851        "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
4852        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
4853let hasExtraSrcRegAllocReq = 1 in {
4854def  VTBL2
4855  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
4856        (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
4857        "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
4858def  VTBL3
4859  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
4860        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
4861        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
4862def  VTBL4
4863  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
4864        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
4865        NVTBLFrm, IIC_VTB4,
4866        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
4867} // hasExtraSrcRegAllocReq = 1
4868
4869def  VTBL2Pseudo
4870  : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
4871def  VTBL3Pseudo
4872  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
4873def  VTBL4Pseudo
4874  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
4875
4876//   VTBX     : Vector Table Extension
4877def  VTBX1
4878  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
4879        (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
4880        "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
4881        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
4882                               DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
4883let hasExtraSrcRegAllocReq = 1 in {
4884def  VTBX2
4885  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
4886        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
4887        "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
4888def  VTBX3
4889  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
4890        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
4891        NVTBLFrm, IIC_VTBX3,
4892        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
4893        "$orig = $Vd", []>;
4894def  VTBX4
4895  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
4896        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
4897        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
4898        "$orig = $Vd", []>;
4899} // hasExtraSrcRegAllocReq = 1
4900
4901def  VTBX2Pseudo
4902  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
4903                IIC_VTBX2, "$orig = $dst", []>;
4904def  VTBX3Pseudo
4905  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
4906                IIC_VTBX3, "$orig = $dst", []>;
4907def  VTBX4Pseudo
4908  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
4909                IIC_VTBX4, "$orig = $dst", []>;
4910} // DecoderMethod = "DecodeTBLInstruction"
4911
4912//===----------------------------------------------------------------------===//
4913// NEON instructions for single-precision FP math
4914//===----------------------------------------------------------------------===//
4915
4916class N2VSPat<SDNode OpNode, NeonI Inst>
4917  : NEONFPPat<(f32 (OpNode SPR:$a)),
4918              (EXTRACT_SUBREG
4919               (v2f32 (COPY_TO_REGCLASS (Inst
4920                (INSERT_SUBREG
4921                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
4922                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
4923
4924class N3VSPat<SDNode OpNode, NeonI Inst>
4925  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
4926              (EXTRACT_SUBREG
4927               (v2f32 (COPY_TO_REGCLASS (Inst
4928                (INSERT_SUBREG
4929                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
4930                 SPR:$a, ssub_0),
4931                (INSERT_SUBREG
4932                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
4933                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
4934
4935class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
4936  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
4937              (EXTRACT_SUBREG
4938               (v2f32 (COPY_TO_REGCLASS (Inst
4939                (INSERT_SUBREG
4940                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
4941                 SPR:$acc, ssub_0),
4942                (INSERT_SUBREG
4943                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
4944                 SPR:$a, ssub_0),
4945                (INSERT_SUBREG
4946                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
4947                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
4948
4949def : N3VSPat<fadd, VADDfd>;
4950def : N3VSPat<fsub, VSUBfd>;
4951def : N3VSPat<fmul, VMULfd>;
4952def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
4953      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
4954def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
4955      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
4956def : N2VSPat<fabs, VABSfd>;
4957def : N2VSPat<fneg, VNEGfd>;
4958def : N3VSPat<NEONfmax, VMAXfd>;
4959def : N3VSPat<NEONfmin, VMINfd>;
4960def : N2VSPat<arm_ftosi, VCVTf2sd>;
4961def : N2VSPat<arm_ftoui, VCVTf2ud>;
4962def : N2VSPat<arm_sitof, VCVTs2fd>;
4963def : N2VSPat<arm_uitof, VCVTu2fd>;
4964
4965//===----------------------------------------------------------------------===//
4966// Non-Instruction Patterns
4967//===----------------------------------------------------------------------===//
4968
4969// bit_convert
4970def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
4971def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
4972def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
4973def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
4974def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
4975def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
4976def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
4977def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
4978def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
4979def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
4980def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
4981def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
4982def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
4983def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
4984def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
4985def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
4986def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
4987def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
4988def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
4989def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
4990def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
4991def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
4992def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
4993def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
4994def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
4995def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
4996def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
4997def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
4998def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
4999def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
5000
5001def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
5002def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
5003def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
5004def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
5005def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
5006def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
5007def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
5008def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
5009def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
5010def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
5011def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
5012def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
5013def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
5014def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
5015def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
5016def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
5017def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
5018def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
5019def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
5020def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
5021def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
5022def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
5023def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
5024def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
5025def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
5026def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
5027def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
5028def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
5029def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
5030def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
5031