ARMInstrNEON.td revision 57838c6b1eacdb22fd448be34514c24b504aa1ef
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the ARM NEON instruction set.
11//
12//===----------------------------------------------------------------------===//
13
14
15//===----------------------------------------------------------------------===//
16// NEON-specific Operands.
17//===----------------------------------------------------------------------===//
18def nModImm : Operand<i32> {
19  let PrintMethod = "printNEONModImmOperand";
20}
21
22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
23def nImmSplatI8 : Operand<i32> {
24  let PrintMethod = "printNEONModImmOperand";
25  let ParserMatchClass = nImmSplatI8AsmOperand;
26}
27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
28def nImmSplatI16 : Operand<i32> {
29  let PrintMethod = "printNEONModImmOperand";
30  let ParserMatchClass = nImmSplatI16AsmOperand;
31}
32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
33def nImmSplatI32 : Operand<i32> {
34  let PrintMethod = "printNEONModImmOperand";
35  let ParserMatchClass = nImmSplatI32AsmOperand;
36}
37def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
38def nImmVMOVI32 : Operand<i32> {
39  let PrintMethod = "printNEONModImmOperand";
40  let ParserMatchClass = nImmVMOVI32AsmOperand;
41}
42def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
43def nImmSplatI64 : Operand<i32> {
44  let PrintMethod = "printNEONModImmOperand";
45  let ParserMatchClass = nImmSplatI64AsmOperand;
46}
47
48def VectorIndex8Operand  : AsmOperandClass { let Name = "VectorIndex8"; }
49def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
50def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
51def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
52  return ((uint64_t)Imm) < 8;
53}]> {
54  let ParserMatchClass = VectorIndex8Operand;
55  let PrintMethod = "printVectorIndex";
56  let MIOperandInfo = (ops i32imm);
57}
58def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
59  return ((uint64_t)Imm) < 4;
60}]> {
61  let ParserMatchClass = VectorIndex16Operand;
62  let PrintMethod = "printVectorIndex";
63  let MIOperandInfo = (ops i32imm);
64}
65def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
66  return ((uint64_t)Imm) < 2;
67}]> {
68  let ParserMatchClass = VectorIndex32Operand;
69  let PrintMethod = "printVectorIndex";
70  let MIOperandInfo = (ops i32imm);
71}
72
73def VecListOneDAsmOperand : AsmOperandClass {
74  let Name = "VecListOneD";
75  let ParserMethod = "parseVectorList";
76}
77def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
78  let ParserMatchClass = VecListOneDAsmOperand;
79}
80// Register list of two sequential D registers.
81def VecListTwoDAsmOperand : AsmOperandClass {
82  let Name = "VecListTwoD";
83  let ParserMethod = "parseVectorList";
84}
85def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
86  let ParserMatchClass = VecListTwoDAsmOperand;
87}
88// Register list of three sequential D registers.
89def VecListThreeDAsmOperand : AsmOperandClass {
90  let Name = "VecListThreeD";
91  let ParserMethod = "parseVectorList";
92}
93def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
94  let ParserMatchClass = VecListThreeDAsmOperand;
95}
96// Register list of four sequential D registers.
97def VecListFourDAsmOperand : AsmOperandClass {
98  let Name = "VecListFourD";
99  let ParserMethod = "parseVectorList";
100}
101def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
102  let ParserMatchClass = VecListFourDAsmOperand;
103}
104// Register list of two D registers spaced by 2 (two sequential Q registers).
105def VecListTwoQAsmOperand : AsmOperandClass {
106  let Name = "VecListTwoQ";
107  let ParserMethod = "parseVectorList";
108}
109def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwo"> {
110  let ParserMatchClass = VecListTwoQAsmOperand;
111}
112
113//===----------------------------------------------------------------------===//
114// NEON-specific DAG Nodes.
115//===----------------------------------------------------------------------===//
116
117def SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
118def SDTARMVCMPZ   : SDTypeProfile<1, 1, []>;
119
120def NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
121def NEONvceqz     : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
122def NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
123def NEONvcgez     : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
124def NEONvclez     : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
125def NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
126def NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
127def NEONvcgtz     : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
128def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
129def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
130def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
131
132// Types for vector shift by immediates.  The "SHX" version is for long and
133// narrow operations where the source and destination vectors have different
134// types.  The "SHINS" version is for shift and insert operations.
135def SDTARMVSH     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
136                                         SDTCisVT<2, i32>]>;
137def SDTARMVSHX    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
138                                         SDTCisVT<2, i32>]>;
139def SDTARMVSHINS  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
140                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
141
142def NEONvshl      : SDNode<"ARMISD::VSHL", SDTARMVSH>;
143def NEONvshrs     : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
144def NEONvshru     : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
145def NEONvshlls    : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
146def NEONvshllu    : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
147def NEONvshlli    : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
148def NEONvshrn     : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
149
150def NEONvrshrs    : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
151def NEONvrshru    : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
152def NEONvrshrn    : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
153
154def NEONvqshls    : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
155def NEONvqshlu    : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
156def NEONvqshlsu   : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
157def NEONvqshrns   : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
158def NEONvqshrnu   : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
159def NEONvqshrnsu  : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
160
161def NEONvqrshrns  : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
162def NEONvqrshrnu  : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
163def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
164
165def NEONvsli      : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
166def NEONvsri      : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
167
168def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
169                                         SDTCisVT<2, i32>]>;
170def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
171def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
172
173def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
174def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
175def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
176
177def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
178                                           SDTCisVT<2, i32>]>;
179def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
180def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
181
182def NEONvbsl      : SDNode<"ARMISD::VBSL",
183                           SDTypeProfile<1, 3, [SDTCisVec<0>,
184                                                SDTCisSameAs<0, 1>,
185                                                SDTCisSameAs<0, 2>,
186                                                SDTCisSameAs<0, 3>]>>;
187
188def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
189
190// VDUPLANE can produce a quad-register result from a double-register source,
191// so the result is not constrained to match the source.
192def NEONvduplane  : SDNode<"ARMISD::VDUPLANE",
193                           SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
194                                                SDTCisVT<2, i32>]>>;
195
196def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
197                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
198def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
199
200def SDTARMVSHUF   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
201def NEONvrev64    : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
202def NEONvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
203def NEONvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
204
205def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
206                                         SDTCisSameAs<0, 2>,
207                                         SDTCisSameAs<0, 3>]>;
208def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
209def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
210def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
211
212def SDTARMVMULL   : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
213                                         SDTCisSameAs<1, 2>]>;
214def NEONvmulls    : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
215def NEONvmullu    : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
216
217def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
218                                         SDTCisSameAs<0, 2>]>;
219def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
220def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
221
222def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
223  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
224  unsigned EltBits = 0;
225  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
226  return (EltBits == 32 && EltVal == 0);
227}]>;
228
229def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
230  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
231  unsigned EltBits = 0;
232  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
233  return (EltBits == 8 && EltVal == 0xff);
234}]>;
235
236//===----------------------------------------------------------------------===//
237// NEON load / store instructions
238//===----------------------------------------------------------------------===//
239
240// Use VLDM to load a Q register as a D register pair.
241// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
242def VLDMQIA
243  : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
244                    IIC_fpLoad_m, "",
245                   [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
246
247// Use VSTM to store a Q register as a D register pair.
248// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
249def VSTMQIA
250  : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
251                    IIC_fpStore_m, "",
252                   [(store (v2f64 QPR:$src), GPR:$Rn)]>;
253
254// Classes for VLD* pseudo-instructions with multi-register operands.
255// These are expanded to real instructions after register allocation.
256class VLDQPseudo<InstrItinClass itin>
257  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
258class VLDQWBPseudo<InstrItinClass itin>
259  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
260                (ins addrmode6:$addr, am6offset:$offset), itin,
261                "$addr.addr = $wb">;
262class VLDQWBfixedPseudo<InstrItinClass itin>
263  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
264                (ins addrmode6:$addr), itin,
265                "$addr.addr = $wb">;
266class VLDQWBregisterPseudo<InstrItinClass itin>
267  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
268                (ins addrmode6:$addr, rGPR:$offset), itin,
269                "$addr.addr = $wb">;
270class VLDQQPseudo<InstrItinClass itin>
271  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
272class VLDQQWBPseudo<InstrItinClass itin>
273  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
274                (ins addrmode6:$addr, am6offset:$offset), itin,
275                "$addr.addr = $wb">;
276class VLDQQQQPseudo<InstrItinClass itin>
277  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
278                "$src = $dst">;
279class VLDQQQQWBPseudo<InstrItinClass itin>
280  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
281                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
282                "$addr.addr = $wb, $src = $dst">;
283
284let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
285
286//   VLD1     : Vector Load (multiple single elements)
287class VLD1D<bits<4> op7_4, string Dt>
288  : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
289          (ins addrmode6:$Rn), IIC_VLD1,
290          "vld1", Dt, "$Vd, $Rn", "", []> {
291  let Rm = 0b1111;
292  let Inst{4} = Rn{4};
293  let DecoderMethod = "DecodeVLDInstruction";
294}
295class VLD1Q<bits<4> op7_4, string Dt>
296  : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd),
297          (ins addrmode6:$Rn), IIC_VLD1x2,
298          "vld1", Dt, "$Vd, $Rn", "", []> {
299  let Rm = 0b1111;
300  let Inst{5-4} = Rn{5-4};
301  let DecoderMethod = "DecodeVLDInstruction";
302}
303
304def  VLD1d8   : VLD1D<{0,0,0,?}, "8">;
305def  VLD1d16  : VLD1D<{0,1,0,?}, "16">;
306def  VLD1d32  : VLD1D<{1,0,0,?}, "32">;
307def  VLD1d64  : VLD1D<{1,1,0,?}, "64">;
308
309def  VLD1q8   : VLD1Q<{0,0,?,?}, "8">;
310def  VLD1q16  : VLD1Q<{0,1,?,?}, "16">;
311def  VLD1q32  : VLD1Q<{1,0,?,?}, "32">;
312def  VLD1q64  : VLD1Q<{1,1,?,?}, "64">;
313
314def  VLD1q8Pseudo  : VLDQPseudo<IIC_VLD1x2>;
315def  VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
316def  VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
317def  VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
318
319// ...with address register writeback:
320multiclass VLD1DWB<bits<4> op7_4, string Dt> {
321  def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
322                     (ins addrmode6:$Rn), IIC_VLD1u,
323                     "vld1", Dt, "$Vd, $Rn!",
324                     "$Rn.addr = $wb", []> {
325    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
326    let Inst{4} = Rn{4};
327    let DecoderMethod = "DecodeVLDInstruction";
328    let AsmMatchConverter = "cvtVLDwbFixed";
329  }
330  def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
331                        (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
332                        "vld1", Dt, "$Vd, $Rn, $Rm",
333                        "$Rn.addr = $wb", []> {
334    let Inst{4} = Rn{4};
335    let DecoderMethod = "DecodeVLDInstruction";
336    let AsmMatchConverter = "cvtVLDwbRegister";
337  }
338}
339multiclass VLD1QWB<bits<4> op7_4, string Dt> {
340  def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb),
341                    (ins addrmode6:$Rn), IIC_VLD1x2u,
342                     "vld1", Dt, "$Vd, $Rn!",
343                     "$Rn.addr = $wb", []> {
344    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
345    let Inst{5-4} = Rn{5-4};
346    let DecoderMethod = "DecodeVLDInstruction";
347    let AsmMatchConverter = "cvtVLDwbFixed";
348  }
349  def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb),
350                        (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
351                        "vld1", Dt, "$Vd, $Rn, $Rm",
352                        "$Rn.addr = $wb", []> {
353    let Inst{5-4} = Rn{5-4};
354    let DecoderMethod = "DecodeVLDInstruction";
355    let AsmMatchConverter = "cvtVLDwbRegister";
356  }
357}
358
359defm VLD1d8wb  : VLD1DWB<{0,0,0,?}, "8">;
360defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
361defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
362defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
363defm VLD1q8wb  : VLD1QWB<{0,0,?,?}, "8">;
364defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
365defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
366defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
367
368def VLD1q8PseudoWB_fixed  : VLDQWBfixedPseudo<IIC_VLD1x2u>;
369def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>;
370def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>;
371def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>;
372def VLD1q8PseudoWB_register  : VLDQWBregisterPseudo<IIC_VLD1x2u>;
373def VLD1q16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>;
374def VLD1q32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>;
375def VLD1q64PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>;
376
377// ...with 3 registers
378class VLD1D3<bits<4> op7_4, string Dt>
379  : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
380          (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
381          "$Vd, $Rn", "", []> {
382  let Rm = 0b1111;
383  let Inst{4} = Rn{4};
384  let DecoderMethod = "DecodeVLDInstruction";
385}
386multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
387  def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
388                    (ins addrmode6:$Rn), IIC_VLD1x2u,
389                     "vld1", Dt, "$Vd, $Rn!",
390                     "$Rn.addr = $wb", []> {
391    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
392    let Inst{4} = Rn{4};
393    let DecoderMethod = "DecodeVLDInstruction";
394    let AsmMatchConverter = "cvtVLDwbFixed";
395  }
396  def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
397                        (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
398                        "vld1", Dt, "$Vd, $Rn, $Rm",
399                        "$Rn.addr = $wb", []> {
400    let Inst{4} = Rn{4};
401    let DecoderMethod = "DecodeVLDInstruction";
402    let AsmMatchConverter = "cvtVLDwbRegister";
403  }
404}
405
406def VLD1d8T      : VLD1D3<{0,0,0,?}, "8">;
407def VLD1d16T     : VLD1D3<{0,1,0,?}, "16">;
408def VLD1d32T     : VLD1D3<{1,0,0,?}, "32">;
409def VLD1d64T     : VLD1D3<{1,1,0,?}, "64">;
410
411defm VLD1d8Twb  : VLD1D3WB<{0,0,0,?}, "8">;
412defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
413defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
414defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
415
416def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
417
418// ...with 4 registers
419class VLD1D4<bits<4> op7_4, string Dt>
420  : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
421          (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
422          "$Vd, $Rn", "", []> {
423  let Rm = 0b1111;
424  let Inst{5-4} = Rn{5-4};
425  let DecoderMethod = "DecodeVLDInstruction";
426}
427multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
428  def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
429                    (ins addrmode6:$Rn), IIC_VLD1x2u,
430                     "vld1", Dt, "$Vd, $Rn!",
431                     "$Rn.addr = $wb", []> {
432    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
433    let Inst{5-4} = Rn{5-4};
434    let DecoderMethod = "DecodeVLDInstruction";
435    let AsmMatchConverter = "cvtVLDwbFixed";
436  }
437  def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
438                        (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
439                        "vld1", Dt, "$Vd, $Rn, $Rm",
440                        "$Rn.addr = $wb", []> {
441    let Inst{5-4} = Rn{5-4};
442    let DecoderMethod = "DecodeVLDInstruction";
443    let AsmMatchConverter = "cvtVLDwbRegister";
444  }
445}
446
447def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8">;
448def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16">;
449def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32">;
450def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64">;
451
452defm VLD1d8Qwb   : VLD1D4WB<{0,0,?,?}, "8">;
453defm VLD1d16Qwb  : VLD1D4WB<{0,1,?,?}, "16">;
454defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32">;
455defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64">;
456
457def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
458
459//   VLD2     : Vector Load (multiple 2-element structures)
460class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
461  : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
462          (ins addrmode6:$Rn), IIC_VLD2,
463          "vld2", Dt, "$Vd, $Rn", "", []> {
464  let Rm = 0b1111;
465  let Inst{5-4} = Rn{5-4};
466  let DecoderMethod = "DecodeVLDInstruction";
467}
468class VLD2Q<bits<4> op7_4, string Dt, RegisterOperand VdTy>
469  : NLdSt<0, 0b10, 0b0011, op7_4,
470          (outs VdTy:$Vd),
471          (ins addrmode6:$Rn), IIC_VLD2x2,
472          "vld2", Dt, "$Vd, $Rn", "", []> {
473  let Rm = 0b1111;
474  let Inst{5-4} = Rn{5-4};
475  let DecoderMethod = "DecodeVLDInstruction";
476}
477
478def  VLD2d8   : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>;
479def  VLD2d16  : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>;
480def  VLD2d32  : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>;
481
482def  VLD2q8   : VLD2Q<{0,0,?,?}, "8", VecListFourD>;
483def  VLD2q16  : VLD2Q<{0,1,?,?}, "16", VecListFourD>;
484def  VLD2q32  : VLD2Q<{1,0,?,?}, "32", VecListFourD>;
485
486def  VLD2d8Pseudo  : VLDQPseudo<IIC_VLD2>;
487def  VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
488def  VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
489
490def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>;
491def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
492def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
493
494// ...with address register writeback:
495class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
496  : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
497          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
498          "vld2", Dt, "$Vd, $Rn$Rm",
499          "$Rn.addr = $wb", []> {
500  let Inst{5-4} = Rn{5-4};
501  let DecoderMethod = "DecodeVLDInstruction";
502}
503class VLD2QWB<bits<4> op7_4, string Dt, RegisterOperand VdTy>
504  : NLdSt<0, 0b10, 0b0011, op7_4,
505          (outs VdTy:$Vd, GPR:$wb),
506          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
507          "vld2", Dt, "$Vd, $Rn$Rm",
508          "$Rn.addr = $wb", []> {
509  let Inst{5-4} = Rn{5-4};
510  let DecoderMethod = "DecodeVLDInstruction";
511}
512
513def VLD2d8_UPD  : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
514def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
515def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
516
517def VLD2q8_UPD  : VLD2QWB<{0,0,?,?}, "8", VecListFourD>;
518def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>;
519def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>;
520
521def VLD2d8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2u>;
522def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
523def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
524
525def VLD2q8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD2x2u>;
526def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
527def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
528
529// ...with double-spaced registers
530def VLD2b8      : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
531def VLD2b16     : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
532def VLD2b32     : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
533def VLD2b8_UPD  : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
534def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
535def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
536
537//   VLD3     : Vector Load (multiple 3-element structures)
538class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
539  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
540          (ins addrmode6:$Rn), IIC_VLD3,
541          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
542  let Rm = 0b1111;
543  let Inst{4} = Rn{4};
544  let DecoderMethod = "DecodeVLDInstruction";
545}
546
547def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
548def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
549def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
550
551def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>;
552def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
553def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
554
555// ...with address register writeback:
556class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
557  : NLdSt<0, 0b10, op11_8, op7_4,
558          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
559          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
560          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
561          "$Rn.addr = $wb", []> {
562  let Inst{4} = Rn{4};
563  let DecoderMethod = "DecodeVLDInstruction";
564}
565
566def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
567def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
568def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
569
570def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>;
571def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
572def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
573
574// ...with double-spaced registers:
575def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
576def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
577def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
578def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
579def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
580def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
581
582def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
583def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
584def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
585
586// ...alternate versions to be allocated odd register numbers:
587def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>;
588def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
589def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
590
591def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
592def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
593def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
594
595//   VLD4     : Vector Load (multiple 4-element structures)
596class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
597  : NLdSt<0, 0b10, op11_8, op7_4,
598          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
599          (ins addrmode6:$Rn), IIC_VLD4,
600          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
601  let Rm = 0b1111;
602  let Inst{5-4} = Rn{5-4};
603  let DecoderMethod = "DecodeVLDInstruction";
604}
605
606def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
607def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
608def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
609
610def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>;
611def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
612def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
613
614// ...with address register writeback:
615class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
616  : NLdSt<0, 0b10, op11_8, op7_4,
617          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
618          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
619          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
620          "$Rn.addr = $wb", []> {
621  let Inst{5-4} = Rn{5-4};
622  let DecoderMethod = "DecodeVLDInstruction";
623}
624
625def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
626def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
627def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
628
629def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>;
630def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
631def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
632
633// ...with double-spaced registers:
634def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
635def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
636def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
637def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
638def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
639def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
640
641def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
642def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
643def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
644
645// ...alternate versions to be allocated odd register numbers:
646def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>;
647def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
648def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
649
650def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
651def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
652def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
653
654} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
655
656// Classes for VLD*LN pseudo-instructions with multi-register operands.
657// These are expanded to real instructions after register allocation.
658class VLDQLNPseudo<InstrItinClass itin>
659  : PseudoNLdSt<(outs QPR:$dst),
660                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
661                itin, "$src = $dst">;
662class VLDQLNWBPseudo<InstrItinClass itin>
663  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
664                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
665                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
666class VLDQQLNPseudo<InstrItinClass itin>
667  : PseudoNLdSt<(outs QQPR:$dst),
668                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
669                itin, "$src = $dst">;
670class VLDQQLNWBPseudo<InstrItinClass itin>
671  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
672                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
673                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
674class VLDQQQQLNPseudo<InstrItinClass itin>
675  : PseudoNLdSt<(outs QQQQPR:$dst),
676                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
677                itin, "$src = $dst">;
678class VLDQQQQLNWBPseudo<InstrItinClass itin>
679  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
680                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
681                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
682
683//   VLD1LN   : Vector Load (single element to one lane)
684class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
685             PatFrag LoadOp>
686  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
687          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
688          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
689          "$src = $Vd",
690          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
691                                         (i32 (LoadOp addrmode6:$Rn)),
692                                         imm:$lane))]> {
693  let Rm = 0b1111;
694  let DecoderMethod = "DecodeVLD1LN";
695}
696class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
697             PatFrag LoadOp>
698  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
699          (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
700          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
701          "$src = $Vd",
702          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
703                                         (i32 (LoadOp addrmode6oneL32:$Rn)),
704                                         imm:$lane))]> {
705  let Rm = 0b1111;
706  let DecoderMethod = "DecodeVLD1LN";
707}
708class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
709  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
710                                               (i32 (LoadOp addrmode6:$addr)),
711                                               imm:$lane))];
712}
713
714def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
715  let Inst{7-5} = lane{2-0};
716}
717def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
718  let Inst{7-6} = lane{1-0};
719  let Inst{4}   = Rn{4};
720}
721def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
722  let Inst{7} = lane{0};
723  let Inst{5} = Rn{4};
724  let Inst{4} = Rn{4};
725}
726
727def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
728def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
729def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
730
731def : Pat<(vector_insert (v2f32 DPR:$src),
732                         (f32 (load addrmode6:$addr)), imm:$lane),
733          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
734def : Pat<(vector_insert (v4f32 QPR:$src),
735                         (f32 (load addrmode6:$addr)), imm:$lane),
736          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
737
738let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
739
740// ...with address register writeback:
741class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
742  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
743          (ins addrmode6:$Rn, am6offset:$Rm,
744           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
745          "\\{$Vd[$lane]\\}, $Rn$Rm",
746          "$src = $Vd, $Rn.addr = $wb", []> {
747  let DecoderMethod = "DecodeVLD1LN";
748}
749
750def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
751  let Inst{7-5} = lane{2-0};
752}
753def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
754  let Inst{7-6} = lane{1-0};
755  let Inst{4}   = Rn{4};
756}
757def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
758  let Inst{7} = lane{0};
759  let Inst{5} = Rn{4};
760  let Inst{4} = Rn{4};
761}
762
763def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>;
764def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
765def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
766
767//   VLD2LN   : Vector Load (single 2-element structure to one lane)
768class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
769  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
770          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
771          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
772          "$src1 = $Vd, $src2 = $dst2", []> {
773  let Rm = 0b1111;
774  let Inst{4}   = Rn{4};
775  let DecoderMethod = "DecodeVLD2LN";
776}
777
778def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
779  let Inst{7-5} = lane{2-0};
780}
781def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
782  let Inst{7-6} = lane{1-0};
783}
784def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
785  let Inst{7} = lane{0};
786}
787
788def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>;
789def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
790def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
791
792// ...with double-spaced registers:
793def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
794  let Inst{7-6} = lane{1-0};
795}
796def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
797  let Inst{7} = lane{0};
798}
799
800def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
801def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
802
803// ...with address register writeback:
804class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
805  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
806          (ins addrmode6:$Rn, am6offset:$Rm,
807           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
808          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
809          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
810  let Inst{4}   = Rn{4};
811  let DecoderMethod = "DecodeVLD2LN";
812}
813
814def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
815  let Inst{7-5} = lane{2-0};
816}
817def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
818  let Inst{7-6} = lane{1-0};
819}
820def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
821  let Inst{7} = lane{0};
822}
823
824def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>;
825def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
826def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
827
828def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
829  let Inst{7-6} = lane{1-0};
830}
831def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
832  let Inst{7} = lane{0};
833}
834
835def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
836def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
837
838//   VLD3LN   : Vector Load (single 3-element structure to one lane)
839class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
840  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
841          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
842          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
843          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
844          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
845  let Rm = 0b1111;
846  let DecoderMethod = "DecodeVLD3LN";
847}
848
849def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
850  let Inst{7-5} = lane{2-0};
851}
852def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
853  let Inst{7-6} = lane{1-0};
854}
855def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
856  let Inst{7}   = lane{0};
857}
858
859def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>;
860def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
861def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
862
863// ...with double-spaced registers:
864def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
865  let Inst{7-6} = lane{1-0};
866}
867def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
868  let Inst{7}   = lane{0};
869}
870
871def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
872def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
873
874// ...with address register writeback:
875class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
876  : NLdStLn<1, 0b10, op11_8, op7_4,
877          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
878          (ins addrmode6:$Rn, am6offset:$Rm,
879           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
880          IIC_VLD3lnu, "vld3", Dt,
881          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
882          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
883          []> {
884  let DecoderMethod = "DecodeVLD3LN";
885}
886
887def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
888  let Inst{7-5} = lane{2-0};
889}
890def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
891  let Inst{7-6} = lane{1-0};
892}
893def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
894  let Inst{7}   = lane{0};
895}
896
897def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>;
898def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
899def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
900
901def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
902  let Inst{7-6} = lane{1-0};
903}
904def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
905  let Inst{7}   = lane{0};
906}
907
908def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
909def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
910
911//   VLD4LN   : Vector Load (single 4-element structure to one lane)
912class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
913  : NLdStLn<1, 0b10, op11_8, op7_4,
914          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
915          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
916          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
917          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
918          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
919  let Rm = 0b1111;
920  let Inst{4}   = Rn{4};
921  let DecoderMethod = "DecodeVLD4LN";
922}
923
924def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
925  let Inst{7-5} = lane{2-0};
926}
927def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
928  let Inst{7-6} = lane{1-0};
929}
930def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
931  let Inst{7}   = lane{0};
932  let Inst{5} = Rn{5};
933}
934
935def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>;
936def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
937def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
938
939// ...with double-spaced registers:
940def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
941  let Inst{7-6} = lane{1-0};
942}
943def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
944  let Inst{7}   = lane{0};
945  let Inst{5} = Rn{5};
946}
947
948def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
949def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
950
951// ...with address register writeback:
952class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
953  : NLdStLn<1, 0b10, op11_8, op7_4,
954          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
955          (ins addrmode6:$Rn, am6offset:$Rm,
956           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
957          IIC_VLD4lnu, "vld4", Dt,
958"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
959"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
960          []> {
961  let Inst{4}   = Rn{4};
962  let DecoderMethod = "DecodeVLD4LN"  ;
963}
964
965def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
966  let Inst{7-5} = lane{2-0};
967}
968def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
969  let Inst{7-6} = lane{1-0};
970}
971def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
972  let Inst{7}   = lane{0};
973  let Inst{5} = Rn{5};
974}
975
976def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>;
977def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
978def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
979
980def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
981  let Inst{7-6} = lane{1-0};
982}
983def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
984  let Inst{7}   = lane{0};
985  let Inst{5} = Rn{5};
986}
987
988def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
989def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
990
991} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
992
993//   VLD1DUP  : Vector Load (single element to all lanes)
994class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
995  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
996          IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
997          [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
998  let Rm = 0b1111;
999  let Inst{4} = Rn{4};
1000  let DecoderMethod = "DecodeVLD1DupInstruction";
1001}
1002class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
1003  let Pattern = [(set QPR:$dst,
1004                      (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
1005}
1006
1007def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
1008def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
1009def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
1010
1011def VLD1DUPq8Pseudo  : VLD1QDUPPseudo<v16i8, extloadi8>;
1012def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
1013def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
1014
1015def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1016          (VLD1DUPd32 addrmode6:$addr)>;
1017def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1018          (VLD1DUPq32Pseudo addrmode6:$addr)>;
1019
1020let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
1021
1022class VLD1QDUP<bits<4> op7_4, string Dt>
1023  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
1024          (ins addrmode6dup:$Rn), IIC_VLD1dup,
1025          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
1026  let Rm = 0b1111;
1027  let Inst{4} = Rn{4};
1028  let DecoderMethod = "DecodeVLD1DupInstruction";
1029}
1030
1031def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8">;
1032def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
1033def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
1034
1035// ...with address register writeback:
1036class VLD1DUPWB<bits<4> op7_4, string Dt>
1037  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
1038          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
1039          "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
1040  let Inst{4} = Rn{4};
1041  let DecoderMethod = "DecodeVLD1DupInstruction";
1042}
1043class VLD1QDUPWB<bits<4> op7_4, string Dt>
1044  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1045          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
1046          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
1047  let Inst{4} = Rn{4};
1048  let DecoderMethod = "DecodeVLD1DupInstruction";
1049}
1050
1051def VLD1DUPd8_UPD  : VLD1DUPWB<{0,0,0,0}, "8">;
1052def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
1053def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
1054
1055def VLD1DUPq8_UPD  : VLD1QDUPWB<{0,0,1,0}, "8">;
1056def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
1057def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
1058
1059def VLD1DUPq8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1dupu>;
1060def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
1061def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
1062
1063//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
1064class VLD2DUP<bits<4> op7_4, string Dt>
1065  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
1066          (ins addrmode6dup:$Rn), IIC_VLD2dup,
1067          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
1068  let Rm = 0b1111;
1069  let Inst{4} = Rn{4};
1070  let DecoderMethod = "DecodeVLD2DupInstruction";
1071}
1072
1073def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8">;
1074def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
1075def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
1076
1077def VLD2DUPd8Pseudo  : VLDQPseudo<IIC_VLD2dup>;
1078def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
1079def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
1080
1081// ...with double-spaced registers (not used for codegen):
1082def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8">;
1083def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
1084def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
1085
1086// ...with address register writeback:
1087class VLD2DUPWB<bits<4> op7_4, string Dt>
1088  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1089          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
1090          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
1091  let Inst{4} = Rn{4};
1092  let DecoderMethod = "DecodeVLD2DupInstruction";
1093}
1094
1095def VLD2DUPd8_UPD  : VLD2DUPWB<{0,0,0,0}, "8">;
1096def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
1097def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
1098
1099def VLD2DUPd8x2_UPD  : VLD2DUPWB<{0,0,1,0}, "8">;
1100def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
1101def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
1102
1103def VLD2DUPd8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2dupu>;
1104def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
1105def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
1106
1107//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
1108class VLD3DUP<bits<4> op7_4, string Dt>
1109  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1110          (ins addrmode6dup:$Rn), IIC_VLD3dup,
1111          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
1112  let Rm = 0b1111;
1113  let Inst{4} = 0;
1114  let DecoderMethod = "DecodeVLD3DupInstruction";
1115}
1116
1117def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
1118def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1119def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1120
1121def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>;
1122def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1123def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1124
1125// ...with double-spaced registers (not used for codegen):
1126def VLD3DUPd8x2  : VLD3DUP<{0,0,1,?}, "8">;
1127def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
1128def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
1129
1130// ...with address register writeback:
1131class VLD3DUPWB<bits<4> op7_4, string Dt>
1132  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1133          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1134          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1135          "$Rn.addr = $wb", []> {
1136  let Inst{4} = 0;
1137  let DecoderMethod = "DecodeVLD3DupInstruction";
1138}
1139
1140def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8">;
1141def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
1142def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
1143
1144def VLD3DUPd8x2_UPD  : VLD3DUPWB<{0,0,1,0}, "8">;
1145def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
1146def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
1147
1148def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>;
1149def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1150def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1151
1152//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
1153class VLD4DUP<bits<4> op7_4, string Dt>
1154  : NLdSt<1, 0b10, 0b1111, op7_4,
1155          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1156          (ins addrmode6dup:$Rn), IIC_VLD4dup,
1157          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1158  let Rm = 0b1111;
1159  let Inst{4} = Rn{4};
1160  let DecoderMethod = "DecodeVLD4DupInstruction";
1161}
1162
1163def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
1164def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1165def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1166
1167def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>;
1168def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1169def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1170
1171// ...with double-spaced registers (not used for codegen):
1172def VLD4DUPd8x2  : VLD4DUP<{0,0,1,?}, "8">;
1173def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
1174def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1175
1176// ...with address register writeback:
1177class VLD4DUPWB<bits<4> op7_4, string Dt>
1178  : NLdSt<1, 0b10, 0b1111, op7_4,
1179          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1180          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1181          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1182          "$Rn.addr = $wb", []> {
1183  let Inst{4} = Rn{4};
1184  let DecoderMethod = "DecodeVLD4DupInstruction";
1185}
1186
1187def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
1188def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1189def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1190
1191def VLD4DUPd8x2_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
1192def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1193def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1194
1195def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>;
1196def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1197def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1198
1199} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
1200
1201let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
1202
1203// Classes for VST* pseudo-instructions with multi-register operands.
1204// These are expanded to real instructions after register allocation.
1205class VSTQPseudo<InstrItinClass itin>
1206  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1207class VSTQWBPseudo<InstrItinClass itin>
1208  : PseudoNLdSt<(outs GPR:$wb),
1209                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1210                "$addr.addr = $wb">;
1211class VSTQQPseudo<InstrItinClass itin>
1212  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1213class VSTQQWBPseudo<InstrItinClass itin>
1214  : PseudoNLdSt<(outs GPR:$wb),
1215                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1216                "$addr.addr = $wb">;
1217class VSTQQQQPseudo<InstrItinClass itin>
1218  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1219class VSTQQQQWBPseudo<InstrItinClass itin>
1220  : PseudoNLdSt<(outs GPR:$wb),
1221                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1222                "$addr.addr = $wb">;
1223
1224//   VST1     : Vector Store (multiple single elements)
1225class VST1D<bits<4> op7_4, string Dt>
1226  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
1227          IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
1228  let Rm = 0b1111;
1229  let Inst{4} = Rn{4};
1230  let DecoderMethod = "DecodeVSTInstruction";
1231}
1232class VST1Q<bits<4> op7_4, string Dt>
1233  : NLdSt<0,0b00,0b1010,op7_4, (outs),
1234          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
1235          "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
1236  let Rm = 0b1111;
1237  let Inst{5-4} = Rn{5-4};
1238  let DecoderMethod = "DecodeVSTInstruction";
1239}
1240
1241def  VST1d8   : VST1D<{0,0,0,?}, "8">;
1242def  VST1d16  : VST1D<{0,1,0,?}, "16">;
1243def  VST1d32  : VST1D<{1,0,0,?}, "32">;
1244def  VST1d64  : VST1D<{1,1,0,?}, "64">;
1245
1246def  VST1q8   : VST1Q<{0,0,?,?}, "8">;
1247def  VST1q16  : VST1Q<{0,1,?,?}, "16">;
1248def  VST1q32  : VST1Q<{1,0,?,?}, "32">;
1249def  VST1q64  : VST1Q<{1,1,?,?}, "64">;
1250
1251def  VST1q8Pseudo  : VSTQPseudo<IIC_VST1x2>;
1252def  VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
1253def  VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
1254def  VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
1255
1256// ...with address register writeback:
1257class VST1DWB<bits<4> op7_4, string Dt>
1258  : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
1259          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
1260          "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
1261  let Inst{4} = Rn{4};
1262  let DecoderMethod = "DecodeVSTInstruction";
1263}
1264class VST1QWB<bits<4> op7_4, string Dt>
1265  : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
1266          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
1267          IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
1268          "$Rn.addr = $wb", []> {
1269  let Inst{5-4} = Rn{5-4};
1270  let DecoderMethod = "DecodeVSTInstruction";
1271}
1272
1273def VST1d8_UPD  : VST1DWB<{0,0,0,?}, "8">;
1274def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
1275def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
1276def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
1277
1278def VST1q8_UPD  : VST1QWB<{0,0,?,?}, "8">;
1279def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
1280def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
1281def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
1282
1283def VST1q8Pseudo_UPD  : VSTQWBPseudo<IIC_VST1x2u>;
1284def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
1285def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
1286def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
1287
1288// ...with 3 registers
1289class VST1D3<bits<4> op7_4, string Dt>
1290  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1291          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
1292          IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
1293  let Rm = 0b1111;
1294  let Inst{4} = Rn{4};
1295  let DecoderMethod = "DecodeVSTInstruction";
1296}
1297class VST1D3WB<bits<4> op7_4, string Dt>
1298  : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
1299          (ins addrmode6:$Rn, am6offset:$Rm,
1300           DPR:$Vd, DPR:$src2, DPR:$src3),
1301          IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1302          "$Rn.addr = $wb", []> {
1303  let Inst{4} = Rn{4};
1304  let DecoderMethod = "DecodeVSTInstruction";
1305}
1306
1307def VST1d8T      : VST1D3<{0,0,0,?}, "8">;
1308def VST1d16T     : VST1D3<{0,1,0,?}, "16">;
1309def VST1d32T     : VST1D3<{1,0,0,?}, "32">;
1310def VST1d64T     : VST1D3<{1,1,0,?}, "64">;
1311
1312def VST1d8T_UPD  : VST1D3WB<{0,0,0,?}, "8">;
1313def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
1314def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
1315def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
1316
1317def VST1d64TPseudo     : VSTQQPseudo<IIC_VST1x3>;
1318def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
1319
1320// ...with 4 registers
1321class VST1D4<bits<4> op7_4, string Dt>
1322  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1323          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1324          IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
1325          []> {
1326  let Rm = 0b1111;
1327  let Inst{5-4} = Rn{5-4};
1328  let DecoderMethod = "DecodeVSTInstruction";
1329}
1330class VST1D4WB<bits<4> op7_4, string Dt>
1331  : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
1332          (ins addrmode6:$Rn, am6offset:$Rm,
1333           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
1334          "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
1335          "$Rn.addr = $wb", []> {
1336  let Inst{5-4} = Rn{5-4};
1337  let DecoderMethod = "DecodeVSTInstruction";
1338}
1339
1340def VST1d8Q      : VST1D4<{0,0,?,?}, "8">;
1341def VST1d16Q     : VST1D4<{0,1,?,?}, "16">;
1342def VST1d32Q     : VST1D4<{1,0,?,?}, "32">;
1343def VST1d64Q     : VST1D4<{1,1,?,?}, "64">;
1344
1345def VST1d8Q_UPD  : VST1D4WB<{0,0,?,?}, "8">;
1346def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
1347def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
1348def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
1349
1350def VST1d64QPseudo     : VSTQQPseudo<IIC_VST1x4>;
1351def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
1352
1353//   VST2     : Vector Store (multiple 2-element structures)
1354class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
1355  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1356          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
1357          IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
1358  let Rm = 0b1111;
1359  let Inst{5-4} = Rn{5-4};
1360  let DecoderMethod = "DecodeVSTInstruction";
1361}
1362class VST2Q<bits<4> op7_4, string Dt>
1363  : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
1364          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1365          IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
1366          "", []> {
1367  let Rm = 0b1111;
1368  let Inst{5-4} = Rn{5-4};
1369  let DecoderMethod = "DecodeVSTInstruction";
1370}
1371
1372def  VST2d8   : VST2D<0b1000, {0,0,?,?}, "8">;
1373def  VST2d16  : VST2D<0b1000, {0,1,?,?}, "16">;
1374def  VST2d32  : VST2D<0b1000, {1,0,?,?}, "32">;
1375
1376def  VST2q8   : VST2Q<{0,0,?,?}, "8">;
1377def  VST2q16  : VST2Q<{0,1,?,?}, "16">;
1378def  VST2q32  : VST2Q<{1,0,?,?}, "32">;
1379
1380def  VST2d8Pseudo  : VSTQPseudo<IIC_VST2>;
1381def  VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
1382def  VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
1383
1384def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>;
1385def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
1386def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
1387
1388// ...with address register writeback:
1389class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1390  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1391          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
1392          IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
1393          "$Rn.addr = $wb", []> {
1394  let Inst{5-4} = Rn{5-4};
1395  let DecoderMethod = "DecodeVSTInstruction";
1396}
1397class VST2QWB<bits<4> op7_4, string Dt>
1398  : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1399          (ins addrmode6:$Rn, am6offset:$Rm,
1400           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
1401          "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
1402          "$Rn.addr = $wb", []> {
1403  let Inst{5-4} = Rn{5-4};
1404  let DecoderMethod = "DecodeVSTInstruction";
1405}
1406
1407def VST2d8_UPD  : VST2DWB<0b1000, {0,0,?,?}, "8">;
1408def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
1409def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
1410
1411def VST2q8_UPD  : VST2QWB<{0,0,?,?}, "8">;
1412def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
1413def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
1414
1415def VST2d8Pseudo_UPD  : VSTQWBPseudo<IIC_VST2u>;
1416def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
1417def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
1418
1419def VST2q8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST2x2u>;
1420def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
1421def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
1422
1423// ...with double-spaced registers
1424def VST2b8      : VST2D<0b1001, {0,0,?,?}, "8">;
1425def VST2b16     : VST2D<0b1001, {0,1,?,?}, "16">;
1426def VST2b32     : VST2D<0b1001, {1,0,?,?}, "32">;
1427def VST2b8_UPD  : VST2DWB<0b1001, {0,0,?,?}, "8">;
1428def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
1429def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
1430
1431//   VST3     : Vector Store (multiple 3-element structures)
1432class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1433  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1434          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1435          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
1436  let Rm = 0b1111;
1437  let Inst{4} = Rn{4};
1438  let DecoderMethod = "DecodeVSTInstruction";
1439}
1440
1441def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
1442def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
1443def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
1444
1445def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>;
1446def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
1447def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
1448
1449// ...with address register writeback:
1450class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1451  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1452          (ins addrmode6:$Rn, am6offset:$Rm,
1453           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1454          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1455          "$Rn.addr = $wb", []> {
1456  let Inst{4} = Rn{4};
1457  let DecoderMethod = "DecodeVSTInstruction";
1458}
1459
1460def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
1461def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
1462def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
1463
1464def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>;
1465def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1466def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1467
1468// ...with double-spaced registers:
1469def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
1470def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
1471def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
1472def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
1473def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
1474def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
1475
1476def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
1477def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1478def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1479
1480// ...alternate versions to be allocated odd register numbers:
1481def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>;
1482def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
1483def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
1484
1485def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
1486def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1487def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1488
1489//   VST4     : Vector Store (multiple 4-element structures)
1490class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
1491  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1492          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1493          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
1494          "", []> {
1495  let Rm = 0b1111;
1496  let Inst{5-4} = Rn{5-4};
1497  let DecoderMethod = "DecodeVSTInstruction";
1498}
1499
1500def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
1501def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
1502def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
1503
1504def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>;
1505def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
1506def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
1507
1508// ...with address register writeback:
1509class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1510  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1511          (ins addrmode6:$Rn, am6offset:$Rm,
1512           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
1513           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
1514          "$Rn.addr = $wb", []> {
1515  let Inst{5-4} = Rn{5-4};
1516  let DecoderMethod = "DecodeVSTInstruction";
1517}
1518
1519def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
1520def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
1521def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
1522
1523def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>;
1524def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1525def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1526
1527// ...with double-spaced registers:
1528def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
1529def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
1530def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
1531def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
1532def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
1533def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
1534
1535def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
1536def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1537def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1538
1539// ...alternate versions to be allocated odd register numbers:
1540def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>;
1541def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
1542def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
1543
1544def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
1545def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1546def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1547
1548} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
1549
1550// Classes for VST*LN pseudo-instructions with multi-register operands.
1551// These are expanded to real instructions after register allocation.
1552class VSTQLNPseudo<InstrItinClass itin>
1553  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1554                itin, "">;
1555class VSTQLNWBPseudo<InstrItinClass itin>
1556  : PseudoNLdSt<(outs GPR:$wb),
1557                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1558                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1559class VSTQQLNPseudo<InstrItinClass itin>
1560  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1561                itin, "">;
1562class VSTQQLNWBPseudo<InstrItinClass itin>
1563  : PseudoNLdSt<(outs GPR:$wb),
1564                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1565                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1566class VSTQQQQLNPseudo<InstrItinClass itin>
1567  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1568                itin, "">;
1569class VSTQQQQLNWBPseudo<InstrItinClass itin>
1570  : PseudoNLdSt<(outs GPR:$wb),
1571                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1572                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1573
1574//   VST1LN   : Vector Store (single element from one lane)
1575class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1576             PatFrag StoreOp, SDNode ExtractOp>
1577  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1578          (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
1579          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
1580          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
1581  let Rm = 0b1111;
1582  let DecoderMethod = "DecodeVST1LN";
1583}
1584class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1585             PatFrag StoreOp, SDNode ExtractOp>
1586  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1587          (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
1588          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
1589          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
1590  let Rm = 0b1111;
1591  let DecoderMethod = "DecodeVST1LN";
1592}
1593class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
1594  : VSTQLNPseudo<IIC_VST1ln> {
1595  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
1596                          addrmode6:$addr)];
1597}
1598
1599def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
1600                       NEONvgetlaneu> {
1601  let Inst{7-5} = lane{2-0};
1602}
1603def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
1604                       NEONvgetlaneu> {
1605  let Inst{7-6} = lane{1-0};
1606  let Inst{4}   = Rn{5};
1607}
1608
1609def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
1610  let Inst{7}   = lane{0};
1611  let Inst{5-4} = Rn{5-4};
1612}
1613
1614def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
1615def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
1616def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
1617
1618def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
1619          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1620def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
1621          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1622
1623// ...with address register writeback:
1624class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1625               PatFrag StoreOp, SDNode ExtractOp>
1626  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
1627          (ins addrmode6:$Rn, am6offset:$Rm,
1628           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
1629          "\\{$Vd[$lane]\\}, $Rn$Rm",
1630          "$Rn.addr = $wb",
1631          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
1632                                  addrmode6:$Rn, am6offset:$Rm))]> {
1633  let DecoderMethod = "DecodeVST1LN";
1634}
1635class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
1636  : VSTQLNWBPseudo<IIC_VST1lnu> {
1637  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
1638                                        addrmode6:$addr, am6offset:$offset))];
1639}
1640
1641def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
1642                             NEONvgetlaneu> {
1643  let Inst{7-5} = lane{2-0};
1644}
1645def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
1646                             NEONvgetlaneu> {
1647  let Inst{7-6} = lane{1-0};
1648  let Inst{4}   = Rn{5};
1649}
1650def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
1651                             extractelt> {
1652  let Inst{7}   = lane{0};
1653  let Inst{5-4} = Rn{5-4};
1654}
1655
1656def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
1657def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
1658def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
1659
1660let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
1661
1662//   VST2LN   : Vector Store (single 2-element structure from one lane)
1663class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1664  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1665          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
1666          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
1667          "", []> {
1668  let Rm = 0b1111;
1669  let Inst{4}   = Rn{4};
1670  let DecoderMethod = "DecodeVST2LN";
1671}
1672
1673def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
1674  let Inst{7-5} = lane{2-0};
1675}
1676def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
1677  let Inst{7-6} = lane{1-0};
1678}
1679def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
1680  let Inst{7}   = lane{0};
1681}
1682
1683def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>;
1684def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
1685def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
1686
1687// ...with double-spaced registers:
1688def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
1689  let Inst{7-6} = lane{1-0};
1690  let Inst{4}   = Rn{4};
1691}
1692def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
1693  let Inst{7}   = lane{0};
1694  let Inst{4}   = Rn{4};
1695}
1696
1697def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
1698def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
1699
1700// ...with address register writeback:
1701class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1702  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
1703          (ins addrmode6:$addr, am6offset:$offset,
1704           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
1705          "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
1706          "$addr.addr = $wb", []> {
1707  let Inst{4}   = Rn{4};
1708  let DecoderMethod = "DecodeVST2LN";
1709}
1710
1711def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
1712  let Inst{7-5} = lane{2-0};
1713}
1714def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
1715  let Inst{7-6} = lane{1-0};
1716}
1717def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
1718  let Inst{7}   = lane{0};
1719}
1720
1721def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>;
1722def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
1723def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
1724
1725def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
1726  let Inst{7-6} = lane{1-0};
1727}
1728def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
1729  let Inst{7}   = lane{0};
1730}
1731
1732def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
1733def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
1734
1735//   VST3LN   : Vector Store (single 3-element structure from one lane)
1736class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1737  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1738          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
1739           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
1740          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
1741  let Rm = 0b1111;
1742  let DecoderMethod = "DecodeVST3LN";
1743}
1744
1745def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
1746  let Inst{7-5} = lane{2-0};
1747}
1748def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
1749  let Inst{7-6} = lane{1-0};
1750}
1751def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
1752  let Inst{7}   = lane{0};
1753}
1754
1755def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>;
1756def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
1757def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
1758
1759// ...with double-spaced registers:
1760def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
1761  let Inst{7-6} = lane{1-0};
1762}
1763def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
1764  let Inst{7}   = lane{0};
1765}
1766
1767def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
1768def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
1769
1770// ...with address register writeback:
1771class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1772  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
1773          (ins addrmode6:$Rn, am6offset:$Rm,
1774           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1775          IIC_VST3lnu, "vst3", Dt,
1776          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
1777          "$Rn.addr = $wb", []> {
1778  let DecoderMethod = "DecodeVST3LN";
1779}
1780
1781def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
1782  let Inst{7-5} = lane{2-0};
1783}
1784def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
1785  let Inst{7-6} = lane{1-0};
1786}
1787def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
1788  let Inst{7}   = lane{0};
1789}
1790
1791def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>;
1792def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
1793def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
1794
1795def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
1796  let Inst{7-6} = lane{1-0};
1797}
1798def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
1799  let Inst{7}   = lane{0};
1800}
1801
1802def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
1803def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
1804
1805//   VST4LN   : Vector Store (single 4-element structure from one lane)
1806class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1807  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1808          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
1809           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
1810          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
1811          "", []> {
1812  let Rm = 0b1111;
1813  let Inst{4} = Rn{4};
1814  let DecoderMethod = "DecodeVST4LN";
1815}
1816
1817def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
1818  let Inst{7-5} = lane{2-0};
1819}
1820def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
1821  let Inst{7-6} = lane{1-0};
1822}
1823def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
1824  let Inst{7}   = lane{0};
1825  let Inst{5} = Rn{5};
1826}
1827
1828def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>;
1829def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
1830def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
1831
1832// ...with double-spaced registers:
1833def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
1834  let Inst{7-6} = lane{1-0};
1835}
1836def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
1837  let Inst{7}   = lane{0};
1838  let Inst{5} = Rn{5};
1839}
1840
1841def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
1842def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
1843
1844// ...with address register writeback:
1845class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1846  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
1847          (ins addrmode6:$Rn, am6offset:$Rm,
1848           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1849          IIC_VST4lnu, "vst4", Dt,
1850  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
1851          "$Rn.addr = $wb", []> {
1852  let Inst{4} = Rn{4};
1853  let DecoderMethod = "DecodeVST4LN";
1854}
1855
1856def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
1857  let Inst{7-5} = lane{2-0};
1858}
1859def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
1860  let Inst{7-6} = lane{1-0};
1861}
1862def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
1863  let Inst{7}   = lane{0};
1864  let Inst{5} = Rn{5};
1865}
1866
1867def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>;
1868def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
1869def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
1870
1871def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
1872  let Inst{7-6} = lane{1-0};
1873}
1874def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
1875  let Inst{7}   = lane{0};
1876  let Inst{5} = Rn{5};
1877}
1878
1879def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
1880def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
1881
1882} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
1883
1884
1885//===----------------------------------------------------------------------===//
1886// NEON pattern fragments
1887//===----------------------------------------------------------------------===//
1888
1889// Extract D sub-registers of Q registers.
1890def DSubReg_i8_reg  : SDNodeXForm<imm, [{
1891  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
1892  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
1893}]>;
1894def DSubReg_i16_reg : SDNodeXForm<imm, [{
1895  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
1896  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
1897}]>;
1898def DSubReg_i32_reg : SDNodeXForm<imm, [{
1899  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
1900  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
1901}]>;
1902def DSubReg_f64_reg : SDNodeXForm<imm, [{
1903  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
1904  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
1905}]>;
1906
1907// Extract S sub-registers of Q/D registers.
1908def SSubReg_f32_reg : SDNodeXForm<imm, [{
1909  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
1910  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
1911}]>;
1912
1913// Translate lane numbers from Q registers to D subregs.
1914def SubReg_i8_lane  : SDNodeXForm<imm, [{
1915  return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
1916}]>;
1917def SubReg_i16_lane : SDNodeXForm<imm, [{
1918  return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
1919}]>;
1920def SubReg_i32_lane : SDNodeXForm<imm, [{
1921  return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
1922}]>;
1923
1924//===----------------------------------------------------------------------===//
1925// Instruction Classes
1926//===----------------------------------------------------------------------===//
1927
1928// Basic 2-register operations: double- and quad-register.
1929class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1930           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
1931           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
1932  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
1933        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
1934        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
1935class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1936           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
1937           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
1938  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
1939        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
1940        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
1941
1942// Basic 2-register intrinsics, both double- and quad-register.
1943class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1944              bits<2> op17_16, bits<5> op11_7, bit op4,
1945              InstrItinClass itin, string OpcodeStr, string Dt,
1946              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1947  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
1948        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1949        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
1950class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1951              bits<2> op17_16, bits<5> op11_7, bit op4,
1952              InstrItinClass itin, string OpcodeStr, string Dt,
1953              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1954  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
1955        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1956        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
1957
1958// Narrow 2-register operations.
1959class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1960           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
1961           InstrItinClass itin, string OpcodeStr, string Dt,
1962           ValueType TyD, ValueType TyQ, SDNode OpNode>
1963  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
1964        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1965        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
1966
1967// Narrow 2-register intrinsics.
1968class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1969              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
1970              InstrItinClass itin, string OpcodeStr, string Dt,
1971              ValueType TyD, ValueType TyQ, Intrinsic IntOp>
1972  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
1973        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1974        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
1975
1976// Long 2-register operations (currently only used for VMOVL).
1977class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1978           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
1979           InstrItinClass itin, string OpcodeStr, string Dt,
1980           ValueType TyQ, ValueType TyD, SDNode OpNode>
1981  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
1982        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1983        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
1984
1985// Long 2-register intrinsics.
1986class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1987              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
1988              InstrItinClass itin, string OpcodeStr, string Dt,
1989              ValueType TyQ, ValueType TyD, Intrinsic IntOp>
1990  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
1991        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
1992        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
1993
1994// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
1995class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
1996  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
1997        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
1998        OpcodeStr, Dt, "$Vd, $Vm",
1999        "$src1 = $Vd, $src2 = $Vm", []>;
2000class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2001                  InstrItinClass itin, string OpcodeStr, string Dt>
2002  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2003        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2004        "$src1 = $Vd, $src2 = $Vm", []>;
2005
2006// Basic 3-register operations: double- and quad-register.
2007class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2008           InstrItinClass itin, string OpcodeStr, string Dt,
2009           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2010  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2011        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2012        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2013        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2014  let isCommutable = Commutable;
2015}
2016// Same as N3VD but no data type.
2017class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2018           InstrItinClass itin, string OpcodeStr,
2019           ValueType ResTy, ValueType OpTy,
2020           SDNode OpNode, bit Commutable>
2021  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2022         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2023         OpcodeStr, "$Vd, $Vn, $Vm", "",
2024         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2025  let isCommutable = Commutable;
2026}
2027
2028class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2029             InstrItinClass itin, string OpcodeStr, string Dt,
2030             ValueType Ty, SDNode ShOp>
2031  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2032        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2033        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2034        [(set (Ty DPR:$Vd),
2035              (Ty (ShOp (Ty DPR:$Vn),
2036                        (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2037  let isCommutable = 0;
2038}
2039class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2040               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2041  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2042        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2043        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2044        [(set (Ty DPR:$Vd),
2045              (Ty (ShOp (Ty DPR:$Vn),
2046                        (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2047  let isCommutable = 0;
2048}
2049
2050class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2051           InstrItinClass itin, string OpcodeStr, string Dt,
2052           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2053  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2054        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2055        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2056        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2057  let isCommutable = Commutable;
2058}
2059class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2060           InstrItinClass itin, string OpcodeStr,
2061           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2062  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2063         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2064         OpcodeStr, "$Vd, $Vn, $Vm", "",
2065         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2066  let isCommutable = Commutable;
2067}
2068class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2069             InstrItinClass itin, string OpcodeStr, string Dt,
2070             ValueType ResTy, ValueType OpTy, SDNode ShOp>
2071  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2072        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2073        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2074        [(set (ResTy QPR:$Vd),
2075              (ResTy (ShOp (ResTy QPR:$Vn),
2076                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2077                                                imm:$lane)))))]> {
2078  let isCommutable = 0;
2079}
2080class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2081               ValueType ResTy, ValueType OpTy, SDNode ShOp>
2082  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2083        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2084        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2085        [(set (ResTy QPR:$Vd),
2086              (ResTy (ShOp (ResTy QPR:$Vn),
2087                           (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2088                                                imm:$lane)))))]> {
2089  let isCommutable = 0;
2090}
2091
2092// Basic 3-register intrinsics, both double- and quad-register.
2093class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2094              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2095              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
2096  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2097        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2098        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2099        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2100  let isCommutable = Commutable;
2101}
2102class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2103                string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
2104  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2105        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2106        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2107        [(set (Ty DPR:$Vd),
2108              (Ty (IntOp (Ty DPR:$Vn),
2109                         (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2110                                           imm:$lane)))))]> {
2111  let isCommutable = 0;
2112}
2113class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2114                  string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
2115  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2116        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2117        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2118        [(set (Ty DPR:$Vd),
2119              (Ty (IntOp (Ty DPR:$Vn),
2120                         (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2121  let isCommutable = 0;
2122}
2123class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2124              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2125              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2126  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2127        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2128        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2129        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2130  let isCommutable = 0;
2131}
2132
2133class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2134              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2135              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
2136  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2137        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2138        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2139        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2140  let isCommutable = Commutable;
2141}
2142class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2143                string OpcodeStr, string Dt,
2144                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2145  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2146        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2147        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2148        [(set (ResTy QPR:$Vd),
2149              (ResTy (IntOp (ResTy QPR:$Vn),
2150                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2151                                                 imm:$lane)))))]> {
2152  let isCommutable = 0;
2153}
2154class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2155                  string OpcodeStr, string Dt,
2156                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2157  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2158        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2159        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2160        [(set (ResTy QPR:$Vd),
2161              (ResTy (IntOp (ResTy QPR:$Vn),
2162                            (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2163                                                 imm:$lane)))))]> {
2164  let isCommutable = 0;
2165}
2166class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2167              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2168              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2169  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2170        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2171        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2172        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2173  let isCommutable = 0;
2174}
2175
2176// Multiply-Add/Sub operations: double- and quad-register.
2177class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2178                InstrItinClass itin, string OpcodeStr, string Dt,
2179                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2180  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2181        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2182        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2183        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2184                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2185
2186class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2187                  string OpcodeStr, string Dt,
2188                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2189  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2190        (outs DPR:$Vd),
2191        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2192        NVMulSLFrm, itin,
2193        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2194        [(set (Ty DPR:$Vd),
2195              (Ty (ShOp (Ty DPR:$src1),
2196                        (Ty (MulOp DPR:$Vn,
2197                                   (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2198                                                     imm:$lane)))))))]>;
2199class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2200                    string OpcodeStr, string Dt,
2201                    ValueType Ty, SDNode MulOp, SDNode ShOp>
2202  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2203        (outs DPR:$Vd),
2204        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2205        NVMulSLFrm, itin,
2206        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2207        [(set (Ty DPR:$Vd),
2208              (Ty (ShOp (Ty DPR:$src1),
2209                        (Ty (MulOp DPR:$Vn,
2210                                   (Ty (NEONvduplane (Ty DPR_8:$Vm),
2211                                                     imm:$lane)))))))]>;
2212
2213class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2214                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2215                SDPatternOperator MulOp, SDPatternOperator OpNode>
2216  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2217        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2218        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2219        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2220                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2221class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2222                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2223                  SDPatternOperator MulOp, SDPatternOperator ShOp>
2224  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2225        (outs QPR:$Vd),
2226        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2227        NVMulSLFrm, itin,
2228        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2229        [(set (ResTy QPR:$Vd),
2230              (ResTy (ShOp (ResTy QPR:$src1),
2231                           (ResTy (MulOp QPR:$Vn,
2232                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2233                                                        imm:$lane)))))))]>;
2234class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2235                    string OpcodeStr, string Dt,
2236                    ValueType ResTy, ValueType OpTy,
2237                    SDNode MulOp, SDNode ShOp>
2238  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2239        (outs QPR:$Vd),
2240        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2241        NVMulSLFrm, itin,
2242        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2243        [(set (ResTy QPR:$Vd),
2244              (ResTy (ShOp (ResTy QPR:$src1),
2245                           (ResTy (MulOp QPR:$Vn,
2246                                   (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2247                                                        imm:$lane)))))))]>;
2248
2249// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2250class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2251                InstrItinClass itin, string OpcodeStr, string Dt,
2252                ValueType Ty, Intrinsic IntOp, SDNode OpNode>
2253  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2254        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2255        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2256        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2257                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2258class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2259                InstrItinClass itin, string OpcodeStr, string Dt,
2260                ValueType Ty, Intrinsic IntOp, SDNode OpNode>
2261  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2262        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2263        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2264        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2265                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2266
2267// Neon 3-argument intrinsics, both double- and quad-register.
2268// The destination register is also used as the first source operand register.
2269class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2270               InstrItinClass itin, string OpcodeStr, string Dt,
2271               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2272  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2273        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2274        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2275        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2276                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2277class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2278               InstrItinClass itin, string OpcodeStr, string Dt,
2279               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2280  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2281        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2282        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2283        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2284                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2285
2286// Long Multiply-Add/Sub operations.
2287class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2288                InstrItinClass itin, string OpcodeStr, string Dt,
2289                ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2290  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2291        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2292        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2293        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2294                                (TyQ (MulOp (TyD DPR:$Vn),
2295                                            (TyD DPR:$Vm)))))]>;
2296class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2297                  InstrItinClass itin, string OpcodeStr, string Dt,
2298                  ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2299  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2300        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2301        NVMulSLFrm, itin,
2302        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2303        [(set QPR:$Vd,
2304          (OpNode (TyQ QPR:$src1),
2305                  (TyQ (MulOp (TyD DPR:$Vn),
2306                              (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
2307                                                 imm:$lane))))))]>;
2308class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2309                    InstrItinClass itin, string OpcodeStr, string Dt,
2310                    ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2311  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2312        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2313        NVMulSLFrm, itin,
2314        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2315        [(set QPR:$Vd,
2316          (OpNode (TyQ QPR:$src1),
2317                  (TyQ (MulOp (TyD DPR:$Vn),
2318                              (TyD (NEONvduplane (TyD DPR_8:$Vm),
2319                                                 imm:$lane))))))]>;
2320
2321// Long Intrinsic-Op vector operations with explicit extend (VABAL).
2322class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2323                   InstrItinClass itin, string OpcodeStr, string Dt,
2324                   ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
2325                   SDNode OpNode>
2326  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2327        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2328        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2329        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2330                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2331                                                        (TyD DPR:$Vm)))))))]>;
2332
2333// Neon Long 3-argument intrinsic.  The destination register is
2334// a quad-register and is also used as the first source operand register.
2335class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2336               InstrItinClass itin, string OpcodeStr, string Dt,
2337               ValueType TyQ, ValueType TyD, Intrinsic IntOp>
2338  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2339        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2340        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2341        [(set QPR:$Vd,
2342          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
2343class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2344                 string OpcodeStr, string Dt,
2345                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2346  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2347        (outs QPR:$Vd),
2348        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2349        NVMulSLFrm, itin,
2350        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2351        [(set (ResTy QPR:$Vd),
2352              (ResTy (IntOp (ResTy QPR:$src1),
2353                            (OpTy DPR:$Vn),
2354                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2355                                                imm:$lane)))))]>;
2356class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2357                   InstrItinClass itin, string OpcodeStr, string Dt,
2358                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2359  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2360        (outs QPR:$Vd),
2361        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2362        NVMulSLFrm, itin,
2363        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2364        [(set (ResTy QPR:$Vd),
2365              (ResTy (IntOp (ResTy QPR:$src1),
2366                            (OpTy DPR:$Vn),
2367                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2368                                                imm:$lane)))))]>;
2369
2370// Narrowing 3-register intrinsics.
2371class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2372              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
2373              Intrinsic IntOp, bit Commutable>
2374  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2375        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
2376        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2377        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
2378  let isCommutable = Commutable;
2379}
2380
2381// Long 3-register operations.
2382class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2383           InstrItinClass itin, string OpcodeStr, string Dt,
2384           ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
2385  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2386        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2387        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2388        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2389  let isCommutable = Commutable;
2390}
2391class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2392             InstrItinClass itin, string OpcodeStr, string Dt,
2393             ValueType TyQ, ValueType TyD, SDNode OpNode>
2394  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2395        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2396        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2397        [(set QPR:$Vd,
2398          (TyQ (OpNode (TyD DPR:$Vn),
2399                       (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
2400class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2401               InstrItinClass itin, string OpcodeStr, string Dt,
2402               ValueType TyQ, ValueType TyD, SDNode OpNode>
2403  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2404        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2405        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2406        [(set QPR:$Vd,
2407          (TyQ (OpNode (TyD DPR:$Vn),
2408                       (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
2409
2410// Long 3-register operations with explicitly extended operands.
2411class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2412              InstrItinClass itin, string OpcodeStr, string Dt,
2413              ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
2414              bit Commutable>
2415  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2416        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2417        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2418        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
2419                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
2420  let isCommutable = Commutable;
2421}
2422
2423// Long 3-register intrinsics with explicit extend (VABDL).
2424class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2425                 InstrItinClass itin, string OpcodeStr, string Dt,
2426                 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
2427                 bit Commutable>
2428  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2429        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2430        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2431        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2432                                                (TyD DPR:$Vm))))))]> {
2433  let isCommutable = Commutable;
2434}
2435
2436// Long 3-register intrinsics.
2437class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2438              InstrItinClass itin, string OpcodeStr, string Dt,
2439              ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
2440  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2441        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2442        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2443        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2444  let isCommutable = Commutable;
2445}
2446class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2447                string OpcodeStr, string Dt,
2448                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2449  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2450        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2451        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2452        [(set (ResTy QPR:$Vd),
2453              (ResTy (IntOp (OpTy DPR:$Vn),
2454                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2455                                                imm:$lane)))))]>;
2456class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2457                  InstrItinClass itin, string OpcodeStr, string Dt,
2458                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2459  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2460        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2461        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2462        [(set (ResTy QPR:$Vd),
2463              (ResTy (IntOp (OpTy DPR:$Vn),
2464                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2465                                                imm:$lane)))))]>;
2466
2467// Wide 3-register operations.
2468class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2469           string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
2470           SDNode OpNode, SDNode ExtOp, bit Commutable>
2471  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2472        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
2473        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2474        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
2475                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
2476  let isCommutable = Commutable;
2477}
2478
2479// Pairwise long 2-register intrinsics, both double- and quad-register.
2480class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2481                bits<2> op17_16, bits<5> op11_7, bit op4,
2482                string OpcodeStr, string Dt,
2483                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2484  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2485        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
2486        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2487class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2488                bits<2> op17_16, bits<5> op11_7, bit op4,
2489                string OpcodeStr, string Dt,
2490                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2491  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2492        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
2493        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2494
2495// Pairwise long 2-register accumulate intrinsics,
2496// both double- and quad-register.
2497// The destination register is also used as the first source operand register.
2498class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2499                 bits<2> op17_16, bits<5> op11_7, bit op4,
2500                 string OpcodeStr, string Dt,
2501                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2502  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
2503        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
2504        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
2505        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
2506class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2507                 bits<2> op17_16, bits<5> op11_7, bit op4,
2508                 string OpcodeStr, string Dt,
2509                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2510  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
2511        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
2512        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
2513        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
2514
2515// Shift by immediate,
2516// both double- and quad-register.
2517class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2518             Format f, InstrItinClass itin, Operand ImmTy,
2519             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
2520  : N2VImm<op24, op23, op11_8, op7, 0, op4,
2521           (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
2522           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2523           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
2524class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2525             Format f, InstrItinClass itin, Operand ImmTy,
2526             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
2527  : N2VImm<op24, op23, op11_8, op7, 1, op4,
2528           (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
2529           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2530           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
2531
2532// Long shift by immediate.
2533class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
2534             string OpcodeStr, string Dt,
2535             ValueType ResTy, ValueType OpTy, SDNode OpNode>
2536  : N2VImm<op24, op23, op11_8, op7, op6, op4,
2537           (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
2538           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2539           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
2540                                          (i32 imm:$SIMM))))]>;
2541
2542// Narrow shift by immediate.
2543class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
2544             InstrItinClass itin, string OpcodeStr, string Dt,
2545             ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
2546  : N2VImm<op24, op23, op11_8, op7, op6, op4,
2547           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
2548           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2549           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
2550                                          (i32 imm:$SIMM))))]>;
2551
2552// Shift right by immediate and accumulate,
2553// both double- and quad-register.
2554class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2555                Operand ImmTy, string OpcodeStr, string Dt,
2556                ValueType Ty, SDNode ShOp>
2557  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
2558           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
2559           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2560           [(set DPR:$Vd, (Ty (add DPR:$src1,
2561                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
2562class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2563                Operand ImmTy, string OpcodeStr, string Dt,
2564                ValueType Ty, SDNode ShOp>
2565  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
2566           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
2567           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2568           [(set QPR:$Vd, (Ty (add QPR:$src1,
2569                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
2570
2571// Shift by immediate and insert,
2572// both double- and quad-register.
2573class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2574                Operand ImmTy, Format f, string OpcodeStr, string Dt,
2575                ValueType Ty,SDNode ShOp>
2576  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
2577           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
2578           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2579           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
2580class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2581                Operand ImmTy, Format f, string OpcodeStr, string Dt,
2582                ValueType Ty,SDNode ShOp>
2583  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
2584           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
2585           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2586           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
2587
2588// Convert, with fractional bits immediate,
2589// both double- and quad-register.
2590class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2591              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2592              Intrinsic IntOp>
2593  : N2VImm<op24, op23, op11_8, op7, 0, op4,
2594           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
2595           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2596           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
2597class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2598              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2599              Intrinsic IntOp>
2600  : N2VImm<op24, op23, op11_8, op7, 1, op4,
2601           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
2602           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2603           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
2604
2605//===----------------------------------------------------------------------===//
2606// Multiclasses
2607//===----------------------------------------------------------------------===//
2608
2609// Abbreviations used in multiclass suffixes:
2610//   Q = quarter int (8 bit) elements
2611//   H = half int (16 bit) elements
2612//   S = single int (32 bit) elements
2613//   D = double int (64 bit) elements
2614
2615// Neon 2-register vector operations and intrinsics.
2616
2617// Neon 2-register comparisons.
2618//   source operand element sizes of 8, 16 and 32 bits:
2619multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
2620                       bits<5> op11_7, bit op4, string opc, string Dt,
2621                       string asm, SDNode OpNode> {
2622  // 64-bit vector types.
2623  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
2624                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
2625                  opc, !strconcat(Dt, "8"), asm, "",
2626                  [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
2627  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
2628                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
2629                  opc, !strconcat(Dt, "16"), asm, "",
2630                  [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
2631  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
2632                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
2633                  opc, !strconcat(Dt, "32"), asm, "",
2634                  [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
2635  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
2636                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
2637                  opc, "f32", asm, "",
2638                  [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
2639    let Inst{10} = 1; // overwrite F = 1
2640  }
2641
2642  // 128-bit vector types.
2643  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
2644                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
2645                  opc, !strconcat(Dt, "8"), asm, "",
2646                  [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
2647  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
2648                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
2649                  opc, !strconcat(Dt, "16"), asm, "",
2650                  [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
2651  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
2652                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
2653                  opc, !strconcat(Dt, "32"), asm, "",
2654                  [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
2655  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
2656                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
2657                  opc, "f32", asm, "",
2658                  [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
2659    let Inst{10} = 1; // overwrite F = 1
2660  }
2661}
2662
2663
2664// Neon 2-register vector intrinsics,
2665//   element sizes of 8, 16 and 32 bits:
2666multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
2667                      bits<5> op11_7, bit op4,
2668                      InstrItinClass itinD, InstrItinClass itinQ,
2669                      string OpcodeStr, string Dt, Intrinsic IntOp> {
2670  // 64-bit vector types.
2671  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
2672                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
2673  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
2674                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
2675  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
2676                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
2677
2678  // 128-bit vector types.
2679  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
2680                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
2681  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
2682                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
2683  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
2684                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
2685}
2686
2687
2688// Neon Narrowing 2-register vector operations,
2689//   source operand element sizes of 16, 32 and 64 bits:
2690multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
2691                    bits<5> op11_7, bit op6, bit op4,
2692                    InstrItinClass itin, string OpcodeStr, string Dt,
2693                    SDNode OpNode> {
2694  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
2695                   itin, OpcodeStr, !strconcat(Dt, "16"),
2696                   v8i8, v8i16, OpNode>;
2697  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
2698                   itin, OpcodeStr, !strconcat(Dt, "32"),
2699                   v4i16, v4i32, OpNode>;
2700  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
2701                   itin, OpcodeStr, !strconcat(Dt, "64"),
2702                   v2i32, v2i64, OpNode>;
2703}
2704
2705// Neon Narrowing 2-register vector intrinsics,
2706//   source operand element sizes of 16, 32 and 64 bits:
2707multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
2708                       bits<5> op11_7, bit op6, bit op4,
2709                       InstrItinClass itin, string OpcodeStr, string Dt,
2710                       Intrinsic IntOp> {
2711  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
2712                      itin, OpcodeStr, !strconcat(Dt, "16"),
2713                      v8i8, v8i16, IntOp>;
2714  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
2715                      itin, OpcodeStr, !strconcat(Dt, "32"),
2716                      v4i16, v4i32, IntOp>;
2717  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
2718                      itin, OpcodeStr, !strconcat(Dt, "64"),
2719                      v2i32, v2i64, IntOp>;
2720}
2721
2722
2723// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
2724//   source operand element sizes of 16, 32 and 64 bits:
2725multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
2726                    string OpcodeStr, string Dt, SDNode OpNode> {
2727  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
2728                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
2729  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
2730                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
2731  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
2732                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
2733}
2734
2735
2736// Neon 3-register vector operations.
2737
2738// First with only element sizes of 8, 16 and 32 bits:
2739multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2740                   InstrItinClass itinD16, InstrItinClass itinD32,
2741                   InstrItinClass itinQ16, InstrItinClass itinQ32,
2742                   string OpcodeStr, string Dt,
2743                   SDNode OpNode, bit Commutable = 0> {
2744  // 64-bit vector types.
2745  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
2746                   OpcodeStr, !strconcat(Dt, "8"),
2747                   v8i8, v8i8, OpNode, Commutable>;
2748  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
2749                   OpcodeStr, !strconcat(Dt, "16"),
2750                   v4i16, v4i16, OpNode, Commutable>;
2751  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
2752                   OpcodeStr, !strconcat(Dt, "32"),
2753                   v2i32, v2i32, OpNode, Commutable>;
2754
2755  // 128-bit vector types.
2756  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
2757                   OpcodeStr, !strconcat(Dt, "8"),
2758                   v16i8, v16i8, OpNode, Commutable>;
2759  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
2760                   OpcodeStr, !strconcat(Dt, "16"),
2761                   v8i16, v8i16, OpNode, Commutable>;
2762  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
2763                   OpcodeStr, !strconcat(Dt, "32"),
2764                   v4i32, v4i32, OpNode, Commutable>;
2765}
2766
2767multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
2768  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
2769                       v4i16, ShOp>;
2770  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
2771                     v2i32, ShOp>;
2772  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
2773                       v8i16, v4i16, ShOp>;
2774  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
2775                     v4i32, v2i32, ShOp>;
2776}
2777
2778// ....then also with element size 64 bits:
2779multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
2780                    InstrItinClass itinD, InstrItinClass itinQ,
2781                    string OpcodeStr, string Dt,
2782                    SDNode OpNode, bit Commutable = 0>
2783  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
2784            OpcodeStr, Dt, OpNode, Commutable> {
2785  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
2786                   OpcodeStr, !strconcat(Dt, "64"),
2787                   v1i64, v1i64, OpNode, Commutable>;
2788  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
2789                   OpcodeStr, !strconcat(Dt, "64"),
2790                   v2i64, v2i64, OpNode, Commutable>;
2791}
2792
2793
2794// Neon 3-register vector intrinsics.
2795
2796// First with only element sizes of 16 and 32 bits:
2797multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2798                     InstrItinClass itinD16, InstrItinClass itinD32,
2799                     InstrItinClass itinQ16, InstrItinClass itinQ32,
2800                     string OpcodeStr, string Dt,
2801                     Intrinsic IntOp, bit Commutable = 0> {
2802  // 64-bit vector types.
2803  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
2804                      OpcodeStr, !strconcat(Dt, "16"),
2805                      v4i16, v4i16, IntOp, Commutable>;
2806  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
2807                      OpcodeStr, !strconcat(Dt, "32"),
2808                      v2i32, v2i32, IntOp, Commutable>;
2809
2810  // 128-bit vector types.
2811  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
2812                      OpcodeStr, !strconcat(Dt, "16"),
2813                      v8i16, v8i16, IntOp, Commutable>;
2814  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
2815                      OpcodeStr, !strconcat(Dt, "32"),
2816                      v4i32, v4i32, IntOp, Commutable>;
2817}
2818multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2819                     InstrItinClass itinD16, InstrItinClass itinD32,
2820                     InstrItinClass itinQ16, InstrItinClass itinQ32,
2821                     string OpcodeStr, string Dt,
2822                     Intrinsic IntOp> {
2823  // 64-bit vector types.
2824  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
2825                      OpcodeStr, !strconcat(Dt, "16"),
2826                      v4i16, v4i16, IntOp>;
2827  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
2828                      OpcodeStr, !strconcat(Dt, "32"),
2829                      v2i32, v2i32, IntOp>;
2830
2831  // 128-bit vector types.
2832  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
2833                      OpcodeStr, !strconcat(Dt, "16"),
2834                      v8i16, v8i16, IntOp>;
2835  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
2836                      OpcodeStr, !strconcat(Dt, "32"),
2837                      v4i32, v4i32, IntOp>;
2838}
2839
2840multiclass N3VIntSL_HS<bits<4> op11_8,
2841                       InstrItinClass itinD16, InstrItinClass itinD32,
2842                       InstrItinClass itinQ16, InstrItinClass itinQ32,
2843                       string OpcodeStr, string Dt, Intrinsic IntOp> {
2844  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
2845                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
2846  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
2847                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
2848  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
2849                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
2850  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
2851                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
2852}
2853
2854// ....then also with element size of 8 bits:
2855multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2856                      InstrItinClass itinD16, InstrItinClass itinD32,
2857                      InstrItinClass itinQ16, InstrItinClass itinQ32,
2858                      string OpcodeStr, string Dt,
2859                      Intrinsic IntOp, bit Commutable = 0>
2860  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
2861              OpcodeStr, Dt, IntOp, Commutable> {
2862  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
2863                      OpcodeStr, !strconcat(Dt, "8"),
2864                      v8i8, v8i8, IntOp, Commutable>;
2865  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
2866                      OpcodeStr, !strconcat(Dt, "8"),
2867                      v16i8, v16i8, IntOp, Commutable>;
2868}
2869multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2870                      InstrItinClass itinD16, InstrItinClass itinD32,
2871                      InstrItinClass itinQ16, InstrItinClass itinQ32,
2872                      string OpcodeStr, string Dt,
2873                      Intrinsic IntOp>
2874  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
2875              OpcodeStr, Dt, IntOp> {
2876  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
2877                      OpcodeStr, !strconcat(Dt, "8"),
2878                      v8i8, v8i8, IntOp>;
2879  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
2880                      OpcodeStr, !strconcat(Dt, "8"),
2881                      v16i8, v16i8, IntOp>;
2882}
2883
2884
2885// ....then also with element size of 64 bits:
2886multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2887                       InstrItinClass itinD16, InstrItinClass itinD32,
2888                       InstrItinClass itinQ16, InstrItinClass itinQ32,
2889                       string OpcodeStr, string Dt,
2890                       Intrinsic IntOp, bit Commutable = 0>
2891  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
2892               OpcodeStr, Dt, IntOp, Commutable> {
2893  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
2894                      OpcodeStr, !strconcat(Dt, "64"),
2895                      v1i64, v1i64, IntOp, Commutable>;
2896  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
2897                      OpcodeStr, !strconcat(Dt, "64"),
2898                      v2i64, v2i64, IntOp, Commutable>;
2899}
2900multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
2901                       InstrItinClass itinD16, InstrItinClass itinD32,
2902                       InstrItinClass itinQ16, InstrItinClass itinQ32,
2903                       string OpcodeStr, string Dt,
2904                       Intrinsic IntOp>
2905  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
2906               OpcodeStr, Dt, IntOp> {
2907  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
2908                      OpcodeStr, !strconcat(Dt, "64"),
2909                      v1i64, v1i64, IntOp>;
2910  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
2911                      OpcodeStr, !strconcat(Dt, "64"),
2912                      v2i64, v2i64, IntOp>;
2913}
2914
2915// Neon Narrowing 3-register vector intrinsics,
2916//   source operand element sizes of 16, 32 and 64 bits:
2917multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
2918                       string OpcodeStr, string Dt,
2919                       Intrinsic IntOp, bit Commutable = 0> {
2920  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
2921                      OpcodeStr, !strconcat(Dt, "16"),
2922                      v8i8, v8i16, IntOp, Commutable>;
2923  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
2924                      OpcodeStr, !strconcat(Dt, "32"),
2925                      v4i16, v4i32, IntOp, Commutable>;
2926  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
2927                      OpcodeStr, !strconcat(Dt, "64"),
2928                      v2i32, v2i64, IntOp, Commutable>;
2929}
2930
2931
2932// Neon Long 3-register vector operations.
2933
2934multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2935                    InstrItinClass itin16, InstrItinClass itin32,
2936                    string OpcodeStr, string Dt,
2937                    SDNode OpNode, bit Commutable = 0> {
2938  def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
2939                   OpcodeStr, !strconcat(Dt, "8"),
2940                   v8i16, v8i8, OpNode, Commutable>;
2941  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
2942                   OpcodeStr, !strconcat(Dt, "16"),
2943                   v4i32, v4i16, OpNode, Commutable>;
2944  def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
2945                   OpcodeStr, !strconcat(Dt, "32"),
2946                   v2i64, v2i32, OpNode, Commutable>;
2947}
2948
2949multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
2950                     InstrItinClass itin, string OpcodeStr, string Dt,
2951                     SDNode OpNode> {
2952  def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
2953                       !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
2954  def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
2955                     !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
2956}
2957
2958multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2959                       InstrItinClass itin16, InstrItinClass itin32,
2960                       string OpcodeStr, string Dt,
2961                       SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
2962  def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
2963                      OpcodeStr, !strconcat(Dt, "8"),
2964                      v8i16, v8i8, OpNode, ExtOp, Commutable>;
2965  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
2966                      OpcodeStr, !strconcat(Dt, "16"),
2967                      v4i32, v4i16, OpNode, ExtOp, Commutable>;
2968  def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
2969                      OpcodeStr, !strconcat(Dt, "32"),
2970                      v2i64, v2i32, OpNode, ExtOp, Commutable>;
2971}
2972
2973// Neon Long 3-register vector intrinsics.
2974
2975// First with only element sizes of 16 and 32 bits:
2976multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
2977                      InstrItinClass itin16, InstrItinClass itin32,
2978                      string OpcodeStr, string Dt,
2979                      Intrinsic IntOp, bit Commutable = 0> {
2980  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
2981                      OpcodeStr, !strconcat(Dt, "16"),
2982                      v4i32, v4i16, IntOp, Commutable>;
2983  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
2984                      OpcodeStr, !strconcat(Dt, "32"),
2985                      v2i64, v2i32, IntOp, Commutable>;
2986}
2987
2988multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
2989                        InstrItinClass itin, string OpcodeStr, string Dt,
2990                        Intrinsic IntOp> {
2991  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
2992                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
2993  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
2994                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
2995}
2996
2997// ....then also with element size of 8 bits:
2998multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
2999                       InstrItinClass itin16, InstrItinClass itin32,
3000                       string OpcodeStr, string Dt,
3001                       Intrinsic IntOp, bit Commutable = 0>
3002  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3003               IntOp, Commutable> {
3004  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3005                      OpcodeStr, !strconcat(Dt, "8"),
3006                      v8i16, v8i8, IntOp, Commutable>;
3007}
3008
3009// ....with explicit extend (VABDL).
3010multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3011                       InstrItinClass itin, string OpcodeStr, string Dt,
3012                       Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
3013  def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3014                         OpcodeStr, !strconcat(Dt, "8"),
3015                         v8i16, v8i8, IntOp, ExtOp, Commutable>;
3016  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3017                         OpcodeStr, !strconcat(Dt, "16"),
3018                         v4i32, v4i16, IntOp, ExtOp, Commutable>;
3019  def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3020                         OpcodeStr, !strconcat(Dt, "32"),
3021                         v2i64, v2i32, IntOp, ExtOp, Commutable>;
3022}
3023
3024
3025// Neon Wide 3-register vector intrinsics,
3026//   source operand element sizes of 8, 16 and 32 bits:
3027multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3028                    string OpcodeStr, string Dt,
3029                    SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3030  def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3031                   OpcodeStr, !strconcat(Dt, "8"),
3032                   v8i16, v8i8, OpNode, ExtOp, Commutable>;
3033  def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3034                   OpcodeStr, !strconcat(Dt, "16"),
3035                   v4i32, v4i16, OpNode, ExtOp, Commutable>;
3036  def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3037                   OpcodeStr, !strconcat(Dt, "32"),
3038                   v2i64, v2i32, OpNode, ExtOp, Commutable>;
3039}
3040
3041
3042// Neon Multiply-Op vector operations,
3043//   element sizes of 8, 16 and 32 bits:
3044multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3045                        InstrItinClass itinD16, InstrItinClass itinD32,
3046                        InstrItinClass itinQ16, InstrItinClass itinQ32,
3047                        string OpcodeStr, string Dt, SDNode OpNode> {
3048  // 64-bit vector types.
3049  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3050                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3051  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3052                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3053  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3054                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3055
3056  // 128-bit vector types.
3057  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3058                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3059  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3060                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3061  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3062                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3063}
3064
3065multiclass N3VMulOpSL_HS<bits<4> op11_8,
3066                         InstrItinClass itinD16, InstrItinClass itinD32,
3067                         InstrItinClass itinQ16, InstrItinClass itinQ32,
3068                         string OpcodeStr, string Dt, SDNode ShOp> {
3069  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3070                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3071  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3072                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3073  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3074                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3075                            mul, ShOp>;
3076  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3077                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3078                          mul, ShOp>;
3079}
3080
3081// Neon Intrinsic-Op vector operations,
3082//   element sizes of 8, 16 and 32 bits:
3083multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3084                        InstrItinClass itinD, InstrItinClass itinQ,
3085                        string OpcodeStr, string Dt, Intrinsic IntOp,
3086                        SDNode OpNode> {
3087  // 64-bit vector types.
3088  def v8i8  : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3089                        OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3090  def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3091                        OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3092  def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3093                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3094
3095  // 128-bit vector types.
3096  def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3097                        OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3098  def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3099                        OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3100  def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3101                        OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3102}
3103
3104// Neon 3-argument intrinsics,
3105//   element sizes of 8, 16 and 32 bits:
3106multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3107                       InstrItinClass itinD, InstrItinClass itinQ,
3108                       string OpcodeStr, string Dt, Intrinsic IntOp> {
3109  // 64-bit vector types.
3110  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
3111                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3112  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
3113                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3114  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
3115                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3116
3117  // 128-bit vector types.
3118  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
3119                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3120  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
3121                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3122  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
3123                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3124}
3125
3126
3127// Neon Long Multiply-Op vector operations,
3128//   element sizes of 8, 16 and 32 bits:
3129multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3130                         InstrItinClass itin16, InstrItinClass itin32,
3131                         string OpcodeStr, string Dt, SDNode MulOp,
3132                         SDNode OpNode> {
3133  def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3134                        !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3135  def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3136                        !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3137  def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3138                        !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3139}
3140
3141multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3142                          string Dt, SDNode MulOp, SDNode OpNode> {
3143  def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3144                            !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3145  def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3146                          !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3147}
3148
3149
3150// Neon Long 3-argument intrinsics.
3151
3152// First with only element sizes of 16 and 32 bits:
3153multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3154                       InstrItinClass itin16, InstrItinClass itin32,
3155                       string OpcodeStr, string Dt, Intrinsic IntOp> {
3156  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3157                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3158  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3159                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3160}
3161
3162multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3163                         string OpcodeStr, string Dt, Intrinsic IntOp> {
3164  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3165                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3166  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3167                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3168}
3169
3170// ....then also with element size of 8 bits:
3171multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3172                        InstrItinClass itin16, InstrItinClass itin32,
3173                        string OpcodeStr, string Dt, Intrinsic IntOp>
3174  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3175  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3176                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3177}
3178
3179// ....with explicit extend (VABAL).
3180multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3181                            InstrItinClass itin, string OpcodeStr, string Dt,
3182                            Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
3183  def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3184                           OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3185                           IntOp, ExtOp, OpNode>;
3186  def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3187                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3188                           IntOp, ExtOp, OpNode>;
3189  def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3190                           OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3191                           IntOp, ExtOp, OpNode>;
3192}
3193
3194
3195// Neon Pairwise long 2-register intrinsics,
3196//   element sizes of 8, 16 and 32 bits:
3197multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3198                        bits<5> op11_7, bit op4,
3199                        string OpcodeStr, string Dt, Intrinsic IntOp> {
3200  // 64-bit vector types.
3201  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3202                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3203  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3204                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3205  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3206                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3207
3208  // 128-bit vector types.
3209  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3210                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3211  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3212                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3213  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3214                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3215}
3216
3217
3218// Neon Pairwise long 2-register accumulate intrinsics,
3219//   element sizes of 8, 16 and 32 bits:
3220multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3221                         bits<5> op11_7, bit op4,
3222                         string OpcodeStr, string Dt, Intrinsic IntOp> {
3223  // 64-bit vector types.
3224  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3225                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3226  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3227                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3228  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3229                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3230
3231  // 128-bit vector types.
3232  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3233                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3234  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3235                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3236  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3237                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3238}
3239
3240
3241// Neon 2-register vector shift by immediate,
3242//   with f of either N2RegVShLFrm or N2RegVShRFrm
3243//   element sizes of 8, 16, 32 and 64 bits:
3244multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3245                       InstrItinClass itin, string OpcodeStr, string Dt,
3246                       SDNode OpNode> {
3247  // 64-bit vector types.
3248  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3249                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3250    let Inst{21-19} = 0b001; // imm6 = 001xxx
3251  }
3252  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3253                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3254    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3255  }
3256  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3257                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3258    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3259  }
3260  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3261                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3262                             // imm6 = xxxxxx
3263
3264  // 128-bit vector types.
3265  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3266                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3267    let Inst{21-19} = 0b001; // imm6 = 001xxx
3268  }
3269  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3270                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3271    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3272  }
3273  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3274                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3275    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3276  }
3277  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3278                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3279                             // imm6 = xxxxxx
3280}
3281multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3282                       InstrItinClass itin, string OpcodeStr, string Dt,
3283                       SDNode OpNode> {
3284  // 64-bit vector types.
3285  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3286                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3287    let Inst{21-19} = 0b001; // imm6 = 001xxx
3288  }
3289  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3290                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3291    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3292  }
3293  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3294                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3295    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3296  }
3297  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3298                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3299                             // imm6 = xxxxxx
3300
3301  // 128-bit vector types.
3302  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3303                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3304    let Inst{21-19} = 0b001; // imm6 = 001xxx
3305  }
3306  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3307                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3308    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3309  }
3310  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3311                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3312    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3313  }
3314  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3315                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3316                             // imm6 = xxxxxx
3317}
3318
3319// Neon Shift-Accumulate vector operations,
3320//   element sizes of 8, 16, 32 and 64 bits:
3321multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3322                         string OpcodeStr, string Dt, SDNode ShOp> {
3323  // 64-bit vector types.
3324  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3325                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
3326    let Inst{21-19} = 0b001; // imm6 = 001xxx
3327  }
3328  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3329                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
3330    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3331  }
3332  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3333                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
3334    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3335  }
3336  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3337                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
3338                             // imm6 = xxxxxx
3339
3340  // 128-bit vector types.
3341  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3342                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
3343    let Inst{21-19} = 0b001; // imm6 = 001xxx
3344  }
3345  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3346                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
3347    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3348  }
3349  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3350                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
3351    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3352  }
3353  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3354                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
3355                             // imm6 = xxxxxx
3356}
3357
3358// Neon Shift-Insert vector operations,
3359//   with f of either N2RegVShLFrm or N2RegVShRFrm
3360//   element sizes of 8, 16, 32 and 64 bits:
3361multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3362                          string OpcodeStr> {
3363  // 64-bit vector types.
3364  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3365                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
3366    let Inst{21-19} = 0b001; // imm6 = 001xxx
3367  }
3368  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3369                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
3370    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3371  }
3372  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3373                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
3374    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3375  }
3376  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
3377                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
3378                             // imm6 = xxxxxx
3379
3380  // 128-bit vector types.
3381  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3382                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
3383    let Inst{21-19} = 0b001; // imm6 = 001xxx
3384  }
3385  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3386                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
3387    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3388  }
3389  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3390                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
3391    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3392  }
3393  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
3394                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
3395                             // imm6 = xxxxxx
3396}
3397multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3398                          string OpcodeStr> {
3399  // 64-bit vector types.
3400  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
3401                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
3402    let Inst{21-19} = 0b001; // imm6 = 001xxx
3403  }
3404  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
3405                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
3406    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3407  }
3408  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
3409                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
3410    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3411  }
3412  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
3413                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
3414                             // imm6 = xxxxxx
3415
3416  // 128-bit vector types.
3417  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
3418                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
3419    let Inst{21-19} = 0b001; // imm6 = 001xxx
3420  }
3421  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
3422                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
3423    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3424  }
3425  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
3426                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
3427    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3428  }
3429  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
3430                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
3431                             // imm6 = xxxxxx
3432}
3433
3434// Neon Shift Long operations,
3435//   element sizes of 8, 16, 32 bits:
3436multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
3437                      bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
3438  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3439                 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
3440    let Inst{21-19} = 0b001; // imm6 = 001xxx
3441  }
3442  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3443                  OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
3444    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3445  }
3446  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3447                  OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
3448    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3449  }
3450}
3451
3452// Neon Shift Narrow operations,
3453//   element sizes of 16, 32, 64 bits:
3454multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
3455                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
3456                      SDNode OpNode> {
3457  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3458                    OpcodeStr, !strconcat(Dt, "16"),
3459                    v8i8, v8i16, shr_imm8, OpNode> {
3460    let Inst{21-19} = 0b001; // imm6 = 001xxx
3461  }
3462  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3463                     OpcodeStr, !strconcat(Dt, "32"),
3464                     v4i16, v4i32, shr_imm16, OpNode> {
3465    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3466  }
3467  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3468                     OpcodeStr, !strconcat(Dt, "64"),
3469                     v2i32, v2i64, shr_imm32, OpNode> {
3470    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3471  }
3472}
3473
3474//===----------------------------------------------------------------------===//
3475// Instruction Definitions.
3476//===----------------------------------------------------------------------===//
3477
3478// Vector Add Operations.
3479
3480//   VADD     : Vector Add (integer and floating-point)
3481defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
3482                         add, 1>;
3483def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
3484                     v2f32, v2f32, fadd, 1>;
3485def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
3486                     v4f32, v4f32, fadd, 1>;
3487//   VADDL    : Vector Add Long (Q = D + D)
3488defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
3489                            "vaddl", "s", add, sext, 1>;
3490defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
3491                            "vaddl", "u", add, zext, 1>;
3492//   VADDW    : Vector Add Wide (Q = Q + D)
3493defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
3494defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
3495//   VHADD    : Vector Halving Add
3496defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
3497                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3498                           "vhadd", "s", int_arm_neon_vhadds, 1>;
3499defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
3500                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3501                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
3502//   VRHADD   : Vector Rounding Halving Add
3503defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
3504                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3505                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
3506defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
3507                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3508                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
3509//   VQADD    : Vector Saturating Add
3510defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
3511                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3512                            "vqadd", "s", int_arm_neon_vqadds, 1>;
3513defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
3514                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3515                            "vqadd", "u", int_arm_neon_vqaddu, 1>;
3516//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
3517defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
3518                            int_arm_neon_vaddhn, 1>;
3519//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
3520defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
3521                            int_arm_neon_vraddhn, 1>;
3522
3523// Vector Multiply Operations.
3524
3525//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
3526defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
3527                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
3528def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
3529                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
3530def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
3531                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
3532def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
3533                     v2f32, v2f32, fmul, 1>;
3534def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
3535                     v4f32, v4f32, fmul, 1>;
3536defm VMULsl   : N3VSL_HS<0b1000, "vmul", "i", mul>;
3537def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
3538def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
3539                       v2f32, fmul>;
3540
3541def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
3542                      (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
3543          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
3544                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
3545                                      (DSubReg_i16_reg imm:$lane))),
3546                              (SubReg_i16_lane imm:$lane)))>;
3547def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
3548                      (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
3549          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
3550                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
3551                                      (DSubReg_i32_reg imm:$lane))),
3552                              (SubReg_i32_lane imm:$lane)))>;
3553def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
3554                       (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
3555          (v4f32 (VMULslfq (v4f32 QPR:$src1),
3556                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
3557                                   (DSubReg_i32_reg imm:$lane))),
3558                           (SubReg_i32_lane imm:$lane)))>;
3559
3560//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
3561defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
3562                          IIC_VMULi16Q, IIC_VMULi32Q,
3563                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
3564defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
3565                            IIC_VMULi16Q, IIC_VMULi32Q,
3566                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
3567def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
3568                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2),
3569                                                            imm:$lane)))),
3570          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
3571                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
3572                                         (DSubReg_i16_reg imm:$lane))),
3573                                 (SubReg_i16_lane imm:$lane)))>;
3574def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
3575                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2),
3576                                                            imm:$lane)))),
3577          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
3578                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
3579                                         (DSubReg_i32_reg imm:$lane))),
3580                                 (SubReg_i32_lane imm:$lane)))>;
3581
3582//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
3583defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
3584                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
3585                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
3586defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
3587                              IIC_VMULi16Q, IIC_VMULi32Q,
3588                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
3589def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
3590                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2),
3591                                                             imm:$lane)))),
3592          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
3593                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
3594                                          (DSubReg_i16_reg imm:$lane))),
3595                                  (SubReg_i16_lane imm:$lane)))>;
3596def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
3597                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2),
3598                                                             imm:$lane)))),
3599          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
3600                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
3601                                          (DSubReg_i32_reg imm:$lane))),
3602                                  (SubReg_i32_lane imm:$lane)))>;
3603
3604//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
3605defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
3606                         "vmull", "s", NEONvmulls, 1>;
3607defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
3608                         "vmull", "u", NEONvmullu, 1>;
3609def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
3610                        v8i16, v8i8, int_arm_neon_vmullp, 1>;
3611defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
3612defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
3613
3614//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
3615defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
3616                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
3617defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
3618                             "vqdmull", "s", int_arm_neon_vqdmull>;
3619
3620// Vector Multiply-Accumulate and Multiply-Subtract Operations.
3621
3622//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
3623defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
3624                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
3625def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
3626                          v2f32, fmul_su, fadd_mlx>,
3627                Requires<[HasNEON, UseFPVMLx]>;
3628def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
3629                          v4f32, fmul_su, fadd_mlx>,
3630                Requires<[HasNEON, UseFPVMLx]>;
3631defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
3632                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
3633def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
3634                            v2f32, fmul_su, fadd_mlx>,
3635                Requires<[HasNEON, UseFPVMLx]>;
3636def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
3637                            v4f32, v2f32, fmul_su, fadd_mlx>,
3638                Requires<[HasNEON, UseFPVMLx]>;
3639
3640def : Pat<(v8i16 (add (v8i16 QPR:$src1),
3641                  (mul (v8i16 QPR:$src2),
3642                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
3643          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
3644                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
3645                                      (DSubReg_i16_reg imm:$lane))),
3646                              (SubReg_i16_lane imm:$lane)))>;
3647
3648def : Pat<(v4i32 (add (v4i32 QPR:$src1),
3649                  (mul (v4i32 QPR:$src2),
3650                       (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
3651          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
3652                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
3653                                      (DSubReg_i32_reg imm:$lane))),
3654                              (SubReg_i32_lane imm:$lane)))>;
3655
3656def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
3657                  (fmul_su (v4f32 QPR:$src2),
3658                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
3659          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
3660                           (v4f32 QPR:$src2),
3661                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
3662                                   (DSubReg_i32_reg imm:$lane))),
3663                           (SubReg_i32_lane imm:$lane)))>,
3664          Requires<[HasNEON, UseFPVMLx]>;
3665
3666//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
3667defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
3668                              "vmlal", "s", NEONvmulls, add>;
3669defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
3670                              "vmlal", "u", NEONvmullu, add>;
3671
3672defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
3673defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
3674
3675//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
3676defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
3677                            "vqdmlal", "s", int_arm_neon_vqdmlal>;
3678defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
3679
3680//   VMLS     : Vector Multiply Subtract (integer and floating-point)
3681defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
3682                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
3683def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
3684                          v2f32, fmul_su, fsub_mlx>,
3685                Requires<[HasNEON, UseFPVMLx]>;
3686def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
3687                          v4f32, fmul_su, fsub_mlx>,
3688                Requires<[HasNEON, UseFPVMLx]>;
3689defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
3690                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
3691def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
3692                            v2f32, fmul_su, fsub_mlx>,
3693                Requires<[HasNEON, UseFPVMLx]>;
3694def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
3695                            v4f32, v2f32, fmul_su, fsub_mlx>,
3696                Requires<[HasNEON, UseFPVMLx]>;
3697
3698def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
3699                  (mul (v8i16 QPR:$src2),
3700                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
3701          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
3702                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
3703                                      (DSubReg_i16_reg imm:$lane))),
3704                              (SubReg_i16_lane imm:$lane)))>;
3705
3706def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
3707                  (mul (v4i32 QPR:$src2),
3708                     (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
3709          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
3710                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
3711                                      (DSubReg_i32_reg imm:$lane))),
3712                              (SubReg_i32_lane imm:$lane)))>;
3713
3714def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
3715                  (fmul_su (v4f32 QPR:$src2),
3716                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
3717          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
3718                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
3719                                   (DSubReg_i32_reg imm:$lane))),
3720                           (SubReg_i32_lane imm:$lane)))>,
3721          Requires<[HasNEON, UseFPVMLx]>;
3722
3723//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
3724defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
3725                              "vmlsl", "s", NEONvmulls, sub>;
3726defm VMLSLu   : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
3727                              "vmlsl", "u", NEONvmullu, sub>;
3728
3729defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
3730defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
3731
3732//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
3733defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
3734                            "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
3735defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
3736
3737// Vector Subtract Operations.
3738
3739//   VSUB     : Vector Subtract (integer and floating-point)
3740defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
3741                         "vsub", "i", sub, 0>;
3742def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
3743                     v2f32, v2f32, fsub, 0>;
3744def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
3745                     v4f32, v4f32, fsub, 0>;
3746//   VSUBL    : Vector Subtract Long (Q = D - D)
3747defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
3748                            "vsubl", "s", sub, sext, 0>;
3749defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
3750                            "vsubl", "u", sub, zext, 0>;
3751//   VSUBW    : Vector Subtract Wide (Q = Q - D)
3752defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
3753defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
3754//   VHSUB    : Vector Halving Subtract
3755defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
3756                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3757                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
3758defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
3759                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3760                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
3761//   VQSUB    : Vector Saturing Subtract
3762defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
3763                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3764                            "vqsub", "s", int_arm_neon_vqsubs, 0>;
3765defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
3766                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
3767                            "vqsub", "u", int_arm_neon_vqsubu, 0>;
3768//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
3769defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
3770                            int_arm_neon_vsubhn, 0>;
3771//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
3772defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
3773                            int_arm_neon_vrsubhn, 0>;
3774
3775// Vector Comparisons.
3776
3777//   VCEQ     : Vector Compare Equal
3778defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3779                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
3780def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
3781                     NEONvceq, 1>;
3782def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
3783                     NEONvceq, 1>;
3784
3785defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
3786                            "$Vd, $Vm, #0", NEONvceqz>;
3787
3788//   VCGE     : Vector Compare Greater Than or Equal
3789defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3790                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
3791defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3792                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
3793def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
3794                     NEONvcge, 0>;
3795def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
3796                     NEONvcge, 0>;
3797
3798defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
3799                            "$Vd, $Vm, #0", NEONvcgez>;
3800defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
3801                            "$Vd, $Vm, #0", NEONvclez>;
3802
3803//   VCGT     : Vector Compare Greater Than
3804defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3805                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
3806defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
3807                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
3808def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
3809                     NEONvcgt, 0>;
3810def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
3811                     NEONvcgt, 0>;
3812
3813defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
3814                            "$Vd, $Vm, #0", NEONvcgtz>;
3815defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
3816                            "$Vd, $Vm, #0", NEONvcltz>;
3817
3818//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
3819def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
3820                        "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
3821def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
3822                        "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
3823//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
3824def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
3825                        "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
3826def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
3827                        "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
3828//   VTST     : Vector Test Bits
3829defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
3830                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
3831
3832// Vector Bitwise Operations.
3833
3834def vnotd : PatFrag<(ops node:$in),
3835                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
3836def vnotq : PatFrag<(ops node:$in),
3837                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
3838
3839
3840//   VAND     : Vector Bitwise AND
3841def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
3842                      v2i32, v2i32, and, 1>;
3843def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
3844                      v4i32, v4i32, and, 1>;
3845
3846//   VEOR     : Vector Bitwise Exclusive OR
3847def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
3848                      v2i32, v2i32, xor, 1>;
3849def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
3850                      v4i32, v4i32, xor, 1>;
3851
3852//   VORR     : Vector Bitwise OR
3853def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
3854                      v2i32, v2i32, or, 1>;
3855def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
3856                      v4i32, v4i32, or, 1>;
3857
3858def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
3859                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
3860                          IIC_VMOVImm,
3861                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
3862                          [(set DPR:$Vd,
3863                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
3864  let Inst{9} = SIMM{9};
3865}
3866
3867def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
3868                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
3869                          IIC_VMOVImm,
3870                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
3871                          [(set DPR:$Vd,
3872                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
3873  let Inst{10-9} = SIMM{10-9};
3874}
3875
3876def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
3877                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
3878                          IIC_VMOVImm,
3879                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
3880                          [(set QPR:$Vd,
3881                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
3882  let Inst{9} = SIMM{9};
3883}
3884
3885def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
3886                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
3887                          IIC_VMOVImm,
3888                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
3889                          [(set QPR:$Vd,
3890                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
3891  let Inst{10-9} = SIMM{10-9};
3892}
3893
3894
3895//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
3896def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
3897                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
3898                     "vbic", "$Vd, $Vn, $Vm", "",
3899                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
3900                                                 (vnotd DPR:$Vm))))]>;
3901def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
3902                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
3903                     "vbic", "$Vd, $Vn, $Vm", "",
3904                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
3905                                                 (vnotq QPR:$Vm))))]>;
3906
3907def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
3908                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
3909                          IIC_VMOVImm,
3910                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
3911                          [(set DPR:$Vd,
3912                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
3913  let Inst{9} = SIMM{9};
3914}
3915
3916def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
3917                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
3918                          IIC_VMOVImm,
3919                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
3920                          [(set DPR:$Vd,
3921                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
3922  let Inst{10-9} = SIMM{10-9};
3923}
3924
3925def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
3926                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
3927                          IIC_VMOVImm,
3928                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
3929                          [(set QPR:$Vd,
3930                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
3931  let Inst{9} = SIMM{9};
3932}
3933
3934def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
3935                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
3936                          IIC_VMOVImm,
3937                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
3938                          [(set QPR:$Vd,
3939                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
3940  let Inst{10-9} = SIMM{10-9};
3941}
3942
3943//   VORN     : Vector Bitwise OR NOT
3944def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
3945                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
3946                     "vorn", "$Vd, $Vn, $Vm", "",
3947                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
3948                                                (vnotd DPR:$Vm))))]>;
3949def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
3950                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
3951                     "vorn", "$Vd, $Vn, $Vm", "",
3952                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
3953                                                (vnotq QPR:$Vm))))]>;
3954
3955//   VMVN     : Vector Bitwise NOT (Immediate)
3956
3957let isReMaterializable = 1 in {
3958
3959def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
3960                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
3961                         "vmvn", "i16", "$Vd, $SIMM", "",
3962                         [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
3963  let Inst{9} = SIMM{9};
3964}
3965
3966def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
3967                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
3968                         "vmvn", "i16", "$Vd, $SIMM", "",
3969                         [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
3970  let Inst{9} = SIMM{9};
3971}
3972
3973def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
3974                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
3975                         "vmvn", "i32", "$Vd, $SIMM", "",
3976                         [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
3977  let Inst{11-8} = SIMM{11-8};
3978}
3979
3980def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
3981                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
3982                         "vmvn", "i32", "$Vd, $SIMM", "",
3983                         [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
3984  let Inst{11-8} = SIMM{11-8};
3985}
3986}
3987
3988//   VMVN     : Vector Bitwise NOT
3989def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
3990                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
3991                     "vmvn", "$Vd, $Vm", "",
3992                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
3993def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
3994                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
3995                     "vmvn", "$Vd, $Vm", "",
3996                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
3997def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
3998def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
3999
4000//   VBSL     : Vector Bitwise Select
4001def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
4002                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4003                     N3RegFrm, IIC_VCNTiD,
4004                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4005                     [(set DPR:$Vd,
4006                           (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
4007
4008def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
4009                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
4010          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
4011
4012def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
4013                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4014                     N3RegFrm, IIC_VCNTiQ,
4015                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4016                     [(set QPR:$Vd,
4017                           (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
4018
4019def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
4020                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
4021          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
4022
4023//   VBIF     : Vector Bitwise Insert if False
4024//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
4025// FIXME: This instruction's encoding MAY NOT BE correct.
4026def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
4027                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4028                     N3RegFrm, IIC_VBINiD,
4029                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4030                     []>;
4031def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
4032                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4033                     N3RegFrm, IIC_VBINiQ,
4034                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4035                     []>;
4036
4037//   VBIT     : Vector Bitwise Insert if True
4038//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
4039// FIXME: This instruction's encoding MAY NOT BE correct.
4040def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
4041                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4042                     N3RegFrm, IIC_VBINiD,
4043                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4044                     []>;
4045def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
4046                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4047                     N3RegFrm, IIC_VBINiQ,
4048                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4049                     []>;
4050
4051// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
4052// for equivalent operations with different register constraints; it just
4053// inserts copies.
4054
4055// Vector Absolute Differences.
4056
4057//   VABD     : Vector Absolute Difference
4058defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
4059                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4060                           "vabd", "s", int_arm_neon_vabds, 1>;
4061defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
4062                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4063                           "vabd", "u", int_arm_neon_vabdu, 1>;
4064def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
4065                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
4066def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
4067                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
4068
4069//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
4070defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
4071                               "vabdl", "s", int_arm_neon_vabds, zext, 1>;
4072defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
4073                               "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
4074
4075//   VABA     : Vector Absolute Difference and Accumulate
4076defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
4077                             "vaba", "s", int_arm_neon_vabds, add>;
4078defm VABAu    : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
4079                             "vaba", "u", int_arm_neon_vabdu, add>;
4080
4081//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
4082defm VABALs   : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
4083                                 "vabal", "s", int_arm_neon_vabds, zext, add>;
4084defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
4085                                 "vabal", "u", int_arm_neon_vabdu, zext, add>;
4086
4087// Vector Maximum and Minimum.
4088
4089//   VMAX     : Vector Maximum
4090defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
4091                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4092                           "vmax", "s", int_arm_neon_vmaxs, 1>;
4093defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
4094                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4095                           "vmax", "u", int_arm_neon_vmaxu, 1>;
4096def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
4097                        "vmax", "f32",
4098                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
4099def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
4100                        "vmax", "f32",
4101                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
4102
4103//   VMIN     : Vector Minimum
4104defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
4105                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4106                           "vmin", "s", int_arm_neon_vmins, 1>;
4107defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
4108                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4109                           "vmin", "u", int_arm_neon_vminu, 1>;
4110def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
4111                        "vmin", "f32",
4112                        v2f32, v2f32, int_arm_neon_vmins, 1>;
4113def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
4114                        "vmin", "f32",
4115                        v4f32, v4f32, int_arm_neon_vmins, 1>;
4116
4117// Vector Pairwise Operations.
4118
4119//   VPADD    : Vector Pairwise Add
4120def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4121                        "vpadd", "i8",
4122                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
4123def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4124                        "vpadd", "i16",
4125                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
4126def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4127                        "vpadd", "i32",
4128                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
4129def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
4130                        IIC_VPBIND, "vpadd", "f32",
4131                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
4132
4133//   VPADDL   : Vector Pairwise Add Long
4134defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
4135                             int_arm_neon_vpaddls>;
4136defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
4137                             int_arm_neon_vpaddlu>;
4138
4139//   VPADAL   : Vector Pairwise Add and Accumulate Long
4140defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
4141                              int_arm_neon_vpadals>;
4142defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
4143                              int_arm_neon_vpadalu>;
4144
4145//   VPMAX    : Vector Pairwise Maximum
4146def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4147                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
4148def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4149                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
4150def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4151                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
4152def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4153                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
4154def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4155                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
4156def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4157                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
4158def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
4159                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
4160
4161//   VPMIN    : Vector Pairwise Minimum
4162def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4163                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
4164def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4165                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
4166def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4167                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
4168def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4169                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
4170def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4171                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
4172def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4173                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
4174def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
4175                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
4176
4177// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
4178
4179//   VRECPE   : Vector Reciprocal Estimate
4180def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
4181                        IIC_VUNAD, "vrecpe", "u32",
4182                        v2i32, v2i32, int_arm_neon_vrecpe>;
4183def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
4184                        IIC_VUNAQ, "vrecpe", "u32",
4185                        v4i32, v4i32, int_arm_neon_vrecpe>;
4186def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
4187                        IIC_VUNAD, "vrecpe", "f32",
4188                        v2f32, v2f32, int_arm_neon_vrecpe>;
4189def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
4190                        IIC_VUNAQ, "vrecpe", "f32",
4191                        v4f32, v4f32, int_arm_neon_vrecpe>;
4192
4193//   VRECPS   : Vector Reciprocal Step
4194def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
4195                        IIC_VRECSD, "vrecps", "f32",
4196                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
4197def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
4198                        IIC_VRECSQ, "vrecps", "f32",
4199                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
4200
4201//   VRSQRTE  : Vector Reciprocal Square Root Estimate
4202def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
4203                         IIC_VUNAD, "vrsqrte", "u32",
4204                         v2i32, v2i32, int_arm_neon_vrsqrte>;
4205def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
4206                         IIC_VUNAQ, "vrsqrte", "u32",
4207                         v4i32, v4i32, int_arm_neon_vrsqrte>;
4208def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
4209                         IIC_VUNAD, "vrsqrte", "f32",
4210                         v2f32, v2f32, int_arm_neon_vrsqrte>;
4211def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
4212                         IIC_VUNAQ, "vrsqrte", "f32",
4213                         v4f32, v4f32, int_arm_neon_vrsqrte>;
4214
4215//   VRSQRTS  : Vector Reciprocal Square Root Step
4216def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
4217                        IIC_VRECSD, "vrsqrts", "f32",
4218                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
4219def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
4220                        IIC_VRECSQ, "vrsqrts", "f32",
4221                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
4222
4223// Vector Shifts.
4224
4225//   VSHL     : Vector Shift
4226defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
4227                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
4228                            "vshl", "s", int_arm_neon_vshifts>;
4229defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
4230                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
4231                            "vshl", "u", int_arm_neon_vshiftu>;
4232
4233//   VSHL     : Vector Shift Left (Immediate)
4234defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
4235
4236//   VSHR     : Vector Shift Right (Immediate)
4237defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
4238defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
4239
4240//   VSHLL    : Vector Shift Left Long
4241defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
4242defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
4243
4244//   VSHLL    : Vector Shift Left Long (with maximum shift count)
4245class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
4246                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
4247                ValueType OpTy, SDNode OpNode>
4248  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
4249           ResTy, OpTy, OpNode> {
4250  let Inst{21-16} = op21_16;
4251  let DecoderMethod = "DecodeVSHLMaxInstruction";
4252}
4253def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
4254                          v8i16, v8i8, NEONvshlli>;
4255def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
4256                          v4i32, v4i16, NEONvshlli>;
4257def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
4258                          v2i64, v2i32, NEONvshlli>;
4259
4260//   VSHRN    : Vector Shift Right and Narrow
4261defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
4262                           NEONvshrn>;
4263
4264//   VRSHL    : Vector Rounding Shift
4265defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
4266                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4267                            "vrshl", "s", int_arm_neon_vrshifts>;
4268defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
4269                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4270                            "vrshl", "u", int_arm_neon_vrshiftu>;
4271//   VRSHR    : Vector Rounding Shift Right
4272defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
4273defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
4274
4275//   VRSHRN   : Vector Rounding Shift Right and Narrow
4276defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
4277                           NEONvrshrn>;
4278
4279//   VQSHL    : Vector Saturating Shift
4280defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
4281                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4282                            "vqshl", "s", int_arm_neon_vqshifts>;
4283defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
4284                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4285                            "vqshl", "u", int_arm_neon_vqshiftu>;
4286//   VQSHL    : Vector Saturating Shift Left (Immediate)
4287defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
4288defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
4289
4290//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
4291defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
4292
4293//   VQSHRN   : Vector Saturating Shift Right and Narrow
4294defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
4295                           NEONvqshrns>;
4296defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
4297                           NEONvqshrnu>;
4298
4299//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
4300defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
4301                           NEONvqshrnsu>;
4302
4303//   VQRSHL   : Vector Saturating Rounding Shift
4304defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
4305                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4306                            "vqrshl", "s", int_arm_neon_vqrshifts>;
4307defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
4308                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4309                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
4310
4311//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
4312defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
4313                           NEONvqrshrns>;
4314defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
4315                           NEONvqrshrnu>;
4316
4317//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
4318defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
4319                           NEONvqrshrnsu>;
4320
4321//   VSRA     : Vector Shift Right and Accumulate
4322defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
4323defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
4324//   VRSRA    : Vector Rounding Shift Right and Accumulate
4325defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
4326defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
4327
4328//   VSLI     : Vector Shift Left and Insert
4329defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
4330
4331//   VSRI     : Vector Shift Right and Insert
4332defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
4333
4334// Vector Absolute and Saturating Absolute.
4335
4336//   VABS     : Vector Absolute Value
4337defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
4338                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
4339                           int_arm_neon_vabs>;
4340def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
4341                        IIC_VUNAD, "vabs", "f32",
4342                        v2f32, v2f32, int_arm_neon_vabs>;
4343def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
4344                        IIC_VUNAQ, "vabs", "f32",
4345                        v4f32, v4f32, int_arm_neon_vabs>;
4346
4347//   VQABS    : Vector Saturating Absolute Value
4348defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
4349                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
4350                           int_arm_neon_vqabs>;
4351
4352// Vector Negate.
4353
4354def vnegd  : PatFrag<(ops node:$in),
4355                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
4356def vnegq  : PatFrag<(ops node:$in),
4357                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
4358
4359class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
4360  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
4361        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
4362        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
4363class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
4364  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
4365        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
4366        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
4367
4368//   VNEG     : Vector Negate (integer)
4369def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
4370def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
4371def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
4372def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
4373def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
4374def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
4375
4376//   VNEG     : Vector Negate (floating-point)
4377def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
4378                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
4379                    "vneg", "f32", "$Vd, $Vm", "",
4380                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
4381def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
4382                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
4383                    "vneg", "f32", "$Vd, $Vm", "",
4384                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
4385
4386def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
4387def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
4388def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
4389def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
4390def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
4391def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
4392
4393//   VQNEG    : Vector Saturating Negate
4394defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
4395                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
4396                           int_arm_neon_vqneg>;
4397
4398// Vector Bit Counting Operations.
4399
4400//   VCLS     : Vector Count Leading Sign Bits
4401defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
4402                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
4403                           int_arm_neon_vcls>;
4404//   VCLZ     : Vector Count Leading Zeros
4405defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
4406                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
4407                           int_arm_neon_vclz>;
4408//   VCNT     : Vector Count One Bits
4409def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
4410                        IIC_VCNTiD, "vcnt", "8",
4411                        v8i8, v8i8, int_arm_neon_vcnt>;
4412def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
4413                        IIC_VCNTiQ, "vcnt", "8",
4414                        v16i8, v16i8, int_arm_neon_vcnt>;
4415
4416// Vector Swap
4417def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
4418                     (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
4419                     "vswp", "$Vd, $Vm", "", []>;
4420def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
4421                     (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
4422                     "vswp", "$Vd, $Vm", "", []>;
4423
4424// Vector Move Operations.
4425
4426//   VMOV     : Vector Move (Register)
4427def : InstAlias<"vmov${p} $Vd, $Vm",
4428                (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
4429def : InstAlias<"vmov${p} $Vd, $Vm",
4430                (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
4431
4432//   VMOV     : Vector Move (Immediate)
4433
4434let isReMaterializable = 1 in {
4435def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
4436                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
4437                         "vmov", "i8", "$Vd, $SIMM", "",
4438                         [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
4439def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
4440                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
4441                         "vmov", "i8", "$Vd, $SIMM", "",
4442                         [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
4443
4444def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
4445                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4446                         "vmov", "i16", "$Vd, $SIMM", "",
4447                         [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
4448  let Inst{9} = SIMM{9};
4449}
4450
4451def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
4452                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4453                         "vmov", "i16", "$Vd, $SIMM", "",
4454                         [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
4455 let Inst{9} = SIMM{9};
4456}
4457
4458def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
4459                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4460                         "vmov", "i32", "$Vd, $SIMM", "",
4461                         [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
4462  let Inst{11-8} = SIMM{11-8};
4463}
4464
4465def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
4466                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4467                         "vmov", "i32", "$Vd, $SIMM", "",
4468                         [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
4469  let Inst{11-8} = SIMM{11-8};
4470}
4471
4472def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
4473                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
4474                         "vmov", "i64", "$Vd, $SIMM", "",
4475                         [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
4476def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
4477                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
4478                         "vmov", "i64", "$Vd, $SIMM", "",
4479                         [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
4480} // isReMaterializable
4481
4482//   VMOV     : Vector Get Lane (move scalar to ARM core register)
4483
4484def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
4485                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
4486                          IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
4487                          [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
4488                                           imm:$lane))]> {
4489  let Inst{21}  = lane{2};
4490  let Inst{6-5} = lane{1-0};
4491}
4492def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
4493                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
4494                          IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
4495                          [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
4496                                           imm:$lane))]> {
4497  let Inst{21} = lane{1};
4498  let Inst{6}  = lane{0};
4499}
4500def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
4501                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
4502                          IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
4503                          [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
4504                                           imm:$lane))]> {
4505  let Inst{21}  = lane{2};
4506  let Inst{6-5} = lane{1-0};
4507}
4508def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
4509                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
4510                          IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
4511                          [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
4512                                           imm:$lane))]> {
4513  let Inst{21} = lane{1};
4514  let Inst{6}  = lane{0};
4515}
4516def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
4517                          (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
4518                          IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
4519                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
4520                                           imm:$lane))]> {
4521  let Inst{21} = lane{0};
4522}
4523// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
4524def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
4525          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
4526                           (DSubReg_i8_reg imm:$lane))),
4527                     (SubReg_i8_lane imm:$lane))>;
4528def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
4529          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
4530                             (DSubReg_i16_reg imm:$lane))),
4531                     (SubReg_i16_lane imm:$lane))>;
4532def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
4533          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
4534                           (DSubReg_i8_reg imm:$lane))),
4535                     (SubReg_i8_lane imm:$lane))>;
4536def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
4537          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
4538                             (DSubReg_i16_reg imm:$lane))),
4539                     (SubReg_i16_lane imm:$lane))>;
4540def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
4541          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
4542                             (DSubReg_i32_reg imm:$lane))),
4543                     (SubReg_i32_lane imm:$lane))>;
4544def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
4545          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
4546                          (SSubReg_f32_reg imm:$src2))>;
4547def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
4548          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
4549                          (SSubReg_f32_reg imm:$src2))>;
4550//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
4551//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
4552def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
4553          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
4554
4555
4556//   VMOV     : Vector Set Lane (move ARM core register to scalar)
4557
4558let Constraints = "$src1 = $V" in {
4559def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
4560                          (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
4561                          IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
4562                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
4563                                           GPR:$R, imm:$lane))]> {
4564  let Inst{21}  = lane{2};
4565  let Inst{6-5} = lane{1-0};
4566}
4567def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
4568                          (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
4569                          IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
4570                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
4571                                           GPR:$R, imm:$lane))]> {
4572  let Inst{21} = lane{1};
4573  let Inst{6}  = lane{0};
4574}
4575def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
4576                          (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
4577                          IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
4578                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
4579                                           GPR:$R, imm:$lane))]> {
4580  let Inst{21} = lane{0};
4581}
4582}
4583def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
4584          (v16i8 (INSERT_SUBREG QPR:$src1,
4585                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
4586                                   (DSubReg_i8_reg imm:$lane))),
4587                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
4588                  (DSubReg_i8_reg imm:$lane)))>;
4589def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
4590          (v8i16 (INSERT_SUBREG QPR:$src1,
4591                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
4592                                     (DSubReg_i16_reg imm:$lane))),
4593                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
4594                  (DSubReg_i16_reg imm:$lane)))>;
4595def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
4596          (v4i32 (INSERT_SUBREG QPR:$src1,
4597                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
4598                                     (DSubReg_i32_reg imm:$lane))),
4599                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
4600                  (DSubReg_i32_reg imm:$lane)))>;
4601
4602def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
4603          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
4604                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
4605def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
4606          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
4607                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
4608
4609//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
4610//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
4611def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
4612          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
4613
4614def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
4615          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
4616def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
4617          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
4618def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
4619          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
4620
4621def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
4622          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
4623def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
4624          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
4625def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
4626          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
4627
4628def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
4629          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
4630                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
4631                         dsub_0)>;
4632def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
4633          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
4634                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
4635                         dsub_0)>;
4636def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
4637          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
4638                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
4639                         dsub_0)>;
4640
4641//   VDUP     : Vector Duplicate (from ARM core register to all elements)
4642
4643class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
4644  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
4645          IIC_VMOVIS, "vdup", Dt, "$V, $R",
4646          [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
4647class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
4648  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
4649          IIC_VMOVIS, "vdup", Dt, "$V, $R",
4650          [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
4651
4652def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
4653def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
4654def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>;
4655def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
4656def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
4657def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
4658
4659def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
4660def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
4661
4662//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
4663
4664class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
4665              ValueType Ty, Operand IdxTy>
4666  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
4667              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
4668              [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
4669
4670class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
4671              ValueType ResTy, ValueType OpTy, Operand IdxTy>
4672  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
4673              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
4674              [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
4675                                      VectorIndex32:$lane)))]>;
4676
4677// Inst{19-16} is partially specified depending on the element size.
4678
4679def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
4680  bits<3> lane;
4681  let Inst{19-17} = lane{2-0};
4682}
4683def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
4684  bits<2> lane;
4685  let Inst{19-18} = lane{1-0};
4686}
4687def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
4688  bits<1> lane;
4689  let Inst{19} = lane{0};
4690}
4691def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
4692  bits<3> lane;
4693  let Inst{19-17} = lane{2-0};
4694}
4695def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
4696  bits<2> lane;
4697  let Inst{19-18} = lane{1-0};
4698}
4699def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
4700  bits<1> lane;
4701  let Inst{19} = lane{0};
4702}
4703
4704def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
4705          (VDUPLN32d DPR:$Vm, imm:$lane)>;
4706
4707def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
4708          (VDUPLN32q DPR:$Vm, imm:$lane)>;
4709
4710def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
4711          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
4712                                  (DSubReg_i8_reg imm:$lane))),
4713                           (SubReg_i8_lane imm:$lane)))>;
4714def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
4715          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
4716                                    (DSubReg_i16_reg imm:$lane))),
4717                            (SubReg_i16_lane imm:$lane)))>;
4718def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
4719          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
4720                                    (DSubReg_i32_reg imm:$lane))),
4721                            (SubReg_i32_lane imm:$lane)))>;
4722def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
4723          (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
4724                                   (DSubReg_i32_reg imm:$lane))),
4725                           (SubReg_i32_lane imm:$lane)))>;
4726
4727def  VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
4728                    [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
4729def  VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
4730                    [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
4731
4732//   VMOVN    : Vector Narrowing Move
4733defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
4734                         "vmovn", "i", trunc>;
4735//   VQMOVN   : Vector Saturating Narrowing Move
4736defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
4737                            "vqmovn", "s", int_arm_neon_vqmovns>;
4738defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
4739                            "vqmovn", "u", int_arm_neon_vqmovnu>;
4740defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
4741                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
4742//   VMOVL    : Vector Lengthening Move
4743defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
4744defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
4745
4746// Vector Conversions.
4747
4748//   VCVT     : Vector Convert Between Floating-Point and Integers
4749def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
4750                     v2i32, v2f32, fp_to_sint>;
4751def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
4752                     v2i32, v2f32, fp_to_uint>;
4753def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
4754                     v2f32, v2i32, sint_to_fp>;
4755def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
4756                     v2f32, v2i32, uint_to_fp>;
4757
4758def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
4759                     v4i32, v4f32, fp_to_sint>;
4760def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
4761                     v4i32, v4f32, fp_to_uint>;
4762def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
4763                     v4f32, v4i32, sint_to_fp>;
4764def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
4765                     v4f32, v4i32, uint_to_fp>;
4766
4767//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
4768def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
4769                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
4770def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
4771                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
4772def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
4773                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
4774def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
4775                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
4776
4777def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
4778                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
4779def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
4780                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
4781def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
4782                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
4783def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
4784                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
4785
4786//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
4787def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
4788                        IIC_VUNAQ, "vcvt", "f16.f32",
4789                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
4790                Requires<[HasNEON, HasFP16]>;
4791def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
4792                        IIC_VUNAQ, "vcvt", "f32.f16",
4793                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
4794                Requires<[HasNEON, HasFP16]>;
4795
4796// Vector Reverse.
4797
4798//   VREV64   : Vector Reverse elements within 64-bit doublewords
4799
4800class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4801  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
4802        (ins DPR:$Vm), IIC_VMOVD,
4803        OpcodeStr, Dt, "$Vd, $Vm", "",
4804        [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
4805class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4806  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
4807        (ins QPR:$Vm), IIC_VMOVQ,
4808        OpcodeStr, Dt, "$Vd, $Vm", "",
4809        [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
4810
4811def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
4812def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
4813def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
4814def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
4815
4816def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
4817def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
4818def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
4819def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
4820
4821//   VREV32   : Vector Reverse elements within 32-bit words
4822
4823class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4824  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
4825        (ins DPR:$Vm), IIC_VMOVD,
4826        OpcodeStr, Dt, "$Vd, $Vm", "",
4827        [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
4828class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4829  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
4830        (ins QPR:$Vm), IIC_VMOVQ,
4831        OpcodeStr, Dt, "$Vd, $Vm", "",
4832        [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
4833
4834def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
4835def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
4836
4837def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
4838def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
4839
4840//   VREV16   : Vector Reverse elements within 16-bit halfwords
4841
4842class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4843  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
4844        (ins DPR:$Vm), IIC_VMOVD,
4845        OpcodeStr, Dt, "$Vd, $Vm", "",
4846        [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
4847class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
4848  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
4849        (ins QPR:$Vm), IIC_VMOVQ,
4850        OpcodeStr, Dt, "$Vd, $Vm", "",
4851        [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
4852
4853def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
4854def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
4855
4856// Other Vector Shuffles.
4857
4858//  Aligned extractions: really just dropping registers
4859
4860class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
4861      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
4862             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
4863
4864def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
4865
4866def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
4867
4868def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
4869
4870def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
4871
4872def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
4873
4874
4875//   VEXT     : Vector Extract
4876
4877class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
4878  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
4879        (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
4880        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
4881        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
4882                                      (Ty DPR:$Vm), imm:$index)))]> {
4883  bits<4> index;
4884  let Inst{11-8} = index{3-0};
4885}
4886
4887class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
4888  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
4889        (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
4890        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
4891        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
4892                                      (Ty QPR:$Vm), imm:$index)))]> {
4893  bits<4> index;
4894  let Inst{11-8} = index{3-0};
4895}
4896
4897def VEXTd8  : VEXTd<"vext", "8",  v8i8> {
4898  let Inst{11-8} = index{3-0};
4899}
4900def VEXTd16 : VEXTd<"vext", "16", v4i16> {
4901  let Inst{11-9} = index{2-0};
4902  let Inst{8}    = 0b0;
4903}
4904def VEXTd32 : VEXTd<"vext", "32", v2i32> {
4905  let Inst{11-10} = index{1-0};
4906  let Inst{9-8}    = 0b00;
4907}
4908def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
4909                           (v2f32 DPR:$Vm),
4910                           (i32 imm:$index))),
4911          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
4912
4913def VEXTq8  : VEXTq<"vext", "8",  v16i8> {
4914  let Inst{11-8} = index{3-0};
4915}
4916def VEXTq16 : VEXTq<"vext", "16", v8i16> {
4917  let Inst{11-9} = index{2-0};
4918  let Inst{8}    = 0b0;
4919}
4920def VEXTq32 : VEXTq<"vext", "32", v4i32> {
4921  let Inst{11-10} = index{1-0};
4922  let Inst{9-8}    = 0b00;
4923}
4924def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
4925                           (v4f32 QPR:$Vm),
4926                           (i32 imm:$index))),
4927          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
4928
4929//   VTRN     : Vector Transpose
4930
4931def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
4932def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
4933def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
4934
4935def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
4936def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
4937def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
4938
4939//   VUZP     : Vector Unzip (Deinterleave)
4940
4941def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
4942def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
4943def  VUZPd32  : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
4944
4945def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
4946def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
4947def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
4948
4949//   VZIP     : Vector Zip (Interleave)
4950
4951def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
4952def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
4953def  VZIPd32  : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
4954
4955def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
4956def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
4957def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
4958
4959// Vector Table Lookup and Table Extension.
4960
4961//   VTBL     : Vector Table Lookup
4962let DecoderMethod = "DecodeTBLInstruction" in {
4963def  VTBL1
4964  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
4965        (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
4966        "vtbl", "8", "$Vd, $Vn, $Vm", "",
4967        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
4968let hasExtraSrcRegAllocReq = 1 in {
4969def  VTBL2
4970  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
4971        (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
4972        "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
4973def  VTBL3
4974  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
4975        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
4976        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
4977def  VTBL4
4978  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
4979        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
4980        NVTBLFrm, IIC_VTB4,
4981        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
4982} // hasExtraSrcRegAllocReq = 1
4983
4984def  VTBL2Pseudo
4985  : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
4986def  VTBL3Pseudo
4987  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
4988def  VTBL4Pseudo
4989  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
4990
4991//   VTBX     : Vector Table Extension
4992def  VTBX1
4993  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
4994        (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
4995        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
4996        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
4997                               DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
4998let hasExtraSrcRegAllocReq = 1 in {
4999def  VTBX2
5000  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
5001        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
5002        "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
5003def  VTBX3
5004  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
5005        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
5006        NVTBLFrm, IIC_VTBX3,
5007        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
5008        "$orig = $Vd", []>;
5009def  VTBX4
5010  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
5011        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
5012        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
5013        "$orig = $Vd", []>;
5014} // hasExtraSrcRegAllocReq = 1
5015
5016def  VTBX2Pseudo
5017  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
5018                IIC_VTBX2, "$orig = $dst", []>;
5019def  VTBX3Pseudo
5020  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
5021                IIC_VTBX3, "$orig = $dst", []>;
5022def  VTBX4Pseudo
5023  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
5024                IIC_VTBX4, "$orig = $dst", []>;
5025} // DecoderMethod = "DecodeTBLInstruction"
5026
5027//===----------------------------------------------------------------------===//
5028// NEON instructions for single-precision FP math
5029//===----------------------------------------------------------------------===//
5030
5031class N2VSPat<SDNode OpNode, NeonI Inst>
5032  : NEONFPPat<(f32 (OpNode SPR:$a)),
5033              (EXTRACT_SUBREG
5034               (v2f32 (COPY_TO_REGCLASS (Inst
5035                (INSERT_SUBREG
5036                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5037                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
5038
5039class N3VSPat<SDNode OpNode, NeonI Inst>
5040  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
5041              (EXTRACT_SUBREG
5042               (v2f32 (COPY_TO_REGCLASS (Inst
5043                (INSERT_SUBREG
5044                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5045                 SPR:$a, ssub_0),
5046                (INSERT_SUBREG
5047                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5048                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
5049
5050class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
5051  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
5052              (EXTRACT_SUBREG
5053               (v2f32 (COPY_TO_REGCLASS (Inst
5054                (INSERT_SUBREG
5055                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5056                 SPR:$acc, ssub_0),
5057                (INSERT_SUBREG
5058                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5059                 SPR:$a, ssub_0),
5060                (INSERT_SUBREG
5061                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5062                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
5063
5064def : N3VSPat<fadd, VADDfd>;
5065def : N3VSPat<fsub, VSUBfd>;
5066def : N3VSPat<fmul, VMULfd>;
5067def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
5068      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
5069def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
5070      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
5071def : N2VSPat<fabs, VABSfd>;
5072def : N2VSPat<fneg, VNEGfd>;
5073def : N3VSPat<NEONfmax, VMAXfd>;
5074def : N3VSPat<NEONfmin, VMINfd>;
5075def : N2VSPat<arm_ftosi, VCVTf2sd>;
5076def : N2VSPat<arm_ftoui, VCVTf2ud>;
5077def : N2VSPat<arm_sitof, VCVTs2fd>;
5078def : N2VSPat<arm_uitof, VCVTu2fd>;
5079
5080//===----------------------------------------------------------------------===//
5081// Non-Instruction Patterns
5082//===----------------------------------------------------------------------===//
5083
5084// bit_convert
5085def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
5086def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
5087def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
5088def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
5089def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
5090def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
5091def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
5092def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
5093def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
5094def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
5095def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
5096def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
5097def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
5098def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
5099def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
5100def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
5101def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
5102def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
5103def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
5104def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
5105def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
5106def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
5107def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
5108def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
5109def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
5110def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
5111def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
5112def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
5113def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
5114def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
5115
5116def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
5117def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
5118def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
5119def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
5120def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
5121def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
5122def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
5123def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
5124def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
5125def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
5126def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
5127def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
5128def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
5129def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
5130def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
5131def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
5132def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
5133def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
5134def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
5135def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
5136def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
5137def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
5138def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
5139def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
5140def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
5141def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
5142def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
5143def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
5144def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
5145def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
5146