1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the ARM NEON instruction set.
11//
12//===----------------------------------------------------------------------===//
13
14
15//===----------------------------------------------------------------------===//
16// NEON-specific Operands.
17//===----------------------------------------------------------------------===//
18def nModImm : Operand<i32> {
19  let PrintMethod = "printNEONModImmOperand";
20}
21
22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
23def nImmSplatI8 : Operand<i32> {
24  let PrintMethod = "printNEONModImmOperand";
25  let ParserMatchClass = nImmSplatI8AsmOperand;
26}
27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
28def nImmSplatI16 : Operand<i32> {
29  let PrintMethod = "printNEONModImmOperand";
30  let ParserMatchClass = nImmSplatI16AsmOperand;
31}
32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
33def nImmSplatI32 : Operand<i32> {
34  let PrintMethod = "printNEONModImmOperand";
35  let ParserMatchClass = nImmSplatI32AsmOperand;
36}
37def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
38def nImmSplatNotI16 : Operand<i32> {
39  let ParserMatchClass = nImmSplatNotI16AsmOperand;
40}
41def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
42def nImmSplatNotI32 : Operand<i32> {
43  let ParserMatchClass = nImmSplatNotI32AsmOperand;
44}
45def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
46def nImmVMOVI32 : Operand<i32> {
47  let PrintMethod = "printNEONModImmOperand";
48  let ParserMatchClass = nImmVMOVI32AsmOperand;
49}
50
51def nImmVMOVI16AsmOperandByteReplicate :
52  AsmOperandClass {
53  let Name = "NEONi16vmovByteReplicate";
54  let PredicateMethod = "isNEONi16ByteReplicate";
55  let RenderMethod = "addNEONvmovByteReplicateOperands";
56}
57def nImmVMOVI32AsmOperandByteReplicate :
58  AsmOperandClass {
59  let Name = "NEONi32vmovByteReplicate";
60  let PredicateMethod = "isNEONi32ByteReplicate";
61  let RenderMethod = "addNEONvmovByteReplicateOperands";
62}
63def nImmVMVNI16AsmOperandByteReplicate :
64  AsmOperandClass {
65  let Name = "NEONi16invByteReplicate";
66  let PredicateMethod = "isNEONi16ByteReplicate";
67  let RenderMethod = "addNEONinvByteReplicateOperands";
68}
69def nImmVMVNI32AsmOperandByteReplicate :
70  AsmOperandClass {
71  let Name = "NEONi32invByteReplicate";
72  let PredicateMethod = "isNEONi32ByteReplicate";
73  let RenderMethod = "addNEONinvByteReplicateOperands";
74}
75
76def nImmVMOVI16ByteReplicate : Operand<i32> {
77  let PrintMethod = "printNEONModImmOperand";
78  let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate;
79}
80def nImmVMOVI32ByteReplicate : Operand<i32> {
81  let PrintMethod = "printNEONModImmOperand";
82  let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate;
83}
84def nImmVMVNI16ByteReplicate : Operand<i32> {
85  let PrintMethod = "printNEONModImmOperand";
86  let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate;
87}
88def nImmVMVNI32ByteReplicate : Operand<i32> {
89  let PrintMethod = "printNEONModImmOperand";
90  let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate;
91}
92
93def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
94def nImmVMOVI32Neg : Operand<i32> {
95  let PrintMethod = "printNEONModImmOperand";
96  let ParserMatchClass = nImmVMOVI32NegAsmOperand;
97}
98def nImmVMOVF32 : Operand<i32> {
99  let PrintMethod = "printFPImmOperand";
100  let ParserMatchClass = FPImmOperand;
101}
102def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
103def nImmSplatI64 : Operand<i32> {
104  let PrintMethod = "printNEONModImmOperand";
105  let ParserMatchClass = nImmSplatI64AsmOperand;
106}
107
108def VectorIndex8Operand  : AsmOperandClass { let Name = "VectorIndex8"; }
109def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
110def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
111def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
112  return ((uint64_t)Imm) < 8;
113}]> {
114  let ParserMatchClass = VectorIndex8Operand;
115  let PrintMethod = "printVectorIndex";
116  let MIOperandInfo = (ops i32imm);
117}
118def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
119  return ((uint64_t)Imm) < 4;
120}]> {
121  let ParserMatchClass = VectorIndex16Operand;
122  let PrintMethod = "printVectorIndex";
123  let MIOperandInfo = (ops i32imm);
124}
125def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
126  return ((uint64_t)Imm) < 2;
127}]> {
128  let ParserMatchClass = VectorIndex32Operand;
129  let PrintMethod = "printVectorIndex";
130  let MIOperandInfo = (ops i32imm);
131}
132
133// Register list of one D register.
134def VecListOneDAsmOperand : AsmOperandClass {
135  let Name = "VecListOneD";
136  let ParserMethod = "parseVectorList";
137  let RenderMethod = "addVecListOperands";
138}
139def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
140  let ParserMatchClass = VecListOneDAsmOperand;
141}
142// Register list of two sequential D registers.
143def VecListDPairAsmOperand : AsmOperandClass {
144  let Name = "VecListDPair";
145  let ParserMethod = "parseVectorList";
146  let RenderMethod = "addVecListOperands";
147}
148def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
149  let ParserMatchClass = VecListDPairAsmOperand;
150}
151// Register list of three sequential D registers.
152def VecListThreeDAsmOperand : AsmOperandClass {
153  let Name = "VecListThreeD";
154  let ParserMethod = "parseVectorList";
155  let RenderMethod = "addVecListOperands";
156}
157def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
158  let ParserMatchClass = VecListThreeDAsmOperand;
159}
160// Register list of four sequential D registers.
161def VecListFourDAsmOperand : AsmOperandClass {
162  let Name = "VecListFourD";
163  let ParserMethod = "parseVectorList";
164  let RenderMethod = "addVecListOperands";
165}
166def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
167  let ParserMatchClass = VecListFourDAsmOperand;
168}
169// Register list of two D registers spaced by 2 (two sequential Q registers).
170def VecListDPairSpacedAsmOperand : AsmOperandClass {
171  let Name = "VecListDPairSpaced";
172  let ParserMethod = "parseVectorList";
173  let RenderMethod = "addVecListOperands";
174}
175def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
176  let ParserMatchClass = VecListDPairSpacedAsmOperand;
177}
178// Register list of three D registers spaced by 2 (three Q registers).
179def VecListThreeQAsmOperand : AsmOperandClass {
180  let Name = "VecListThreeQ";
181  let ParserMethod = "parseVectorList";
182  let RenderMethod = "addVecListOperands";
183}
184def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
185  let ParserMatchClass = VecListThreeQAsmOperand;
186}
187// Register list of three D registers spaced by 2 (three Q registers).
188def VecListFourQAsmOperand : AsmOperandClass {
189  let Name = "VecListFourQ";
190  let ParserMethod = "parseVectorList";
191  let RenderMethod = "addVecListOperands";
192}
193def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
194  let ParserMatchClass = VecListFourQAsmOperand;
195}
196
197// Register list of one D register, with "all lanes" subscripting.
198def VecListOneDAllLanesAsmOperand : AsmOperandClass {
199  let Name = "VecListOneDAllLanes";
200  let ParserMethod = "parseVectorList";
201  let RenderMethod = "addVecListOperands";
202}
203def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
204  let ParserMatchClass = VecListOneDAllLanesAsmOperand;
205}
206// Register list of two D registers, with "all lanes" subscripting.
207def VecListDPairAllLanesAsmOperand : AsmOperandClass {
208  let Name = "VecListDPairAllLanes";
209  let ParserMethod = "parseVectorList";
210  let RenderMethod = "addVecListOperands";
211}
212def VecListDPairAllLanes : RegisterOperand<DPair,
213                                           "printVectorListTwoAllLanes"> {
214  let ParserMatchClass = VecListDPairAllLanesAsmOperand;
215}
216// Register list of two D registers spaced by 2 (two sequential Q registers).
217def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
218  let Name = "VecListDPairSpacedAllLanes";
219  let ParserMethod = "parseVectorList";
220  let RenderMethod = "addVecListOperands";
221}
222def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
223                                         "printVectorListTwoSpacedAllLanes"> {
224  let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
225}
226// Register list of three D registers, with "all lanes" subscripting.
227def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
228  let Name = "VecListThreeDAllLanes";
229  let ParserMethod = "parseVectorList";
230  let RenderMethod = "addVecListOperands";
231}
232def VecListThreeDAllLanes : RegisterOperand<DPR,
233                                            "printVectorListThreeAllLanes"> {
234  let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
235}
236// Register list of three D registers spaced by 2 (three sequential Q regs).
237def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
238  let Name = "VecListThreeQAllLanes";
239  let ParserMethod = "parseVectorList";
240  let RenderMethod = "addVecListOperands";
241}
242def VecListThreeQAllLanes : RegisterOperand<DPR,
243                                         "printVectorListThreeSpacedAllLanes"> {
244  let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
245}
246// Register list of four D registers, with "all lanes" subscripting.
247def VecListFourDAllLanesAsmOperand : AsmOperandClass {
248  let Name = "VecListFourDAllLanes";
249  let ParserMethod = "parseVectorList";
250  let RenderMethod = "addVecListOperands";
251}
252def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
253  let ParserMatchClass = VecListFourDAllLanesAsmOperand;
254}
255// Register list of four D registers spaced by 2 (four sequential Q regs).
256def VecListFourQAllLanesAsmOperand : AsmOperandClass {
257  let Name = "VecListFourQAllLanes";
258  let ParserMethod = "parseVectorList";
259  let RenderMethod = "addVecListOperands";
260}
261def VecListFourQAllLanes : RegisterOperand<DPR,
262                                         "printVectorListFourSpacedAllLanes"> {
263  let ParserMatchClass = VecListFourQAllLanesAsmOperand;
264}
265
266
267// Register list of one D register, with byte lane subscripting.
268def VecListOneDByteIndexAsmOperand : AsmOperandClass {
269  let Name = "VecListOneDByteIndexed";
270  let ParserMethod = "parseVectorList";
271  let RenderMethod = "addVecListIndexedOperands";
272}
273def VecListOneDByteIndexed : Operand<i32> {
274  let ParserMatchClass = VecListOneDByteIndexAsmOperand;
275  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
276}
277// ...with half-word lane subscripting.
278def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
279  let Name = "VecListOneDHWordIndexed";
280  let ParserMethod = "parseVectorList";
281  let RenderMethod = "addVecListIndexedOperands";
282}
283def VecListOneDHWordIndexed : Operand<i32> {
284  let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
285  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
286}
287// ...with word lane subscripting.
288def VecListOneDWordIndexAsmOperand : AsmOperandClass {
289  let Name = "VecListOneDWordIndexed";
290  let ParserMethod = "parseVectorList";
291  let RenderMethod = "addVecListIndexedOperands";
292}
293def VecListOneDWordIndexed : Operand<i32> {
294  let ParserMatchClass = VecListOneDWordIndexAsmOperand;
295  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
296}
297
298// Register list of two D registers with byte lane subscripting.
299def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
300  let Name = "VecListTwoDByteIndexed";
301  let ParserMethod = "parseVectorList";
302  let RenderMethod = "addVecListIndexedOperands";
303}
304def VecListTwoDByteIndexed : Operand<i32> {
305  let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
306  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
307}
308// ...with half-word lane subscripting.
309def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
310  let Name = "VecListTwoDHWordIndexed";
311  let ParserMethod = "parseVectorList";
312  let RenderMethod = "addVecListIndexedOperands";
313}
314def VecListTwoDHWordIndexed : Operand<i32> {
315  let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
316  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
317}
318// ...with word lane subscripting.
319def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
320  let Name = "VecListTwoDWordIndexed";
321  let ParserMethod = "parseVectorList";
322  let RenderMethod = "addVecListIndexedOperands";
323}
324def VecListTwoDWordIndexed : Operand<i32> {
325  let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
326  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
327}
328// Register list of two Q registers with half-word lane subscripting.
329def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
330  let Name = "VecListTwoQHWordIndexed";
331  let ParserMethod = "parseVectorList";
332  let RenderMethod = "addVecListIndexedOperands";
333}
334def VecListTwoQHWordIndexed : Operand<i32> {
335  let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
336  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
337}
338// ...with word lane subscripting.
339def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
340  let Name = "VecListTwoQWordIndexed";
341  let ParserMethod = "parseVectorList";
342  let RenderMethod = "addVecListIndexedOperands";
343}
344def VecListTwoQWordIndexed : Operand<i32> {
345  let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
346  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
347}
348
349
350// Register list of three D registers with byte lane subscripting.
351def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
352  let Name = "VecListThreeDByteIndexed";
353  let ParserMethod = "parseVectorList";
354  let RenderMethod = "addVecListIndexedOperands";
355}
356def VecListThreeDByteIndexed : Operand<i32> {
357  let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
358  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
359}
360// ...with half-word lane subscripting.
361def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
362  let Name = "VecListThreeDHWordIndexed";
363  let ParserMethod = "parseVectorList";
364  let RenderMethod = "addVecListIndexedOperands";
365}
366def VecListThreeDHWordIndexed : Operand<i32> {
367  let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
368  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
369}
370// ...with word lane subscripting.
371def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
372  let Name = "VecListThreeDWordIndexed";
373  let ParserMethod = "parseVectorList";
374  let RenderMethod = "addVecListIndexedOperands";
375}
376def VecListThreeDWordIndexed : Operand<i32> {
377  let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
378  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
379}
380// Register list of three Q registers with half-word lane subscripting.
381def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
382  let Name = "VecListThreeQHWordIndexed";
383  let ParserMethod = "parseVectorList";
384  let RenderMethod = "addVecListIndexedOperands";
385}
386def VecListThreeQHWordIndexed : Operand<i32> {
387  let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
388  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
389}
390// ...with word lane subscripting.
391def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
392  let Name = "VecListThreeQWordIndexed";
393  let ParserMethod = "parseVectorList";
394  let RenderMethod = "addVecListIndexedOperands";
395}
396def VecListThreeQWordIndexed : Operand<i32> {
397  let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
398  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
399}
400
401// Register list of four D registers with byte lane subscripting.
402def VecListFourDByteIndexAsmOperand : AsmOperandClass {
403  let Name = "VecListFourDByteIndexed";
404  let ParserMethod = "parseVectorList";
405  let RenderMethod = "addVecListIndexedOperands";
406}
407def VecListFourDByteIndexed : Operand<i32> {
408  let ParserMatchClass = VecListFourDByteIndexAsmOperand;
409  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
410}
411// ...with half-word lane subscripting.
412def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
413  let Name = "VecListFourDHWordIndexed";
414  let ParserMethod = "parseVectorList";
415  let RenderMethod = "addVecListIndexedOperands";
416}
417def VecListFourDHWordIndexed : Operand<i32> {
418  let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
419  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
420}
421// ...with word lane subscripting.
422def VecListFourDWordIndexAsmOperand : AsmOperandClass {
423  let Name = "VecListFourDWordIndexed";
424  let ParserMethod = "parseVectorList";
425  let RenderMethod = "addVecListIndexedOperands";
426}
427def VecListFourDWordIndexed : Operand<i32> {
428  let ParserMatchClass = VecListFourDWordIndexAsmOperand;
429  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
430}
431// Register list of four Q registers with half-word lane subscripting.
432def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
433  let Name = "VecListFourQHWordIndexed";
434  let ParserMethod = "parseVectorList";
435  let RenderMethod = "addVecListIndexedOperands";
436}
437def VecListFourQHWordIndexed : Operand<i32> {
438  let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
439  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
440}
441// ...with word lane subscripting.
442def VecListFourQWordIndexAsmOperand : AsmOperandClass {
443  let Name = "VecListFourQWordIndexed";
444  let ParserMethod = "parseVectorList";
445  let RenderMethod = "addVecListIndexedOperands";
446}
447def VecListFourQWordIndexed : Operand<i32> {
448  let ParserMatchClass = VecListFourQWordIndexAsmOperand;
449  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
450}
451
452def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
453  return cast<LoadSDNode>(N)->getAlignment() >= 8;
454}]>;
455def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
456                                 (store node:$val, node:$ptr), [{
457  return cast<StoreSDNode>(N)->getAlignment() >= 8;
458}]>;
459def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
460  return cast<LoadSDNode>(N)->getAlignment() == 4;
461}]>;
462def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
463                                 (store node:$val, node:$ptr), [{
464  return cast<StoreSDNode>(N)->getAlignment() == 4;
465}]>;
466def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
467  return cast<LoadSDNode>(N)->getAlignment() == 2;
468}]>;
469def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
470                                 (store node:$val, node:$ptr), [{
471  return cast<StoreSDNode>(N)->getAlignment() == 2;
472}]>;
473def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
474  return cast<LoadSDNode>(N)->getAlignment() == 1;
475}]>;
476def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
477                             (store node:$val, node:$ptr), [{
478  return cast<StoreSDNode>(N)->getAlignment() == 1;
479}]>;
480def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
481  return cast<LoadSDNode>(N)->getAlignment() < 4;
482}]>;
483def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
484                                    (store node:$val, node:$ptr), [{
485  return cast<StoreSDNode>(N)->getAlignment() < 4;
486}]>;
487
488//===----------------------------------------------------------------------===//
489// NEON-specific DAG Nodes.
490//===----------------------------------------------------------------------===//
491
492def SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
493def SDTARMVCMPZ   : SDTypeProfile<1, 1, []>;
494
495def NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
496def NEONvceqz     : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
497def NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
498def NEONvcgez     : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
499def NEONvclez     : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
500def NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
501def NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
502def NEONvcgtz     : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
503def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
504def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
505def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
506
507// Types for vector shift by immediates.  The "SHX" version is for long and
508// narrow operations where the source and destination vectors have different
509// types.  The "SHINS" version is for shift and insert operations.
510def SDTARMVSH     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
511                                         SDTCisVT<2, i32>]>;
512def SDTARMVSHX    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
513                                         SDTCisVT<2, i32>]>;
514def SDTARMVSHINS  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
515                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
516
517def NEONvshl      : SDNode<"ARMISD::VSHL", SDTARMVSH>;
518def NEONvshrs     : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
519def NEONvshru     : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
520def NEONvshrn     : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
521
522def NEONvrshrs    : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
523def NEONvrshru    : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
524def NEONvrshrn    : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
525
526def NEONvqshls    : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
527def NEONvqshlu    : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
528def NEONvqshlsu   : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
529def NEONvqshrns   : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
530def NEONvqshrnu   : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
531def NEONvqshrnsu  : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
532
533def NEONvqrshrns  : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
534def NEONvqrshrnu  : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
535def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
536
537def NEONvsli      : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
538def NEONvsri      : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
539
540def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
541                                         SDTCisVT<2, i32>]>;
542def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
543def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
544
545def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
546def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
547def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
548def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
549
550def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
551                                           SDTCisVT<2, i32>]>;
552def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
553def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
554
555def NEONvbsl      : SDNode<"ARMISD::VBSL",
556                           SDTypeProfile<1, 3, [SDTCisVec<0>,
557                                                SDTCisSameAs<0, 1>,
558                                                SDTCisSameAs<0, 2>,
559                                                SDTCisSameAs<0, 3>]>>;
560
561def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
562
563// VDUPLANE can produce a quad-register result from a double-register source,
564// so the result is not constrained to match the source.
565def NEONvduplane  : SDNode<"ARMISD::VDUPLANE",
566                           SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
567                                                SDTCisVT<2, i32>]>>;
568
569def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
570                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
571def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
572
573def SDTARMVSHUF   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
574def NEONvrev64    : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
575def NEONvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
576def NEONvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
577
578def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
579                                         SDTCisSameAs<0, 2>,
580                                         SDTCisSameAs<0, 3>]>;
581def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
582def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
583def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
584
585def SDTARMVMULL   : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
586                                         SDTCisSameAs<1, 2>]>;
587def NEONvmulls    : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
588def NEONvmullu    : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
589
590def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
591                                         SDTCisSameAs<0, 2>]>;
592def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
593def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
594
595def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
596  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
597  unsigned EltBits = 0;
598  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
599  return (EltBits == 32 && EltVal == 0);
600}]>;
601
602def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
603  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
604  unsigned EltBits = 0;
605  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
606  return (EltBits == 8 && EltVal == 0xff);
607}]>;
608
609//===----------------------------------------------------------------------===//
610// NEON load / store instructions
611//===----------------------------------------------------------------------===//
612
613// Use VLDM to load a Q register as a D register pair.
614// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
615def VLDMQIA
616  : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
617                    IIC_fpLoad_m, "",
618                   [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
619
620// Use VSTM to store a Q register as a D register pair.
621// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
622def VSTMQIA
623  : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
624                    IIC_fpStore_m, "",
625                   [(store (v2f64 DPair:$src), GPR:$Rn)]>;
626
627// Classes for VLD* pseudo-instructions with multi-register operands.
628// These are expanded to real instructions after register allocation.
629class VLDQPseudo<InstrItinClass itin>
630  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
631class VLDQWBPseudo<InstrItinClass itin>
632  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
633                (ins addrmode6:$addr, am6offset:$offset), itin,
634                "$addr.addr = $wb">;
635class VLDQWBfixedPseudo<InstrItinClass itin>
636  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
637                (ins addrmode6:$addr), itin,
638                "$addr.addr = $wb">;
639class VLDQWBregisterPseudo<InstrItinClass itin>
640  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
641                (ins addrmode6:$addr, rGPR:$offset), itin,
642                "$addr.addr = $wb">;
643
644class VLDQQPseudo<InstrItinClass itin>
645  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
646class VLDQQWBPseudo<InstrItinClass itin>
647  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
648                (ins addrmode6:$addr, am6offset:$offset), itin,
649                "$addr.addr = $wb">;
650class VLDQQWBfixedPseudo<InstrItinClass itin>
651  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
652                (ins addrmode6:$addr), itin,
653                "$addr.addr = $wb">;
654class VLDQQWBregisterPseudo<InstrItinClass itin>
655  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
656                (ins addrmode6:$addr, rGPR:$offset), itin,
657                "$addr.addr = $wb">;
658
659
660class VLDQQQQPseudo<InstrItinClass itin>
661  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
662                "$src = $dst">;
663class VLDQQQQWBPseudo<InstrItinClass itin>
664  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
665                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
666                "$addr.addr = $wb, $src = $dst">;
667
668let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
669
670//   VLD1     : Vector Load (multiple single elements)
671class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
672  : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
673          (ins AddrMode:$Rn), IIC_VLD1,
674          "vld1", Dt, "$Vd, $Rn", "", []> {
675  let Rm = 0b1111;
676  let Inst{4} = Rn{4};
677  let DecoderMethod = "DecodeVLDST1Instruction";
678}
679class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
680  : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
681          (ins AddrMode:$Rn), IIC_VLD1x2,
682          "vld1", Dt, "$Vd, $Rn", "", []> {
683  let Rm = 0b1111;
684  let Inst{5-4} = Rn{5-4};
685  let DecoderMethod = "DecodeVLDST1Instruction";
686}
687
688def  VLD1d8   : VLD1D<{0,0,0,?}, "8",  addrmode6align64>;
689def  VLD1d16  : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
690def  VLD1d32  : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
691def  VLD1d64  : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
692
693def  VLD1q8   : VLD1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
694def  VLD1q16  : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
695def  VLD1q32  : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
696def  VLD1q64  : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
697
698// ...with address register writeback:
699multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
700  def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
701                     (ins AddrMode:$Rn), IIC_VLD1u,
702                     "vld1", Dt, "$Vd, $Rn!",
703                     "$Rn.addr = $wb", []> {
704    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
705    let Inst{4} = Rn{4};
706    let DecoderMethod = "DecodeVLDST1Instruction";
707  }
708  def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
709                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
710                        "vld1", Dt, "$Vd, $Rn, $Rm",
711                        "$Rn.addr = $wb", []> {
712    let Inst{4} = Rn{4};
713    let DecoderMethod = "DecodeVLDST1Instruction";
714  }
715}
716multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
717  def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
718                    (ins AddrMode:$Rn), IIC_VLD1x2u,
719                     "vld1", Dt, "$Vd, $Rn!",
720                     "$Rn.addr = $wb", []> {
721    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
722    let Inst{5-4} = Rn{5-4};
723    let DecoderMethod = "DecodeVLDST1Instruction";
724  }
725  def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
726                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
727                        "vld1", Dt, "$Vd, $Rn, $Rm",
728                        "$Rn.addr = $wb", []> {
729    let Inst{5-4} = Rn{5-4};
730    let DecoderMethod = "DecodeVLDST1Instruction";
731  }
732}
733
734defm VLD1d8wb  : VLD1DWB<{0,0,0,?}, "8",  addrmode6align64>;
735defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
736defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
737defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
738defm VLD1q8wb  : VLD1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
739defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
740defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
741defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
742
743// ...with 3 registers
744class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
745  : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
746          (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
747          "$Vd, $Rn", "", []> {
748  let Rm = 0b1111;
749  let Inst{4} = Rn{4};
750  let DecoderMethod = "DecodeVLDST1Instruction";
751}
752multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
753  def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
754                    (ins AddrMode:$Rn), IIC_VLD1x2u,
755                     "vld1", Dt, "$Vd, $Rn!",
756                     "$Rn.addr = $wb", []> {
757    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
758    let Inst{4} = Rn{4};
759    let DecoderMethod = "DecodeVLDST1Instruction";
760  }
761  def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
762                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
763                        "vld1", Dt, "$Vd, $Rn, $Rm",
764                        "$Rn.addr = $wb", []> {
765    let Inst{4} = Rn{4};
766    let DecoderMethod = "DecodeVLDST1Instruction";
767  }
768}
769
770def VLD1d8T      : VLD1D3<{0,0,0,?}, "8",  addrmode6align64>;
771def VLD1d16T     : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
772def VLD1d32T     : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
773def VLD1d64T     : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
774
775defm VLD1d8Twb  : VLD1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
776defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
777defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
778defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
779
780def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
781def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
782def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
783
784// ...with 4 registers
785class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
786  : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
787          (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
788          "$Vd, $Rn", "", []> {
789  let Rm = 0b1111;
790  let Inst{5-4} = Rn{5-4};
791  let DecoderMethod = "DecodeVLDST1Instruction";
792}
793multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
794  def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
795                    (ins AddrMode:$Rn), IIC_VLD1x2u,
796                     "vld1", Dt, "$Vd, $Rn!",
797                     "$Rn.addr = $wb", []> {
798    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
799    let Inst{5-4} = Rn{5-4};
800    let DecoderMethod = "DecodeVLDST1Instruction";
801  }
802  def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
803                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
804                        "vld1", Dt, "$Vd, $Rn, $Rm",
805                        "$Rn.addr = $wb", []> {
806    let Inst{5-4} = Rn{5-4};
807    let DecoderMethod = "DecodeVLDST1Instruction";
808  }
809}
810
811def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
812def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
813def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
814def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
815
816defm VLD1d8Qwb   : VLD1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
817defm VLD1d16Qwb  : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
818defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
819defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
820
821def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
822def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
823def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
824
825//   VLD2     : Vector Load (multiple 2-element structures)
826class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
827           InstrItinClass itin, Operand AddrMode>
828  : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
829          (ins AddrMode:$Rn), itin,
830          "vld2", Dt, "$Vd, $Rn", "", []> {
831  let Rm = 0b1111;
832  let Inst{5-4} = Rn{5-4};
833  let DecoderMethod = "DecodeVLDST2Instruction";
834}
835
836def  VLD2d8   : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
837                     addrmode6align64or128>;
838def  VLD2d16  : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
839                     addrmode6align64or128>;
840def  VLD2d32  : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
841                     addrmode6align64or128>;
842
843def  VLD2q8   : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
844                     addrmode6align64or128or256>;
845def  VLD2q16  : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
846                     addrmode6align64or128or256>;
847def  VLD2q32  : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
848                     addrmode6align64or128or256>;
849
850def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>;
851def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
852def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
853
854// ...with address register writeback:
855multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
856                  RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
857  def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
858                     (ins AddrMode:$Rn), itin,
859                     "vld2", Dt, "$Vd, $Rn!",
860                     "$Rn.addr = $wb", []> {
861    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
862    let Inst{5-4} = Rn{5-4};
863    let DecoderMethod = "DecodeVLDST2Instruction";
864  }
865  def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
866                        (ins AddrMode:$Rn, rGPR:$Rm), itin,
867                        "vld2", Dt, "$Vd, $Rn, $Rm",
868                        "$Rn.addr = $wb", []> {
869    let Inst{5-4} = Rn{5-4};
870    let DecoderMethod = "DecodeVLDST2Instruction";
871  }
872}
873
874defm VLD2d8wb  : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
875                        addrmode6align64or128>;
876defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
877                        addrmode6align64or128>;
878defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
879                        addrmode6align64or128>;
880
881defm VLD2q8wb  : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
882                        addrmode6align64or128or256>;
883defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
884                        addrmode6align64or128or256>;
885defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
886                        addrmode6align64or128or256>;
887
888def VLD2q8PseudoWB_fixed     : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
889def VLD2q16PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
890def VLD2q32PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
891def VLD2q8PseudoWB_register  : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
892def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
893def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
894
895// ...with double-spaced registers
896def  VLD2b8    : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
897                      addrmode6align64or128>;
898def  VLD2b16   : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
899                      addrmode6align64or128>;
900def  VLD2b32   : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
901                      addrmode6align64or128>;
902defm VLD2b8wb  : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
903                        addrmode6align64or128>;
904defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
905                        addrmode6align64or128>;
906defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
907                        addrmode6align64or128>;
908
909//   VLD3     : Vector Load (multiple 3-element structures)
910class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
911  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
912          (ins addrmode6:$Rn), IIC_VLD3,
913          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
914  let Rm = 0b1111;
915  let Inst{4} = Rn{4};
916  let DecoderMethod = "DecodeVLDST3Instruction";
917}
918
919def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
920def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
921def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
922
923def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>;
924def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
925def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
926
927// ...with address register writeback:
928class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
929  : NLdSt<0, 0b10, op11_8, op7_4,
930          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
931          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
932          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
933          "$Rn.addr = $wb", []> {
934  let Inst{4} = Rn{4};
935  let DecoderMethod = "DecodeVLDST3Instruction";
936}
937
938def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
939def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
940def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
941
942def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>;
943def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
944def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
945
946// ...with double-spaced registers:
947def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
948def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
949def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
950def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
951def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
952def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
953
954def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
955def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
956def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
957
958// ...alternate versions to be allocated odd register numbers:
959def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>;
960def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
961def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
962
963def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
964def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
965def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
966
967//   VLD4     : Vector Load (multiple 4-element structures)
968class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
969  : NLdSt<0, 0b10, op11_8, op7_4,
970          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
971          (ins addrmode6:$Rn), IIC_VLD4,
972          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
973  let Rm = 0b1111;
974  let Inst{5-4} = Rn{5-4};
975  let DecoderMethod = "DecodeVLDST4Instruction";
976}
977
978def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
979def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
980def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
981
982def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>;
983def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
984def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
985
986// ...with address register writeback:
987class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
988  : NLdSt<0, 0b10, op11_8, op7_4,
989          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
990          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
991          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
992          "$Rn.addr = $wb", []> {
993  let Inst{5-4} = Rn{5-4};
994  let DecoderMethod = "DecodeVLDST4Instruction";
995}
996
997def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
998def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
999def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
1000
1001def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>;
1002def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
1003def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
1004
1005// ...with double-spaced registers:
1006def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
1007def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
1008def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
1009def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
1010def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
1011def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
1012
1013def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
1014def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
1015def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
1016
1017// ...alternate versions to be allocated odd register numbers:
1018def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>;
1019def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
1020def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
1021
1022def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
1023def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
1024def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
1025
1026} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1027
1028// Classes for VLD*LN pseudo-instructions with multi-register operands.
1029// These are expanded to real instructions after register allocation.
1030class VLDQLNPseudo<InstrItinClass itin>
1031  : PseudoNLdSt<(outs QPR:$dst),
1032                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1033                itin, "$src = $dst">;
1034class VLDQLNWBPseudo<InstrItinClass itin>
1035  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
1036                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1037                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1038class VLDQQLNPseudo<InstrItinClass itin>
1039  : PseudoNLdSt<(outs QQPR:$dst),
1040                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1041                itin, "$src = $dst">;
1042class VLDQQLNWBPseudo<InstrItinClass itin>
1043  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
1044                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1045                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1046class VLDQQQQLNPseudo<InstrItinClass itin>
1047  : PseudoNLdSt<(outs QQQQPR:$dst),
1048                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1049                itin, "$src = $dst">;
1050class VLDQQQQLNWBPseudo<InstrItinClass itin>
1051  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
1052                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1053                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1054
1055//   VLD1LN   : Vector Load (single element to one lane)
1056class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1057             PatFrag LoadOp>
1058  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1059          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1060          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1061          "$src = $Vd",
1062          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1063                                         (i32 (LoadOp addrmode6:$Rn)),
1064                                         imm:$lane))]> {
1065  let Rm = 0b1111;
1066  let DecoderMethod = "DecodeVLD1LN";
1067}
1068class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1069             PatFrag LoadOp>
1070  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1071          (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1072          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1073          "$src = $Vd",
1074          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1075                                         (i32 (LoadOp addrmode6oneL32:$Rn)),
1076                                         imm:$lane))]> {
1077  let Rm = 0b1111;
1078  let DecoderMethod = "DecodeVLD1LN";
1079}
1080class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
1081  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1082                                               (i32 (LoadOp addrmode6:$addr)),
1083                                               imm:$lane))];
1084}
1085
1086def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1087  let Inst{7-5} = lane{2-0};
1088}
1089def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1090  let Inst{7-6} = lane{1-0};
1091  let Inst{5-4} = Rn{5-4};
1092}
1093def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1094  let Inst{7} = lane{0};
1095  let Inst{5-4} = Rn{5-4};
1096}
1097
1098def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
1099def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1100def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1101
1102def : Pat<(vector_insert (v2f32 DPR:$src),
1103                         (f32 (load addrmode6:$addr)), imm:$lane),
1104          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1105def : Pat<(vector_insert (v4f32 QPR:$src),
1106                         (f32 (load addrmode6:$addr)), imm:$lane),
1107          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1108
1109let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1110
1111// ...with address register writeback:
1112class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1113  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1114          (ins addrmode6:$Rn, am6offset:$Rm,
1115           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1116          "\\{$Vd[$lane]\\}, $Rn$Rm",
1117          "$src = $Vd, $Rn.addr = $wb", []> {
1118  let DecoderMethod = "DecodeVLD1LN";
1119}
1120
1121def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1122  let Inst{7-5} = lane{2-0};
1123}
1124def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1125  let Inst{7-6} = lane{1-0};
1126  let Inst{4}   = Rn{4};
1127}
1128def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1129  let Inst{7} = lane{0};
1130  let Inst{5} = Rn{4};
1131  let Inst{4} = Rn{4};
1132}
1133
1134def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>;
1135def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
1136def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
1137
1138//   VLD2LN   : Vector Load (single 2-element structure to one lane)
1139class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1140  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1141          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1142          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1143          "$src1 = $Vd, $src2 = $dst2", []> {
1144  let Rm = 0b1111;
1145  let Inst{4}   = Rn{4};
1146  let DecoderMethod = "DecodeVLD2LN";
1147}
1148
1149def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1150  let Inst{7-5} = lane{2-0};
1151}
1152def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1153  let Inst{7-6} = lane{1-0};
1154}
1155def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1156  let Inst{7} = lane{0};
1157}
1158
1159def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>;
1160def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
1161def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
1162
1163// ...with double-spaced registers:
1164def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1165  let Inst{7-6} = lane{1-0};
1166}
1167def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1168  let Inst{7} = lane{0};
1169}
1170
1171def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
1172def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
1173
1174// ...with address register writeback:
1175class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1176  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1177          (ins addrmode6:$Rn, am6offset:$Rm,
1178           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1179          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1180          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1181  let Inst{4}   = Rn{4};
1182  let DecoderMethod = "DecodeVLD2LN";
1183}
1184
1185def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1186  let Inst{7-5} = lane{2-0};
1187}
1188def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1189  let Inst{7-6} = lane{1-0};
1190}
1191def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1192  let Inst{7} = lane{0};
1193}
1194
1195def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>;
1196def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
1197def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
1198
1199def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1200  let Inst{7-6} = lane{1-0};
1201}
1202def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1203  let Inst{7} = lane{0};
1204}
1205
1206def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
1207def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
1208
1209//   VLD3LN   : Vector Load (single 3-element structure to one lane)
1210class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1211  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1212          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1213          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1214          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1215          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
1216  let Rm = 0b1111;
1217  let DecoderMethod = "DecodeVLD3LN";
1218}
1219
1220def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1221  let Inst{7-5} = lane{2-0};
1222}
1223def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1224  let Inst{7-6} = lane{1-0};
1225}
1226def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1227  let Inst{7}   = lane{0};
1228}
1229
1230def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>;
1231def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
1232def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
1233
1234// ...with double-spaced registers:
1235def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1236  let Inst{7-6} = lane{1-0};
1237}
1238def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1239  let Inst{7}   = lane{0};
1240}
1241
1242def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
1243def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
1244
1245// ...with address register writeback:
1246class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1247  : NLdStLn<1, 0b10, op11_8, op7_4,
1248          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1249          (ins addrmode6:$Rn, am6offset:$Rm,
1250           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1251          IIC_VLD3lnu, "vld3", Dt,
1252          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1253          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1254          []> {
1255  let DecoderMethod = "DecodeVLD3LN";
1256}
1257
1258def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1259  let Inst{7-5} = lane{2-0};
1260}
1261def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1262  let Inst{7-6} = lane{1-0};
1263}
1264def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1265  let Inst{7} = lane{0};
1266}
1267
1268def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>;
1269def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
1270def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
1271
1272def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1273  let Inst{7-6} = lane{1-0};
1274}
1275def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1276  let Inst{7} = lane{0};
1277}
1278
1279def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
1280def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
1281
1282//   VLD4LN   : Vector Load (single 4-element structure to one lane)
1283class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1284  : NLdStLn<1, 0b10, op11_8, op7_4,
1285          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1286          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1287          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1288          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1289          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
1290  let Rm = 0b1111;
1291  let Inst{4} = Rn{4};
1292  let DecoderMethod = "DecodeVLD4LN";
1293}
1294
1295def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1296  let Inst{7-5} = lane{2-0};
1297}
1298def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1299  let Inst{7-6} = lane{1-0};
1300}
1301def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1302  let Inst{7} = lane{0};
1303  let Inst{5} = Rn{5};
1304}
1305
1306def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>;
1307def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
1308def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
1309
1310// ...with double-spaced registers:
1311def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1312  let Inst{7-6} = lane{1-0};
1313}
1314def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1315  let Inst{7} = lane{0};
1316  let Inst{5} = Rn{5};
1317}
1318
1319def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
1320def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
1321
1322// ...with address register writeback:
1323class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1324  : NLdStLn<1, 0b10, op11_8, op7_4,
1325          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1326          (ins addrmode6:$Rn, am6offset:$Rm,
1327           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1328          IIC_VLD4lnu, "vld4", Dt,
1329"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1330"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1331          []> {
1332  let Inst{4}   = Rn{4};
1333  let DecoderMethod = "DecodeVLD4LN"  ;
1334}
1335
1336def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1337  let Inst{7-5} = lane{2-0};
1338}
1339def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1340  let Inst{7-6} = lane{1-0};
1341}
1342def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1343  let Inst{7} = lane{0};
1344  let Inst{5} = Rn{5};
1345}
1346
1347def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>;
1348def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
1349def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
1350
1351def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1352  let Inst{7-6} = lane{1-0};
1353}
1354def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1355  let Inst{7} = lane{0};
1356  let Inst{5} = Rn{5};
1357}
1358
1359def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
1360def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
1361
1362} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1363
1364//   VLD1DUP  : Vector Load (single element to all lanes)
1365class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1366              Operand AddrMode>
1367  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1368          (ins AddrMode:$Rn),
1369          IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1370          [(set VecListOneDAllLanes:$Vd,
1371                (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1372  let Rm = 0b1111;
1373  let Inst{4} = Rn{4};
1374  let DecoderMethod = "DecodeVLD1DupInstruction";
1375}
1376def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
1377                         addrmode6dupalignNone>;
1378def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
1379                         addrmode6dupalign16>;
1380def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
1381                         addrmode6dupalign32>;
1382
1383def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1384          (VLD1DUPd32 addrmode6:$addr)>;
1385
1386class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1387               Operand AddrMode>
1388  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1389          (ins AddrMode:$Rn), IIC_VLD1dup,
1390          "vld1", Dt, "$Vd, $Rn", "",
1391          [(set VecListDPairAllLanes:$Vd,
1392                (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1393  let Rm = 0b1111;
1394  let Inst{4} = Rn{4};
1395  let DecoderMethod = "DecodeVLD1DupInstruction";
1396}
1397
1398def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
1399                          addrmode6dupalignNone>;
1400def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
1401                          addrmode6dupalign16>;
1402def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
1403                          addrmode6dupalign32>;
1404
1405def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1406          (VLD1DUPq32 addrmode6:$addr)>;
1407
1408let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1409// ...with address register writeback:
1410multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1411  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1412                     (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1413                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1414                     "vld1", Dt, "$Vd, $Rn!",
1415                     "$Rn.addr = $wb", []> {
1416    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1417    let Inst{4} = Rn{4};
1418    let DecoderMethod = "DecodeVLD1DupInstruction";
1419  }
1420  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1421                        (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1422                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1423                        "vld1", Dt, "$Vd, $Rn, $Rm",
1424                        "$Rn.addr = $wb", []> {
1425    let Inst{4} = Rn{4};
1426    let DecoderMethod = "DecodeVLD1DupInstruction";
1427  }
1428}
1429multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1430  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1431                     (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1432                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1433                     "vld1", Dt, "$Vd, $Rn!",
1434                     "$Rn.addr = $wb", []> {
1435    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1436    let Inst{4} = Rn{4};
1437    let DecoderMethod = "DecodeVLD1DupInstruction";
1438  }
1439  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1440                        (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1441                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1442                        "vld1", Dt, "$Vd, $Rn, $Rm",
1443                        "$Rn.addr = $wb", []> {
1444    let Inst{4} = Rn{4};
1445    let DecoderMethod = "DecodeVLD1DupInstruction";
1446  }
1447}
1448
1449defm VLD1DUPd8wb  : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
1450defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
1451defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
1452
1453defm VLD1DUPq8wb  : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
1454defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
1455defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
1456
1457//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
1458class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
1459  : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1460          (ins AddrMode:$Rn), IIC_VLD2dup,
1461          "vld2", Dt, "$Vd, $Rn", "", []> {
1462  let Rm = 0b1111;
1463  let Inst{4} = Rn{4};
1464  let DecoderMethod = "DecodeVLD2DupInstruction";
1465}
1466
1467def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListDPairAllLanes,
1468                         addrmode6dupalign16>;
1469def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
1470                         addrmode6dupalign32>;
1471def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
1472                         addrmode6dupalign64>;
1473
1474// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
1475// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
1476// ...with double-spaced registers
1477def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListDPairSpacedAllLanes,
1478                           addrmode6dupalign16>;
1479def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1480                           addrmode6dupalign32>;
1481def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1482                           addrmode6dupalign64>;
1483
1484// ...with address register writeback:
1485multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
1486                     Operand AddrMode> {
1487  def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1488                     (outs VdTy:$Vd, GPR:$wb),
1489                     (ins AddrMode:$Rn), IIC_VLD2dupu,
1490                     "vld2", Dt, "$Vd, $Rn!",
1491                     "$Rn.addr = $wb", []> {
1492    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1493    let Inst{4} = Rn{4};
1494    let DecoderMethod = "DecodeVLD2DupInstruction";
1495  }
1496  def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1497                        (outs VdTy:$Vd, GPR:$wb),
1498                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1499                        "vld2", Dt, "$Vd, $Rn, $Rm",
1500                        "$Rn.addr = $wb", []> {
1501    let Inst{4} = Rn{4};
1502    let DecoderMethod = "DecodeVLD2DupInstruction";
1503  }
1504}
1505
1506defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListDPairAllLanes,
1507                                addrmode6dupalign16>;
1508defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
1509                                addrmode6dupalign32>;
1510defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
1511                                addrmode6dupalign64>;
1512
1513defm VLD2DUPd8x2wb  : VLD2DUPWB<{0,0,1,0}, "8",  VecListDPairSpacedAllLanes,
1514                                addrmode6dupalign16>;
1515defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1516                                addrmode6dupalign32>;
1517defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1518                                addrmode6dupalign64>;
1519
1520//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
1521class VLD3DUP<bits<4> op7_4, string Dt>
1522  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1523          (ins addrmode6dup:$Rn), IIC_VLD3dup,
1524          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
1525  let Rm = 0b1111;
1526  let Inst{4} = 0;
1527  let DecoderMethod = "DecodeVLD3DupInstruction";
1528}
1529
1530def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
1531def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1532def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1533
1534def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>;
1535def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1536def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1537
1538// ...with double-spaced registers (not used for codegen):
1539def VLD3DUPq8  : VLD3DUP<{0,0,1,?}, "8">;
1540def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1541def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1542
1543// ...with address register writeback:
1544class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
1545  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1546          (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1547          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1548          "$Rn.addr = $wb", []> {
1549  let Inst{4} = 0;
1550  let DecoderMethod = "DecodeVLD3DupInstruction";
1551}
1552
1553def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8",  addrmode6dupalign64>;
1554def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
1555def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
1556
1557def VLD3DUPq8_UPD  : VLD3DUPWB<{0,0,1,0}, "8",  addrmode6dupalign64>;
1558def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
1559def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
1560
1561def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>;
1562def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1563def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1564
1565//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
1566class VLD4DUP<bits<4> op7_4, string Dt>
1567  : NLdSt<1, 0b10, 0b1111, op7_4,
1568          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1569          (ins addrmode6dup:$Rn), IIC_VLD4dup,
1570          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1571  let Rm = 0b1111;
1572  let Inst{4} = Rn{4};
1573  let DecoderMethod = "DecodeVLD4DupInstruction";
1574}
1575
1576def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
1577def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1578def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1579
1580def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>;
1581def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1582def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1583
1584// ...with double-spaced registers (not used for codegen):
1585def VLD4DUPq8  : VLD4DUP<{0,0,1,?}, "8">;
1586def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1587def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1588
1589// ...with address register writeback:
1590class VLD4DUPWB<bits<4> op7_4, string Dt>
1591  : NLdSt<1, 0b10, 0b1111, op7_4,
1592          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1593          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1594          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1595          "$Rn.addr = $wb", []> {
1596  let Inst{4} = Rn{4};
1597  let DecoderMethod = "DecodeVLD4DupInstruction";
1598}
1599
1600def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
1601def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1602def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1603
1604def VLD4DUPq8_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
1605def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1606def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1607
1608def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>;
1609def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1610def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1611
1612} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1613
1614let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
1615
1616// Classes for VST* pseudo-instructions with multi-register operands.
1617// These are expanded to real instructions after register allocation.
1618class VSTQPseudo<InstrItinClass itin>
1619  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1620class VSTQWBPseudo<InstrItinClass itin>
1621  : PseudoNLdSt<(outs GPR:$wb),
1622                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1623                "$addr.addr = $wb">;
1624class VSTQWBfixedPseudo<InstrItinClass itin>
1625  : PseudoNLdSt<(outs GPR:$wb),
1626                (ins addrmode6:$addr, QPR:$src), itin,
1627                "$addr.addr = $wb">;
1628class VSTQWBregisterPseudo<InstrItinClass itin>
1629  : PseudoNLdSt<(outs GPR:$wb),
1630                (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1631                "$addr.addr = $wb">;
1632class VSTQQPseudo<InstrItinClass itin>
1633  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1634class VSTQQWBPseudo<InstrItinClass itin>
1635  : PseudoNLdSt<(outs GPR:$wb),
1636                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1637                "$addr.addr = $wb">;
1638class VSTQQWBfixedPseudo<InstrItinClass itin>
1639  : PseudoNLdSt<(outs GPR:$wb),
1640                (ins addrmode6:$addr, QQPR:$src), itin,
1641                "$addr.addr = $wb">;
1642class VSTQQWBregisterPseudo<InstrItinClass itin>
1643  : PseudoNLdSt<(outs GPR:$wb),
1644                (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1645                "$addr.addr = $wb">;
1646
1647class VSTQQQQPseudo<InstrItinClass itin>
1648  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1649class VSTQQQQWBPseudo<InstrItinClass itin>
1650  : PseudoNLdSt<(outs GPR:$wb),
1651                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1652                "$addr.addr = $wb">;
1653
1654//   VST1     : Vector Store (multiple single elements)
1655class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
1656  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
1657          IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
1658  let Rm = 0b1111;
1659  let Inst{4} = Rn{4};
1660  let DecoderMethod = "DecodeVLDST1Instruction";
1661}
1662class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
1663  : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
1664          IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
1665  let Rm = 0b1111;
1666  let Inst{5-4} = Rn{5-4};
1667  let DecoderMethod = "DecodeVLDST1Instruction";
1668}
1669
1670def  VST1d8   : VST1D<{0,0,0,?}, "8",  addrmode6align64>;
1671def  VST1d16  : VST1D<{0,1,0,?}, "16", addrmode6align64>;
1672def  VST1d32  : VST1D<{1,0,0,?}, "32", addrmode6align64>;
1673def  VST1d64  : VST1D<{1,1,0,?}, "64", addrmode6align64>;
1674
1675def  VST1q8   : VST1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
1676def  VST1q16  : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
1677def  VST1q32  : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
1678def  VST1q64  : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
1679
1680// ...with address register writeback:
1681multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1682  def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1683                     (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1684                     "vst1", Dt, "$Vd, $Rn!",
1685                     "$Rn.addr = $wb", []> {
1686    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1687    let Inst{4} = Rn{4};
1688    let DecoderMethod = "DecodeVLDST1Instruction";
1689  }
1690  def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1691                        (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1692                        IIC_VLD1u,
1693                        "vst1", Dt, "$Vd, $Rn, $Rm",
1694                        "$Rn.addr = $wb", []> {
1695    let Inst{4} = Rn{4};
1696    let DecoderMethod = "DecodeVLDST1Instruction";
1697  }
1698}
1699multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1700  def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1701                    (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1702                     "vst1", Dt, "$Vd, $Rn!",
1703                     "$Rn.addr = $wb", []> {
1704    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1705    let Inst{5-4} = Rn{5-4};
1706    let DecoderMethod = "DecodeVLDST1Instruction";
1707  }
1708  def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1709                        (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1710                        IIC_VLD1x2u,
1711                        "vst1", Dt, "$Vd, $Rn, $Rm",
1712                        "$Rn.addr = $wb", []> {
1713    let Inst{5-4} = Rn{5-4};
1714    let DecoderMethod = "DecodeVLDST1Instruction";
1715  }
1716}
1717
1718defm VST1d8wb  : VST1DWB<{0,0,0,?}, "8",  addrmode6align64>;
1719defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
1720defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
1721defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
1722
1723defm VST1q8wb  : VST1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
1724defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
1725defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
1726defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
1727
1728// ...with 3 registers
1729class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
1730  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1731          (ins AddrMode:$Rn, VecListThreeD:$Vd),
1732          IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
1733  let Rm = 0b1111;
1734  let Inst{4} = Rn{4};
1735  let DecoderMethod = "DecodeVLDST1Instruction";
1736}
1737multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1738  def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1739                    (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1740                     "vst1", Dt, "$Vd, $Rn!",
1741                     "$Rn.addr = $wb", []> {
1742    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1743    let Inst{5-4} = Rn{5-4};
1744    let DecoderMethod = "DecodeVLDST1Instruction";
1745  }
1746  def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1747                        (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1748                        IIC_VLD1x3u,
1749                        "vst1", Dt, "$Vd, $Rn, $Rm",
1750                        "$Rn.addr = $wb", []> {
1751    let Inst{5-4} = Rn{5-4};
1752    let DecoderMethod = "DecodeVLDST1Instruction";
1753  }
1754}
1755
1756def VST1d8T     : VST1D3<{0,0,0,?}, "8",  addrmode6align64>;
1757def VST1d16T    : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
1758def VST1d32T    : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
1759def VST1d64T    : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
1760
1761defm VST1d8Twb  : VST1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
1762defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
1763defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
1764defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
1765
1766def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>;
1767def VST1d64TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>;
1768def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
1769
1770// ...with 4 registers
1771class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
1772  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1773          (ins AddrMode:$Rn, VecListFourD:$Vd),
1774          IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1775          []> {
1776  let Rm = 0b1111;
1777  let Inst{5-4} = Rn{5-4};
1778  let DecoderMethod = "DecodeVLDST1Instruction";
1779}
1780multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1781  def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1782                    (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1783                     "vst1", Dt, "$Vd, $Rn!",
1784                     "$Rn.addr = $wb", []> {
1785    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1786    let Inst{5-4} = Rn{5-4};
1787    let DecoderMethod = "DecodeVLDST1Instruction";
1788  }
1789  def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1790                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1791                        IIC_VLD1x4u,
1792                        "vst1", Dt, "$Vd, $Rn, $Rm",
1793                        "$Rn.addr = $wb", []> {
1794    let Inst{5-4} = Rn{5-4};
1795    let DecoderMethod = "DecodeVLDST1Instruction";
1796  }
1797}
1798
1799def VST1d8Q     : VST1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1800def VST1d16Q    : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
1801def VST1d32Q    : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
1802def VST1d64Q    : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
1803
1804defm VST1d8Qwb  : VST1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1805defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1806defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1807defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
1808
1809def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>;
1810def VST1d64QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>;
1811def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
1812
1813//   VST2     : Vector Store (multiple 2-element structures)
1814class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1815            InstrItinClass itin, Operand AddrMode>
1816  : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
1817          itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1818  let Rm = 0b1111;
1819  let Inst{5-4} = Rn{5-4};
1820  let DecoderMethod = "DecodeVLDST2Instruction";
1821}
1822
1823def  VST2d8   : VST2<0b1000, {0,0,?,?}, "8",  VecListDPair, IIC_VST2,
1824                     addrmode6align64or128>;
1825def  VST2d16  : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
1826                     addrmode6align64or128>;
1827def  VST2d32  : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
1828                     addrmode6align64or128>;
1829
1830def  VST2q8   : VST2<0b0011, {0,0,?,?}, "8",  VecListFourD, IIC_VST2x2,
1831                     addrmode6align64or128or256>;
1832def  VST2q16  : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
1833                     addrmode6align64or128or256>;
1834def  VST2q32  : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
1835                     addrmode6align64or128or256>;
1836
1837def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>;
1838def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
1839def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
1840
1841// ...with address register writeback:
1842multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1843                   RegisterOperand VdTy, Operand AddrMode> {
1844  def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1845                     (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
1846                     "vst2", Dt, "$Vd, $Rn!",
1847                     "$Rn.addr = $wb", []> {
1848    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1849    let Inst{5-4} = Rn{5-4};
1850    let DecoderMethod = "DecodeVLDST2Instruction";
1851  }
1852  def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1853                        (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1854                        "vst2", Dt, "$Vd, $Rn, $Rm",
1855                        "$Rn.addr = $wb", []> {
1856    let Inst{5-4} = Rn{5-4};
1857    let DecoderMethod = "DecodeVLDST2Instruction";
1858  }
1859}
1860multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1861  def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1862                     (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1863                     "vst2", Dt, "$Vd, $Rn!",
1864                     "$Rn.addr = $wb", []> {
1865    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1866    let Inst{5-4} = Rn{5-4};
1867    let DecoderMethod = "DecodeVLDST2Instruction";
1868  }
1869  def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1870                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1871                        IIC_VLD1u,
1872                        "vst2", Dt, "$Vd, $Rn, $Rm",
1873                        "$Rn.addr = $wb", []> {
1874    let Inst{5-4} = Rn{5-4};
1875    let DecoderMethod = "DecodeVLDST2Instruction";
1876  }
1877}
1878
1879defm VST2d8wb    : VST2DWB<0b1000, {0,0,?,?}, "8",  VecListDPair,
1880                           addrmode6align64or128>;
1881defm VST2d16wb   : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
1882                           addrmode6align64or128>;
1883defm VST2d32wb   : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
1884                           addrmode6align64or128>;
1885
1886defm VST2q8wb    : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1887defm VST2q16wb   : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1888defm VST2q32wb   : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1889
1890def VST2q8PseudoWB_fixed     : VSTQQWBfixedPseudo<IIC_VST2x2u>;
1891def VST2q16PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>;
1892def VST2q32PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>;
1893def VST2q8PseudoWB_register  : VSTQQWBregisterPseudo<IIC_VST2x2u>;
1894def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
1895def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
1896
1897// ...with double-spaced registers
1898def VST2b8      : VST2<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced, IIC_VST2,
1899                      addrmode6align64or128>;
1900def VST2b16     : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
1901                      addrmode6align64or128>;
1902def VST2b32     : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
1903                      addrmode6align64or128>;
1904defm VST2b8wb   : VST2DWB<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced,
1905                          addrmode6align64or128>;
1906defm VST2b16wb  : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
1907                          addrmode6align64or128>;
1908defm VST2b32wb  : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
1909                          addrmode6align64or128>;
1910
1911//   VST3     : Vector Store (multiple 3-element structures)
1912class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1913  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1914          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1915          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
1916  let Rm = 0b1111;
1917  let Inst{4} = Rn{4};
1918  let DecoderMethod = "DecodeVLDST3Instruction";
1919}
1920
1921def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
1922def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
1923def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
1924
1925def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>;
1926def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
1927def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
1928
1929// ...with address register writeback:
1930class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1931  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1932          (ins addrmode6:$Rn, am6offset:$Rm,
1933           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1934          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1935          "$Rn.addr = $wb", []> {
1936  let Inst{4} = Rn{4};
1937  let DecoderMethod = "DecodeVLDST3Instruction";
1938}
1939
1940def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
1941def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
1942def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
1943
1944def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>;
1945def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1946def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1947
1948// ...with double-spaced registers:
1949def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
1950def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
1951def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
1952def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
1953def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
1954def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
1955
1956def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
1957def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1958def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1959
1960// ...alternate versions to be allocated odd register numbers:
1961def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>;
1962def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
1963def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
1964
1965def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
1966def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1967def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1968
1969//   VST4     : Vector Store (multiple 4-element structures)
1970class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
1971  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1972          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1973          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
1974          "", []> {
1975  let Rm = 0b1111;
1976  let Inst{5-4} = Rn{5-4};
1977  let DecoderMethod = "DecodeVLDST4Instruction";
1978}
1979
1980def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
1981def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
1982def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
1983
1984def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>;
1985def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
1986def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
1987
1988// ...with address register writeback:
1989class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1990  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1991          (ins addrmode6:$Rn, am6offset:$Rm,
1992           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
1993           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
1994          "$Rn.addr = $wb", []> {
1995  let Inst{5-4} = Rn{5-4};
1996  let DecoderMethod = "DecodeVLDST4Instruction";
1997}
1998
1999def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
2000def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
2001def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
2002
2003def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>;
2004def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
2005def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
2006
2007// ...with double-spaced registers:
2008def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
2009def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
2010def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
2011def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
2012def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
2013def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
2014
2015def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
2016def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
2017def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
2018
2019// ...alternate versions to be allocated odd register numbers:
2020def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>;
2021def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
2022def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
2023
2024def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
2025def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
2026def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
2027
2028} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2029
2030// Classes for VST*LN pseudo-instructions with multi-register operands.
2031// These are expanded to real instructions after register allocation.
2032class VSTQLNPseudo<InstrItinClass itin>
2033  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2034                itin, "">;
2035class VSTQLNWBPseudo<InstrItinClass itin>
2036  : PseudoNLdSt<(outs GPR:$wb),
2037                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
2038                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2039class VSTQQLNPseudo<InstrItinClass itin>
2040  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2041                itin, "">;
2042class VSTQQLNWBPseudo<InstrItinClass itin>
2043  : PseudoNLdSt<(outs GPR:$wb),
2044                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
2045                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2046class VSTQQQQLNPseudo<InstrItinClass itin>
2047  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2048                itin, "">;
2049class VSTQQQQLNWBPseudo<InstrItinClass itin>
2050  : PseudoNLdSt<(outs GPR:$wb),
2051                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
2052                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2053
2054//   VST1LN   : Vector Store (single element from one lane)
2055class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2056             PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
2057  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2058          (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2059          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2060          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
2061  let Rm = 0b1111;
2062  let DecoderMethod = "DecodeVST1LN";
2063}
2064class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2065  : VSTQLNPseudo<IIC_VST1ln> {
2066  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2067                          addrmode6:$addr)];
2068}
2069
2070def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
2071                       NEONvgetlaneu, addrmode6> {
2072  let Inst{7-5} = lane{2-0};
2073}
2074def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2075                       NEONvgetlaneu, addrmode6> {
2076  let Inst{7-6} = lane{1-0};
2077  let Inst{4}   = Rn{4};
2078}
2079
2080def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
2081                       addrmode6oneL32> {
2082  let Inst{7}   = lane{0};
2083  let Inst{5-4} = Rn{5-4};
2084}
2085
2086def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
2087def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
2088def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2089
2090def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2091          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2092def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2093          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2094
2095// ...with address register writeback:
2096class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2097               PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2098  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2099          (ins AdrMode:$Rn, am6offset:$Rm,
2100           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2101          "\\{$Vd[$lane]\\}, $Rn$Rm",
2102          "$Rn.addr = $wb",
2103          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2104                                  AdrMode:$Rn, am6offset:$Rm))]> {
2105  let DecoderMethod = "DecodeVST1LN";
2106}
2107class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2108  : VSTQLNWBPseudo<IIC_VST1lnu> {
2109  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2110                                        addrmode6:$addr, am6offset:$offset))];
2111}
2112
2113def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2114                             NEONvgetlaneu, addrmode6> {
2115  let Inst{7-5} = lane{2-0};
2116}
2117def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2118                             NEONvgetlaneu, addrmode6> {
2119  let Inst{7-6} = lane{1-0};
2120  let Inst{4}   = Rn{4};
2121}
2122def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2123                             extractelt, addrmode6oneL32> {
2124  let Inst{7}   = lane{0};
2125  let Inst{5-4} = Rn{5-4};
2126}
2127
2128def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
2129def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
2130def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2131
2132let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
2133
2134//   VST2LN   : Vector Store (single 2-element structure from one lane)
2135class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2136  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2137          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2138          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2139          "", []> {
2140  let Rm = 0b1111;
2141  let Inst{4}   = Rn{4};
2142  let DecoderMethod = "DecodeVST2LN";
2143}
2144
2145def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
2146  let Inst{7-5} = lane{2-0};
2147}
2148def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2149  let Inst{7-6} = lane{1-0};
2150}
2151def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2152  let Inst{7}   = lane{0};
2153}
2154
2155def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>;
2156def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
2157def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
2158
2159// ...with double-spaced registers:
2160def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2161  let Inst{7-6} = lane{1-0};
2162  let Inst{4}   = Rn{4};
2163}
2164def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2165  let Inst{7}   = lane{0};
2166  let Inst{4}   = Rn{4};
2167}
2168
2169def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
2170def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
2171
2172// ...with address register writeback:
2173class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2174  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2175          (ins addrmode6:$Rn, am6offset:$Rm,
2176           DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2177          "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2178          "$Rn.addr = $wb", []> {
2179  let Inst{4}   = Rn{4};
2180  let DecoderMethod = "DecodeVST2LN";
2181}
2182
2183def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2184  let Inst{7-5} = lane{2-0};
2185}
2186def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2187  let Inst{7-6} = lane{1-0};
2188}
2189def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2190  let Inst{7}   = lane{0};
2191}
2192
2193def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>;
2194def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
2195def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
2196
2197def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2198  let Inst{7-6} = lane{1-0};
2199}
2200def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2201  let Inst{7}   = lane{0};
2202}
2203
2204def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
2205def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
2206
2207//   VST3LN   : Vector Store (single 3-element structure from one lane)
2208class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2209  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2210          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2211           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2212          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
2213  let Rm = 0b1111;
2214  let DecoderMethod = "DecodeVST3LN";
2215}
2216
2217def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
2218  let Inst{7-5} = lane{2-0};
2219}
2220def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2221  let Inst{7-6} = lane{1-0};
2222}
2223def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2224  let Inst{7}   = lane{0};
2225}
2226
2227def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>;
2228def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
2229def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
2230
2231// ...with double-spaced registers:
2232def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2233  let Inst{7-6} = lane{1-0};
2234}
2235def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2236  let Inst{7}   = lane{0};
2237}
2238
2239def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2240def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2241
2242// ...with address register writeback:
2243class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2244  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2245          (ins addrmode6:$Rn, am6offset:$Rm,
2246           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2247          IIC_VST3lnu, "vst3", Dt,
2248          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2249          "$Rn.addr = $wb", []> {
2250  let DecoderMethod = "DecodeVST3LN";
2251}
2252
2253def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2254  let Inst{7-5} = lane{2-0};
2255}
2256def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2257  let Inst{7-6} = lane{1-0};
2258}
2259def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2260  let Inst{7}   = lane{0};
2261}
2262
2263def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>;
2264def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
2265def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
2266
2267def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2268  let Inst{7-6} = lane{1-0};
2269}
2270def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2271  let Inst{7}   = lane{0};
2272}
2273
2274def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
2275def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
2276
2277//   VST4LN   : Vector Store (single 4-element structure from one lane)
2278class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2279  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2280          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2281           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2282          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2283          "", []> {
2284  let Rm = 0b1111;
2285  let Inst{4} = Rn{4};
2286  let DecoderMethod = "DecodeVST4LN";
2287}
2288
2289def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
2290  let Inst{7-5} = lane{2-0};
2291}
2292def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2293  let Inst{7-6} = lane{1-0};
2294}
2295def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2296  let Inst{7}   = lane{0};
2297  let Inst{5} = Rn{5};
2298}
2299
2300def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>;
2301def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
2302def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
2303
2304// ...with double-spaced registers:
2305def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2306  let Inst{7-6} = lane{1-0};
2307}
2308def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2309  let Inst{7}   = lane{0};
2310  let Inst{5} = Rn{5};
2311}
2312
2313def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
2314def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
2315
2316// ...with address register writeback:
2317class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2318  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2319          (ins addrmode6:$Rn, am6offset:$Rm,
2320           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2321          IIC_VST4lnu, "vst4", Dt,
2322  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2323          "$Rn.addr = $wb", []> {
2324  let Inst{4} = Rn{4};
2325  let DecoderMethod = "DecodeVST4LN";
2326}
2327
2328def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2329  let Inst{7-5} = lane{2-0};
2330}
2331def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2332  let Inst{7-6} = lane{1-0};
2333}
2334def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2335  let Inst{7}   = lane{0};
2336  let Inst{5} = Rn{5};
2337}
2338
2339def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>;
2340def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
2341def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
2342
2343def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2344  let Inst{7-6} = lane{1-0};
2345}
2346def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2347  let Inst{7}   = lane{0};
2348  let Inst{5} = Rn{5};
2349}
2350
2351def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
2352def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
2353
2354} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2355
2356// Use vld1/vst1 for unaligned f64 load / store
2357def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2358          (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
2359def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2360          (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2361def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2362          (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
2363def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2364          (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2365def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2366          (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
2367def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2368          (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
2369
2370// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
2371// load / store if it's legal.
2372def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
2373          (VLD1q64 addrmode6:$addr)>;
2374def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2375          (VST1q64 addrmode6:$addr, QPR:$value)>;
2376def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2377          (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
2378def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2379          (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2380def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
2381          (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
2382def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2383          (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2384def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
2385          (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
2386def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2387          (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
2388
2389//===----------------------------------------------------------------------===//
2390// NEON pattern fragments
2391//===----------------------------------------------------------------------===//
2392
2393// Extract D sub-registers of Q registers.
2394def DSubReg_i8_reg  : SDNodeXForm<imm, [{
2395  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2396  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
2397}]>;
2398def DSubReg_i16_reg : SDNodeXForm<imm, [{
2399  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2400  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
2401}]>;
2402def DSubReg_i32_reg : SDNodeXForm<imm, [{
2403  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2404  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
2405}]>;
2406def DSubReg_f64_reg : SDNodeXForm<imm, [{
2407  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2408  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
2409}]>;
2410
2411// Extract S sub-registers of Q/D registers.
2412def SSubReg_f32_reg : SDNodeXForm<imm, [{
2413  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2414  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
2415}]>;
2416
2417// Translate lane numbers from Q registers to D subregs.
2418def SubReg_i8_lane  : SDNodeXForm<imm, [{
2419  return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
2420}]>;
2421def SubReg_i16_lane : SDNodeXForm<imm, [{
2422  return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
2423}]>;
2424def SubReg_i32_lane : SDNodeXForm<imm, [{
2425  return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
2426}]>;
2427
2428//===----------------------------------------------------------------------===//
2429// Instruction Classes
2430//===----------------------------------------------------------------------===//
2431
2432// Basic 2-register operations: double- and quad-register.
2433class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2434           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2435           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2436  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2437        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2438        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2439class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2440           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2441           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2442  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2443        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2444        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2445
2446// Basic 2-register intrinsics, both double- and quad-register.
2447class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2448              bits<2> op17_16, bits<5> op11_7, bit op4,
2449              InstrItinClass itin, string OpcodeStr, string Dt,
2450              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2451  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2452        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2453        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2454class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2455              bits<2> op17_16, bits<5> op11_7, bit op4,
2456              InstrItinClass itin, string OpcodeStr, string Dt,
2457              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2458  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2459        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2460        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2461
2462// Same as above, but not predicated.
2463class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
2464              InstrItinClass itin, string OpcodeStr, string Dt,
2465              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2466  : N2Vnp<0b10, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
2467          itin, OpcodeStr, Dt,
2468          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2469
2470class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
2471              InstrItinClass itin, string OpcodeStr, string Dt,
2472              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2473  : N2Vnp<0b10, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
2474          itin, OpcodeStr, Dt,
2475          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2476
2477// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
2478class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2479              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2480              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2481  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,  (outs QPR:$Vd), (ins QPR:$Vm),
2482          itin, OpcodeStr, Dt,
2483          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2484
2485// Same as N2VQIntXnp but with Vd as a src register.
2486class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2487              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2488              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2489  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
2490          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
2491          itin, OpcodeStr, Dt,
2492          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
2493  let Constraints = "$src = $Vd";
2494}
2495
2496// Narrow 2-register operations.
2497class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2498           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2499           InstrItinClass itin, string OpcodeStr, string Dt,
2500           ValueType TyD, ValueType TyQ, SDNode OpNode>
2501  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2502        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2503        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2504
2505// Narrow 2-register intrinsics.
2506class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2507              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2508              InstrItinClass itin, string OpcodeStr, string Dt,
2509              ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2510  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2511        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2512        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2513
2514// Long 2-register operations (currently only used for VMOVL).
2515class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2516           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2517           InstrItinClass itin, string OpcodeStr, string Dt,
2518           ValueType TyQ, ValueType TyD, SDNode OpNode>
2519  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2520        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2521        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2522
2523// Long 2-register intrinsics.
2524class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2525              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2526              InstrItinClass itin, string OpcodeStr, string Dt,
2527              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2528  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2529        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2530        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2531
2532// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2533class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2534  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2535        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2536        OpcodeStr, Dt, "$Vd, $Vm",
2537        "$src1 = $Vd, $src2 = $Vm", []>;
2538class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2539                  InstrItinClass itin, string OpcodeStr, string Dt>
2540  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2541        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2542        "$src1 = $Vd, $src2 = $Vm", []>;
2543
2544// Basic 3-register operations: double- and quad-register.
2545class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2546           InstrItinClass itin, string OpcodeStr, string Dt,
2547           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2548  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2549        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2550        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2551        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2552  // All of these have a two-operand InstAlias.
2553  let TwoOperandAliasConstraint = "$Vn = $Vd";
2554  let isCommutable = Commutable;
2555}
2556// Same as N3VD but no data type.
2557class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2558           InstrItinClass itin, string OpcodeStr,
2559           ValueType ResTy, ValueType OpTy,
2560           SDNode OpNode, bit Commutable>
2561  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2562         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2563         OpcodeStr, "$Vd, $Vn, $Vm", "",
2564         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2565  // All of these have a two-operand InstAlias.
2566  let TwoOperandAliasConstraint = "$Vn = $Vd";
2567  let isCommutable = Commutable;
2568}
2569
2570class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2571             InstrItinClass itin, string OpcodeStr, string Dt,
2572             ValueType Ty, SDNode ShOp>
2573  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2574        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2575        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2576        [(set (Ty DPR:$Vd),
2577              (Ty (ShOp (Ty DPR:$Vn),
2578                        (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2579  // All of these have a two-operand InstAlias.
2580  let TwoOperandAliasConstraint = "$Vn = $Vd";
2581  let isCommutable = 0;
2582}
2583class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2584               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2585  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2586        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2587        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2588        [(set (Ty DPR:$Vd),
2589              (Ty (ShOp (Ty DPR:$Vn),
2590                        (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2591  // All of these have a two-operand InstAlias.
2592  let TwoOperandAliasConstraint = "$Vn = $Vd";
2593  let isCommutable = 0;
2594}
2595
2596class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2597           InstrItinClass itin, string OpcodeStr, string Dt,
2598           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2599  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2600        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2601        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2602        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2603  // All of these have a two-operand InstAlias.
2604  let TwoOperandAliasConstraint = "$Vn = $Vd";
2605  let isCommutable = Commutable;
2606}
2607class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2608           InstrItinClass itin, string OpcodeStr,
2609           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2610  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2611         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2612         OpcodeStr, "$Vd, $Vn, $Vm", "",
2613         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2614  // All of these have a two-operand InstAlias.
2615  let TwoOperandAliasConstraint = "$Vn = $Vd";
2616  let isCommutable = Commutable;
2617}
2618class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2619             InstrItinClass itin, string OpcodeStr, string Dt,
2620             ValueType ResTy, ValueType OpTy, SDNode ShOp>
2621  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2622        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2623        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2624        [(set (ResTy QPR:$Vd),
2625              (ResTy (ShOp (ResTy QPR:$Vn),
2626                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2627                                                imm:$lane)))))]> {
2628  // All of these have a two-operand InstAlias.
2629  let TwoOperandAliasConstraint = "$Vn = $Vd";
2630  let isCommutable = 0;
2631}
2632class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2633               ValueType ResTy, ValueType OpTy, SDNode ShOp>
2634  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2635        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2636        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2637        [(set (ResTy QPR:$Vd),
2638              (ResTy (ShOp (ResTy QPR:$Vn),
2639                           (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2640                                                imm:$lane)))))]> {
2641  // All of these have a two-operand InstAlias.
2642  let TwoOperandAliasConstraint = "$Vn = $Vd";
2643  let isCommutable = 0;
2644}
2645
2646// Basic 3-register intrinsics, both double- and quad-register.
2647class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2648              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2649              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2650  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2651        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2652        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2653        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2654  // All of these have a two-operand InstAlias.
2655  let TwoOperandAliasConstraint = "$Vn = $Vd";
2656  let isCommutable = Commutable;
2657}
2658
2659class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2660                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2661                string Dt, ValueType ResTy, ValueType OpTy,
2662                SDPatternOperator IntOp, bit Commutable>
2663  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2664          (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
2665          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2666
2667class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2668                string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2669  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2670        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2671        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2672        [(set (Ty DPR:$Vd),
2673              (Ty (IntOp (Ty DPR:$Vn),
2674                         (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2675                                           imm:$lane)))))]> {
2676  let isCommutable = 0;
2677}
2678
2679class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2680                  string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2681  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2682        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2683        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2684        [(set (Ty DPR:$Vd),
2685              (Ty (IntOp (Ty DPR:$Vn),
2686                         (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2687  let isCommutable = 0;
2688}
2689class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2690              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2691              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2692  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2693        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2694        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2695        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2696  let TwoOperandAliasConstraint = "$Vm = $Vd";
2697  let isCommutable = 0;
2698}
2699
2700class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2701              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2702              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2703  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2704        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2705        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2706        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2707  // All of these have a two-operand InstAlias.
2708  let TwoOperandAliasConstraint = "$Vn = $Vd";
2709  let isCommutable = Commutable;
2710}
2711
2712class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2713                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2714                string Dt, ValueType ResTy, ValueType OpTy,
2715                SDPatternOperator IntOp, bit Commutable>
2716  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2717          (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
2718          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2719
2720// Same as N3VQIntnp but with Vd as a src register.
2721class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2722                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2723                string Dt, ValueType ResTy, ValueType OpTy,
2724                SDPatternOperator IntOp, bit Commutable>
2725  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2726          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
2727          f, itin, OpcodeStr, Dt,
2728          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
2729                                       (OpTy QPR:$Vm))))]> {
2730  let Constraints = "$src = $Vd";
2731}
2732
2733class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2734                string OpcodeStr, string Dt,
2735                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2736  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2737        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2738        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2739        [(set (ResTy QPR:$Vd),
2740              (ResTy (IntOp (ResTy QPR:$Vn),
2741                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2742                                                 imm:$lane)))))]> {
2743  let isCommutable = 0;
2744}
2745class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2746                  string OpcodeStr, string Dt,
2747                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2748  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2749        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2750        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2751        [(set (ResTy QPR:$Vd),
2752              (ResTy (IntOp (ResTy QPR:$Vn),
2753                            (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2754                                                 imm:$lane)))))]> {
2755  let isCommutable = 0;
2756}
2757class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2758              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2759              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2760  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2761        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2762        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2763        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2764  let TwoOperandAliasConstraint = "$Vm = $Vd";
2765  let isCommutable = 0;
2766}
2767
2768// Multiply-Add/Sub operations: double- and quad-register.
2769class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2770                InstrItinClass itin, string OpcodeStr, string Dt,
2771                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2772  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2773        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2774        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2775        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2776                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2777
2778class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2779                  string OpcodeStr, string Dt,
2780                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2781  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2782        (outs DPR:$Vd),
2783        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2784        NVMulSLFrm, itin,
2785        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2786        [(set (Ty DPR:$Vd),
2787              (Ty (ShOp (Ty DPR:$src1),
2788                        (Ty (MulOp DPR:$Vn,
2789                                   (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2790                                                     imm:$lane)))))))]>;
2791class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2792                    string OpcodeStr, string Dt,
2793                    ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2794  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2795        (outs DPR:$Vd),
2796        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2797        NVMulSLFrm, itin,
2798        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2799        [(set (Ty DPR:$Vd),
2800              (Ty (ShOp (Ty DPR:$src1),
2801                        (Ty (MulOp DPR:$Vn,
2802                                   (Ty (NEONvduplane (Ty DPR_8:$Vm),
2803                                                     imm:$lane)))))))]>;
2804
2805class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2806                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2807                SDPatternOperator MulOp, SDPatternOperator OpNode>
2808  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2809        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2810        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2811        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2812                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2813class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2814                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2815                  SDPatternOperator MulOp, SDPatternOperator ShOp>
2816  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2817        (outs QPR:$Vd),
2818        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2819        NVMulSLFrm, itin,
2820        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2821        [(set (ResTy QPR:$Vd),
2822              (ResTy (ShOp (ResTy QPR:$src1),
2823                           (ResTy (MulOp QPR:$Vn,
2824                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2825                                                        imm:$lane)))))))]>;
2826class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2827                    string OpcodeStr, string Dt,
2828                    ValueType ResTy, ValueType OpTy,
2829                    SDPatternOperator MulOp, SDPatternOperator ShOp>
2830  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2831        (outs QPR:$Vd),
2832        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2833        NVMulSLFrm, itin,
2834        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2835        [(set (ResTy QPR:$Vd),
2836              (ResTy (ShOp (ResTy QPR:$src1),
2837                           (ResTy (MulOp QPR:$Vn,
2838                                   (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2839                                                        imm:$lane)))))))]>;
2840
2841// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2842class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2843                InstrItinClass itin, string OpcodeStr, string Dt,
2844                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2845  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2846        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2847        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2848        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2849                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2850class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2851                InstrItinClass itin, string OpcodeStr, string Dt,
2852                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2853  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2854        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2855        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2856        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2857                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2858
2859// Neon 3-argument intrinsics, both double- and quad-register.
2860// The destination register is also used as the first source operand register.
2861class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2862               InstrItinClass itin, string OpcodeStr, string Dt,
2863               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2864  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2865        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2866        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2867        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2868                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2869class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2870               InstrItinClass itin, string OpcodeStr, string Dt,
2871               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2872  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2873        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2874        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2875        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2876                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2877
2878// Long Multiply-Add/Sub operations.
2879class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2880                InstrItinClass itin, string OpcodeStr, string Dt,
2881                ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2882  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2883        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2884        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2885        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2886                                (TyQ (MulOp (TyD DPR:$Vn),
2887                                            (TyD DPR:$Vm)))))]>;
2888class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2889                  InstrItinClass itin, string OpcodeStr, string Dt,
2890                  ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2891  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2892        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2893        NVMulSLFrm, itin,
2894        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2895        [(set QPR:$Vd,
2896          (OpNode (TyQ QPR:$src1),
2897                  (TyQ (MulOp (TyD DPR:$Vn),
2898                              (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
2899                                                 imm:$lane))))))]>;
2900class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2901                    InstrItinClass itin, string OpcodeStr, string Dt,
2902                    ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2903  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2904        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2905        NVMulSLFrm, itin,
2906        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2907        [(set QPR:$Vd,
2908          (OpNode (TyQ QPR:$src1),
2909                  (TyQ (MulOp (TyD DPR:$Vn),
2910                              (TyD (NEONvduplane (TyD DPR_8:$Vm),
2911                                                 imm:$lane))))))]>;
2912
2913// Long Intrinsic-Op vector operations with explicit extend (VABAL).
2914class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2915                   InstrItinClass itin, string OpcodeStr, string Dt,
2916                   ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
2917                   SDNode OpNode>
2918  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2919        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2920        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2921        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2922                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2923                                                        (TyD DPR:$Vm)))))))]>;
2924
2925// Neon Long 3-argument intrinsic.  The destination register is
2926// a quad-register and is also used as the first source operand register.
2927class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2928               InstrItinClass itin, string OpcodeStr, string Dt,
2929               ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2930  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2931        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2932        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2933        [(set QPR:$Vd,
2934          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
2935class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2936                 string OpcodeStr, string Dt,
2937                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2938  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2939        (outs QPR:$Vd),
2940        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2941        NVMulSLFrm, itin,
2942        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2943        [(set (ResTy QPR:$Vd),
2944              (ResTy (IntOp (ResTy QPR:$src1),
2945                            (OpTy DPR:$Vn),
2946                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2947                                                imm:$lane)))))]>;
2948class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2949                   InstrItinClass itin, string OpcodeStr, string Dt,
2950                   ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2951  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2952        (outs QPR:$Vd),
2953        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2954        NVMulSLFrm, itin,
2955        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2956        [(set (ResTy QPR:$Vd),
2957              (ResTy (IntOp (ResTy QPR:$src1),
2958                            (OpTy DPR:$Vn),
2959                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2960                                                imm:$lane)))))]>;
2961
2962// Narrowing 3-register intrinsics.
2963class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2964              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
2965              SDPatternOperator IntOp, bit Commutable>
2966  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2967        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
2968        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2969        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
2970  let isCommutable = Commutable;
2971}
2972
2973// Long 3-register operations.
2974class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2975           InstrItinClass itin, string OpcodeStr, string Dt,
2976           ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
2977  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2978        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2979        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2980        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2981  let isCommutable = Commutable;
2982}
2983
2984class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2985             InstrItinClass itin, string OpcodeStr, string Dt,
2986             ValueType TyQ, ValueType TyD, SDNode OpNode>
2987  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2988        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2989        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2990        [(set QPR:$Vd,
2991          (TyQ (OpNode (TyD DPR:$Vn),
2992                       (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
2993class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2994               InstrItinClass itin, string OpcodeStr, string Dt,
2995               ValueType TyQ, ValueType TyD, SDNode OpNode>
2996  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2997        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2998        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2999        [(set QPR:$Vd,
3000          (TyQ (OpNode (TyD DPR:$Vn),
3001                       (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3002
3003// Long 3-register operations with explicitly extended operands.
3004class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3005              InstrItinClass itin, string OpcodeStr, string Dt,
3006              ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
3007              bit Commutable>
3008  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3009        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3010        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3011        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
3012                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3013  let isCommutable = Commutable;
3014}
3015
3016// Long 3-register intrinsics with explicit extend (VABDL).
3017class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3018                 InstrItinClass itin, string OpcodeStr, string Dt,
3019                 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3020                 bit Commutable>
3021  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3022        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3023        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3024        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3025                                                (TyD DPR:$Vm))))))]> {
3026  let isCommutable = Commutable;
3027}
3028
3029// Long 3-register intrinsics.
3030class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3031              InstrItinClass itin, string OpcodeStr, string Dt,
3032              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
3033  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3034        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3035        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3036        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3037  let isCommutable = Commutable;
3038}
3039
3040// Same as above, but not predicated.
3041class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
3042                bit op4, InstrItinClass itin, string OpcodeStr,
3043                string Dt, ValueType ResTy, ValueType OpTy,
3044                SDPatternOperator IntOp, bit Commutable>
3045  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
3046          (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
3047          [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
3048
3049class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3050                string OpcodeStr, string Dt,
3051                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3052  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3053        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3054        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3055        [(set (ResTy QPR:$Vd),
3056              (ResTy (IntOp (OpTy DPR:$Vn),
3057                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
3058                                                imm:$lane)))))]>;
3059class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3060                  InstrItinClass itin, string OpcodeStr, string Dt,
3061                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3062  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3063        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3064        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3065        [(set (ResTy QPR:$Vd),
3066              (ResTy (IntOp (OpTy DPR:$Vn),
3067                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
3068                                                imm:$lane)))))]>;
3069
3070// Wide 3-register operations.
3071class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3072           string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
3073           SDNode OpNode, SDNode ExtOp, bit Commutable>
3074  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3075        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
3076        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3077        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
3078                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3079  // All of these have a two-operand InstAlias.
3080  let TwoOperandAliasConstraint = "$Vn = $Vd";
3081  let isCommutable = Commutable;
3082}
3083
3084// Pairwise long 2-register intrinsics, both double- and quad-register.
3085class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3086                bits<2> op17_16, bits<5> op11_7, bit op4,
3087                string OpcodeStr, string Dt,
3088                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3089  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
3090        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3091        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
3092class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3093                bits<2> op17_16, bits<5> op11_7, bit op4,
3094                string OpcodeStr, string Dt,
3095                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3096  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
3097        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3098        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
3099
3100// Pairwise long 2-register accumulate intrinsics,
3101// both double- and quad-register.
3102// The destination register is also used as the first source operand register.
3103class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3104                 bits<2> op17_16, bits<5> op11_7, bit op4,
3105                 string OpcodeStr, string Dt,
3106                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3107  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
3108        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
3109        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3110        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
3111class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3112                 bits<2> op17_16, bits<5> op11_7, bit op4,
3113                 string OpcodeStr, string Dt,
3114                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3115  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
3116        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
3117        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3118        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
3119
3120// Shift by immediate,
3121// both double- and quad-register.
3122let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3123class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3124             Format f, InstrItinClass itin, Operand ImmTy,
3125             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3126  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3127           (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
3128           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3129           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
3130class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3131             Format f, InstrItinClass itin, Operand ImmTy,
3132             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3133  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3134           (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
3135           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3136           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
3137}
3138
3139// Long shift by immediate.
3140class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3141             string OpcodeStr, string Dt,
3142             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3143             SDPatternOperator OpNode>
3144  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3145           (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
3146           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3147           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
3148
3149// Narrow shift by immediate.
3150class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3151             InstrItinClass itin, string OpcodeStr, string Dt,
3152             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3153             SDPatternOperator OpNode>
3154  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3155           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
3156           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3157           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
3158                                          (i32 ImmTy:$SIMM))))]>;
3159
3160// Shift right by immediate and accumulate,
3161// both double- and quad-register.
3162let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3163class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3164                Operand ImmTy, string OpcodeStr, string Dt,
3165                ValueType Ty, SDNode ShOp>
3166  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3167           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3168           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3169           [(set DPR:$Vd, (Ty (add DPR:$src1,
3170                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
3171class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3172                Operand ImmTy, string OpcodeStr, string Dt,
3173                ValueType Ty, SDNode ShOp>
3174  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3175           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3176           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3177           [(set QPR:$Vd, (Ty (add QPR:$src1,
3178                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
3179}
3180
3181// Shift by immediate and insert,
3182// both double- and quad-register.
3183let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3184class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3185                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3186                ValueType Ty,SDNode ShOp>
3187  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3188           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
3189           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3190           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3191class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3192                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3193                ValueType Ty,SDNode ShOp>
3194  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3195           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3196           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3197           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3198}
3199
3200// Convert, with fractional bits immediate,
3201// both double- and quad-register.
3202class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3203              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3204              SDPatternOperator IntOp>
3205  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3206           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3207           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3208           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3209class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3210              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3211              SDPatternOperator IntOp>
3212  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3213           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3214           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3215           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3216
3217//===----------------------------------------------------------------------===//
3218// Multiclasses
3219//===----------------------------------------------------------------------===//
3220
3221// Abbreviations used in multiclass suffixes:
3222//   Q = quarter int (8 bit) elements
3223//   H = half int (16 bit) elements
3224//   S = single int (32 bit) elements
3225//   D = double int (64 bit) elements
3226
3227// Neon 2-register vector operations and intrinsics.
3228
3229// Neon 2-register comparisons.
3230//   source operand element sizes of 8, 16 and 32 bits:
3231multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3232                       bits<5> op11_7, bit op4, string opc, string Dt,
3233                       string asm, SDNode OpNode> {
3234  // 64-bit vector types.
3235  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3236                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3237                  opc, !strconcat(Dt, "8"), asm, "",
3238                  [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
3239  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3240                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3241                  opc, !strconcat(Dt, "16"), asm, "",
3242                  [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
3243  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3244                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3245                  opc, !strconcat(Dt, "32"), asm, "",
3246                  [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
3247  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3248                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3249                  opc, "f32", asm, "",
3250                  [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
3251    let Inst{10} = 1; // overwrite F = 1
3252  }
3253
3254  // 128-bit vector types.
3255  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3256                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3257                  opc, !strconcat(Dt, "8"), asm, "",
3258                  [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
3259  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3260                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3261                  opc, !strconcat(Dt, "16"), asm, "",
3262                  [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
3263  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3264                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3265                  opc, !strconcat(Dt, "32"), asm, "",
3266                  [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
3267  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3268                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3269                  opc, "f32", asm, "",
3270                  [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
3271    let Inst{10} = 1; // overwrite F = 1
3272  }
3273}
3274
3275
3276// Neon 2-register vector intrinsics,
3277//   element sizes of 8, 16 and 32 bits:
3278multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3279                      bits<5> op11_7, bit op4,
3280                      InstrItinClass itinD, InstrItinClass itinQ,
3281                      string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3282  // 64-bit vector types.
3283  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3284                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3285  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3286                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3287  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3288                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3289
3290  // 128-bit vector types.
3291  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3292                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3293  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3294                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3295  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3296                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3297}
3298
3299
3300// Neon Narrowing 2-register vector operations,
3301//   source operand element sizes of 16, 32 and 64 bits:
3302multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3303                    bits<5> op11_7, bit op6, bit op4,
3304                    InstrItinClass itin, string OpcodeStr, string Dt,
3305                    SDNode OpNode> {
3306  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3307                   itin, OpcodeStr, !strconcat(Dt, "16"),
3308                   v8i8, v8i16, OpNode>;
3309  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3310                   itin, OpcodeStr, !strconcat(Dt, "32"),
3311                   v4i16, v4i32, OpNode>;
3312  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3313                   itin, OpcodeStr, !strconcat(Dt, "64"),
3314                   v2i32, v2i64, OpNode>;
3315}
3316
3317// Neon Narrowing 2-register vector intrinsics,
3318//   source operand element sizes of 16, 32 and 64 bits:
3319multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3320                       bits<5> op11_7, bit op6, bit op4,
3321                       InstrItinClass itin, string OpcodeStr, string Dt,
3322                       SDPatternOperator IntOp> {
3323  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3324                      itin, OpcodeStr, !strconcat(Dt, "16"),
3325                      v8i8, v8i16, IntOp>;
3326  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3327                      itin, OpcodeStr, !strconcat(Dt, "32"),
3328                      v4i16, v4i32, IntOp>;
3329  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3330                      itin, OpcodeStr, !strconcat(Dt, "64"),
3331                      v2i32, v2i64, IntOp>;
3332}
3333
3334
3335// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3336//   source operand element sizes of 16, 32 and 64 bits:
3337multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3338                    string OpcodeStr, string Dt, SDNode OpNode> {
3339  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3340                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3341  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3342                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3343  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3344                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3345}
3346
3347
3348// Neon 3-register vector operations.
3349
3350// First with only element sizes of 8, 16 and 32 bits:
3351multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3352                   InstrItinClass itinD16, InstrItinClass itinD32,
3353                   InstrItinClass itinQ16, InstrItinClass itinQ32,
3354                   string OpcodeStr, string Dt,
3355                   SDNode OpNode, bit Commutable = 0> {
3356  // 64-bit vector types.
3357  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3358                   OpcodeStr, !strconcat(Dt, "8"),
3359                   v8i8, v8i8, OpNode, Commutable>;
3360  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3361                   OpcodeStr, !strconcat(Dt, "16"),
3362                   v4i16, v4i16, OpNode, Commutable>;
3363  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3364                   OpcodeStr, !strconcat(Dt, "32"),
3365                   v2i32, v2i32, OpNode, Commutable>;
3366
3367  // 128-bit vector types.
3368  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3369                   OpcodeStr, !strconcat(Dt, "8"),
3370                   v16i8, v16i8, OpNode, Commutable>;
3371  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3372                   OpcodeStr, !strconcat(Dt, "16"),
3373                   v8i16, v8i16, OpNode, Commutable>;
3374  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3375                   OpcodeStr, !strconcat(Dt, "32"),
3376                   v4i32, v4i32, OpNode, Commutable>;
3377}
3378
3379multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3380  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3381  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3382  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3383  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3384                     v4i32, v2i32, ShOp>;
3385}
3386
3387// ....then also with element size 64 bits:
3388multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3389                    InstrItinClass itinD, InstrItinClass itinQ,
3390                    string OpcodeStr, string Dt,
3391                    SDNode OpNode, bit Commutable = 0>
3392  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3393            OpcodeStr, Dt, OpNode, Commutable> {
3394  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3395                   OpcodeStr, !strconcat(Dt, "64"),
3396                   v1i64, v1i64, OpNode, Commutable>;
3397  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3398                   OpcodeStr, !strconcat(Dt, "64"),
3399                   v2i64, v2i64, OpNode, Commutable>;
3400}
3401
3402
3403// Neon 3-register vector intrinsics.
3404
3405// First with only element sizes of 16 and 32 bits:
3406multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3407                     InstrItinClass itinD16, InstrItinClass itinD32,
3408                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3409                     string OpcodeStr, string Dt,
3410                     SDPatternOperator IntOp, bit Commutable = 0> {
3411  // 64-bit vector types.
3412  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3413                      OpcodeStr, !strconcat(Dt, "16"),
3414                      v4i16, v4i16, IntOp, Commutable>;
3415  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3416                      OpcodeStr, !strconcat(Dt, "32"),
3417                      v2i32, v2i32, IntOp, Commutable>;
3418
3419  // 128-bit vector types.
3420  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3421                      OpcodeStr, !strconcat(Dt, "16"),
3422                      v8i16, v8i16, IntOp, Commutable>;
3423  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3424                      OpcodeStr, !strconcat(Dt, "32"),
3425                      v4i32, v4i32, IntOp, Commutable>;
3426}
3427multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3428                     InstrItinClass itinD16, InstrItinClass itinD32,
3429                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3430                     string OpcodeStr, string Dt,
3431                     SDPatternOperator IntOp> {
3432  // 64-bit vector types.
3433  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3434                      OpcodeStr, !strconcat(Dt, "16"),
3435                      v4i16, v4i16, IntOp>;
3436  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3437                      OpcodeStr, !strconcat(Dt, "32"),
3438                      v2i32, v2i32, IntOp>;
3439
3440  // 128-bit vector types.
3441  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3442                      OpcodeStr, !strconcat(Dt, "16"),
3443                      v8i16, v8i16, IntOp>;
3444  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3445                      OpcodeStr, !strconcat(Dt, "32"),
3446                      v4i32, v4i32, IntOp>;
3447}
3448
3449multiclass N3VIntSL_HS<bits<4> op11_8,
3450                       InstrItinClass itinD16, InstrItinClass itinD32,
3451                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3452                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3453  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3454                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3455  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3456                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3457  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3458                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3459  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3460                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3461}
3462
3463// ....then also with element size of 8 bits:
3464multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3465                      InstrItinClass itinD16, InstrItinClass itinD32,
3466                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3467                      string OpcodeStr, string Dt,
3468                      SDPatternOperator IntOp, bit Commutable = 0>
3469  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3470              OpcodeStr, Dt, IntOp, Commutable> {
3471  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3472                      OpcodeStr, !strconcat(Dt, "8"),
3473                      v8i8, v8i8, IntOp, Commutable>;
3474  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3475                      OpcodeStr, !strconcat(Dt, "8"),
3476                      v16i8, v16i8, IntOp, Commutable>;
3477}
3478multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3479                      InstrItinClass itinD16, InstrItinClass itinD32,
3480                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3481                      string OpcodeStr, string Dt,
3482                      SDPatternOperator IntOp>
3483  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3484              OpcodeStr, Dt, IntOp> {
3485  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3486                      OpcodeStr, !strconcat(Dt, "8"),
3487                      v8i8, v8i8, IntOp>;
3488  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3489                      OpcodeStr, !strconcat(Dt, "8"),
3490                      v16i8, v16i8, IntOp>;
3491}
3492
3493
3494// ....then also with element size of 64 bits:
3495multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3496                       InstrItinClass itinD16, InstrItinClass itinD32,
3497                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3498                       string OpcodeStr, string Dt,
3499                       SDPatternOperator IntOp, bit Commutable = 0>
3500  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3501               OpcodeStr, Dt, IntOp, Commutable> {
3502  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3503                      OpcodeStr, !strconcat(Dt, "64"),
3504                      v1i64, v1i64, IntOp, Commutable>;
3505  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3506                      OpcodeStr, !strconcat(Dt, "64"),
3507                      v2i64, v2i64, IntOp, Commutable>;
3508}
3509multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3510                       InstrItinClass itinD16, InstrItinClass itinD32,
3511                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3512                       string OpcodeStr, string Dt,
3513                       SDPatternOperator IntOp>
3514  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3515               OpcodeStr, Dt, IntOp> {
3516  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3517                      OpcodeStr, !strconcat(Dt, "64"),
3518                      v1i64, v1i64, IntOp>;
3519  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3520                      OpcodeStr, !strconcat(Dt, "64"),
3521                      v2i64, v2i64, IntOp>;
3522}
3523
3524// Neon Narrowing 3-register vector intrinsics,
3525//   source operand element sizes of 16, 32 and 64 bits:
3526multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3527                       string OpcodeStr, string Dt,
3528                       SDPatternOperator IntOp, bit Commutable = 0> {
3529  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
3530                      OpcodeStr, !strconcat(Dt, "16"),
3531                      v8i8, v8i16, IntOp, Commutable>;
3532  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3533                      OpcodeStr, !strconcat(Dt, "32"),
3534                      v4i16, v4i32, IntOp, Commutable>;
3535  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3536                      OpcodeStr, !strconcat(Dt, "64"),
3537                      v2i32, v2i64, IntOp, Commutable>;
3538}
3539
3540
3541// Neon Long 3-register vector operations.
3542
3543multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3544                    InstrItinClass itin16, InstrItinClass itin32,
3545                    string OpcodeStr, string Dt,
3546                    SDNode OpNode, bit Commutable = 0> {
3547  def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3548                   OpcodeStr, !strconcat(Dt, "8"),
3549                   v8i16, v8i8, OpNode, Commutable>;
3550  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3551                   OpcodeStr, !strconcat(Dt, "16"),
3552                   v4i32, v4i16, OpNode, Commutable>;
3553  def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3554                   OpcodeStr, !strconcat(Dt, "32"),
3555                   v2i64, v2i32, OpNode, Commutable>;
3556}
3557
3558multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3559                     InstrItinClass itin, string OpcodeStr, string Dt,
3560                     SDNode OpNode> {
3561  def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3562                       !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3563  def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3564                     !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3565}
3566
3567multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3568                       InstrItinClass itin16, InstrItinClass itin32,
3569                       string OpcodeStr, string Dt,
3570                       SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3571  def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3572                      OpcodeStr, !strconcat(Dt, "8"),
3573                      v8i16, v8i8, OpNode, ExtOp, Commutable>;
3574  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3575                      OpcodeStr, !strconcat(Dt, "16"),
3576                      v4i32, v4i16, OpNode, ExtOp, Commutable>;
3577  def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3578                      OpcodeStr, !strconcat(Dt, "32"),
3579                      v2i64, v2i32, OpNode, ExtOp, Commutable>;
3580}
3581
3582// Neon Long 3-register vector intrinsics.
3583
3584// First with only element sizes of 16 and 32 bits:
3585multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3586                      InstrItinClass itin16, InstrItinClass itin32,
3587                      string OpcodeStr, string Dt,
3588                      SDPatternOperator IntOp, bit Commutable = 0> {
3589  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3590                      OpcodeStr, !strconcat(Dt, "16"),
3591                      v4i32, v4i16, IntOp, Commutable>;
3592  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3593                      OpcodeStr, !strconcat(Dt, "32"),
3594                      v2i64, v2i32, IntOp, Commutable>;
3595}
3596
3597multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3598                        InstrItinClass itin, string OpcodeStr, string Dt,
3599                        SDPatternOperator IntOp> {
3600  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3601                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3602  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3603                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3604}
3605
3606// ....then also with element size of 8 bits:
3607multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3608                       InstrItinClass itin16, InstrItinClass itin32,
3609                       string OpcodeStr, string Dt,
3610                       SDPatternOperator IntOp, bit Commutable = 0>
3611  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3612               IntOp, Commutable> {
3613  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3614                      OpcodeStr, !strconcat(Dt, "8"),
3615                      v8i16, v8i8, IntOp, Commutable>;
3616}
3617
3618// ....with explicit extend (VABDL).
3619multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3620                       InstrItinClass itin, string OpcodeStr, string Dt,
3621                       SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3622  def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3623                         OpcodeStr, !strconcat(Dt, "8"),
3624                         v8i16, v8i8, IntOp, ExtOp, Commutable>;
3625  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3626                         OpcodeStr, !strconcat(Dt, "16"),
3627                         v4i32, v4i16, IntOp, ExtOp, Commutable>;
3628  def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3629                         OpcodeStr, !strconcat(Dt, "32"),
3630                         v2i64, v2i32, IntOp, ExtOp, Commutable>;
3631}
3632
3633
3634// Neon Wide 3-register vector intrinsics,
3635//   source operand element sizes of 8, 16 and 32 bits:
3636multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3637                    string OpcodeStr, string Dt,
3638                    SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3639  def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3640                   OpcodeStr, !strconcat(Dt, "8"),
3641                   v8i16, v8i8, OpNode, ExtOp, Commutable>;
3642  def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3643                   OpcodeStr, !strconcat(Dt, "16"),
3644                   v4i32, v4i16, OpNode, ExtOp, Commutable>;
3645  def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3646                   OpcodeStr, !strconcat(Dt, "32"),
3647                   v2i64, v2i32, OpNode, ExtOp, Commutable>;
3648}
3649
3650
3651// Neon Multiply-Op vector operations,
3652//   element sizes of 8, 16 and 32 bits:
3653multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3654                        InstrItinClass itinD16, InstrItinClass itinD32,
3655                        InstrItinClass itinQ16, InstrItinClass itinQ32,
3656                        string OpcodeStr, string Dt, SDNode OpNode> {
3657  // 64-bit vector types.
3658  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3659                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3660  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3661                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3662  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3663                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3664
3665  // 128-bit vector types.
3666  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3667                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3668  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3669                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3670  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3671                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3672}
3673
3674multiclass N3VMulOpSL_HS<bits<4> op11_8,
3675                         InstrItinClass itinD16, InstrItinClass itinD32,
3676                         InstrItinClass itinQ16, InstrItinClass itinQ32,
3677                         string OpcodeStr, string Dt, SDPatternOperator ShOp> {
3678  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3679                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3680  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3681                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3682  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3683                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3684                            mul, ShOp>;
3685  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3686                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3687                          mul, ShOp>;
3688}
3689
3690// Neon Intrinsic-Op vector operations,
3691//   element sizes of 8, 16 and 32 bits:
3692multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3693                        InstrItinClass itinD, InstrItinClass itinQ,
3694                        string OpcodeStr, string Dt, SDPatternOperator IntOp,
3695                        SDNode OpNode> {
3696  // 64-bit vector types.
3697  def v8i8  : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3698                        OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3699  def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3700                        OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3701  def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3702                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3703
3704  // 128-bit vector types.
3705  def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3706                        OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3707  def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3708                        OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3709  def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3710                        OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3711}
3712
3713// Neon 3-argument intrinsics,
3714//   element sizes of 16 and 32 bits:
3715multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3716                       InstrItinClass itinD16, InstrItinClass itinD32,
3717                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3718                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3719  // 64-bit vector types.
3720  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
3721                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3722  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
3723                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3724
3725  // 128-bit vector types.
3726  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
3727                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3728  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
3729                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3730}
3731
3732//   element sizes of 8, 16 and 32 bits:
3733multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3734                       InstrItinClass itinD16, InstrItinClass itinD32,
3735                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3736                       string OpcodeStr, string Dt, SDPatternOperator IntOp>
3737           :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
3738                        itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
3739  // 64-bit vector types.
3740  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
3741                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3742  // 128-bit vector types.
3743  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
3744                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3745}
3746
3747// Neon Long Multiply-Op vector operations,
3748//   element sizes of 8, 16 and 32 bits:
3749multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3750                         InstrItinClass itin16, InstrItinClass itin32,
3751                         string OpcodeStr, string Dt, SDNode MulOp,
3752                         SDNode OpNode> {
3753  def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3754                        !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3755  def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3756                        !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3757  def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3758                        !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3759}
3760
3761multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3762                          string Dt, SDNode MulOp, SDNode OpNode> {
3763  def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3764                            !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3765  def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3766                          !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3767}
3768
3769
3770// Neon Long 3-argument intrinsics.
3771
3772// First with only element sizes of 16 and 32 bits:
3773multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3774                       InstrItinClass itin16, InstrItinClass itin32,
3775                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3776  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3777                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3778  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3779                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3780}
3781
3782multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3783                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3784  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3785                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3786  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3787                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3788}
3789
3790// ....then also with element size of 8 bits:
3791multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3792                        InstrItinClass itin16, InstrItinClass itin32,
3793                        string OpcodeStr, string Dt, SDPatternOperator IntOp>
3794  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3795  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3796                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3797}
3798
3799// ....with explicit extend (VABAL).
3800multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3801                            InstrItinClass itin, string OpcodeStr, string Dt,
3802                            SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3803  def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3804                           OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3805                           IntOp, ExtOp, OpNode>;
3806  def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3807                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3808                           IntOp, ExtOp, OpNode>;
3809  def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3810                           OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3811                           IntOp, ExtOp, OpNode>;
3812}
3813
3814
3815// Neon Pairwise long 2-register intrinsics,
3816//   element sizes of 8, 16 and 32 bits:
3817multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3818                        bits<5> op11_7, bit op4,
3819                        string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3820  // 64-bit vector types.
3821  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3822                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3823  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3824                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3825  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3826                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3827
3828  // 128-bit vector types.
3829  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3830                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3831  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3832                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3833  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3834                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3835}
3836
3837
3838// Neon Pairwise long 2-register accumulate intrinsics,
3839//   element sizes of 8, 16 and 32 bits:
3840multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3841                         bits<5> op11_7, bit op4,
3842                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3843  // 64-bit vector types.
3844  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3845                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3846  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3847                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3848  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3849                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3850
3851  // 128-bit vector types.
3852  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3853                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3854  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3855                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3856  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3857                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3858}
3859
3860
3861// Neon 2-register vector shift by immediate,
3862//   with f of either N2RegVShLFrm or N2RegVShRFrm
3863//   element sizes of 8, 16, 32 and 64 bits:
3864multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3865                       InstrItinClass itin, string OpcodeStr, string Dt,
3866                       SDNode OpNode> {
3867  // 64-bit vector types.
3868  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3869                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3870    let Inst{21-19} = 0b001; // imm6 = 001xxx
3871  }
3872  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3873                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3874    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3875  }
3876  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3877                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3878    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3879  }
3880  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3881                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3882                             // imm6 = xxxxxx
3883
3884  // 128-bit vector types.
3885  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3886                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3887    let Inst{21-19} = 0b001; // imm6 = 001xxx
3888  }
3889  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3890                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3891    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3892  }
3893  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3894                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3895    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3896  }
3897  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3898                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3899                             // imm6 = xxxxxx
3900}
3901multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3902                       InstrItinClass itin, string OpcodeStr, string Dt,
3903                       string baseOpc, SDNode OpNode> {
3904  // 64-bit vector types.
3905  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3906                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3907    let Inst{21-19} = 0b001; // imm6 = 001xxx
3908  }
3909  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3910                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3911    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3912  }
3913  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3914                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3915    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3916  }
3917  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3918                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3919                             // imm6 = xxxxxx
3920
3921  // 128-bit vector types.
3922  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3923                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3924    let Inst{21-19} = 0b001; // imm6 = 001xxx
3925  }
3926  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3927                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3928    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3929  }
3930  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3931                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3932    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3933  }
3934  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3935                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3936                             // imm6 = xxxxxx
3937}
3938
3939// Neon Shift-Accumulate vector operations,
3940//   element sizes of 8, 16, 32 and 64 bits:
3941multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3942                         string OpcodeStr, string Dt, SDNode ShOp> {
3943  // 64-bit vector types.
3944  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3945                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
3946    let Inst{21-19} = 0b001; // imm6 = 001xxx
3947  }
3948  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3949                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
3950    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3951  }
3952  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3953                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
3954    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3955  }
3956  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3957                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
3958                             // imm6 = xxxxxx
3959
3960  // 128-bit vector types.
3961  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3962                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
3963    let Inst{21-19} = 0b001; // imm6 = 001xxx
3964  }
3965  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3966                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
3967    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3968  }
3969  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3970                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
3971    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3972  }
3973  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3974                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
3975                             // imm6 = xxxxxx
3976}
3977
3978// Neon Shift-Insert vector operations,
3979//   with f of either N2RegVShLFrm or N2RegVShRFrm
3980//   element sizes of 8, 16, 32 and 64 bits:
3981multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3982                          string OpcodeStr> {
3983  // 64-bit vector types.
3984  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3985                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
3986    let Inst{21-19} = 0b001; // imm6 = 001xxx
3987  }
3988  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3989                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
3990    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3991  }
3992  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3993                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
3994    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3995  }
3996  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
3997                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
3998                             // imm6 = xxxxxx
3999
4000  // 128-bit vector types.
4001  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4002                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
4003    let Inst{21-19} = 0b001; // imm6 = 001xxx
4004  }
4005  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4006                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
4007    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4008  }
4009  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4010                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
4011    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4012  }
4013  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
4014                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
4015                             // imm6 = xxxxxx
4016}
4017multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4018                          string OpcodeStr> {
4019  // 64-bit vector types.
4020  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4021                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
4022    let Inst{21-19} = 0b001; // imm6 = 001xxx
4023  }
4024  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4025                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
4026    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4027  }
4028  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4029                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
4030    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4031  }
4032  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4033                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
4034                             // imm6 = xxxxxx
4035
4036  // 128-bit vector types.
4037  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4038                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
4039    let Inst{21-19} = 0b001; // imm6 = 001xxx
4040  }
4041  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4042                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
4043    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4044  }
4045  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4046                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
4047    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4048  }
4049  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4050                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
4051                             // imm6 = xxxxxx
4052}
4053
4054// Neon Shift Long operations,
4055//   element sizes of 8, 16, 32 bits:
4056multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4057                      bit op4, string OpcodeStr, string Dt,
4058                      SDPatternOperator OpNode> {
4059  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4060              OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
4061    let Inst{21-19} = 0b001; // imm6 = 001xxx
4062  }
4063  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4064               OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
4065    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4066  }
4067  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4068               OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
4069    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4070  }
4071}
4072
4073// Neon Shift Narrow operations,
4074//   element sizes of 16, 32, 64 bits:
4075multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4076                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
4077                      SDPatternOperator OpNode> {
4078  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4079                    OpcodeStr, !strconcat(Dt, "16"),
4080                    v8i8, v8i16, shr_imm8, OpNode> {
4081    let Inst{21-19} = 0b001; // imm6 = 001xxx
4082  }
4083  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4084                     OpcodeStr, !strconcat(Dt, "32"),
4085                     v4i16, v4i32, shr_imm16, OpNode> {
4086    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4087  }
4088  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4089                     OpcodeStr, !strconcat(Dt, "64"),
4090                     v2i32, v2i64, shr_imm32, OpNode> {
4091    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4092  }
4093}
4094
4095//===----------------------------------------------------------------------===//
4096// Instruction Definitions.
4097//===----------------------------------------------------------------------===//
4098
4099// Vector Add Operations.
4100
4101//   VADD     : Vector Add (integer and floating-point)
4102defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
4103                         add, 1>;
4104def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
4105                     v2f32, v2f32, fadd, 1>;
4106def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
4107                     v4f32, v4f32, fadd, 1>;
4108//   VADDL    : Vector Add Long (Q = D + D)
4109defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4110                            "vaddl", "s", add, sext, 1>;
4111defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4112                            "vaddl", "u", add, zext, 1>;
4113//   VADDW    : Vector Add Wide (Q = Q + D)
4114defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
4115defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
4116//   VHADD    : Vector Halving Add
4117defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
4118                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4119                           "vhadd", "s", int_arm_neon_vhadds, 1>;
4120defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
4121                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4122                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
4123//   VRHADD   : Vector Rounding Halving Add
4124defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
4125                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4126                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
4127defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
4128                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4129                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
4130//   VQADD    : Vector Saturating Add
4131defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
4132                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4133                            "vqadd", "s", int_arm_neon_vqadds, 1>;
4134defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
4135                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4136                            "vqadd", "u", int_arm_neon_vqaddu, 1>;
4137//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
4138defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
4139//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
4140defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
4141                            int_arm_neon_vraddhn, 1>;
4142
4143def : Pat<(v8i8  (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4144          (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
4145def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4146          (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
4147def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4148          (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
4149
4150// Vector Multiply Operations.
4151
4152//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
4153defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
4154                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
4155def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
4156                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
4157def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
4158                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
4159def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
4160                     v2f32, v2f32, fmul, 1>;
4161def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
4162                     v4f32, v4f32, fmul, 1>;
4163defm VMULsl   : N3VSL_HS<0b1000, "vmul", mul>;
4164def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
4165def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
4166                       v2f32, fmul>;
4167
4168def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
4169                      (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
4170          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
4171                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
4172                                      (DSubReg_i16_reg imm:$lane))),
4173                              (SubReg_i16_lane imm:$lane)))>;
4174def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
4175                      (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
4176          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
4177                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
4178                                      (DSubReg_i32_reg imm:$lane))),
4179                              (SubReg_i32_lane imm:$lane)))>;
4180def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
4181                       (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
4182          (v4f32 (VMULslfq (v4f32 QPR:$src1),
4183                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
4184                                   (DSubReg_i32_reg imm:$lane))),
4185                           (SubReg_i32_lane imm:$lane)))>;
4186
4187
4188def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
4189          (VMULslfd DPR:$Rn,
4190            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4191            (i32 0))>;
4192def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
4193          (VMULslfq QPR:$Rn,
4194            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4195            (i32 0))>;
4196
4197
4198//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
4199defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
4200                          IIC_VMULi16Q, IIC_VMULi32Q,
4201                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
4202defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
4203                            IIC_VMULi16Q, IIC_VMULi32Q,
4204                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
4205def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
4206                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2),
4207                                                            imm:$lane)))),
4208          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
4209                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4210                                         (DSubReg_i16_reg imm:$lane))),
4211                                 (SubReg_i16_lane imm:$lane)))>;
4212def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
4213                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2),
4214                                                            imm:$lane)))),
4215          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
4216                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4217                                         (DSubReg_i32_reg imm:$lane))),
4218                                 (SubReg_i32_lane imm:$lane)))>;
4219
4220//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4221defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4222                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4223                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4224defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4225                              IIC_VMULi16Q, IIC_VMULi32Q,
4226                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
4227def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4228                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2),
4229                                                             imm:$lane)))),
4230          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4231                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
4232                                          (DSubReg_i16_reg imm:$lane))),
4233                                  (SubReg_i16_lane imm:$lane)))>;
4234def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4235                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2),
4236                                                             imm:$lane)))),
4237          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4238                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
4239                                          (DSubReg_i32_reg imm:$lane))),
4240                                  (SubReg_i32_lane imm:$lane)))>;
4241
4242//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
4243let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
4244    DecoderNamespace = "NEONData" in {
4245  defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4246                           "vmull", "s", NEONvmulls, 1>;
4247  defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4248                           "vmull", "u", NEONvmullu, 1>;
4249  def  VMULLp8   :  N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4250                            v8i16, v8i8, int_arm_neon_vmullp, 1>;
4251  def  VMULLp64  : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
4252                          "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
4253                    Requires<[HasV8, HasCrypto]>;
4254}
4255defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
4256defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
4257
4258//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
4259defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4260                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4261defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4262                             "vqdmull", "s", int_arm_neon_vqdmull>;
4263
4264// Vector Multiply-Accumulate and Multiply-Subtract Operations.
4265
4266//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
4267defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4268                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4269def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4270                          v2f32, fmul_su, fadd_mlx>,
4271                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4272def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4273                          v4f32, fmul_su, fadd_mlx>,
4274                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4275defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4276                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4277def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4278                            v2f32, fmul_su, fadd_mlx>,
4279                Requires<[HasNEON, UseFPVMLx]>;
4280def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4281                            v4f32, v2f32, fmul_su, fadd_mlx>,
4282                Requires<[HasNEON, UseFPVMLx]>;
4283
4284def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4285                  (mul (v8i16 QPR:$src2),
4286                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4287          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4288                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4289                                      (DSubReg_i16_reg imm:$lane))),
4290                              (SubReg_i16_lane imm:$lane)))>;
4291
4292def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4293                  (mul (v4i32 QPR:$src2),
4294                       (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4295          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4296                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4297                                      (DSubReg_i32_reg imm:$lane))),
4298                              (SubReg_i32_lane imm:$lane)))>;
4299
4300def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4301                  (fmul_su (v4f32 QPR:$src2),
4302                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4303          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4304                           (v4f32 QPR:$src2),
4305                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4306                                   (DSubReg_i32_reg imm:$lane))),
4307                           (SubReg_i32_lane imm:$lane)))>,
4308          Requires<[HasNEON, UseFPVMLx]>;
4309
4310//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
4311defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4312                              "vmlal", "s", NEONvmulls, add>;
4313defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4314                              "vmlal", "u", NEONvmullu, add>;
4315
4316defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
4317defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
4318
4319let Predicates = [HasNEON, HasV8_1a] in {
4320  // v8.1a Neon Rounding Double Multiply-Op vector operations,
4321  // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
4322  //            (Q += D * D)
4323  defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
4324                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4325                             null_frag>;
4326  def : Pat<(v4i16 (int_arm_neon_vqadds
4327                     (v4i16 DPR:$src1),
4328                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4329                                                   (v4i16 DPR:$Vm))))),
4330            (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4331  def : Pat<(v2i32 (int_arm_neon_vqadds
4332                     (v2i32 DPR:$src1),
4333                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4334                                                   (v2i32 DPR:$Vm))))),
4335            (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4336  def : Pat<(v8i16 (int_arm_neon_vqadds
4337                     (v8i16 QPR:$src1),
4338                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4339                                                   (v8i16 QPR:$Vm))))),
4340            (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4341  def : Pat<(v4i32 (int_arm_neon_vqadds
4342                     (v4i32 QPR:$src1),
4343                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4344                                                   (v4i32 QPR:$Vm))))),
4345            (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4346
4347  defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
4348                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4349                                  null_frag>;
4350  def : Pat<(v4i16 (int_arm_neon_vqadds
4351                     (v4i16 DPR:$src1),
4352                     (v4i16 (int_arm_neon_vqrdmulh
4353                              (v4i16 DPR:$Vn),
4354                              (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4355                                                   imm:$lane)))))),
4356            (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
4357                                    imm:$lane))>;
4358  def : Pat<(v2i32 (int_arm_neon_vqadds
4359                     (v2i32 DPR:$src1),
4360                     (v2i32 (int_arm_neon_vqrdmulh
4361                              (v2i32 DPR:$Vn),
4362                              (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4363                                                   imm:$lane)))))),
4364            (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4365                                    imm:$lane))>;
4366  def : Pat<(v8i16 (int_arm_neon_vqadds
4367                     (v8i16 QPR:$src1),
4368                     (v8i16 (int_arm_neon_vqrdmulh
4369                              (v8i16 QPR:$src2),
4370                              (v8i16 (NEONvduplane (v8i16 QPR:$src3),
4371                                                   imm:$lane)))))),
4372            (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
4373                                    (v8i16 QPR:$src2),
4374                                    (v4i16 (EXTRACT_SUBREG
4375                                             QPR:$src3,
4376                                             (DSubReg_i16_reg imm:$lane))),
4377                                    (SubReg_i16_lane imm:$lane)))>;
4378  def : Pat<(v4i32 (int_arm_neon_vqadds
4379                     (v4i32 QPR:$src1),
4380                     (v4i32 (int_arm_neon_vqrdmulh 
4381                              (v4i32 QPR:$src2),
4382                              (v4i32 (NEONvduplane (v4i32 QPR:$src3), 
4383                                                   imm:$lane)))))),
4384            (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4385                                    (v4i32 QPR:$src2),
4386                                    (v2i32 (EXTRACT_SUBREG
4387                                             QPR:$src3,
4388                                             (DSubReg_i32_reg imm:$lane))),
4389                                    (SubReg_i32_lane imm:$lane)))>;
4390
4391  //   VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
4392  //              (Q -= D * D)
4393  defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
4394                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4395                             null_frag>;
4396  def : Pat<(v4i16 (int_arm_neon_vqsubs
4397                     (v4i16 DPR:$src1),
4398                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4399                                                   (v4i16 DPR:$Vm))))),
4400            (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4401  def : Pat<(v2i32 (int_arm_neon_vqsubs
4402                     (v2i32 DPR:$src1),
4403                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4404                                                   (v2i32 DPR:$Vm))))),
4405            (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4406  def : Pat<(v8i16 (int_arm_neon_vqsubs
4407                     (v8i16 QPR:$src1),
4408                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4409                                                   (v8i16 QPR:$Vm))))),
4410            (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4411  def : Pat<(v4i32 (int_arm_neon_vqsubs
4412                     (v4i32 QPR:$src1),
4413                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4414                                                   (v4i32 QPR:$Vm))))),
4415            (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4416
4417  defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
4418                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4419                                  null_frag>;
4420  def : Pat<(v4i16 (int_arm_neon_vqsubs
4421                     (v4i16 DPR:$src1),
4422                     (v4i16 (int_arm_neon_vqrdmulh
4423                              (v4i16 DPR:$Vn),
4424                              (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4425                                                   imm:$lane)))))),
4426            (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4427  def : Pat<(v2i32 (int_arm_neon_vqsubs
4428                     (v2i32 DPR:$src1),
4429                     (v2i32 (int_arm_neon_vqrdmulh
4430                              (v2i32 DPR:$Vn),
4431                              (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4432                                                   imm:$lane)))))),
4433            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 
4434                                    imm:$lane))>;
4435  def : Pat<(v8i16 (int_arm_neon_vqsubs
4436                     (v8i16 QPR:$src1),
4437                     (v8i16 (int_arm_neon_vqrdmulh
4438                              (v8i16 QPR:$src2),
4439                              (v8i16 (NEONvduplane (v8i16 QPR:$src3), 
4440                                                   imm:$lane)))))),
4441            (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4442                                    (v8i16 QPR:$src2),
4443                                    (v4i16 (EXTRACT_SUBREG 
4444                                             QPR:$src3,
4445                                             (DSubReg_i16_reg imm:$lane))),
4446                                    (SubReg_i16_lane imm:$lane)))>;
4447  def : Pat<(v4i32 (int_arm_neon_vqsubs
4448                     (v4i32 QPR:$src1),
4449                     (v4i32 (int_arm_neon_vqrdmulh
4450                              (v4i32 QPR:$src2),
4451                              (v4i32 (NEONvduplane (v4i32 QPR:$src3),
4452                                                    imm:$lane)))))),
4453            (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4454                                    (v4i32 QPR:$src2),
4455                                    (v2i32 (EXTRACT_SUBREG 
4456                                             QPR:$src3,
4457                                             (DSubReg_i32_reg imm:$lane))),
4458                                    (SubReg_i32_lane imm:$lane)))>;
4459}
4460//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4461defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4462                            "vqdmlal", "s", null_frag>;
4463defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
4464
4465def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4466                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4467                                                  (v4i16 DPR:$Vm))))),
4468          (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4469def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4470                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4471                                                  (v2i32 DPR:$Vm))))),
4472          (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4473def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
4474                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4475                                (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4476                                                     imm:$lane)))))),
4477          (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4478def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
4479                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4480                                (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4481                                                     imm:$lane)))))),
4482          (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4483
4484//   VMLS     : Vector Multiply Subtract (integer and floating-point)
4485defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4486                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4487def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4488                          v2f32, fmul_su, fsub_mlx>,
4489                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4490def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4491                          v4f32, fmul_su, fsub_mlx>,
4492                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4493defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4494                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4495def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4496                            v2f32, fmul_su, fsub_mlx>,
4497                Requires<[HasNEON, UseFPVMLx]>;
4498def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4499                            v4f32, v2f32, fmul_su, fsub_mlx>,
4500                Requires<[HasNEON, UseFPVMLx]>;
4501
4502def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4503                  (mul (v8i16 QPR:$src2),
4504                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4505          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4506                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4507                                      (DSubReg_i16_reg imm:$lane))),
4508                              (SubReg_i16_lane imm:$lane)))>;
4509
4510def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4511                  (mul (v4i32 QPR:$src2),
4512                     (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4513          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4514                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4515                                      (DSubReg_i32_reg imm:$lane))),
4516                              (SubReg_i32_lane imm:$lane)))>;
4517
4518def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4519                  (fmul_su (v4f32 QPR:$src2),
4520                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4521          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4522                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4523                                   (DSubReg_i32_reg imm:$lane))),
4524                           (SubReg_i32_lane imm:$lane)))>,
4525          Requires<[HasNEON, UseFPVMLx]>;
4526
4527//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
4528defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4529                              "vmlsl", "s", NEONvmulls, sub>;
4530defm VMLSLu   : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4531                              "vmlsl", "u", NEONvmullu, sub>;
4532
4533defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
4534defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
4535
4536//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4537defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4538                            "vqdmlsl", "s", null_frag>;
4539defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
4540
4541def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4542                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4543                                                  (v4i16 DPR:$Vm))))),
4544          (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4545def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4546                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4547                                                  (v2i32 DPR:$Vm))))),
4548          (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4549def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
4550                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4551                                (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
4552                                                     imm:$lane)))))),
4553          (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4554def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
4555                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4556                                (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
4557                                                     imm:$lane)))))),
4558          (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4559
4560// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4561def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4562                          v2f32, fmul_su, fadd_mlx>,
4563                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4564
4565def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4566                          v4f32, fmul_su, fadd_mlx>,
4567                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4568
4569//   Fused Vector Multiply Subtract (floating-point)
4570def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4571                          v2f32, fmul_su, fsub_mlx>,
4572                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4573def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4574                          v4f32, fmul_su, fsub_mlx>,
4575                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4576
4577// Match @llvm.fma.* intrinsics
4578def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4579          (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4580          Requires<[HasVFP4]>;
4581def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4582          (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4583          Requires<[HasVFP4]>;
4584def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4585          (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4586      Requires<[HasVFP4]>;
4587def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4588          (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4589      Requires<[HasVFP4]>;
4590
4591// Vector Subtract Operations.
4592
4593//   VSUB     : Vector Subtract (integer and floating-point)
4594defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
4595                         "vsub", "i", sub, 0>;
4596def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
4597                     v2f32, v2f32, fsub, 0>;
4598def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
4599                     v4f32, v4f32, fsub, 0>;
4600//   VSUBL    : Vector Subtract Long (Q = D - D)
4601defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4602                            "vsubl", "s", sub, sext, 0>;
4603defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4604                            "vsubl", "u", sub, zext, 0>;
4605//   VSUBW    : Vector Subtract Wide (Q = Q - D)
4606defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
4607defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
4608//   VHSUB    : Vector Halving Subtract
4609defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
4610                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4611                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
4612defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
4613                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4614                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
4615//   VQSUB    : Vector Saturing Subtract
4616defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
4617                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4618                            "vqsub", "s", int_arm_neon_vqsubs, 0>;
4619defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
4620                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4621                            "vqsub", "u", int_arm_neon_vqsubu, 0>;
4622//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
4623defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
4624//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
4625defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
4626                            int_arm_neon_vrsubhn, 0>;
4627
4628def : Pat<(v8i8  (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4629          (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
4630def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4631          (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
4632def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4633          (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
4634
4635// Vector Comparisons.
4636
4637//   VCEQ     : Vector Compare Equal
4638defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4639                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
4640def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
4641                     NEONvceq, 1>;
4642def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
4643                     NEONvceq, 1>;
4644
4645let TwoOperandAliasConstraint = "$Vm = $Vd" in
4646defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
4647                            "$Vd, $Vm, #0", NEONvceqz>;
4648
4649//   VCGE     : Vector Compare Greater Than or Equal
4650defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4651                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
4652defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4653                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
4654def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
4655                     NEONvcge, 0>;
4656def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
4657                     NEONvcge, 0>;
4658
4659let TwoOperandAliasConstraint = "$Vm = $Vd" in {
4660defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
4661                            "$Vd, $Vm, #0", NEONvcgez>;
4662defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
4663                            "$Vd, $Vm, #0", NEONvclez>;
4664}
4665
4666//   VCGT     : Vector Compare Greater Than
4667defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4668                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
4669defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4670                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
4671def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
4672                     NEONvcgt, 0>;
4673def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
4674                     NEONvcgt, 0>;
4675
4676let TwoOperandAliasConstraint = "$Vm = $Vd" in {
4677defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
4678                            "$Vd, $Vm, #0", NEONvcgtz>;
4679defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
4680                            "$Vd, $Vm, #0", NEONvcltz>;
4681}
4682
4683//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
4684def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
4685                        "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
4686def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
4687                        "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
4688//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
4689def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
4690                        "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
4691def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
4692                        "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
4693//   VTST     : Vector Test Bits
4694defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
4695                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
4696
4697def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
4698                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
4699def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
4700                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
4701def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
4702                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
4703def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
4704                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
4705
4706def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
4707                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
4708def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
4709                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
4710def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
4711                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
4712def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
4713                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
4714
4715// Vector Bitwise Operations.
4716
4717def vnotd : PatFrag<(ops node:$in),
4718                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
4719def vnotq : PatFrag<(ops node:$in),
4720                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
4721
4722
4723//   VAND     : Vector Bitwise AND
4724def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
4725                      v2i32, v2i32, and, 1>;
4726def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
4727                      v4i32, v4i32, and, 1>;
4728
4729//   VEOR     : Vector Bitwise Exclusive OR
4730def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
4731                      v2i32, v2i32, xor, 1>;
4732def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
4733                      v4i32, v4i32, xor, 1>;
4734
4735//   VORR     : Vector Bitwise OR
4736def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
4737                      v2i32, v2i32, or, 1>;
4738def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
4739                      v4i32, v4i32, or, 1>;
4740
4741def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
4742                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
4743                          IIC_VMOVImm,
4744                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
4745                          [(set DPR:$Vd,
4746                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
4747  let Inst{9} = SIMM{9};
4748}
4749
4750def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
4751                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
4752                          IIC_VMOVImm,
4753                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
4754                          [(set DPR:$Vd,
4755                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
4756  let Inst{10-9} = SIMM{10-9};
4757}
4758
4759def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
4760                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
4761                          IIC_VMOVImm,
4762                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
4763                          [(set QPR:$Vd,
4764                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
4765  let Inst{9} = SIMM{9};
4766}
4767
4768def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
4769                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
4770                          IIC_VMOVImm,
4771                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
4772                          [(set QPR:$Vd,
4773                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
4774  let Inst{10-9} = SIMM{10-9};
4775}
4776
4777
4778//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
4779let TwoOperandAliasConstraint = "$Vn = $Vd" in {
4780def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
4781                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
4782                     "vbic", "$Vd, $Vn, $Vm", "",
4783                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
4784                                                 (vnotd DPR:$Vm))))]>;
4785def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
4786                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
4787                     "vbic", "$Vd, $Vn, $Vm", "",
4788                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
4789                                                 (vnotq QPR:$Vm))))]>;
4790}
4791
4792def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
4793                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
4794                          IIC_VMOVImm,
4795                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
4796                          [(set DPR:$Vd,
4797                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
4798  let Inst{9} = SIMM{9};
4799}
4800
4801def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
4802                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
4803                          IIC_VMOVImm,
4804                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
4805                          [(set DPR:$Vd,
4806                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
4807  let Inst{10-9} = SIMM{10-9};
4808}
4809
4810def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
4811                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
4812                          IIC_VMOVImm,
4813                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
4814                          [(set QPR:$Vd,
4815                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
4816  let Inst{9} = SIMM{9};
4817}
4818
4819def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
4820                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
4821                          IIC_VMOVImm,
4822                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
4823                          [(set QPR:$Vd,
4824                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
4825  let Inst{10-9} = SIMM{10-9};
4826}
4827
4828//   VORN     : Vector Bitwise OR NOT
4829def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
4830                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
4831                     "vorn", "$Vd, $Vn, $Vm", "",
4832                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
4833                                                (vnotd DPR:$Vm))))]>;
4834def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
4835                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
4836                     "vorn", "$Vd, $Vn, $Vm", "",
4837                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
4838                                                (vnotq QPR:$Vm))))]>;
4839
4840//   VMVN     : Vector Bitwise NOT (Immediate)
4841
4842let isReMaterializable = 1 in {
4843
4844def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
4845                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4846                         "vmvn", "i16", "$Vd, $SIMM", "",
4847                         [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
4848  let Inst{9} = SIMM{9};
4849}
4850
4851def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
4852                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4853                         "vmvn", "i16", "$Vd, $SIMM", "",
4854                         [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
4855  let Inst{9} = SIMM{9};
4856}
4857
4858def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
4859                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4860                         "vmvn", "i32", "$Vd, $SIMM", "",
4861                         [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
4862  let Inst{11-8} = SIMM{11-8};
4863}
4864
4865def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
4866                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4867                         "vmvn", "i32", "$Vd, $SIMM", "",
4868                         [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
4869  let Inst{11-8} = SIMM{11-8};
4870}
4871}
4872
4873//   VMVN     : Vector Bitwise NOT
4874def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
4875                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
4876                     "vmvn", "$Vd, $Vm", "",
4877                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
4878def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
4879                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
4880                     "vmvn", "$Vd, $Vm", "",
4881                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
4882def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
4883def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
4884
4885//   VBSL     : Vector Bitwise Select
4886def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
4887                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4888                     N3RegFrm, IIC_VCNTiD,
4889                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4890                     [(set DPR:$Vd,
4891                           (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
4892def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
4893                                   (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
4894          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4895        Requires<[HasNEON]>;
4896def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
4897                                    (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
4898          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4899        Requires<[HasNEON]>;
4900def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
4901                                    (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
4902          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4903        Requires<[HasNEON]>;
4904def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
4905                                    (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
4906          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4907        Requires<[HasNEON]>;
4908def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
4909                                    (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
4910          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4911        Requires<[HasNEON]>;
4912
4913def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
4914                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
4915          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
4916        Requires<[HasNEON]>;
4917
4918def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
4919                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
4920          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
4921        Requires<[HasNEON]>;
4922
4923def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
4924                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4925                     N3RegFrm, IIC_VCNTiQ,
4926                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4927                     [(set QPR:$Vd,
4928                           (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
4929
4930def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
4931                                   (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
4932          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4933        Requires<[HasNEON]>;
4934def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
4935                                    (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
4936          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4937        Requires<[HasNEON]>;
4938def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
4939                                    (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
4940          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4941        Requires<[HasNEON]>;
4942def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
4943                                    (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
4944          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4945        Requires<[HasNEON]>;
4946def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
4947                                    (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
4948          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4949        Requires<[HasNEON]>;
4950
4951def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
4952                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
4953          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
4954        Requires<[HasNEON]>;
4955def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
4956                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
4957          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
4958        Requires<[HasNEON]>;
4959
4960//   VBIF     : Vector Bitwise Insert if False
4961//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
4962// FIXME: This instruction's encoding MAY NOT BE correct.
4963def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
4964                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4965                     N3RegFrm, IIC_VBINiD,
4966                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4967                     []>;
4968def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
4969                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4970                     N3RegFrm, IIC_VBINiQ,
4971                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4972                     []>;
4973
4974//   VBIT     : Vector Bitwise Insert if True
4975//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
4976// FIXME: This instruction's encoding MAY NOT BE correct.
4977def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
4978                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4979                     N3RegFrm, IIC_VBINiD,
4980                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4981                     []>;
4982def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
4983                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4984                     N3RegFrm, IIC_VBINiQ,
4985                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4986                     []>;
4987
4988// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
4989// for equivalent operations with different register constraints; it just
4990// inserts copies.
4991
4992// Vector Absolute Differences.
4993
4994//   VABD     : Vector Absolute Difference
4995defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
4996                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4997                           "vabd", "s", int_arm_neon_vabds, 1>;
4998defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
4999                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5000                           "vabd", "u", int_arm_neon_vabdu, 1>;
5001def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
5002                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
5003def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5004                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
5005
5006//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
5007defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5008                               "vabdl", "s", int_arm_neon_vabds, zext, 1>;
5009defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5010                               "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5011
5012//   VABA     : Vector Absolute Difference and Accumulate
5013defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5014                             "vaba", "s", int_arm_neon_vabds, add>;
5015defm VABAu    : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5016                             "vaba", "u", int_arm_neon_vabdu, add>;
5017
5018//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5019defm VABALs   : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5020                                 "vabal", "s", int_arm_neon_vabds, zext, add>;
5021defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5022                                 "vabal", "u", int_arm_neon_vabdu, zext, add>;
5023
5024// Vector Maximum and Minimum.
5025
5026//   VMAX     : Vector Maximum
5027defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
5028                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5029                           "vmax", "s", int_arm_neon_vmaxs, 1>;
5030defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
5031                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5032                           "vmax", "u", int_arm_neon_vmaxu, 1>;
5033def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
5034                        "vmax", "f32",
5035                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
5036def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5037                        "vmax", "f32",
5038                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
5039
5040// VMAXNM
5041let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5042  def VMAXNMND  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
5043                            N3RegFrm, NoItinerary, "vmaxnm", "f32",
5044                            v2f32, v2f32, int_arm_neon_vmaxnm, 1>,
5045                            Requires<[HasV8, HasNEON]>;
5046  def VMAXNMNQ  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
5047                            N3RegFrm, NoItinerary, "vmaxnm", "f32",
5048                            v4f32, v4f32, int_arm_neon_vmaxnm, 1>,
5049                            Requires<[HasV8, HasNEON]>;
5050}
5051
5052//   VMIN     : Vector Minimum
5053defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
5054                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5055                           "vmin", "s", int_arm_neon_vmins, 1>;
5056defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
5057                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5058                           "vmin", "u", int_arm_neon_vminu, 1>;
5059def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
5060                        "vmin", "f32",
5061                        v2f32, v2f32, int_arm_neon_vmins, 1>;
5062def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5063                        "vmin", "f32",
5064                        v4f32, v4f32, int_arm_neon_vmins, 1>;
5065
5066// VMINNM
5067let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5068  def VMINNMND  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
5069                            N3RegFrm, NoItinerary, "vminnm", "f32",
5070                            v2f32, v2f32, int_arm_neon_vminnm, 1>,
5071                            Requires<[HasV8, HasNEON]>;
5072  def VMINNMNQ  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
5073                            N3RegFrm, NoItinerary, "vminnm", "f32",
5074                            v4f32, v4f32, int_arm_neon_vminnm, 1>,
5075                            Requires<[HasV8, HasNEON]>;
5076}
5077
5078// Vector Pairwise Operations.
5079
5080//   VPADD    : Vector Pairwise Add
5081def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5082                        "vpadd", "i8",
5083                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
5084def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5085                        "vpadd", "i16",
5086                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
5087def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5088                        "vpadd", "i32",
5089                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
5090def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
5091                        IIC_VPBIND, "vpadd", "f32",
5092                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
5093
5094//   VPADDL   : Vector Pairwise Add Long
5095defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
5096                             int_arm_neon_vpaddls>;
5097defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
5098                             int_arm_neon_vpaddlu>;
5099
5100//   VPADAL   : Vector Pairwise Add and Accumulate Long
5101defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
5102                              int_arm_neon_vpadals>;
5103defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
5104                              int_arm_neon_vpadalu>;
5105
5106//   VPMAX    : Vector Pairwise Maximum
5107def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5108                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
5109def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5110                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
5111def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5112                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
5113def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5114                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
5115def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5116                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
5117def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5118                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
5119def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5120                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
5121
5122//   VPMIN    : Vector Pairwise Minimum
5123def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5124                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
5125def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5126                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
5127def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5128                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
5129def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5130                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
5131def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5132                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
5133def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5134                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
5135def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5136                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
5137
5138// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
5139
5140//   VRECPE   : Vector Reciprocal Estimate
5141def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5142                        IIC_VUNAD, "vrecpe", "u32",
5143                        v2i32, v2i32, int_arm_neon_vrecpe>;
5144def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5145                        IIC_VUNAQ, "vrecpe", "u32",
5146                        v4i32, v4i32, int_arm_neon_vrecpe>;
5147def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5148                        IIC_VUNAD, "vrecpe", "f32",
5149                        v2f32, v2f32, int_arm_neon_vrecpe>;
5150def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5151                        IIC_VUNAQ, "vrecpe", "f32",
5152                        v4f32, v4f32, int_arm_neon_vrecpe>;
5153
5154//   VRECPS   : Vector Reciprocal Step
5155def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5156                        IIC_VRECSD, "vrecps", "f32",
5157                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
5158def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5159                        IIC_VRECSQ, "vrecps", "f32",
5160                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
5161
5162//   VRSQRTE  : Vector Reciprocal Square Root Estimate
5163def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5164                         IIC_VUNAD, "vrsqrte", "u32",
5165                         v2i32, v2i32, int_arm_neon_vrsqrte>;
5166def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5167                         IIC_VUNAQ, "vrsqrte", "u32",
5168                         v4i32, v4i32, int_arm_neon_vrsqrte>;
5169def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5170                         IIC_VUNAD, "vrsqrte", "f32",
5171                         v2f32, v2f32, int_arm_neon_vrsqrte>;
5172def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5173                         IIC_VUNAQ, "vrsqrte", "f32",
5174                         v4f32, v4f32, int_arm_neon_vrsqrte>;
5175
5176//   VRSQRTS  : Vector Reciprocal Square Root Step
5177def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5178                        IIC_VRECSD, "vrsqrts", "f32",
5179                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
5180def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5181                        IIC_VRECSQ, "vrsqrts", "f32",
5182                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
5183
5184// Vector Shifts.
5185
5186//   VSHL     : Vector Shift
5187defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
5188                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5189                            "vshl", "s", int_arm_neon_vshifts>;
5190defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
5191                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5192                            "vshl", "u", int_arm_neon_vshiftu>;
5193
5194//   VSHL     : Vector Shift Left (Immediate)
5195defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
5196
5197//   VSHR     : Vector Shift Right (Immediate)
5198defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
5199                            NEONvshrs>;
5200defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
5201                            NEONvshru>;
5202
5203//   VSHLL    : Vector Shift Left Long
5204defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
5205  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>;
5206defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
5207  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>;
5208
5209//   VSHLL    : Vector Shift Left Long (with maximum shift count)
5210class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
5211                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
5212                ValueType OpTy, Operand ImmTy>
5213  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
5214           ResTy, OpTy, ImmTy, null_frag> {
5215  let Inst{21-16} = op21_16;
5216  let DecoderMethod = "DecodeVSHLMaxInstruction";
5217}
5218def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
5219                          v8i16, v8i8, imm8>;
5220def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
5221                          v4i32, v4i16, imm16>;
5222def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
5223                          v2i64, v2i32, imm32>;
5224
5225def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))),
5226          (VSHLLi8 DPR:$Rn, 8)>;
5227def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))),
5228          (VSHLLi16 DPR:$Rn, 16)>;
5229def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))),
5230          (VSHLLi32 DPR:$Rn, 32)>;
5231def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))),
5232          (VSHLLi8 DPR:$Rn, 8)>;
5233def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
5234          (VSHLLi16 DPR:$Rn, 16)>;
5235def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
5236          (VSHLLi32 DPR:$Rn, 32)>;
5237
5238//   VSHRN    : Vector Shift Right and Narrow
5239defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
5240                           PatFrag<(ops node:$Rn, node:$amt),
5241                                   (trunc (NEONvshrs node:$Rn, node:$amt))>>;
5242
5243def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
5244          (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
5245def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
5246          (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
5247def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
5248          (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
5249
5250//   VRSHL    : Vector Rounding Shift
5251defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
5252                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5253                            "vrshl", "s", int_arm_neon_vrshifts>;
5254defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
5255                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5256                            "vrshl", "u", int_arm_neon_vrshiftu>;
5257//   VRSHR    : Vector Rounding Shift Right
5258defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
5259                            NEONvrshrs>;
5260defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
5261                            NEONvrshru>;
5262
5263//   VRSHRN   : Vector Rounding Shift Right and Narrow
5264defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
5265                           NEONvrshrn>;
5266
5267//   VQSHL    : Vector Saturating Shift
5268defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
5269                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5270                            "vqshl", "s", int_arm_neon_vqshifts>;
5271defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
5272                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5273                            "vqshl", "u", int_arm_neon_vqshiftu>;
5274//   VQSHL    : Vector Saturating Shift Left (Immediate)
5275defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
5276defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
5277
5278//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
5279defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
5280
5281//   VQSHRN   : Vector Saturating Shift Right and Narrow
5282defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
5283                           NEONvqshrns>;
5284defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
5285                           NEONvqshrnu>;
5286
5287//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
5288defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
5289                           NEONvqshrnsu>;
5290
5291//   VQRSHL   : Vector Saturating Rounding Shift
5292defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
5293                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5294                            "vqrshl", "s", int_arm_neon_vqrshifts>;
5295defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
5296                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5297                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
5298
5299//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
5300defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
5301                           NEONvqrshrns>;
5302defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
5303                           NEONvqrshrnu>;
5304
5305//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
5306defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
5307                           NEONvqrshrnsu>;
5308
5309//   VSRA     : Vector Shift Right and Accumulate
5310defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
5311defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
5312//   VRSRA    : Vector Rounding Shift Right and Accumulate
5313defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
5314defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
5315
5316//   VSLI     : Vector Shift Left and Insert
5317defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
5318
5319//   VSRI     : Vector Shift Right and Insert
5320defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
5321
5322// Vector Absolute and Saturating Absolute.
5323
5324//   VABS     : Vector Absolute Value
5325defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
5326                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
5327                           int_arm_neon_vabs>;
5328def  VABSfd   : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5329                     "vabs", "f32",
5330                     v2f32, v2f32, fabs>;
5331def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
5332                     "vabs", "f32",
5333                      v4f32, v4f32, fabs>;
5334
5335def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
5336               (v2i32 (bitconvert (v8i8 (add DPR:$src,
5337                                             (NEONvshrs DPR:$src, (i32 7))))))),
5338          (VABSv8i8 DPR:$src)>;
5339def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
5340               (v2i32 (bitconvert (v4i16 (add DPR:$src,
5341                                            (NEONvshrs DPR:$src, (i32 15))))))),
5342          (VABSv4i16 DPR:$src)>;
5343def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
5344               (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
5345          (VABSv2i32 DPR:$src)>;
5346def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
5347               (v4i32 (bitconvert (v16i8 (add QPR:$src,
5348                                             (NEONvshrs QPR:$src, (i32 7))))))),
5349          (VABSv16i8 QPR:$src)>;
5350def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
5351               (v4i32 (bitconvert (v8i16 (add QPR:$src,
5352                                            (NEONvshrs QPR:$src, (i32 15))))))),
5353          (VABSv8i16 QPR:$src)>;
5354def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
5355               (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
5356          (VABSv4i32 QPR:$src)>;
5357
5358//   VQABS    : Vector Saturating Absolute Value
5359defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
5360                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
5361                           int_arm_neon_vqabs>;
5362
5363// Vector Negate.
5364
5365def vnegd  : PatFrag<(ops node:$in),
5366                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
5367def vnegq  : PatFrag<(ops node:$in),
5368                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
5369
5370class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5371  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
5372        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
5373        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
5374class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
5375  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
5376        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
5377        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
5378
5379//   VNEG     : Vector Negate (integer)
5380def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
5381def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
5382def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
5383def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
5384def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
5385def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
5386
5387//   VNEG     : Vector Negate (floating-point)
5388def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
5389                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
5390                    "vneg", "f32", "$Vd, $Vm", "",
5391                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
5392def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
5393                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
5394                    "vneg", "f32", "$Vd, $Vm", "",
5395                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
5396
5397def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
5398def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
5399def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
5400def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
5401def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
5402def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
5403
5404//   VQNEG    : Vector Saturating Negate
5405defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
5406                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
5407                           int_arm_neon_vqneg>;
5408
5409// Vector Bit Counting Operations.
5410
5411//   VCLS     : Vector Count Leading Sign Bits
5412defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
5413                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
5414                           int_arm_neon_vcls>;
5415//   VCLZ     : Vector Count Leading Zeros
5416defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
5417                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
5418                           ctlz>;
5419//   VCNT     : Vector Count One Bits
5420def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
5421                        IIC_VCNTiD, "vcnt", "8",
5422                        v8i8, v8i8, ctpop>;
5423def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
5424                        IIC_VCNTiQ, "vcnt", "8",
5425                        v16i8, v16i8, ctpop>;
5426
5427// Vector Swap
5428def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
5429                     (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
5430                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
5431                     []>;
5432def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
5433                     (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
5434                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
5435                     []>;
5436
5437// Vector Move Operations.
5438
5439//   VMOV     : Vector Move (Register)
5440def : NEONInstAlias<"vmov${p} $Vd, $Vm",
5441                    (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
5442def : NEONInstAlias<"vmov${p} $Vd, $Vm",
5443                    (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
5444
5445//   VMOV     : Vector Move (Immediate)
5446
5447let isReMaterializable = 1 in {
5448def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
5449                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
5450                         "vmov", "i8", "$Vd, $SIMM", "",
5451                         [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
5452def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
5453                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
5454                         "vmov", "i8", "$Vd, $SIMM", "",
5455                         [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
5456
5457def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
5458                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5459                         "vmov", "i16", "$Vd, $SIMM", "",
5460                         [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
5461  let Inst{9} = SIMM{9};
5462}
5463
5464def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
5465                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5466                         "vmov", "i16", "$Vd, $SIMM", "",
5467                         [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
5468 let Inst{9} = SIMM{9};
5469}
5470
5471def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
5472                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5473                         "vmov", "i32", "$Vd, $SIMM", "",
5474                         [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
5475  let Inst{11-8} = SIMM{11-8};
5476}
5477
5478def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
5479                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5480                         "vmov", "i32", "$Vd, $SIMM", "",
5481                         [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
5482  let Inst{11-8} = SIMM{11-8};
5483}
5484
5485def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
5486                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
5487                         "vmov", "i64", "$Vd, $SIMM", "",
5488                         [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
5489def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
5490                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
5491                         "vmov", "i64", "$Vd, $SIMM", "",
5492                         [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
5493
5494def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
5495                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
5496                         "vmov", "f32", "$Vd, $SIMM", "",
5497                         [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
5498def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
5499                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
5500                         "vmov", "f32", "$Vd, $SIMM", "",
5501                         [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
5502} // isReMaterializable
5503
5504// Add support for bytes replication feature, so it could be GAS compatible.
5505// E.g. instructions below:
5506// "vmov.i32 d0, 0xffffffff"
5507// "vmov.i32 d0, 0xabababab"
5508// "vmov.i16 d0, 0xabab"
5509// are incorrect, but we could deal with such cases.
5510// For last two instructions, for example, it should emit:
5511// "vmov.i8 d0, 0xab"
5512def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
5513                    (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
5514def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
5515                    (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
5516def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
5517                    (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
5518def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
5519                    (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
5520
5521// Also add same support for VMVN instructions. So instruction:
5522// "vmvn.i32 d0, 0xabababab"
5523// actually means:
5524// "vmov.i8 d0, 0x54"
5525def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
5526                    (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
5527def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
5528                    (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
5529def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
5530                    (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
5531def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
5532                    (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
5533
5534// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
5535// require zero cycles to execute so they should be used wherever possible for
5536// setting a register to zero.
5537
5538// Even without these pseudo-insts we would probably end up with the correct
5539// instruction, but we could not mark the general ones with "isAsCheapAsAMove"
5540// since they are sometimes rather expensive (in general).
5541
5542let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
5543  def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
5544                               [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))],
5545                               (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
5546               Requires<[HasZCZ]>;
5547  def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
5548                               [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))],
5549                               (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
5550               Requires<[HasZCZ]>;
5551}
5552
5553//   VMOV     : Vector Get Lane (move scalar to ARM core register)
5554
5555def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
5556                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
5557                          IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
5558                          [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
5559                                           imm:$lane))]> {
5560  let Inst{21}  = lane{2};
5561  let Inst{6-5} = lane{1-0};
5562}
5563def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
5564                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
5565                          IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
5566                          [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
5567                                           imm:$lane))]> {
5568  let Inst{21} = lane{1};
5569  let Inst{6}  = lane{0};
5570}
5571def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
5572                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
5573                          IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
5574                          [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
5575                                           imm:$lane))]> {
5576  let Inst{21}  = lane{2};
5577  let Inst{6-5} = lane{1-0};
5578}
5579def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
5580                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
5581                          IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
5582                          [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
5583                                           imm:$lane))]> {
5584  let Inst{21} = lane{1};
5585  let Inst{6}  = lane{0};
5586}
5587def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
5588                          (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
5589                          IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
5590                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
5591                                           imm:$lane))]>,
5592                Requires<[HasVFP2, HasFastVGETLNi32]> {
5593  let Inst{21} = lane{0};
5594}
5595// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
5596def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
5597          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
5598                           (DSubReg_i8_reg imm:$lane))),
5599                     (SubReg_i8_lane imm:$lane))>;
5600def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
5601          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
5602                             (DSubReg_i16_reg imm:$lane))),
5603                     (SubReg_i16_lane imm:$lane))>;
5604def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
5605          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
5606                           (DSubReg_i8_reg imm:$lane))),
5607                     (SubReg_i8_lane imm:$lane))>;
5608def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
5609          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
5610                             (DSubReg_i16_reg imm:$lane))),
5611                     (SubReg_i16_lane imm:$lane))>;
5612def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
5613          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
5614                             (DSubReg_i32_reg imm:$lane))),
5615                     (SubReg_i32_lane imm:$lane))>,
5616      Requires<[HasNEON, HasFastVGETLNi32]>;
5617def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
5618          (COPY_TO_REGCLASS
5619            (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
5620      Requires<[HasNEON, HasSlowVGETLNi32]>;
5621def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
5622          (COPY_TO_REGCLASS
5623            (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
5624      Requires<[HasNEON, HasSlowVGETLNi32]>;
5625def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
5626          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
5627                          (SSubReg_f32_reg imm:$src2))>;
5628def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
5629          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
5630                          (SSubReg_f32_reg imm:$src2))>;
5631//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
5632//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
5633def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
5634          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
5635
5636
5637//   VMOV     : Vector Set Lane (move ARM core register to scalar)
5638
5639let Constraints = "$src1 = $V" in {
5640def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
5641                          (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
5642                          IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
5643                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
5644                                           GPR:$R, imm:$lane))]> {
5645  let Inst{21}  = lane{2};
5646  let Inst{6-5} = lane{1-0};
5647}
5648def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
5649                          (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
5650                          IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
5651                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
5652                                           GPR:$R, imm:$lane))]> {
5653  let Inst{21} = lane{1};
5654  let Inst{6}  = lane{0};
5655}
5656def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
5657                          (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
5658                          IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
5659                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
5660                                           GPR:$R, imm:$lane))]>,
5661                Requires<[HasVFP2]> {
5662  let Inst{21} = lane{0};
5663  // This instruction is equivalent as
5664  // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
5665  let isInsertSubreg = 1;
5666}
5667}
5668def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
5669          (v16i8 (INSERT_SUBREG QPR:$src1,
5670                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
5671                                   (DSubReg_i8_reg imm:$lane))),
5672                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
5673                  (DSubReg_i8_reg imm:$lane)))>;
5674def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
5675          (v8i16 (INSERT_SUBREG QPR:$src1,
5676                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
5677                                     (DSubReg_i16_reg imm:$lane))),
5678                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
5679                  (DSubReg_i16_reg imm:$lane)))>;
5680def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
5681          (v4i32 (INSERT_SUBREG QPR:$src1,
5682                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
5683                                     (DSubReg_i32_reg imm:$lane))),
5684                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
5685                  (DSubReg_i32_reg imm:$lane)))>;
5686
5687def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
5688          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
5689                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
5690def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
5691          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
5692                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
5693
5694//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
5695//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
5696def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
5697          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
5698
5699def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
5700          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
5701def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
5702          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
5703def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
5704          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
5705
5706def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
5707          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5708def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
5709          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5710def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
5711          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5712
5713def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
5714          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5715                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5716                         dsub_0)>;
5717def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
5718          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
5719                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5720                         dsub_0)>;
5721def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
5722          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
5723                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5724                         dsub_0)>;
5725
5726//   VDUP     : Vector Duplicate (from ARM core register to all elements)
5727
5728class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
5729  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
5730          IIC_VMOVIS, "vdup", Dt, "$V, $R",
5731          [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
5732class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
5733  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
5734          IIC_VMOVIS, "vdup", Dt, "$V, $R",
5735          [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
5736
5737def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
5738def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
5739def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>,
5740                Requires<[HasNEON, HasFastVDUP32]>;
5741def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
5742def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
5743def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
5744
5745// NEONvdup patterns for uarchs with fast VDUP.32.
5746def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
5747      Requires<[HasNEON,HasFastVDUP32]>;
5748def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
5749
5750// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
5751def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
5752      Requires<[HasNEON,HasSlowVDUP32]>;
5753def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
5754      Requires<[HasNEON,HasSlowVDUP32]>;
5755
5756//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
5757
5758class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
5759              ValueType Ty, Operand IdxTy>
5760  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
5761              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
5762              [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
5763
5764class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
5765              ValueType ResTy, ValueType OpTy, Operand IdxTy>
5766  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
5767              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
5768              [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
5769                                      VectorIndex32:$lane)))]>;
5770
5771// Inst{19-16} is partially specified depending on the element size.
5772
5773def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
5774  bits<3> lane;
5775  let Inst{19-17} = lane{2-0};
5776}
5777def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
5778  bits<2> lane;
5779  let Inst{19-18} = lane{1-0};
5780}
5781def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
5782  bits<1> lane;
5783  let Inst{19} = lane{0};
5784}
5785def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
5786  bits<3> lane;
5787  let Inst{19-17} = lane{2-0};
5788}
5789def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
5790  bits<2> lane;
5791  let Inst{19-18} = lane{1-0};
5792}
5793def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
5794  bits<1> lane;
5795  let Inst{19} = lane{0};
5796}
5797
5798def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
5799          (VDUPLN32d DPR:$Vm, imm:$lane)>;
5800
5801def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
5802          (VDUPLN32q DPR:$Vm, imm:$lane)>;
5803
5804def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
5805          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
5806                                  (DSubReg_i8_reg imm:$lane))),
5807                           (SubReg_i8_lane imm:$lane)))>;
5808def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
5809          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
5810                                    (DSubReg_i16_reg imm:$lane))),
5811                            (SubReg_i16_lane imm:$lane)))>;
5812def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
5813          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
5814                                    (DSubReg_i32_reg imm:$lane))),
5815                            (SubReg_i32_lane imm:$lane)))>;
5816def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
5817          (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
5818                                   (DSubReg_i32_reg imm:$lane))),
5819                           (SubReg_i32_lane imm:$lane)))>;
5820
5821def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
5822          (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
5823                             SPR:$src, ssub_0), (i32 0)))>;
5824def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
5825          (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
5826                             SPR:$src, ssub_0), (i32 0)))>;
5827
5828//   VMOVN    : Vector Narrowing Move
5829defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
5830                         "vmovn", "i", trunc>;
5831//   VQMOVN   : Vector Saturating Narrowing Move
5832defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
5833                            "vqmovn", "s", int_arm_neon_vqmovns>;
5834defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
5835                            "vqmovn", "u", int_arm_neon_vqmovnu>;
5836defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
5837                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
5838//   VMOVL    : Vector Lengthening Move
5839defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
5840defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
5841def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
5842def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
5843def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
5844
5845// Vector Conversions.
5846
5847//   VCVT     : Vector Convert Between Floating-Point and Integers
5848def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
5849                     v2i32, v2f32, fp_to_sint>;
5850def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
5851                     v2i32, v2f32, fp_to_uint>;
5852def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
5853                     v2f32, v2i32, sint_to_fp>;
5854def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
5855                     v2f32, v2i32, uint_to_fp>;
5856
5857def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
5858                     v4i32, v4f32, fp_to_sint>;
5859def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
5860                     v4i32, v4f32, fp_to_uint>;
5861def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
5862                     v4f32, v4i32, sint_to_fp>;
5863def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
5864                     v4f32, v4i32, uint_to_fp>;
5865
5866// VCVT{A, N, P, M}
5867multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
5868                    SDPatternOperator IntU> {
5869  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5870    def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
5871                       "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
5872    def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
5873                       "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
5874    def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
5875                       "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
5876    def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
5877                       "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
5878  }
5879}
5880
5881defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
5882defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
5883defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
5884defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
5885
5886//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
5887let DecoderMethod = "DecodeVCVTD" in {
5888def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
5889                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
5890def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
5891                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
5892def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
5893                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
5894def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
5895                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
5896}
5897
5898let DecoderMethod = "DecodeVCVTQ" in {
5899def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
5900                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
5901def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
5902                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
5903def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
5904                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
5905def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
5906                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
5907}
5908
5909def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
5910                    (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
5911def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
5912                    (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
5913def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
5914                    (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
5915def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
5916                    (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
5917
5918def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
5919                    (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
5920def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
5921                    (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
5922def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
5923                    (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
5924def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
5925                    (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
5926
5927
5928//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
5929def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
5930                        IIC_VUNAQ, "vcvt", "f16.f32",
5931                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
5932                Requires<[HasNEON, HasFP16]>;
5933def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
5934                        IIC_VUNAQ, "vcvt", "f32.f16",
5935                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
5936                Requires<[HasNEON, HasFP16]>;
5937
5938// Vector Reverse.
5939
5940//   VREV64   : Vector Reverse elements within 64-bit doublewords
5941
5942class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5943  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
5944        (ins DPR:$Vm), IIC_VMOVD,
5945        OpcodeStr, Dt, "$Vd, $Vm", "",
5946        [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
5947class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5948  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
5949        (ins QPR:$Vm), IIC_VMOVQ,
5950        OpcodeStr, Dt, "$Vd, $Vm", "",
5951        [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
5952
5953def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
5954def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
5955def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
5956def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
5957
5958def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
5959def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
5960def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
5961def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
5962
5963//   VREV32   : Vector Reverse elements within 32-bit words
5964
5965class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5966  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
5967        (ins DPR:$Vm), IIC_VMOVD,
5968        OpcodeStr, Dt, "$Vd, $Vm", "",
5969        [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
5970class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5971  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
5972        (ins QPR:$Vm), IIC_VMOVQ,
5973        OpcodeStr, Dt, "$Vd, $Vm", "",
5974        [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
5975
5976def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
5977def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
5978
5979def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
5980def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
5981
5982//   VREV16   : Vector Reverse elements within 16-bit halfwords
5983
5984class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5985  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
5986        (ins DPR:$Vm), IIC_VMOVD,
5987        OpcodeStr, Dt, "$Vd, $Vm", "",
5988        [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
5989class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5990  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
5991        (ins QPR:$Vm), IIC_VMOVQ,
5992        OpcodeStr, Dt, "$Vd, $Vm", "",
5993        [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
5994
5995def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
5996def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
5997
5998// Other Vector Shuffles.
5999
6000//  Aligned extractions: really just dropping registers
6001
6002class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
6003      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
6004             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
6005
6006def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
6007
6008def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
6009
6010def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
6011
6012def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
6013
6014def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
6015
6016
6017//   VEXT     : Vector Extract
6018
6019
6020// All of these have a two-operand InstAlias.
6021let TwoOperandAliasConstraint = "$Vn = $Vd" in {
6022class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6023  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
6024        (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
6025        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6026        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
6027                                     (Ty DPR:$Vm), imm:$index)))]> {
6028  bits<3> index;
6029  let Inst{11} = 0b0;
6030  let Inst{10-8} = index{2-0};
6031}
6032
6033class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6034  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
6035        (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
6036        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6037        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
6038                                     (Ty QPR:$Vm), imm:$index)))]> {
6039  bits<4> index;
6040  let Inst{11-8} = index{3-0};
6041}
6042}
6043
6044def VEXTd8  : VEXTd<"vext", "8",  v8i8, imm0_7> {
6045  let Inst{10-8} = index{2-0};
6046}
6047def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
6048  let Inst{10-9} = index{1-0};
6049  let Inst{8}    = 0b0;
6050}
6051def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
6052  let Inst{10}     = index{0};
6053  let Inst{9-8}    = 0b00;
6054}
6055def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
6056                           (v2f32 DPR:$Vm),
6057                           (i32 imm:$index))),
6058          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
6059
6060def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
6061  let Inst{11-8} = index{3-0};
6062}
6063def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
6064  let Inst{11-9} = index{2-0};
6065  let Inst{8}    = 0b0;
6066}
6067def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
6068  let Inst{11-10} = index{1-0};
6069  let Inst{9-8}    = 0b00;
6070}
6071def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
6072  let Inst{11} = index{0};
6073  let Inst{10-8}    = 0b000;
6074}
6075def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
6076                           (v4f32 QPR:$Vm),
6077                           (i32 imm:$index))),
6078          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
6079
6080//   VTRN     : Vector Transpose
6081
6082def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
6083def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
6084def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
6085
6086def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
6087def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
6088def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
6089
6090//   VUZP     : Vector Unzip (Deinterleave)
6091
6092def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
6093def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
6094// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6095def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
6096                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6097
6098def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
6099def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
6100def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
6101
6102//   VZIP     : Vector Zip (Interleave)
6103
6104def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
6105def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
6106// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
6107def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
6108                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
6109
6110def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
6111def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
6112def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
6113
6114// Vector Table Lookup and Table Extension.
6115
6116//   VTBL     : Vector Table Lookup
6117let DecoderMethod = "DecodeTBLInstruction" in {
6118def  VTBL1
6119  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
6120        (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
6121        "vtbl", "8", "$Vd, $Vn, $Vm", "",
6122        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
6123let hasExtraSrcRegAllocReq = 1 in {
6124def  VTBL2
6125  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
6126        (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
6127        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6128def  VTBL3
6129  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
6130        (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
6131        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6132def  VTBL4
6133  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
6134        (ins VecListFourD:$Vn, DPR:$Vm),
6135        NVTBLFrm, IIC_VTB4,
6136        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
6137} // hasExtraSrcRegAllocReq = 1
6138
6139def  VTBL3Pseudo
6140  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
6141def  VTBL4Pseudo
6142  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
6143
6144//   VTBX     : Vector Table Extension
6145def  VTBX1
6146  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
6147        (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
6148        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
6149        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
6150                               DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
6151let hasExtraSrcRegAllocReq = 1 in {
6152def  VTBX2
6153  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
6154        (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
6155        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
6156def  VTBX3
6157  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
6158        (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
6159        NVTBLFrm, IIC_VTBX3,
6160        "vtbx", "8", "$Vd, $Vn, $Vm",
6161        "$orig = $Vd", []>;
6162def  VTBX4
6163  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
6164        (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
6165        "vtbx", "8", "$Vd, $Vn, $Vm",
6166        "$orig = $Vd", []>;
6167} // hasExtraSrcRegAllocReq = 1
6168
6169def  VTBX3Pseudo
6170  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6171                IIC_VTBX3, "$orig = $dst", []>;
6172def  VTBX4Pseudo
6173  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
6174                IIC_VTBX4, "$orig = $dst", []>;
6175} // DecoderMethod = "DecodeTBLInstruction"
6176
6177// VRINT      : Vector Rounding
6178multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
6179  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6180    def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
6181                      !strconcat("vrint", op), "f32",
6182                      v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
6183      let Inst{9-7} = op9_7;
6184    }
6185    def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
6186                      !strconcat("vrint", op), "f32",
6187                      v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
6188      let Inst{9-7} = op9_7;
6189    }
6190  }
6191
6192  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
6193                  (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
6194  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
6195                  (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
6196}
6197
6198defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
6199defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
6200defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
6201defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
6202defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
6203defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
6204
6205// Cryptography instructions
6206let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
6207    DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
6208  class AES<string op, bit op7, bit op6, SDPatternOperator Int>
6209    : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
6210                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
6211      Requires<[HasV8, HasCrypto]>;
6212  class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
6213    : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
6214                 !strconcat("aes", op), "8", v16i8, v16i8, Int>,
6215      Requires<[HasV8, HasCrypto]>;
6216  class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
6217              SDPatternOperator Int>
6218    : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
6219                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
6220      Requires<[HasV8, HasCrypto]>;
6221  class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
6222              SDPatternOperator Int>
6223    : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
6224                 !strconcat("sha", op), "32", v4i32, v4i32, Int>,
6225      Requires<[HasV8, HasCrypto]>;
6226  class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
6227    : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
6228                !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
6229      Requires<[HasV8, HasCrypto]>;
6230}
6231
6232def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
6233def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
6234def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
6235def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
6236
6237def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
6238def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
6239def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
6240def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
6241def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
6242def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
6243def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
6244def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
6245def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
6246def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
6247
6248def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
6249          (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
6250              (SHA1H (SUBREG_TO_REG (i64 0),
6251                                    (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
6252                                    ssub_0)),
6253              ssub_0)), GPR)>;
6254
6255def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6256          (SHA1C v4i32:$hash_abcd,
6257                 (SUBREG_TO_REG (i64 0),
6258                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
6259                                ssub_0),
6260                 v4i32:$wk)>;
6261
6262def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6263          (SHA1M v4i32:$hash_abcd,
6264                 (SUBREG_TO_REG (i64 0),
6265                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
6266                                ssub_0),
6267                 v4i32:$wk)>;
6268
6269def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
6270          (SHA1P v4i32:$hash_abcd,
6271                 (SUBREG_TO_REG (i64 0),
6272                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
6273                                ssub_0),
6274                 v4i32:$wk)>;
6275
6276//===----------------------------------------------------------------------===//
6277// NEON instructions for single-precision FP math
6278//===----------------------------------------------------------------------===//
6279
6280class N2VSPat<SDNode OpNode, NeonI Inst>
6281  : NEONFPPat<(f32 (OpNode SPR:$a)),
6282              (EXTRACT_SUBREG
6283               (v2f32 (COPY_TO_REGCLASS (Inst
6284                (INSERT_SUBREG
6285                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6286                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
6287
6288class N3VSPat<SDNode OpNode, NeonI Inst>
6289  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
6290              (EXTRACT_SUBREG
6291               (v2f32 (COPY_TO_REGCLASS (Inst
6292                (INSERT_SUBREG
6293                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6294                 SPR:$a, ssub_0),
6295                (INSERT_SUBREG
6296                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6297                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
6298
6299class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
6300  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
6301              (EXTRACT_SUBREG
6302               (v2f32 (COPY_TO_REGCLASS (Inst
6303                (INSERT_SUBREG
6304                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6305                 SPR:$acc, ssub_0),
6306                (INSERT_SUBREG
6307                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6308                 SPR:$a, ssub_0),
6309                (INSERT_SUBREG
6310                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
6311                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
6312
6313class NVCVTIFPat<SDNode OpNode, NeonI Inst>
6314  : NEONFPPat<(f32 (OpNode GPR:$a)),
6315              (f32 (EXTRACT_SUBREG
6316                     (v2f32 (Inst
6317                       (INSERT_SUBREG
6318                         (v2f32 (IMPLICIT_DEF)),
6319                         (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
6320                     ssub_0))>;
6321class NVCVTFIPat<SDNode OpNode, NeonI Inst>
6322  : NEONFPPat<(i32 (OpNode SPR:$a)),
6323              (i32 (EXTRACT_SUBREG
6324                     (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6325                                                 SPR:$a, ssub_0))),
6326                     ssub_0))>;
6327
6328def : N3VSPat<fadd, VADDfd>;
6329def : N3VSPat<fsub, VSUBfd>;
6330def : N3VSPat<fmul, VMULfd>;
6331def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
6332      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
6333def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
6334      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
6335def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
6336      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
6337def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
6338      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
6339def : N2VSPat<fabs, VABSfd>;
6340def : N2VSPat<fneg, VNEGfd>;
6341def : N3VSPat<NEONfmax, VMAXfd>;
6342def : N3VSPat<NEONfmin, VMINfd>;
6343def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
6344def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
6345def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
6346def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
6347
6348// NEON doesn't have any f64 conversions, so provide patterns to make
6349// sure the VFP conversions match when extracting from a vector.
6350def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
6351             (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6352def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
6353             (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6354def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
6355             (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6356def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
6357             (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6358
6359
6360// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
6361def : Pat<(f32 (bitconvert GPR:$a)),
6362          (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
6363        Requires<[HasNEON, DontUseVMOVSR]>;
6364
6365//===----------------------------------------------------------------------===//
6366// Non-Instruction Patterns
6367//===----------------------------------------------------------------------===//
6368
6369// bit_convert
6370let Predicates = [IsLE] in {
6371  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
6372  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
6373  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
6374}
6375def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
6376let Predicates = [IsLE] in {
6377  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
6378  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
6379  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
6380  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
6381  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
6382}
6383def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
6384let Predicates = [IsLE] in {
6385  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
6386  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
6387  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
6388  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
6389  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
6390  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
6391  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
6392  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
6393  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
6394  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
6395}
6396def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
6397let Predicates = [IsLE] in {
6398  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
6399  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
6400  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
6401  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
6402  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
6403  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
6404}
6405def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
6406let Predicates = [IsLE] in {
6407  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
6408  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
6409}
6410
6411let Predicates = [IsLE] in {
6412  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
6413  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
6414  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
6415}
6416def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
6417let Predicates = [IsLE] in {
6418  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
6419  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
6420  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
6421  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
6422  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
6423}
6424def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
6425let Predicates = [IsLE] in {
6426  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
6427  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
6428  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
6429  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
6430  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
6431  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
6432  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
6433  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
6434  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
6435  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
6436  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
6437}
6438def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
6439let Predicates = [IsLE] in {
6440  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
6441  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
6442  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
6443}
6444def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
6445let Predicates = [IsLE] in {
6446  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
6447  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
6448  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
6449  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
6450}
6451
6452let Predicates = [IsBE] in {
6453  // 64 bit conversions
6454  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
6455  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
6456  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
6457  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
6458  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
6459  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
6460  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
6461  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
6462  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
6463  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
6464  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
6465  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
6466  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
6467  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (VREV64d8  DPR:$src)>;
6468  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (VREV32d8  DPR:$src)>;
6469  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (VREV16d8  DPR:$src)>;
6470  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (VREV64d8  DPR:$src)>;
6471  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (VREV32d8  DPR:$src)>;
6472  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
6473  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
6474  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
6475  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
6476  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
6477  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
6478  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
6479  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
6480
6481  // 128 bit conversions
6482  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
6483  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
6484  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
6485  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
6486  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
6487  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
6488  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
6489  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
6490  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
6491  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
6492  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
6493  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
6494  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
6495  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8  QPR:$src)>;
6496  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8  QPR:$src)>;
6497  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8  QPR:$src)>;
6498  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8  QPR:$src)>;
6499  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8  QPR:$src)>;
6500  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
6501  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
6502  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
6503  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
6504  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
6505  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
6506  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
6507  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
6508}
6509
6510// Fold extracting an element out of a v2i32 into a vfp register.
6511def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
6512          (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
6513
6514// Vector lengthening move with load, matching extending loads.
6515
6516// extload, zextload and sextload for a standard lengthening load. Example:
6517// Lengthen_Single<"8", "i16", "8"> =
6518//     Pat<(v8i16 (extloadvi8 addrmode6:$addr))
6519//         (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
6520//                              (f64 (IMPLICIT_DEF)), (i32 0)))>;
6521multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
6522  let AddedComplexity = 10 in {
6523  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6524                    (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
6525                  (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
6526                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
6527
6528  def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6529                  (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
6530                (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
6531                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
6532
6533  def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6534                  (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
6535                (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
6536                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
6537  }
6538}
6539
6540// extload, zextload and sextload for a lengthening load which only uses
6541// half the lanes available. Example:
6542// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
6543//     Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
6544//         (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
6545//                                      (f64 (IMPLICIT_DEF)), (i32 0))),
6546//                         dsub_0)>;
6547multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
6548                               string InsnLanes, string InsnTy> {
6549  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6550                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
6551       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
6552         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6553         dsub_0)>;
6554  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6555                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
6556       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
6557         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6558         dsub_0)>;
6559  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6560                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
6561       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
6562         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6563         dsub_0)>;
6564}
6565
6566// The following class definition is basically a copy of the
6567// Lengthen_HalfSingle definition above, however with an additional parameter
6568// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
6569// data loaded by VLD1LN into proper vector format in big endian mode.
6570multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
6571                               string InsnLanes, string InsnTy, string RevLanes> {
6572  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6573                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
6574       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
6575         (!cast<Instruction>("VREV32d" # RevLanes)
6576           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6577         dsub_0)>;
6578  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6579                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
6580       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
6581         (!cast<Instruction>("VREV32d" # RevLanes)
6582           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6583         dsub_0)>;
6584  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6585                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
6586       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
6587         (!cast<Instruction>("VREV32d" # RevLanes)
6588           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6589         dsub_0)>;
6590}
6591
6592// extload, zextload and sextload for a lengthening load followed by another
6593// lengthening load, to quadruple the initial length.
6594//
6595// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
6596//     Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
6597//         (EXTRACT_SUBREG (VMOVLuv4i32
6598//           (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
6599//                                                   (f64 (IMPLICIT_DEF)),
6600//                                                   (i32 0))),
6601//                           dsub_0)),
6602//           dsub_0)>;
6603multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
6604                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
6605                           string Insn2Ty> {
6606  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6607                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
6608         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
6609           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
6610             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6611             dsub_0))>;
6612  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6613                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
6614         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
6615           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
6616             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6617             dsub_0))>;
6618  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6619                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
6620         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
6621           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
6622             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6623             dsub_0))>;
6624}
6625
6626// The following class definition is basically a copy of the
6627// Lengthen_Double definition above, however with an additional parameter
6628// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
6629// data loaded by VLD1LN into proper vector format in big endian mode.
6630multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
6631                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
6632                           string Insn2Ty, string RevLanes> {
6633  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6634                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
6635         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
6636           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
6637            (!cast<Instruction>("VREV32d" # RevLanes)
6638             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6639             dsub_0))>;
6640  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6641                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
6642         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
6643           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
6644            (!cast<Instruction>("VREV32d" # RevLanes)
6645             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6646             dsub_0))>;
6647  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6648                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
6649         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
6650           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
6651            (!cast<Instruction>("VREV32d" # RevLanes)
6652             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6653             dsub_0))>;
6654}
6655
6656// extload, zextload and sextload for a lengthening load followed by another
6657// lengthening load, to quadruple the initial length, but which ends up only
6658// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
6659//
6660// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
6661// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
6662//     (EXTRACT_SUBREG (VMOVLuv4i32
6663//       (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
6664//                                               (f64 (IMPLICIT_DEF)), (i32 0))),
6665//                       dsub_0)),
6666//       dsub_0)>;
6667multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
6668                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
6669                           string Insn2Ty> {
6670  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6671                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
6672         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
6673           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
6674             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6675             dsub_0)),
6676          dsub_0)>;
6677  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6678                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
6679         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
6680           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
6681             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6682             dsub_0)),
6683          dsub_0)>;
6684  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6685                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
6686         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
6687           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
6688             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
6689             dsub_0)),
6690          dsub_0)>;
6691}
6692
6693// The following class definition is basically a copy of the
6694// Lengthen_HalfDouble definition above, however with an additional VREV16d8
6695// instruction to convert data loaded by VLD1LN into proper vector format
6696// in big endian mode.
6697multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
6698                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
6699                           string Insn2Ty> {
6700  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6701                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
6702         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
6703           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
6704            (!cast<Instruction>("VREV16d8")
6705             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6706             dsub_0)),
6707          dsub_0)>;
6708  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6709                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
6710         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
6711           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
6712            (!cast<Instruction>("VREV16d8")
6713             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6714             dsub_0)),
6715          dsub_0)>;
6716  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
6717                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
6718         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
6719           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
6720            (!cast<Instruction>("VREV16d8")
6721             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
6722             dsub_0)),
6723          dsub_0)>;
6724}
6725
6726defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
6727defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
6728defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
6729
6730let Predicates = [IsLE] in {
6731  defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
6732  defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
6733
6734  // Double lengthening - v4i8 -> v4i16 -> v4i32
6735  defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
6736  // v2i8 -> v2i16 -> v2i32
6737  defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
6738  // v2i16 -> v2i32 -> v2i64
6739  defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
6740}
6741
6742let Predicates = [IsBE] in {
6743  defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
6744  defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
6745
6746  // Double lengthening - v4i8 -> v4i16 -> v4i32
6747  defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
6748  // v2i8 -> v2i16 -> v2i32
6749  defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
6750  // v2i16 -> v2i32 -> v2i64
6751  defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
6752}
6753
6754// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
6755let Predicates = [IsLE] in {
6756  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
6757        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
6758           (VLD1LNd16 addrmode6:$addr,
6759                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
6760  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
6761        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
6762           (VLD1LNd16 addrmode6:$addr,
6763                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
6764  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
6765        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
6766           (VLD1LNd16 addrmode6:$addr,
6767                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
6768}
6769// The following patterns are basically a copy of the patterns above, 
6770// however with an additional VREV16d instruction to convert data
6771// loaded by VLD1LN into proper vector format in big endian mode.
6772let Predicates = [IsBE] in {
6773  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
6774        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
6775           (!cast<Instruction>("VREV16d8")
6776             (VLD1LNd16 addrmode6:$addr,
6777                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
6778  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
6779        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
6780           (!cast<Instruction>("VREV16d8")
6781             (VLD1LNd16 addrmode6:$addr,
6782                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
6783  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
6784        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
6785           (!cast<Instruction>("VREV16d8")
6786             (VLD1LNd16 addrmode6:$addr,
6787                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
6788}
6789
6790//===----------------------------------------------------------------------===//
6791// Assembler aliases
6792//
6793
6794def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
6795                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
6796def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
6797                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
6798
6799// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
6800defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
6801                         (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6802defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
6803                         (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6804defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
6805                         (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6806defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
6807                         (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6808defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
6809                         (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6810defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
6811                         (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6812defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
6813                         (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6814defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
6815                         (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6816// ... two-operand aliases
6817defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
6818                         (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6819defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
6820                         (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6821defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
6822                         (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6823defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
6824                         (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6825defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
6826                         (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6827defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
6828                         (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6829// ... immediates
6830def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
6831                    (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
6832def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
6833                    (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
6834def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
6835                    (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
6836def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
6837                    (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
6838
6839
6840// VLD1 single-lane pseudo-instructions. These need special handling for
6841// the lane index that an InstAlias can't handle, so we use these instead.
6842def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
6843                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
6844                      pred:$p)>;
6845def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
6846                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
6847                      pred:$p)>;
6848def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
6849                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
6850                      pred:$p)>;
6851
6852def VLD1LNdWB_fixed_Asm_8 :
6853        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
6854                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
6855                      pred:$p)>;
6856def VLD1LNdWB_fixed_Asm_16 :
6857        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
6858                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
6859                      pred:$p)>;
6860def VLD1LNdWB_fixed_Asm_32 :
6861        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
6862                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
6863                      pred:$p)>;
6864def VLD1LNdWB_register_Asm_8 :
6865        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
6866                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
6867                       rGPR:$Rm, pred:$p)>;
6868def VLD1LNdWB_register_Asm_16 :
6869        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
6870                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
6871                       rGPR:$Rm, pred:$p)>;
6872def VLD1LNdWB_register_Asm_32 :
6873        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
6874                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
6875                       rGPR:$Rm, pred:$p)>;
6876
6877
6878// VST1 single-lane pseudo-instructions. These need special handling for
6879// the lane index that an InstAlias can't handle, so we use these instead.
6880def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
6881                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
6882                      pred:$p)>;
6883def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
6884                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
6885                      pred:$p)>;
6886def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
6887                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
6888                      pred:$p)>;
6889
6890def VST1LNdWB_fixed_Asm_8 :
6891        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
6892                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
6893                      pred:$p)>;
6894def VST1LNdWB_fixed_Asm_16 :
6895        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
6896                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
6897                      pred:$p)>;
6898def VST1LNdWB_fixed_Asm_32 :
6899        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
6900                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
6901                      pred:$p)>;
6902def VST1LNdWB_register_Asm_8 :
6903        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
6904                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
6905                       rGPR:$Rm, pred:$p)>;
6906def VST1LNdWB_register_Asm_16 :
6907        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
6908                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
6909                       rGPR:$Rm, pred:$p)>;
6910def VST1LNdWB_register_Asm_32 :
6911        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
6912                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
6913                       rGPR:$Rm, pred:$p)>;
6914
6915// VLD2 single-lane pseudo-instructions. These need special handling for
6916// the lane index that an InstAlias can't handle, so we use these instead.
6917def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
6918                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
6919                  pred:$p)>;
6920def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
6921                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
6922                      pred:$p)>;
6923def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
6924                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
6925def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
6926                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
6927                      pred:$p)>;
6928def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
6929                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
6930                      pred:$p)>;
6931
6932def VLD2LNdWB_fixed_Asm_8 :
6933        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
6934                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
6935                      pred:$p)>;
6936def VLD2LNdWB_fixed_Asm_16 :
6937        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
6938                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
6939                      pred:$p)>;
6940def VLD2LNdWB_fixed_Asm_32 :
6941        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
6942                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
6943                      pred:$p)>;
6944def VLD2LNqWB_fixed_Asm_16 :
6945        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
6946                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
6947                      pred:$p)>;
6948def VLD2LNqWB_fixed_Asm_32 :
6949        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
6950                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
6951                      pred:$p)>;
6952def VLD2LNdWB_register_Asm_8 :
6953        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
6954                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
6955                       rGPR:$Rm, pred:$p)>;
6956def VLD2LNdWB_register_Asm_16 :
6957        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
6958                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
6959                       rGPR:$Rm, pred:$p)>;
6960def VLD2LNdWB_register_Asm_32 :
6961        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
6962                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
6963                       rGPR:$Rm, pred:$p)>;
6964def VLD2LNqWB_register_Asm_16 :
6965        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
6966                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
6967                       rGPR:$Rm, pred:$p)>;
6968def VLD2LNqWB_register_Asm_32 :
6969        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
6970                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
6971                       rGPR:$Rm, pred:$p)>;
6972
6973
6974// VST2 single-lane pseudo-instructions. These need special handling for
6975// the lane index that an InstAlias can't handle, so we use these instead.
6976def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
6977                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
6978                      pred:$p)>;
6979def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
6980                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
6981                      pred:$p)>;
6982def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
6983                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
6984                      pred:$p)>;
6985def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
6986                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
6987                      pred:$p)>;
6988def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
6989                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
6990                      pred:$p)>;
6991
6992def VST2LNdWB_fixed_Asm_8 :
6993        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
6994                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
6995                      pred:$p)>;
6996def VST2LNdWB_fixed_Asm_16 :
6997        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
6998                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
6999                      pred:$p)>;
7000def VST2LNdWB_fixed_Asm_32 :
7001        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
7002                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7003                      pred:$p)>;
7004def VST2LNqWB_fixed_Asm_16 :
7005        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
7006                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7007                      pred:$p)>;
7008def VST2LNqWB_fixed_Asm_32 :
7009        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
7010                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7011                      pred:$p)>;
7012def VST2LNdWB_register_Asm_8 :
7013        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
7014                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
7015                       rGPR:$Rm, pred:$p)>;
7016def VST2LNdWB_register_Asm_16 :
7017        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
7018                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
7019                       rGPR:$Rm, pred:$p)>;
7020def VST2LNdWB_register_Asm_32 :
7021        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
7022                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
7023                       rGPR:$Rm, pred:$p)>;
7024def VST2LNqWB_register_Asm_16 :
7025        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
7026                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
7027                       rGPR:$Rm, pred:$p)>;
7028def VST2LNqWB_register_Asm_32 :
7029        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
7030                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
7031                       rGPR:$Rm, pred:$p)>;
7032
7033// VLD3 all-lanes pseudo-instructions. These need special handling for
7034// the lane index that an InstAlias can't handle, so we use these instead.
7035def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7036               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7037                    pred:$p)>;
7038def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7039               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7040                    pred:$p)>;
7041def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7042               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7043                    pred:$p)>;
7044def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7045               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7046                    pred:$p)>;
7047def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7048               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7049                    pred:$p)>;
7050def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7051               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7052                    pred:$p)>;
7053
7054def VLD3DUPdWB_fixed_Asm_8 :
7055        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7056               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7057                    pred:$p)>;
7058def VLD3DUPdWB_fixed_Asm_16 :
7059        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7060               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7061                    pred:$p)>;
7062def VLD3DUPdWB_fixed_Asm_32 :
7063        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7064               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7065                    pred:$p)>;
7066def VLD3DUPqWB_fixed_Asm_8 :
7067        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7068               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7069                    pred:$p)>;
7070def VLD3DUPqWB_fixed_Asm_16 :
7071        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7072               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7073                    pred:$p)>;
7074def VLD3DUPqWB_fixed_Asm_32 :
7075        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7076               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7077                    pred:$p)>;
7078def VLD3DUPdWB_register_Asm_8 :
7079        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7080                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7081                       rGPR:$Rm, pred:$p)>;
7082def VLD3DUPdWB_register_Asm_16 :
7083        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7084                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7085                       rGPR:$Rm, pred:$p)>;
7086def VLD3DUPdWB_register_Asm_32 :
7087        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7088                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
7089                       rGPR:$Rm, pred:$p)>;
7090def VLD3DUPqWB_register_Asm_8 :
7091        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7092                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7093                       rGPR:$Rm, pred:$p)>;
7094def VLD3DUPqWB_register_Asm_16 :
7095        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7096                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7097                       rGPR:$Rm, pred:$p)>;
7098def VLD3DUPqWB_register_Asm_32 :
7099        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7100                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
7101                       rGPR:$Rm, pred:$p)>;
7102
7103
7104// VLD3 single-lane pseudo-instructions. These need special handling for
7105// the lane index that an InstAlias can't handle, so we use these instead.
7106def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7107               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7108                    pred:$p)>;
7109def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7110               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7111                    pred:$p)>;
7112def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7113               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7114                    pred:$p)>;
7115def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7116               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7117                    pred:$p)>;
7118def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7119               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7120                    pred:$p)>;
7121
7122def VLD3LNdWB_fixed_Asm_8 :
7123        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7124               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7125                    pred:$p)>;
7126def VLD3LNdWB_fixed_Asm_16 :
7127        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7128               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7129                    pred:$p)>;
7130def VLD3LNdWB_fixed_Asm_32 :
7131        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7132               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7133                    pred:$p)>;
7134def VLD3LNqWB_fixed_Asm_16 :
7135        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7136               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7137                    pred:$p)>;
7138def VLD3LNqWB_fixed_Asm_32 :
7139        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7140               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7141                    pred:$p)>;
7142def VLD3LNdWB_register_Asm_8 :
7143        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7144                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7145                       rGPR:$Rm, pred:$p)>;
7146def VLD3LNdWB_register_Asm_16 :
7147        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7148                  (ins VecListThreeDHWordIndexed:$list,
7149                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7150def VLD3LNdWB_register_Asm_32 :
7151        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7152                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7153                       rGPR:$Rm, pred:$p)>;
7154def VLD3LNqWB_register_Asm_16 :
7155        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7156                  (ins VecListThreeQHWordIndexed:$list,
7157                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7158def VLD3LNqWB_register_Asm_32 :
7159        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7160                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7161                       rGPR:$Rm, pred:$p)>;
7162
7163// VLD3 multiple structure pseudo-instructions. These need special handling for
7164// the vector operands that the normal instructions don't yet model.
7165// FIXME: Remove these when the register classes and instructions are updated.
7166def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7167               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7168def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7169               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7170def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7171               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7172def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
7173               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7174def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
7175               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7176def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
7177               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7178
7179def VLD3dWB_fixed_Asm_8 :
7180        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7181               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7182def VLD3dWB_fixed_Asm_16 :
7183        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7184               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7185def VLD3dWB_fixed_Asm_32 :
7186        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7187               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7188def VLD3qWB_fixed_Asm_8 :
7189        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
7190               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7191def VLD3qWB_fixed_Asm_16 :
7192        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
7193               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7194def VLD3qWB_fixed_Asm_32 :
7195        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
7196               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7197def VLD3dWB_register_Asm_8 :
7198        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7199                  (ins VecListThreeD:$list, addrmode6align64:$addr,
7200                       rGPR:$Rm, pred:$p)>;
7201def VLD3dWB_register_Asm_16 :
7202        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7203                  (ins VecListThreeD:$list, addrmode6align64:$addr,
7204                       rGPR:$Rm, pred:$p)>;
7205def VLD3dWB_register_Asm_32 :
7206        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7207                  (ins VecListThreeD:$list, addrmode6align64:$addr,
7208                       rGPR:$Rm, pred:$p)>;
7209def VLD3qWB_register_Asm_8 :
7210        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
7211                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
7212                       rGPR:$Rm, pred:$p)>;
7213def VLD3qWB_register_Asm_16 :
7214        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
7215                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
7216                       rGPR:$Rm, pred:$p)>;
7217def VLD3qWB_register_Asm_32 :
7218        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
7219                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
7220                       rGPR:$Rm, pred:$p)>;
7221
7222// VST3 single-lane pseudo-instructions. These need special handling for
7223// the lane index that an InstAlias can't handle, so we use these instead.
7224def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
7225               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7226                    pred:$p)>;
7227def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7228               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7229                    pred:$p)>;
7230def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7231               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7232                    pred:$p)>;
7233def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7234               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7235                    pred:$p)>;
7236def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7237               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7238                    pred:$p)>;
7239
7240def VST3LNdWB_fixed_Asm_8 :
7241        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
7242               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7243                    pred:$p)>;
7244def VST3LNdWB_fixed_Asm_16 :
7245        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7246               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
7247                    pred:$p)>;
7248def VST3LNdWB_fixed_Asm_32 :
7249        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7250               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7251                    pred:$p)>;
7252def VST3LNqWB_fixed_Asm_16 :
7253        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7254               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
7255                    pred:$p)>;
7256def VST3LNqWB_fixed_Asm_32 :
7257        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7258               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7259                    pred:$p)>;
7260def VST3LNdWB_register_Asm_8 :
7261        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
7262                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
7263                       rGPR:$Rm, pred:$p)>;
7264def VST3LNdWB_register_Asm_16 :
7265        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7266                  (ins VecListThreeDHWordIndexed:$list,
7267                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7268def VST3LNdWB_register_Asm_32 :
7269        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7270                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
7271                       rGPR:$Rm, pred:$p)>;
7272def VST3LNqWB_register_Asm_16 :
7273        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7274                  (ins VecListThreeQHWordIndexed:$list,
7275                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
7276def VST3LNqWB_register_Asm_32 :
7277        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7278                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
7279                       rGPR:$Rm, pred:$p)>;
7280
7281
7282// VST3 multiple structure pseudo-instructions. These need special handling for
7283// the vector operands that the normal instructions don't yet model.
7284// FIXME: Remove these when the register classes and instructions are updated.
7285def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
7286               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7287def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7288               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7289def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7290               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7291def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
7292               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7293def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
7294               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7295def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
7296               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7297
7298def VST3dWB_fixed_Asm_8 :
7299        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
7300               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7301def VST3dWB_fixed_Asm_16 :
7302        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7303               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7304def VST3dWB_fixed_Asm_32 :
7305        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7306               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
7307def VST3qWB_fixed_Asm_8 :
7308        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
7309               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7310def VST3qWB_fixed_Asm_16 :
7311        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
7312               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7313def VST3qWB_fixed_Asm_32 :
7314        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
7315               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
7316def VST3dWB_register_Asm_8 :
7317        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
7318                  (ins VecListThreeD:$list, addrmode6align64:$addr,
7319                       rGPR:$Rm, pred:$p)>;
7320def VST3dWB_register_Asm_16 :
7321        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7322                  (ins VecListThreeD:$list, addrmode6align64:$addr,
7323                       rGPR:$Rm, pred:$p)>;
7324def VST3dWB_register_Asm_32 :
7325        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7326                  (ins VecListThreeD:$list, addrmode6align64:$addr,
7327                       rGPR:$Rm, pred:$p)>;
7328def VST3qWB_register_Asm_8 :
7329        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
7330                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
7331                       rGPR:$Rm, pred:$p)>;
7332def VST3qWB_register_Asm_16 :
7333        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
7334                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
7335                       rGPR:$Rm, pred:$p)>;
7336def VST3qWB_register_Asm_32 :
7337        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
7338                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
7339                       rGPR:$Rm, pred:$p)>;
7340
7341// VLD4 all-lanes pseudo-instructions. These need special handling for
7342// the lane index that an InstAlias can't handle, so we use these instead.
7343def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7344               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
7345                    pred:$p)>;
7346def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7347               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
7348                    pred:$p)>;
7349def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7350               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
7351                    pred:$p)>;
7352def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7353               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
7354                    pred:$p)>;
7355def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7356               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
7357                    pred:$p)>;
7358def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7359               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
7360                    pred:$p)>;
7361
7362def VLD4DUPdWB_fixed_Asm_8 :
7363        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7364               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
7365                    pred:$p)>;
7366def VLD4DUPdWB_fixed_Asm_16 :
7367        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7368               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
7369                    pred:$p)>;
7370def VLD4DUPdWB_fixed_Asm_32 :
7371        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7372               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
7373                    pred:$p)>;
7374def VLD4DUPqWB_fixed_Asm_8 :
7375        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7376               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
7377                    pred:$p)>;
7378def VLD4DUPqWB_fixed_Asm_16 :
7379        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7380               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
7381                    pred:$p)>;
7382def VLD4DUPqWB_fixed_Asm_32 :
7383        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7384               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
7385                    pred:$p)>;
7386def VLD4DUPdWB_register_Asm_8 :
7387        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7388                  (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
7389                       rGPR:$Rm, pred:$p)>;
7390def VLD4DUPdWB_register_Asm_16 :
7391        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7392                  (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
7393                       rGPR:$Rm, pred:$p)>;
7394def VLD4DUPdWB_register_Asm_32 :
7395        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7396                  (ins VecListFourDAllLanes:$list,
7397                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
7398def VLD4DUPqWB_register_Asm_8 :
7399        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7400                  (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
7401                       rGPR:$Rm, pred:$p)>;
7402def VLD4DUPqWB_register_Asm_16 :
7403        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7404                  (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
7405                       rGPR:$Rm, pred:$p)>;
7406def VLD4DUPqWB_register_Asm_32 :
7407        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7408                  (ins VecListFourQAllLanes:$list,
7409                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
7410
7411
7412// VLD4 single-lane pseudo-instructions. These need special handling for
7413// the lane index that an InstAlias can't handle, so we use these instead.
7414def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7415               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7416                    pred:$p)>;
7417def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7418               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7419                    pred:$p)>;
7420def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7421               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
7422                    pred:$p)>;
7423def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7424               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7425                    pred:$p)>;
7426def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7427               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
7428                    pred:$p)>;
7429
7430def VLD4LNdWB_fixed_Asm_8 :
7431        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7432               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7433                    pred:$p)>;
7434def VLD4LNdWB_fixed_Asm_16 :
7435        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7436               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7437                    pred:$p)>;
7438def VLD4LNdWB_fixed_Asm_32 :
7439        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7440               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
7441                    pred:$p)>;
7442def VLD4LNqWB_fixed_Asm_16 :
7443        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7444               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7445                    pred:$p)>;
7446def VLD4LNqWB_fixed_Asm_32 :
7447        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7448               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
7449                    pred:$p)>;
7450def VLD4LNdWB_register_Asm_8 :
7451        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7452                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7453                       rGPR:$Rm, pred:$p)>;
7454def VLD4LNdWB_register_Asm_16 :
7455        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7456                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7457                       rGPR:$Rm, pred:$p)>;
7458def VLD4LNdWB_register_Asm_32 :
7459        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7460                  (ins VecListFourDWordIndexed:$list,
7461                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
7462def VLD4LNqWB_register_Asm_16 :
7463        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7464                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7465                       rGPR:$Rm, pred:$p)>;
7466def VLD4LNqWB_register_Asm_32 :
7467        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7468                  (ins VecListFourQWordIndexed:$list,
7469                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
7470
7471
7472
7473// VLD4 multiple structure pseudo-instructions. These need special handling for
7474// the vector operands that the normal instructions don't yet model.
7475// FIXME: Remove these when the register classes and instructions are updated.
7476def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7477               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7478                pred:$p)>;
7479def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7480               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7481                pred:$p)>;
7482def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7483               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7484                pred:$p)>;
7485def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
7486               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7487                pred:$p)>;
7488def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
7489               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7490                pred:$p)>;
7491def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
7492               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7493                pred:$p)>;
7494
7495def VLD4dWB_fixed_Asm_8 :
7496        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7497               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7498                pred:$p)>;
7499def VLD4dWB_fixed_Asm_16 :
7500        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7501               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7502                pred:$p)>;
7503def VLD4dWB_fixed_Asm_32 :
7504        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7505               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7506                pred:$p)>;
7507def VLD4qWB_fixed_Asm_8 :
7508        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
7509               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7510                pred:$p)>;
7511def VLD4qWB_fixed_Asm_16 :
7512        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
7513               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7514                pred:$p)>;
7515def VLD4qWB_fixed_Asm_32 :
7516        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
7517               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7518                pred:$p)>;
7519def VLD4dWB_register_Asm_8 :
7520        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7521                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7522                       rGPR:$Rm, pred:$p)>;
7523def VLD4dWB_register_Asm_16 :
7524        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7525                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7526                       rGPR:$Rm, pred:$p)>;
7527def VLD4dWB_register_Asm_32 :
7528        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7529                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7530                       rGPR:$Rm, pred:$p)>;
7531def VLD4qWB_register_Asm_8 :
7532        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
7533                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7534                       rGPR:$Rm, pred:$p)>;
7535def VLD4qWB_register_Asm_16 :
7536        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
7537                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7538                       rGPR:$Rm, pred:$p)>;
7539def VLD4qWB_register_Asm_32 :
7540        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
7541                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7542                       rGPR:$Rm, pred:$p)>;
7543
7544// VST4 single-lane pseudo-instructions. These need special handling for
7545// the lane index that an InstAlias can't handle, so we use these instead.
7546def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
7547               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7548                    pred:$p)>;
7549def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
7550               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7551                    pred:$p)>;
7552def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
7553               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
7554                    pred:$p)>;
7555def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
7556               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7557                    pred:$p)>;
7558def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
7559               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
7560                    pred:$p)>;
7561
7562def VST4LNdWB_fixed_Asm_8 :
7563        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
7564               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7565                    pred:$p)>;
7566def VST4LNdWB_fixed_Asm_16 :
7567        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
7568               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7569                    pred:$p)>;
7570def VST4LNdWB_fixed_Asm_32 :
7571        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
7572               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
7573                    pred:$p)>;
7574def VST4LNqWB_fixed_Asm_16 :
7575        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
7576               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7577                    pred:$p)>;
7578def VST4LNqWB_fixed_Asm_32 :
7579        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
7580               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
7581                    pred:$p)>;
7582def VST4LNdWB_register_Asm_8 :
7583        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
7584                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
7585                       rGPR:$Rm, pred:$p)>;
7586def VST4LNdWB_register_Asm_16 :
7587        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
7588                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
7589                       rGPR:$Rm, pred:$p)>;
7590def VST4LNdWB_register_Asm_32 :
7591        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
7592                  (ins VecListFourDWordIndexed:$list,
7593                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
7594def VST4LNqWB_register_Asm_16 :
7595        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
7596                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
7597                       rGPR:$Rm, pred:$p)>;
7598def VST4LNqWB_register_Asm_32 :
7599        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
7600                  (ins VecListFourQWordIndexed:$list,
7601                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
7602
7603
7604// VST4 multiple structure pseudo-instructions. These need special handling for
7605// the vector operands that the normal instructions don't yet model.
7606// FIXME: Remove these when the register classes and instructions are updated.
7607def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
7608               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7609                    pred:$p)>;
7610def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
7611               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7612                    pred:$p)>;
7613def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
7614               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7615                    pred:$p)>;
7616def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
7617               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7618                    pred:$p)>;
7619def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
7620               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7621                    pred:$p)>;
7622def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
7623               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7624                    pred:$p)>;
7625
7626def VST4dWB_fixed_Asm_8 :
7627        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
7628               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7629                    pred:$p)>;
7630def VST4dWB_fixed_Asm_16 :
7631        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
7632               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7633                    pred:$p)>;
7634def VST4dWB_fixed_Asm_32 :
7635        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
7636               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7637                    pred:$p)>;
7638def VST4qWB_fixed_Asm_8 :
7639        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
7640               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7641                    pred:$p)>;
7642def VST4qWB_fixed_Asm_16 :
7643        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
7644               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7645                    pred:$p)>;
7646def VST4qWB_fixed_Asm_32 :
7647        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
7648               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7649                    pred:$p)>;
7650def VST4dWB_register_Asm_8 :
7651        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
7652                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7653                       rGPR:$Rm, pred:$p)>;
7654def VST4dWB_register_Asm_16 :
7655        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
7656                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7657                       rGPR:$Rm, pred:$p)>;
7658def VST4dWB_register_Asm_32 :
7659        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
7660                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
7661                       rGPR:$Rm, pred:$p)>;
7662def VST4qWB_register_Asm_8 :
7663        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
7664                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7665                       rGPR:$Rm, pred:$p)>;
7666def VST4qWB_register_Asm_16 :
7667        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
7668                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7669                       rGPR:$Rm, pred:$p)>;
7670def VST4qWB_register_Asm_32 :
7671        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
7672                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
7673                       rGPR:$Rm, pred:$p)>;
7674
7675// VMOV/VMVN takes an optional datatype suffix
7676defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
7677                         (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
7678defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
7679                         (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
7680
7681defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
7682                         (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
7683defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
7684                         (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
7685
7686// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
7687// D-register versions.
7688def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
7689                    (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7690def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
7691                    (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7692def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
7693                    (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7694def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
7695                    (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7696def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
7697                    (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7698def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
7699                    (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7700def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
7701                    (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7702// Q-register versions.
7703def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
7704                    (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7705def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
7706                    (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7707def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
7708                    (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7709def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
7710                    (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7711def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
7712                    (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7713def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
7714                    (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7715def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
7716                    (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7717
7718// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
7719// D-register versions.
7720def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
7721                    (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7722def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
7723                    (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7724def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
7725                    (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7726def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
7727                    (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7728def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
7729                    (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7730def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
7731                    (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7732def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
7733                    (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
7734// Q-register versions.
7735def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
7736                    (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7737def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
7738                    (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7739def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
7740                    (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7741def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
7742                    (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7743def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
7744                    (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7745def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
7746                    (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7747def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
7748                    (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
7749
7750// VSWP allows, but does not require, a type suffix.
7751defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
7752                         (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
7753defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
7754                         (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
7755
7756// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
7757defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
7758                         (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7759defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
7760                         (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7761defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
7762                         (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
7763defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
7764                         (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7765defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
7766                         (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7767defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
7768                         (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
7769
7770// "vmov Rd, #-imm" can be handled via "vmvn".
7771def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
7772                    (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
7773def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
7774                    (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
7775def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
7776                    (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
7777def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
7778                    (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
7779
7780// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
7781// these should restrict to just the Q register variants, but the register
7782// classes are enough to match correctly regardless, so we keep it simple
7783// and just use MnemonicAlias.
7784def : NEONMnemonicAlias<"vbicq", "vbic">;
7785def : NEONMnemonicAlias<"vandq", "vand">;
7786def : NEONMnemonicAlias<"veorq", "veor">;
7787def : NEONMnemonicAlias<"vorrq", "vorr">;
7788
7789def : NEONMnemonicAlias<"vmovq", "vmov">;
7790def : NEONMnemonicAlias<"vmvnq", "vmvn">;
7791// Explicit versions for floating point so that the FPImm variants get
7792// handled early. The parser gets confused otherwise.
7793def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
7794def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
7795
7796def : NEONMnemonicAlias<"vaddq", "vadd">;
7797def : NEONMnemonicAlias<"vsubq", "vsub">;
7798
7799def : NEONMnemonicAlias<"vminq", "vmin">;
7800def : NEONMnemonicAlias<"vmaxq", "vmax">;
7801
7802def : NEONMnemonicAlias<"vmulq", "vmul">;
7803
7804def : NEONMnemonicAlias<"vabsq", "vabs">;
7805
7806def : NEONMnemonicAlias<"vshlq", "vshl">;
7807def : NEONMnemonicAlias<"vshrq", "vshr">;
7808
7809def : NEONMnemonicAlias<"vcvtq", "vcvt">;
7810
7811def : NEONMnemonicAlias<"vcleq", "vcle">;
7812def : NEONMnemonicAlias<"vceqq", "vceq">;
7813
7814def : NEONMnemonicAlias<"vzipq", "vzip">;
7815def : NEONMnemonicAlias<"vswpq", "vswp">;
7816
7817def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
7818def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
7819
7820
7821// Alias for loading floating point immediates that aren't representable
7822// using the vmov.f32 encoding but the bitpattern is representable using
7823// the .i32 encoding.
7824def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
7825                     (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
7826def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
7827                     (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
7828