1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// AArch64 Instruction definitions.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// ARM Instruction Predicate Definitions.
16//
17def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
18                               AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
19def HasNEON          : Predicate<"Subtarget->hasNEON()">,
20                                 AssemblerPredicate<"FeatureNEON", "neon">;
21def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
22                                 AssemblerPredicate<"FeatureCrypto", "crypto">;
23def HasCRC           : Predicate<"Subtarget->hasCRC()">,
24                                 AssemblerPredicate<"FeatureCRC", "crc">;
25def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
26def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
27
28//===----------------------------------------------------------------------===//
29// AArch64-specific DAG Nodes.
30//
31
32// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
33def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
34                                              [SDTCisSameAs<0, 2>,
35                                               SDTCisSameAs<0, 3>,
36                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
37
38// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
39def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
40                                            [SDTCisSameAs<0, 1>,
41                                             SDTCisSameAs<0, 2>,
42                                             SDTCisInt<0>,
43                                             SDTCisVT<3, i32>]>;
44
45// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
46def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
47                                            [SDTCisSameAs<0, 2>,
48                                             SDTCisSameAs<0, 3>,
49                                             SDTCisInt<0>,
50                                             SDTCisVT<1, i32>,
51                                             SDTCisVT<4, i32>]>;
52
53def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
54                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
55                                      SDTCisVT<2, i32>]>;
56def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
57def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
58                                        SDTCisVT<2, OtherVT>]>;
59
60
61def SDT_AArch64CSel  : SDTypeProfile<1, 4,
62                                   [SDTCisSameAs<0, 1>,
63                                    SDTCisSameAs<0, 2>,
64                                    SDTCisInt<3>,
65                                    SDTCisVT<4, i32>]>;
66def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
67                                   [SDTCisFP<0>,
68                                    SDTCisSameAs<0, 1>]>;
69def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
70def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
71def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
72                                          SDTCisSameAs<0, 1>,
73                                          SDTCisSameAs<0, 2>]>;
74def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
75def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
76def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
77                                           SDTCisInt<2>, SDTCisInt<3>]>;
78def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
79def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
80                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
81def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
82
83def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
84def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
85def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
86def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
87                                           SDTCisSameAs<0,2>]>;
88def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
89                                           SDTCisSameAs<0,2>,
90                                           SDTCisSameAs<0,3>]>;
91def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
92def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
93
94def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
95
96def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
97                                                 SDTCisPtrTy<1>]>;
98def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
99                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
100                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
101                                         SDTCisSameAs<1, 4>]>;
102
103
104// Node definitions.
105def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
106def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
107def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
108def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
109                                SDCallSeqStart<[ SDTCisVT<0, i32> ]>,
110                                [SDNPHasChain, SDNPOutGlue]>;
111def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
112                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
113                                               SDTCisVT<1, i32> ]>,
114                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
115def AArch64call          : SDNode<"AArch64ISD::CALL",
116                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
117                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
118                                 SDNPVariadic]>;
119def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
120                                [SDNPHasChain]>;
121def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
122                                [SDNPHasChain]>;
123def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
124                                [SDNPHasChain]>;
125def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
126                                [SDNPHasChain]>;
127def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
128                                [SDNPHasChain]>;
129
130
131def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
132def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
133def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
134def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
135def AArch64retflag       : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
136                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
137def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
138def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
139def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
140                            [SDNPCommutative]>;
141def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
142def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
143                            [SDNPCommutative]>;
144def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
145def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
146
147def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
148
149def AArch64fcmp      : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
150
151def AArch64fmax      : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>;
152def AArch64fmin      : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>;
153
154def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
155def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
156def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
157def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
158def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
159
160def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
161def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
162def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
163def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
164def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
165def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
166
167def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
168def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
169def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
170def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
171def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
172def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
173def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
174
175def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
176def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
177def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
178def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
179
180def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
181def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
182def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
183def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
184def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
185def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
186def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
187def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
188
189def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
190def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
191def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>;
192
193def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
194def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
195def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
196def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
197def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
198
199def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
200def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
201def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
202
203def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
204def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
205def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
206def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
207def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
208def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
209                        (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
210
211def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
212def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
213def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
214def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
215def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
216
217def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
218def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
219
220def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>;
221
222def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
223                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
224
225def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
226                               [SDNPHasChain, SDNPSideEffect]>;
227
228def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
229def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
230
231def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
232                                 SDT_AArch64TLSDescCall,
233                                 [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
234                                  SDNPVariadic]>;
235
236def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
237                                 SDT_AArch64WrapperLarge>;
238
239
240//===----------------------------------------------------------------------===//
241
242//===----------------------------------------------------------------------===//
243
244// AArch64 Instruction Predicate Definitions.
245//
246def HasZCZ    : Predicate<"Subtarget->hasZeroCycleZeroing()">;
247def NoZCZ     : Predicate<"!Subtarget->hasZeroCycleZeroing()">;
248def IsDarwin  : Predicate<"Subtarget->isTargetDarwin()">;
249def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
250def ForCodeSize   : Predicate<"ForCodeSize">;
251def NotForCodeSize   : Predicate<"!ForCodeSize">;
252
253include "AArch64InstrFormats.td"
254
255//===----------------------------------------------------------------------===//
256
257//===----------------------------------------------------------------------===//
258// Miscellaneous instructions.
259//===----------------------------------------------------------------------===//
260
261let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
262def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
263                              [(AArch64callseq_start timm:$amt)]>;
264def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
265                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
266} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
267
268let isReMaterializable = 1, isCodeGenOnly = 1 in {
269// FIXME: The following pseudo instructions are only needed because remat
270// cannot handle multiple instructions.  When that changes, they can be
271// removed, along with the AArch64Wrapper node.
272
273let AddedComplexity = 10 in
274def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
275                     [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
276              Sched<[WriteLDAdr]>;
277
278// The MOVaddr instruction should match only when the add is not folded
279// into a load or store address.
280def MOVaddr
281    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
282             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
283                                            tglobaladdr:$low))]>,
284      Sched<[WriteAdrAdr]>;
285def MOVaddrJT
286    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
287             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
288                                             tjumptable:$low))]>,
289      Sched<[WriteAdrAdr]>;
290def MOVaddrCP
291    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
292             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
293                                             tconstpool:$low))]>,
294      Sched<[WriteAdrAdr]>;
295def MOVaddrBA
296    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
297             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
298                                             tblockaddress:$low))]>,
299      Sched<[WriteAdrAdr]>;
300def MOVaddrTLS
301    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
302             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
303                                            tglobaltlsaddr:$low))]>,
304      Sched<[WriteAdrAdr]>;
305def MOVaddrEXT
306    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
307             [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
308                                            texternalsym:$low))]>,
309      Sched<[WriteAdrAdr]>;
310
311} // isReMaterializable, isCodeGenOnly
312
313def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
314          (LOADgot tglobaltlsaddr:$addr)>;
315
316def : Pat<(AArch64LOADgot texternalsym:$addr),
317          (LOADgot texternalsym:$addr)>;
318
319def : Pat<(AArch64LOADgot tconstpool:$addr),
320          (LOADgot tconstpool:$addr)>;
321
322//===----------------------------------------------------------------------===//
323// System instructions.
324//===----------------------------------------------------------------------===//
325
326def HINT : HintI<"hint">;
327def : InstAlias<"nop",  (HINT 0b000)>;
328def : InstAlias<"yield",(HINT 0b001)>;
329def : InstAlias<"wfe",  (HINT 0b010)>;
330def : InstAlias<"wfi",  (HINT 0b011)>;
331def : InstAlias<"sev",  (HINT 0b100)>;
332def : InstAlias<"sevl", (HINT 0b101)>;
333
334  // As far as LLVM is concerned this writes to the system's exclusive monitors.
335let mayLoad = 1, mayStore = 1 in
336def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
337
338def DMB   : CRmSystemI<barrier_op, 0b101, "dmb">;
339def DSB   : CRmSystemI<barrier_op, 0b100, "dsb">;
340def ISB   : CRmSystemI<barrier_op, 0b110, "isb">;
341def : InstAlias<"clrex", (CLREX 0xf)>;
342def : InstAlias<"isb", (ISB 0xf)>;
343
344def MRS    : MRSI;
345def MSR    : MSRI;
346def MSRpstate: MSRpstateI;
347
348// The thread pointer (on Linux, at least, where this has been implemented) is
349// TPIDR_EL0.
350def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
351
352// Generic system instructions
353def SYSxt  : SystemXtI<0, "sys">;
354def SYSLxt : SystemLXtI<1, "sysl">;
355
356def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
357                (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
358                 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
359
360//===----------------------------------------------------------------------===//
361// Move immediate instructions.
362//===----------------------------------------------------------------------===//
363
364defm MOVK : InsertImmediate<0b11, "movk">;
365defm MOVN : MoveImmediate<0b00, "movn">;
366
367let PostEncoderMethod = "fixMOVZ" in
368defm MOVZ : MoveImmediate<0b10, "movz">;
369
370// First group of aliases covers an implicit "lsl #0".
371def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>;
372def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>;
373def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
374def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
375def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
376def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
377
378// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
379def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
380def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
381def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
382def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
383
384def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
385def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
386def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
387def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
388
389def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48)>;
390def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>;
391def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>;
392def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>;
393
394def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
395def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
396
397def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
398def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
399
400def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>;
401def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>;
402
403// Final group of aliases covers true "mov $Rd, $imm" cases.
404multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
405                          int width, int shift> {
406  def _asmoperand : AsmOperandClass {
407    let Name = basename # width # "_lsl" # shift # "MovAlias";
408    let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
409                               # shift # ">";
410    let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
411  }
412
413  def _movimm : Operand<i32> {
414    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
415  }
416
417  def : InstAlias<"mov $Rd, $imm",
418                  (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
419}
420
421defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
422defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
423
424defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
425defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
426defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
427defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
428
429defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
430defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
431
432defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
433defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
434defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
435defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
436
437let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
438    isAsCheapAsAMove = 1 in {
439// FIXME: The following pseudo instructions are only needed because remat
440// cannot handle multiple instructions.  When that changes, we can select
441// directly to the real instructions and get rid of these pseudos.
442
443def MOVi32imm
444    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
445             [(set GPR32:$dst, imm:$src)]>,
446      Sched<[WriteImm]>;
447def MOVi64imm
448    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
449             [(set GPR64:$dst, imm:$src)]>,
450      Sched<[WriteImm]>;
451} // isReMaterializable, isCodeGenOnly
452
453// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
454// eventual expansion code fewer bits to worry about getting right. Marshalling
455// the types is a little tricky though:
456def i64imm_32bit : ImmLeaf<i64, [{
457  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
458}]>;
459
460def trunc_imm : SDNodeXForm<imm, [{
461  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i32);
462}]>;
463
464def : Pat<(i64 i64imm_32bit:$src),
465          (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
466
467// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
468// sequences.
469def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
470                             tglobaladdr:$g1, tglobaladdr:$g0),
471          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48),
472                                  tglobaladdr:$g2, 32),
473                          tglobaladdr:$g1, 16),
474                  tglobaladdr:$g0, 0)>;
475
476def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
477                             tblockaddress:$g1, tblockaddress:$g0),
478          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48),
479                                  tblockaddress:$g2, 32),
480                          tblockaddress:$g1, 16),
481                  tblockaddress:$g0, 0)>;
482
483def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
484                             tconstpool:$g1, tconstpool:$g0),
485          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48),
486                                  tconstpool:$g2, 32),
487                          tconstpool:$g1, 16),
488                  tconstpool:$g0, 0)>;
489
490def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
491                             tjumptable:$g1, tjumptable:$g0),
492          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48),
493                                  tjumptable:$g2, 32),
494                          tjumptable:$g1, 16),
495                  tjumptable:$g0, 0)>;
496
497
498//===----------------------------------------------------------------------===//
499// Arithmetic instructions.
500//===----------------------------------------------------------------------===//
501
502// Add/subtract with carry.
503defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
504defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
505
506def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
507def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
508def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
509def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
510
511// Add/subtract
512defm ADD : AddSub<0, "add", add>;
513defm SUB : AddSub<1, "sub">;
514
515def : InstAlias<"mov $dst, $src",
516                (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
517def : InstAlias<"mov $dst, $src",
518                (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
519def : InstAlias<"mov $dst, $src",
520                (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
521def : InstAlias<"mov $dst, $src",
522                (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
523
524defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">;
525defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">;
526
527// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
528def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
529          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
530def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
531          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
532def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
533          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
534def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
535          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
536def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
537          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
538def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
539          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
540def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
541          (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
542def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
543          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
544
545// Because of the immediate format for add/sub-imm instructions, the
546// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
547//  These patterns capture that transformation.
548let AddedComplexity = 1 in {
549def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
550          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
551def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
552          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
553def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
554          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
555def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
556          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
557}
558
559// Because of the immediate format for add/sub-imm instructions, the
560// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
561//  These patterns capture that transformation.
562let AddedComplexity = 1 in {
563def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
564          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
565def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
566          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
567def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
568          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
569def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
570          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
571}
572
573def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
574def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
575def : InstAlias<"neg $dst, $src$shift",
576                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
577def : InstAlias<"neg $dst, $src$shift",
578                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
579
580def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
581def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
582def : InstAlias<"negs $dst, $src$shift",
583                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
584def : InstAlias<"negs $dst, $src$shift",
585                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
586
587
588// Unsigned/Signed divide
589defm UDIV : Div<0, "udiv", udiv>;
590defm SDIV : Div<1, "sdiv", sdiv>;
591let isCodeGenOnly = 1 in {
592defm UDIV_Int : Div<0, "udiv", int_aarch64_udiv>;
593defm SDIV_Int : Div<1, "sdiv", int_aarch64_sdiv>;
594}
595
596// Variable shift
597defm ASRV : Shift<0b10, "asr", sra>;
598defm LSLV : Shift<0b00, "lsl", shl>;
599defm LSRV : Shift<0b01, "lsr", srl>;
600defm RORV : Shift<0b11, "ror", rotr>;
601
602def : ShiftAlias<"asrv", ASRVWr, GPR32>;
603def : ShiftAlias<"asrv", ASRVXr, GPR64>;
604def : ShiftAlias<"lslv", LSLVWr, GPR32>;
605def : ShiftAlias<"lslv", LSLVXr, GPR64>;
606def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
607def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
608def : ShiftAlias<"rorv", RORVWr, GPR32>;
609def : ShiftAlias<"rorv", RORVXr, GPR64>;
610
611// Multiply-add
612let AddedComplexity = 7 in {
613defm MADD : MulAccum<0, "madd", add>;
614defm MSUB : MulAccum<1, "msub", sub>;
615
616def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
617          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
618def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
619          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
620
621def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
622          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
623def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
624          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
625} // AddedComplexity = 7
626
627let AddedComplexity = 5 in {
628def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
629def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
630def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
631def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
632
633def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
634          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
635def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
636          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
637
638def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
639          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
640def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
641          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
642} // AddedComplexity = 5
643
644def : MulAccumWAlias<"mul", MADDWrrr>;
645def : MulAccumXAlias<"mul", MADDXrrr>;
646def : MulAccumWAlias<"mneg", MSUBWrrr>;
647def : MulAccumXAlias<"mneg", MSUBXrrr>;
648def : WideMulAccumAlias<"smull", SMADDLrrr>;
649def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
650def : WideMulAccumAlias<"umull", UMADDLrrr>;
651def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
652
653// Multiply-high
654def SMULHrr : MulHi<0b010, "smulh", mulhs>;
655def UMULHrr : MulHi<0b110, "umulh", mulhu>;
656
657// CRC32
658def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
659def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
660def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
661def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
662
663def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
664def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
665def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
666def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
667
668
669//===----------------------------------------------------------------------===//
670// Logical instructions.
671//===----------------------------------------------------------------------===//
672
673// (immediate)
674defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
675defm AND  : LogicalImm<0b00, "and", and, "bic">;
676defm EOR  : LogicalImm<0b10, "eor", xor, "eon">;
677defm ORR  : LogicalImm<0b01, "orr", or, "orn">;
678
679// FIXME: these aliases *are* canonical sometimes (when movz can't be
680// used). Actually, it seems to be working right now, but putting logical_immXX
681// here is a bit dodgy on the AsmParser side too.
682def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
683                                          logical_imm32:$imm), 0>;
684def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
685                                          logical_imm64:$imm), 0>;
686
687
688// (register)
689defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
690defm BICS : LogicalRegS<0b11, 1, "bics",
691                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
692defm AND  : LogicalReg<0b00, 0, "and", and>;
693defm BIC  : LogicalReg<0b00, 1, "bic",
694                       BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
695defm EON  : LogicalReg<0b10, 1, "eon",
696                       BinOpFrag<(xor node:$LHS, (not node:$RHS))>>;
697defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
698defm ORN  : LogicalReg<0b01, 1, "orn",
699                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
700defm ORR  : LogicalReg<0b01, 0, "orr", or>;
701
702def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
703def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
704
705def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
706def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
707
708def : InstAlias<"mvn $Wd, $Wm$sh",
709                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
710def : InstAlias<"mvn $Xd, $Xm$sh",
711                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
712
713def : InstAlias<"tst $src1, $src2",
714                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
715def : InstAlias<"tst $src1, $src2",
716                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
717
718def : InstAlias<"tst $src1, $src2",
719                        (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
720def : InstAlias<"tst $src1, $src2",
721                        (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
722
723def : InstAlias<"tst $src1, $src2$sh",
724               (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
725def : InstAlias<"tst $src1, $src2$sh",
726               (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
727
728
729def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
730def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
731
732
733//===----------------------------------------------------------------------===//
734// One operand data processing instructions.
735//===----------------------------------------------------------------------===//
736
737defm CLS    : OneOperandData<0b101, "cls">;
738defm CLZ    : OneOperandData<0b100, "clz", ctlz>;
739defm RBIT   : OneOperandData<0b000, "rbit">;
740
741def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>;
742def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>;
743
744def  REV16Wr : OneWRegData<0b001, "rev16",
745                                  UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
746def  REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
747
748def : Pat<(cttz GPR32:$Rn),
749          (CLZWr (RBITWr GPR32:$Rn))>;
750def : Pat<(cttz GPR64:$Rn),
751          (CLZXr (RBITXr GPR64:$Rn))>;
752def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
753                (i32 1))),
754          (CLSWr GPR32:$Rn)>;
755def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
756                (i64 1))),
757          (CLSXr GPR64:$Rn)>;
758
759// Unlike the other one operand instructions, the instructions with the "rev"
760// mnemonic do *not* just different in the size bit, but actually use different
761// opcode bits for the different sizes.
762def REVWr   : OneWRegData<0b010, "rev", bswap>;
763def REVXr   : OneXRegData<0b011, "rev", bswap>;
764def REV32Xr : OneXRegData<0b010, "rev32",
765                                 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
766
767// The bswap commutes with the rotr so we want a pattern for both possible
768// orders.
769def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
770def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
771
772//===----------------------------------------------------------------------===//
773// Bitfield immediate extraction instruction.
774//===----------------------------------------------------------------------===//
775let neverHasSideEffects = 1 in
776defm EXTR : ExtractImm<"extr">;
777def : InstAlias<"ror $dst, $src, $shift",
778            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
779def : InstAlias<"ror $dst, $src, $shift",
780            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
781
782def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
783          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
784def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
785          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
786
787//===----------------------------------------------------------------------===//
788// Other bitfield immediate instructions.
789//===----------------------------------------------------------------------===//
790let neverHasSideEffects = 1 in {
791defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
792defm SBFM : BitfieldImm<0b00, "sbfm">;
793defm UBFM : BitfieldImm<0b10, "ubfm">;
794}
795
796def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
797  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
798  return CurDAG->getTargetConstant(enc, MVT::i64);
799}]>;
800
801def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
802  uint64_t enc = 31 - N->getZExtValue();
803  return CurDAG->getTargetConstant(enc, MVT::i64);
804}]>;
805
806// min(7, 31 - shift_amt)
807def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
808  uint64_t enc = 31 - N->getZExtValue();
809  enc = enc > 7 ? 7 : enc;
810  return CurDAG->getTargetConstant(enc, MVT::i64);
811}]>;
812
813// min(15, 31 - shift_amt)
814def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
815  uint64_t enc = 31 - N->getZExtValue();
816  enc = enc > 15 ? 15 : enc;
817  return CurDAG->getTargetConstant(enc, MVT::i64);
818}]>;
819
820def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
821  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
822  return CurDAG->getTargetConstant(enc, MVT::i64);
823}]>;
824
825def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
826  uint64_t enc = 63 - N->getZExtValue();
827  return CurDAG->getTargetConstant(enc, MVT::i64);
828}]>;
829
830// min(7, 63 - shift_amt)
831def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
832  uint64_t enc = 63 - N->getZExtValue();
833  enc = enc > 7 ? 7 : enc;
834  return CurDAG->getTargetConstant(enc, MVT::i64);
835}]>;
836
837// min(15, 63 - shift_amt)
838def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
839  uint64_t enc = 63 - N->getZExtValue();
840  enc = enc > 15 ? 15 : enc;
841  return CurDAG->getTargetConstant(enc, MVT::i64);
842}]>;
843
844// min(31, 63 - shift_amt)
845def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
846  uint64_t enc = 63 - N->getZExtValue();
847  enc = enc > 31 ? 31 : enc;
848  return CurDAG->getTargetConstant(enc, MVT::i64);
849}]>;
850
851def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
852          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
853                              (i64 (i32shift_b imm0_31:$imm)))>;
854def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
855          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
856                              (i64 (i64shift_b imm0_63:$imm)))>;
857
858let AddedComplexity = 10 in {
859def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
860          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
861def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
862          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
863}
864
865def : InstAlias<"asr $dst, $src, $shift",
866                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
867def : InstAlias<"asr $dst, $src, $shift",
868                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
869def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
870def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
871def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
872def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
873def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
874
875def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
876          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
877def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
878          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
879
880def : InstAlias<"lsr $dst, $src, $shift",
881                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
882def : InstAlias<"lsr $dst, $src, $shift",
883                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
884def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
885def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
886def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
887def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
888def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
889
890//===----------------------------------------------------------------------===//
891// Conditionally set flags instructions.
892//===----------------------------------------------------------------------===//
893defm CCMN : CondSetFlagsImm<0, "ccmn">;
894defm CCMP : CondSetFlagsImm<1, "ccmp">;
895
896defm CCMN : CondSetFlagsReg<0, "ccmn">;
897defm CCMP : CondSetFlagsReg<1, "ccmp">;
898
899//===----------------------------------------------------------------------===//
900// Conditional select instructions.
901//===----------------------------------------------------------------------===//
902defm CSEL  : CondSelect<0, 0b00, "csel">;
903
904def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
905defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
906defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
907defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
908
909def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
910          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
911def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
912          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
913def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
914          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
915def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
916          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
917def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
918          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
919def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
920          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
921
922def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
923          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
924def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
925          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
926def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
927          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
928def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
929          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
930
931// The inverse of the condition code from the alias instruction is what is used
932// in the aliased instruction. The parser all ready inverts the condition code
933// for these aliases.
934def : InstAlias<"cset $dst, $cc",
935                (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
936def : InstAlias<"cset $dst, $cc",
937                (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
938
939def : InstAlias<"csetm $dst, $cc",
940                (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
941def : InstAlias<"csetm $dst, $cc",
942                (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
943
944def : InstAlias<"cinc $dst, $src, $cc",
945                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
946def : InstAlias<"cinc $dst, $src, $cc",
947                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
948
949def : InstAlias<"cinv $dst, $src, $cc",
950                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
951def : InstAlias<"cinv $dst, $src, $cc",
952                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
953
954def : InstAlias<"cneg $dst, $src, $cc",
955                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
956def : InstAlias<"cneg $dst, $src, $cc",
957                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
958
959//===----------------------------------------------------------------------===//
960// PC-relative instructions.
961//===----------------------------------------------------------------------===//
962let isReMaterializable = 1 in {
963let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in {
964def ADR  : ADRI<0, "adr", adrlabel, []>;
965} // neverHasSideEffects = 1
966
967def ADRP : ADRI<1, "adrp", adrplabel,
968                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
969} // isReMaterializable = 1
970
971// page address of a constant pool entry, block address
972def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
973def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
974
975//===----------------------------------------------------------------------===//
976// Unconditional branch (register) instructions.
977//===----------------------------------------------------------------------===//
978
979let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
980def RET  : BranchReg<0b0010, "ret", []>;
981def DRPS : SpecialReturn<0b0101, "drps">;
982def ERET : SpecialReturn<0b0100, "eret">;
983} // isReturn = 1, isTerminator = 1, isBarrier = 1
984
985// Default to the LR register.
986def : InstAlias<"ret", (RET LR)>;
987
988let isCall = 1, Defs = [LR], Uses = [SP] in {
989def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>;
990} // isCall
991
992let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
993def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
994} // isBranch, isTerminator, isBarrier, isIndirectBranch
995
996// Create a separate pseudo-instruction for codegen to use so that we don't
997// flag lr as used in every function. It'll be restored before the RET by the
998// epilogue if it's legitimately used.
999def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> {
1000  let isTerminator = 1;
1001  let isBarrier = 1;
1002  let isReturn = 1;
1003}
1004
1005// This is a directive-like pseudo-instruction. The purpose is to insert an
1006// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
1007// (which in the usual case is a BLR).
1008let hasSideEffects = 1 in
1009def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
1010  let AsmString = ".tlsdesccall $sym";
1011}
1012
1013// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
1014// gets expanded to two MCInsts during lowering.
1015let isCall = 1, Defs = [LR] in
1016def TLSDESC_BLR
1017    : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
1018             [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
1019
1020def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
1021          (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
1022//===----------------------------------------------------------------------===//
1023// Conditional branch (immediate) instruction.
1024//===----------------------------------------------------------------------===//
1025def Bcc : BranchCond;
1026
1027//===----------------------------------------------------------------------===//
1028// Compare-and-branch instructions.
1029//===----------------------------------------------------------------------===//
1030defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
1031defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
1032
1033//===----------------------------------------------------------------------===//
1034// Test-bit-and-branch instructions.
1035//===----------------------------------------------------------------------===//
1036defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
1037defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
1038
1039//===----------------------------------------------------------------------===//
1040// Unconditional branch (immediate) instructions.
1041//===----------------------------------------------------------------------===//
1042let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
1043def B  : BranchImm<0, "b", [(br bb:$addr)]>;
1044} // isBranch, isTerminator, isBarrier
1045
1046let isCall = 1, Defs = [LR], Uses = [SP] in {
1047def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
1048} // isCall
1049def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
1050
1051//===----------------------------------------------------------------------===//
1052// Exception generation instructions.
1053//===----------------------------------------------------------------------===//
1054def BRK   : ExceptionGeneration<0b001, 0b00, "brk">;
1055def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
1056def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
1057def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
1058def HLT   : ExceptionGeneration<0b010, 0b00, "hlt">;
1059def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
1060def SMC   : ExceptionGeneration<0b000, 0b11, "smc">;
1061def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
1062
1063// DCPSn defaults to an immediate operand of zero if unspecified.
1064def : InstAlias<"dcps1", (DCPS1 0)>;
1065def : InstAlias<"dcps2", (DCPS2 0)>;
1066def : InstAlias<"dcps3", (DCPS3 0)>;
1067
1068//===----------------------------------------------------------------------===//
1069// Load instructions.
1070//===----------------------------------------------------------------------===//
1071
1072// Pair (indexed, offset)
1073defm LDPW : LoadPairOffset<0b00, 0, GPR32, simm7s4, "ldp">;
1074defm LDPX : LoadPairOffset<0b10, 0, GPR64, simm7s8, "ldp">;
1075defm LDPS : LoadPairOffset<0b00, 1, FPR32, simm7s4, "ldp">;
1076defm LDPD : LoadPairOffset<0b01, 1, FPR64, simm7s8, "ldp">;
1077defm LDPQ : LoadPairOffset<0b10, 1, FPR128, simm7s16, "ldp">;
1078
1079defm LDPSW : LoadPairOffset<0b01, 0, GPR64, simm7s4, "ldpsw">;
1080
1081// Pair (pre-indexed)
1082def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, simm7s4, "ldp">;
1083def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, simm7s8, "ldp">;
1084def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, simm7s4, "ldp">;
1085def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, simm7s8, "ldp">;
1086def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, simm7s16, "ldp">;
1087
1088def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, simm7s4, "ldpsw">;
1089
1090// Pair (post-indexed)
1091def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">;
1092def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">;
1093def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">;
1094def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">;
1095def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">;
1096
1097def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">;
1098
1099
1100// Pair (no allocate)
1101defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32, simm7s4, "ldnp">;
1102defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64, simm7s8, "ldnp">;
1103defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32, simm7s4, "ldnp">;
1104defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64, simm7s8, "ldnp">;
1105defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128, simm7s16, "ldnp">;
1106
1107//---
1108// (register offset)
1109//---
1110
1111// Integer
1112defm LDRBB : Load8RO<0b00,  0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
1113defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
1114defm LDRW  : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
1115defm LDRX  : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
1116
1117// Floating-point
1118defm LDRB : Load8RO<0b00,   1, 0b01, FPR8,   "ldr", untyped, load>;
1119defm LDRH : Load16RO<0b01,  1, 0b01, FPR16,  "ldr", f16, load>;
1120defm LDRS : Load32RO<0b10,  1, 0b01, FPR32,  "ldr", f32, load>;
1121defm LDRD : Load64RO<0b11,  1, 0b01, FPR64,  "ldr", f64, load>;
1122defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128, "ldr", f128, load>;
1123
1124// Load sign-extended half-word
1125defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
1126defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
1127
1128// Load sign-extended byte
1129defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
1130defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
1131
1132// Load sign-extended word
1133defm LDRSW  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
1134
1135// Pre-fetch.
1136defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
1137
1138// For regular load, we do not have any alignment requirement.
1139// Thus, it is safe to directly map the vector loads with interesting
1140// addressing modes.
1141// FIXME: We could do the same for bitconvert to floating point vectors.
1142multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
1143                              ValueType ScalTy, ValueType VecTy,
1144                              Instruction LOADW, Instruction LOADX,
1145                              SubRegIndex sub> {
1146  def : Pat<(VecTy (scalar_to_vector (ScalTy
1147              (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
1148            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
1149                           (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
1150                           sub)>;
1151
1152  def : Pat<(VecTy (scalar_to_vector (ScalTy
1153              (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
1154            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
1155                           (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
1156                           sub)>;
1157}
1158
1159let AddedComplexity = 10 in {
1160defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v8i8,  LDRBroW, LDRBroX, bsub>;
1161defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v16i8, LDRBroW, LDRBroX, bsub>;
1162
1163defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
1164defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
1165
1166defm : ScalToVecROLoadPat<ro32, load,       i32, v2i32, LDRSroW, LDRSroX, ssub>;
1167defm : ScalToVecROLoadPat<ro32, load,       i32, v4i32, LDRSroW, LDRSroX, ssub>;
1168
1169defm : ScalToVecROLoadPat<ro32, load,       f32, v2f32, LDRSroW, LDRSroX, ssub>;
1170defm : ScalToVecROLoadPat<ro32, load,       f32, v4f32, LDRSroW, LDRSroX, ssub>;
1171
1172defm : ScalToVecROLoadPat<ro64, load,       i64, v2i64, LDRDroW, LDRDroX, dsub>;
1173
1174defm : ScalToVecROLoadPat<ro64, load,       f64, v2f64, LDRDroW, LDRDroX, dsub>;
1175
1176
1177def : Pat <(v1i64 (scalar_to_vector (i64
1178                      (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
1179                                           ro_Wextend64:$extend))))),
1180           (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
1181
1182def : Pat <(v1i64 (scalar_to_vector (i64
1183                      (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
1184                                           ro_Xextend64:$extend))))),
1185           (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
1186}
1187
1188// Match all load 64 bits width whose type is compatible with FPR64
1189multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
1190                        Instruction LOADW, Instruction LOADX> {
1191
1192  def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
1193            (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
1194
1195  def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
1196            (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
1197}
1198
1199let AddedComplexity = 10 in {
1200let Predicates = [IsLE] in {
1201  // We must do vector loads with LD1 in big-endian.
1202  defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
1203  defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
1204  defm : VecROLoadPat<ro64, v8i8,  LDRDroW, LDRDroX>;
1205  defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
1206}
1207
1208defm : VecROLoadPat<ro64, v1i64,  LDRDroW, LDRDroX>;
1209defm : VecROLoadPat<ro64, v1f64,  LDRDroW, LDRDroX>;
1210
1211// Match all load 128 bits width whose type is compatible with FPR128
1212let Predicates = [IsLE] in {
1213  // We must do vector loads with LD1 in big-endian.
1214  defm : VecROLoadPat<ro128, v2i64,  LDRQroW, LDRQroX>;
1215  defm : VecROLoadPat<ro128, v2f64,  LDRQroW, LDRQroX>;
1216  defm : VecROLoadPat<ro128, v4i32,  LDRQroW, LDRQroX>;
1217  defm : VecROLoadPat<ro128, v4f32,  LDRQroW, LDRQroX>;
1218  defm : VecROLoadPat<ro128, v8i16,  LDRQroW, LDRQroX>;
1219  defm : VecROLoadPat<ro128, v16i8,  LDRQroW, LDRQroX>;
1220}
1221} // AddedComplexity = 10
1222
1223// zextload -> i64
1224multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
1225                            Instruction INSTW, Instruction INSTX> {
1226  def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
1227            (SUBREG_TO_REG (i64 0),
1228                           (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
1229                           sub_32)>;
1230
1231  def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
1232            (SUBREG_TO_REG (i64 0),
1233                           (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
1234                           sub_32)>;
1235}
1236
1237let AddedComplexity = 10 in {
1238  defm : ExtLoadTo64ROPat<ro8,  zextloadi8,  LDRBBroW, LDRBBroX>;
1239  defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
1240  defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW,  LDRWroX>;
1241
1242  // zextloadi1 -> zextloadi8
1243  defm : ExtLoadTo64ROPat<ro8,  zextloadi1,  LDRBBroW, LDRBBroX>;
1244
1245  // extload -> zextload
1246  defm : ExtLoadTo64ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
1247  defm : ExtLoadTo64ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
1248  defm : ExtLoadTo64ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
1249
1250  // extloadi1 -> zextloadi8
1251  defm : ExtLoadTo64ROPat<ro8,  extloadi1,   LDRBBroW, LDRBBroX>;
1252}
1253
1254
1255// zextload -> i64
1256multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
1257                            Instruction INSTW, Instruction INSTX> {
1258  def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
1259            (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
1260
1261  def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
1262            (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
1263
1264}
1265
1266let AddedComplexity = 10 in {
1267  // extload -> zextload
1268  defm : ExtLoadTo32ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
1269  defm : ExtLoadTo32ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
1270  defm : ExtLoadTo32ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
1271
1272  // zextloadi1 -> zextloadi8
1273  defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
1274}
1275
1276//---
1277// (unsigned immediate)
1278//---
1279defm LDRX : LoadUI<0b11, 0, 0b01, GPR64, uimm12s8, "ldr",
1280                   [(set GPR64:$Rt,
1281                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
1282defm LDRW : LoadUI<0b10, 0, 0b01, GPR32, uimm12s4, "ldr",
1283                   [(set GPR32:$Rt,
1284                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
1285defm LDRB : LoadUI<0b00, 1, 0b01, FPR8, uimm12s1, "ldr",
1286                   [(set FPR8:$Rt,
1287                         (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
1288defm LDRH : LoadUI<0b01, 1, 0b01, FPR16, uimm12s2, "ldr",
1289                   [(set (f16 FPR16:$Rt),
1290                         (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
1291defm LDRS : LoadUI<0b10, 1, 0b01, FPR32, uimm12s4, "ldr",
1292                   [(set (f32 FPR32:$Rt),
1293                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
1294defm LDRD : LoadUI<0b11, 1, 0b01, FPR64, uimm12s8, "ldr",
1295                   [(set (f64 FPR64:$Rt),
1296                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
1297defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128, uimm12s16, "ldr",
1298                 [(set (f128 FPR128:$Rt),
1299                       (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
1300
1301// For regular load, we do not have any alignment requirement.
1302// Thus, it is safe to directly map the vector loads with interesting
1303// addressing modes.
1304// FIXME: We could do the same for bitconvert to floating point vectors.
1305def : Pat <(v8i8 (scalar_to_vector (i32
1306               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
1307           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
1308                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
1309def : Pat <(v16i8 (scalar_to_vector (i32
1310               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
1311           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
1312                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
1313def : Pat <(v4i16 (scalar_to_vector (i32
1314               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
1315           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
1316                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
1317def : Pat <(v8i16 (scalar_to_vector (i32
1318               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
1319           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
1320                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
1321def : Pat <(v2i32 (scalar_to_vector (i32
1322               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
1323           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
1324                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
1325def : Pat <(v4i32 (scalar_to_vector (i32
1326               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
1327           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
1328                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
1329def : Pat <(v1i64 (scalar_to_vector (i64
1330               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
1331           (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1332def : Pat <(v2i64 (scalar_to_vector (i64
1333               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
1334           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
1335                          (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
1336
1337// Match all load 64 bits width whose type is compatible with FPR64
1338let Predicates = [IsLE] in {
1339  // We must use LD1 to perform vector loads in big-endian.
1340  def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1341            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1342  def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1343            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1344  def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1345            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1346  def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1347            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1348}
1349def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1350          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1351def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
1352          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
1353
1354// Match all load 128 bits width whose type is compatible with FPR128
1355let Predicates = [IsLE] in {
1356  // We must use LD1 to perform vector loads in big-endian.
1357  def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
1358            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
1359  def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
1360            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
1361  def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
1362            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
1363  def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
1364            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
1365  def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
1366            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
1367  def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
1368            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
1369}
1370def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
1371          (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
1372
1373defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
1374                    [(set GPR32:$Rt,
1375                          (zextloadi16 (am_indexed16 GPR64sp:$Rn,
1376                                                     uimm12s2:$offset)))]>;
1377defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
1378                    [(set GPR32:$Rt,
1379                          (zextloadi8 (am_indexed8 GPR64sp:$Rn,
1380                                                   uimm12s1:$offset)))]>;
1381// zextload -> i64
1382def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
1383    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
1384def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
1385    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
1386
1387// zextloadi1 -> zextloadi8
1388def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
1389          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
1390def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
1391    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
1392
1393// extload -> zextload
1394def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
1395          (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
1396def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
1397          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
1398def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
1399          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
1400def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
1401    (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
1402def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
1403    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
1404def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
1405    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
1406def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
1407    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
1408
1409// load sign-extended half-word
1410defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
1411                     [(set GPR32:$Rt,
1412                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
1413                                                      uimm12s2:$offset)))]>;
1414defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
1415                     [(set GPR64:$Rt,
1416                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
1417                                                      uimm12s2:$offset)))]>;
1418
1419// load sign-extended byte
1420defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
1421                     [(set GPR32:$Rt,
1422                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
1423                                                    uimm12s1:$offset)))]>;
1424defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
1425                     [(set GPR64:$Rt,
1426                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
1427                                                    uimm12s1:$offset)))]>;
1428
1429// load sign-extended word
1430defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
1431                     [(set GPR64:$Rt,
1432                           (sextloadi32 (am_indexed32 GPR64sp:$Rn,
1433                                                      uimm12s4:$offset)))]>;
1434
1435// load zero-extended word
1436def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
1437      (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
1438
1439// Pre-fetch.
1440def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
1441                        [(AArch64Prefetch imm:$Rt,
1442                                        (am_indexed64 GPR64sp:$Rn,
1443                                                      uimm12s8:$offset))]>;
1444
1445def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
1446
1447//---
1448// (literal)
1449def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">;
1450def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">;
1451def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">;
1452def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">;
1453def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">;
1454
1455// load sign-extended word
1456def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">;
1457
1458// prefetch
1459def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
1460//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
1461
1462//---
1463// (unscaled immediate)
1464defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64, "ldur",
1465                    [(set GPR64:$Rt,
1466                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
1467defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32, "ldur",
1468                    [(set GPR32:$Rt,
1469                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
1470defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8, "ldur",
1471                    [(set FPR8:$Rt,
1472                          (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
1473defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16, "ldur",
1474                    [(set FPR16:$Rt,
1475                          (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
1476defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32, "ldur",
1477                    [(set (f32 FPR32:$Rt),
1478                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
1479defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64, "ldur",
1480                    [(set (f64 FPR64:$Rt),
1481                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
1482defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128, "ldur",
1483                    [(set (f128 FPR128:$Rt),
1484                          (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
1485
1486defm LDURHH
1487    : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
1488             [(set GPR32:$Rt,
1489                    (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
1490defm LDURBB
1491    : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
1492             [(set GPR32:$Rt,
1493                    (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
1494
1495// Match all load 64 bits width whose type is compatible with FPR64
1496let Predicates = [IsLE] in {
1497  def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
1498            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
1499  def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
1500            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
1501  def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
1502            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
1503  def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
1504            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
1505}
1506def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
1507          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
1508def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
1509          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
1510
1511// Match all load 128 bits width whose type is compatible with FPR128
1512let Predicates = [IsLE] in {
1513  def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
1514            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
1515  def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
1516            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
1517  def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
1518            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
1519  def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
1520            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
1521  def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
1522            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
1523  def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
1524            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
1525}
1526
1527//  anyext -> zext
1528def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
1529          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
1530def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
1531          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
1532def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
1533          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
1534def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
1535    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1536def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
1537    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1538def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
1539    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1540def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
1541    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1542// unscaled zext
1543def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
1544          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
1545def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
1546          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
1547def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
1548          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
1549def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
1550    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1551def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
1552    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1553def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
1554    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1555def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
1556    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1557
1558
1559//---
1560// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
1561
1562// Define new assembler match classes as we want to only match these when
1563// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
1564// associate a DiagnosticType either, as we want the diagnostic for the
1565// canonical form (the scaled operand) to take precedence.
1566class SImm9OffsetOperand<int Width> : AsmOperandClass {
1567  let Name = "SImm9OffsetFB" # Width;
1568  let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
1569  let RenderMethod = "addImmOperands";
1570}
1571
1572def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
1573def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
1574def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
1575def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
1576def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
1577
1578def simm9_offset_fb8 : Operand<i64> {
1579  let ParserMatchClass = SImm9OffsetFB8Operand;
1580}
1581def simm9_offset_fb16 : Operand<i64> {
1582  let ParserMatchClass = SImm9OffsetFB16Operand;
1583}
1584def simm9_offset_fb32 : Operand<i64> {
1585  let ParserMatchClass = SImm9OffsetFB32Operand;
1586}
1587def simm9_offset_fb64 : Operand<i64> {
1588  let ParserMatchClass = SImm9OffsetFB64Operand;
1589}
1590def simm9_offset_fb128 : Operand<i64> {
1591  let ParserMatchClass = SImm9OffsetFB128Operand;
1592}
1593
1594def : InstAlias<"ldr $Rt, [$Rn, $offset]",
1595                (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
1596def : InstAlias<"ldr $Rt, [$Rn, $offset]",
1597                (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
1598def : InstAlias<"ldr $Rt, [$Rn, $offset]",
1599                (LDURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
1600def : InstAlias<"ldr $Rt, [$Rn, $offset]",
1601                (LDURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
1602def : InstAlias<"ldr $Rt, [$Rn, $offset]",
1603                (LDURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
1604def : InstAlias<"ldr $Rt, [$Rn, $offset]",
1605                (LDURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
1606def : InstAlias<"ldr $Rt, [$Rn, $offset]",
1607               (LDURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
1608
1609// zextload -> i64
1610def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
1611  (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1612def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
1613  (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
1614
1615// load sign-extended half-word
1616defm LDURSHW
1617    : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
1618               [(set GPR32:$Rt,
1619                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
1620defm LDURSHX
1621    : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
1622              [(set GPR64:$Rt,
1623                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
1624
1625// load sign-extended byte
1626defm LDURSBW
1627    : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
1628                [(set GPR32:$Rt,
1629                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
1630defm LDURSBX
1631    : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
1632                [(set GPR64:$Rt,
1633                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
1634
1635// load sign-extended word
1636defm LDURSW
1637    : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
1638              [(set GPR64:$Rt,
1639                    (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
1640
1641// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
1642def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
1643                (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
1644def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
1645                (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
1646def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
1647                (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
1648def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
1649                (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
1650def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
1651                (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
1652def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
1653                (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
1654def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
1655                (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
1656
1657// Pre-fetch.
1658defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
1659                  [(AArch64Prefetch imm:$Rt,
1660                                  (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
1661
1662//---
1663// (unscaled immediate, unprivileged)
1664defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
1665defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
1666
1667defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
1668defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
1669
1670// load sign-extended half-word
1671defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
1672defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
1673
1674// load sign-extended byte
1675defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
1676defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
1677
1678// load sign-extended word
1679defm LDTRSW  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
1680
1681//---
1682// (immediate pre-indexed)
1683def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">;
1684def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">;
1685def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8,  "ldr">;
1686def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">;
1687def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">;
1688def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">;
1689def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">;
1690
1691// load sign-extended half-word
1692def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
1693def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
1694
1695// load sign-extended byte
1696def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
1697def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
1698
1699// load zero-extended byte
1700def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">;
1701def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">;
1702
1703// load sign-extended word
1704def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
1705
1706//---
1707// (immediate post-indexed)
1708def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">;
1709def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">;
1710def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8,  "ldr">;
1711def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">;
1712def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">;
1713def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">;
1714def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">;
1715
1716// load sign-extended half-word
1717def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
1718def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
1719
1720// load sign-extended byte
1721def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
1722def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
1723
1724// load zero-extended byte
1725def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">;
1726def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">;
1727
1728// load sign-extended word
1729def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
1730
1731//===----------------------------------------------------------------------===//
1732// Store instructions.
1733//===----------------------------------------------------------------------===//
1734
1735// Pair (indexed, offset)
1736// FIXME: Use dedicated range-checked addressing mode operand here.
1737defm STPW : StorePairOffset<0b00, 0, GPR32, simm7s4, "stp">;
1738defm STPX : StorePairOffset<0b10, 0, GPR64, simm7s8, "stp">;
1739defm STPS : StorePairOffset<0b00, 1, FPR32, simm7s4, "stp">;
1740defm STPD : StorePairOffset<0b01, 1, FPR64, simm7s8, "stp">;
1741defm STPQ : StorePairOffset<0b10, 1, FPR128, simm7s16, "stp">;
1742
1743// Pair (pre-indexed)
1744def STPWpre : StorePairPreIdx<0b00, 0, GPR32, simm7s4, "stp">;
1745def STPXpre : StorePairPreIdx<0b10, 0, GPR64, simm7s8, "stp">;
1746def STPSpre : StorePairPreIdx<0b00, 1, FPR32, simm7s4, "stp">;
1747def STPDpre : StorePairPreIdx<0b01, 1, FPR64, simm7s8, "stp">;
1748def STPQpre : StorePairPreIdx<0b10, 1, FPR128, simm7s16, "stp">;
1749
1750// Pair (pre-indexed)
1751def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">;
1752def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">;
1753def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">;
1754def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">;
1755def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">;
1756
1757// Pair (no allocate)
1758defm STNPW : StorePairNoAlloc<0b00, 0, GPR32, simm7s4, "stnp">;
1759defm STNPX : StorePairNoAlloc<0b10, 0, GPR64, simm7s8, "stnp">;
1760defm STNPS : StorePairNoAlloc<0b00, 1, FPR32, simm7s4, "stnp">;
1761defm STNPD : StorePairNoAlloc<0b01, 1, FPR64, simm7s8, "stnp">;
1762defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128, simm7s16, "stnp">;
1763
1764//---
1765// (Register offset)
1766
1767// Integer
1768defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
1769defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
1770defm STRW  : Store32RO<0b10, 0, 0b00, GPR32, "str",  i32, store>;
1771defm STRX  : Store64RO<0b11, 0, 0b00, GPR64, "str",  i64, store>;
1772
1773
1774// Floating-point
1775defm STRB : Store8RO< 0b00,  1, 0b00, FPR8,   "str", untyped, store>;
1776defm STRH : Store16RO<0b01,  1, 0b00, FPR16,  "str", f16,     store>;
1777defm STRS : Store32RO<0b10,  1, 0b00, FPR32,  "str", f32,     store>;
1778defm STRD : Store64RO<0b11,  1, 0b00, FPR64,  "str", f64,     store>;
1779defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128,    store>;
1780
1781multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
1782                                 Instruction STRW, Instruction STRX> {
1783
1784  def : Pat<(storeop GPR64:$Rt,
1785                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
1786            (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
1787                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
1788
1789  def : Pat<(storeop GPR64:$Rt,
1790                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
1791            (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
1792                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
1793}
1794
1795let AddedComplexity = 10 in {
1796  // truncstore i64
1797  defm : TruncStoreFrom64ROPat<ro8,  truncstorei8,  STRBBroW, STRBBroX>;
1798  defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
1799  defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW,  STRWroX>;
1800}
1801
1802multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
1803                         Instruction STRW, Instruction STRX> {
1804  def : Pat<(store (VecTy FPR:$Rt),
1805                   (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
1806            (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
1807
1808  def : Pat<(store (VecTy FPR:$Rt),
1809                   (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
1810            (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
1811}
1812
1813let AddedComplexity = 10 in {
1814// Match all store 64 bits width whose type is compatible with FPR64
1815let Predicates = [IsLE] in {
1816  // We must use ST1 to store vectors in big-endian.
1817  defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
1818  defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
1819  defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
1820  defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
1821}
1822
1823defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
1824defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
1825
1826// Match all store 128 bits width whose type is compatible with FPR128
1827let Predicates = [IsLE] in {
1828  // We must use ST1 to store vectors in big-endian.
1829  defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
1830  defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
1831  defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
1832  defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
1833  defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
1834  defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
1835}
1836} // AddedComplexity = 10
1837
1838//---
1839// (unsigned immediate)
1840defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str",
1841                   [(store GPR64:$Rt,
1842                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
1843defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str",
1844                    [(store GPR32:$Rt,
1845                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
1846defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str",
1847                    [(store FPR8:$Rt,
1848                            (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
1849defm STRH : StoreUI<0b01, 1, 0b00, FPR16, uimm12s2, "str",
1850                    [(store (f16 FPR16:$Rt),
1851                            (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
1852defm STRS : StoreUI<0b10, 1, 0b00, FPR32, uimm12s4, "str",
1853                    [(store (f32 FPR32:$Rt),
1854                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
1855defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str",
1856                    [(store (f64 FPR64:$Rt),
1857                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
1858defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>;
1859
1860defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh",
1861                     [(truncstorei16 GPR32:$Rt,
1862                                     (am_indexed16 GPR64sp:$Rn,
1863                                                   uimm12s2:$offset))]>;
1864defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1,  "strb",
1865                     [(truncstorei8 GPR32:$Rt,
1866                                    (am_indexed8 GPR64sp:$Rn,
1867                                                 uimm12s1:$offset))]>;
1868
1869// Match all store 64 bits width whose type is compatible with FPR64
1870let AddedComplexity = 10 in {
1871let Predicates = [IsLE] in {
1872  // We must use ST1 to store vectors in big-endian.
1873  def : Pat<(store (v2f32 FPR64:$Rt),
1874                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
1875            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
1876  def : Pat<(store (v8i8 FPR64:$Rt),
1877                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
1878            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
1879  def : Pat<(store (v4i16 FPR64:$Rt),
1880                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
1881            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
1882  def : Pat<(store (v2i32 FPR64:$Rt),
1883                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
1884            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
1885}
1886def : Pat<(store (v1f64 FPR64:$Rt),
1887                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
1888          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
1889def : Pat<(store (v1i64 FPR64:$Rt),
1890                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
1891          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
1892
1893// Match all store 128 bits width whose type is compatible with FPR128
1894let Predicates = [IsLE] in {
1895  // We must use ST1 to store vectors in big-endian.
1896  def : Pat<(store (v4f32 FPR128:$Rt),
1897                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
1898            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
1899  def : Pat<(store (v2f64 FPR128:$Rt),
1900                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
1901            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
1902  def : Pat<(store (v16i8 FPR128:$Rt),
1903                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
1904            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
1905  def : Pat<(store (v8i16 FPR128:$Rt),
1906                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
1907            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
1908  def : Pat<(store (v4i32 FPR128:$Rt),
1909                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
1910            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
1911  def : Pat<(store (v2i64 FPR128:$Rt),
1912                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
1913            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
1914}
1915def : Pat<(store (f128  FPR128:$Rt),
1916                 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
1917          (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
1918
1919// truncstore i64
1920def : Pat<(truncstorei32 GPR64:$Rt,
1921                         (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
1922  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
1923def : Pat<(truncstorei16 GPR64:$Rt,
1924                         (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
1925  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
1926def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
1927  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
1928
1929} // AddedComplexity = 10
1930
1931//---
1932// (unscaled immediate)
1933defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64, "stur",
1934                         [(store GPR64:$Rt,
1935                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
1936defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32, "stur",
1937                         [(store GPR32:$Rt,
1938                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
1939defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8, "stur",
1940                         [(store FPR8:$Rt,
1941                                 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
1942defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16, "stur",
1943                         [(store (f16 FPR16:$Rt),
1944                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
1945defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32, "stur",
1946                         [(store (f32 FPR32:$Rt),
1947                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
1948defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64, "stur",
1949                         [(store (f64 FPR64:$Rt),
1950                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
1951defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128, "stur",
1952                         [(store (f128 FPR128:$Rt),
1953                                 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
1954defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32, "sturh",
1955                         [(truncstorei16 GPR32:$Rt,
1956                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
1957defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32, "sturb",
1958                         [(truncstorei8 GPR32:$Rt,
1959                                  (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
1960
1961// Match all store 64 bits width whose type is compatible with FPR64
1962let Predicates = [IsLE] in {
1963  // We must use ST1 to store vectors in big-endian.
1964  def : Pat<(store (v2f32 FPR64:$Rt),
1965                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
1966            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1967  def : Pat<(store (v8i8 FPR64:$Rt),
1968                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
1969            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1970  def : Pat<(store (v4i16 FPR64:$Rt),
1971                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
1972            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1973  def : Pat<(store (v2i32 FPR64:$Rt),
1974                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
1975            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1976}
1977def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
1978          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1979def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
1980          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1981
1982// Match all store 128 bits width whose type is compatible with FPR128
1983let Predicates = [IsLE] in {
1984  // We must use ST1 to store vectors in big-endian.
1985  def : Pat<(store (v4f32 FPR128:$Rt),
1986                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
1987            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1988  def : Pat<(store (v2f64 FPR128:$Rt),
1989                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
1990            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1991  def : Pat<(store (v16i8 FPR128:$Rt),
1992                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
1993            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1994  def : Pat<(store (v8i16 FPR128:$Rt),
1995                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
1996            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
1997  def : Pat<(store (v4i32 FPR128:$Rt),
1998                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
1999            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2000  def : Pat<(store (v2i64 FPR128:$Rt),
2001                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2002            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2003  def : Pat<(store (v2f64 FPR128:$Rt),
2004                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
2005            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
2006}
2007
2008// unscaled i64 truncating stores
2009def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
2010  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
2011def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
2012  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
2013def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
2014  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
2015
2016//---
2017// STR mnemonics fall back to STUR for negative or unaligned offsets.
2018def : InstAlias<"str $Rt, [$Rn, $offset]",
2019                (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
2020def : InstAlias<"str $Rt, [$Rn, $offset]",
2021                (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
2022def : InstAlias<"str $Rt, [$Rn, $offset]",
2023                (STURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
2024def : InstAlias<"str $Rt, [$Rn, $offset]",
2025                (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
2026def : InstAlias<"str $Rt, [$Rn, $offset]",
2027                (STURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
2028def : InstAlias<"str $Rt, [$Rn, $offset]",
2029                (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
2030def : InstAlias<"str $Rt, [$Rn, $offset]",
2031                (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
2032
2033def : InstAlias<"strb $Rt, [$Rn, $offset]",
2034                (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
2035def : InstAlias<"strh $Rt, [$Rn, $offset]",
2036                (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
2037
2038//---
2039// (unscaled immediate, unprivileged)
2040defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
2041defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
2042
2043defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
2044defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
2045
2046//---
2047// (immediate pre-indexed)
2048def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str",  pre_store, i32>;
2049def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str",  pre_store, i64>;
2050def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8,  "str",  pre_store, untyped>;
2051def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str",  pre_store, f16>;
2052def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str",  pre_store, f32>;
2053def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str",  pre_store, f64>;
2054def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str", pre_store, f128>;
2055
2056def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb", pre_truncsti8,  i32>;
2057def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh", pre_truncsti16, i32>;
2058
2059// truncstore i64
2060def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2061  (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2062           simm9:$off)>;
2063def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2064  (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2065            simm9:$off)>;
2066def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2067  (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2068            simm9:$off)>;
2069
2070def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2071          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2072def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2073          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2074def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2075          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2076def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2077          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2078def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2079          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2080def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2081          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2082
2083def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2084          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2085def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2086          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2087def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2088          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2089def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2090          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2091def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2092          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2093def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2094          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2095
2096//---
2097// (immediate post-indexed)
2098def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32,  "str", post_store, i32>;
2099def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64,  "str", post_store, i64>;
2100def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8,   "str", post_store, untyped>;
2101def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16,  "str", post_store, f16>;
2102def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32,  "str", post_store, f32>;
2103def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64,  "str", post_store, f64>;
2104def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str", post_store, f128>;
2105
2106def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb", post_truncsti8, i32>;
2107def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh", post_truncsti16, i32>;
2108
2109// truncstore i64
2110def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2111  (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2112            simm9:$off)>;
2113def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2114  (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2115             simm9:$off)>;
2116def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
2117  (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
2118             simm9:$off)>;
2119
2120def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2121          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2122def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2123          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2124def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2125          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2126def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2127          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2128def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2129          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2130def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
2131          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
2132
2133def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2134          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2135def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2136          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2137def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2138          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2139def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2140          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2141def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2142          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2143def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
2144          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
2145
2146//===----------------------------------------------------------------------===//
2147// Load/store exclusive instructions.
2148//===----------------------------------------------------------------------===//
2149
2150def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
2151def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
2152def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
2153def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
2154
2155def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
2156def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
2157def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
2158def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
2159
2160def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
2161def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
2162def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
2163def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
2164
2165def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
2166def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
2167def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
2168def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
2169
2170def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
2171def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
2172def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
2173def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
2174
2175def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
2176def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
2177def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
2178def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
2179
2180def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
2181def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
2182
2183def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
2184def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
2185
2186def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
2187def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
2188
2189def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
2190def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
2191
2192//===----------------------------------------------------------------------===//
2193// Scaled floating point to integer conversion instructions.
2194//===----------------------------------------------------------------------===//
2195
2196defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
2197defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
2198defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
2199defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
2200defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
2201defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
2202defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
2203defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
2204defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
2205defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
2206defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
2207defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
2208let isCodeGenOnly = 1 in {
2209defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>;
2210defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>;
2211defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>;
2212defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>;
2213}
2214
2215//===----------------------------------------------------------------------===//
2216// Scaled integer to floating point conversion instructions.
2217//===----------------------------------------------------------------------===//
2218
2219defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>;
2220defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
2221
2222//===----------------------------------------------------------------------===//
2223// Unscaled integer to floating point conversion instruction.
2224//===----------------------------------------------------------------------===//
2225
2226defm FMOV : UnscaledConversion<"fmov">;
2227
2228def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
2229def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
2230
2231//===----------------------------------------------------------------------===//
2232// Floating point conversion instruction.
2233//===----------------------------------------------------------------------===//
2234
2235defm FCVT : FPConversion<"fcvt">;
2236
2237def : Pat<(f32_to_f16 FPR32:$Rn),
2238          (i32 (COPY_TO_REGCLASS
2239                   (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
2240                   GPR32))>;
2241
2242def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
2243                          [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
2244
2245// When converting from f16 coming directly from a load, make sure we
2246// load into the FPR16 registers rather than going through the GPRs.
2247//   f16->f32
2248def : Pat<(f32 (f16_to_f32 (i32
2249                (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2250                                    ro_Wextend16:$extend))))),
2251          (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
2252def : Pat<(f32 (f16_to_f32 (i32
2253                (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2254                                    ro_Xextend16:$extend))))),
2255          (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
2256def : Pat <(f32 (f16_to_f32 (i32
2257                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
2258           (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
2259def : Pat <(f32 (f16_to_f32 (i32
2260                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
2261           (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
2262
2263//   f16->f64
2264def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
2265                (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2266                                    ro_Wextend16:$extend))))))),
2267          (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
2268def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
2269                (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2270                                    ro_Xextend16:$extend))))))),
2271          (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
2272def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
2273                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
2274           (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
2275def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
2276                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
2277           (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
2278
2279// When converting to f16 going directly to a store, make sure we use the
2280// appropriate direct conversion instructions and store via the FPR16
2281// registers rather than going through the GPRs.
2282let AddedComplexity = 10 in {
2283// f32->f16
2284def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2285                          (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2286                                         ro_Wextend16:$extend)),
2287           (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
2288                                         ro_Wextend16:$extend)>;
2289def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2290                          (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2291                                         ro_Xextend16:$extend)),
2292           (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
2293                                         ro_Xextend16:$extend)>;
2294def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2295              (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
2296           (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
2297def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
2298              (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
2299           (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
2300// f64->f16
2301def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2302                          (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
2303                                         ro_Wextend16:$extend)),
2304           (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
2305                                         ro_Wextend16:$extend)>;
2306def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2307                          (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
2308                                         ro_Xextend16:$extend)),
2309           (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
2310                                         ro_Xextend16:$extend)>;
2311def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2312              (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
2313           (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
2314def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
2315              (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
2316           (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
2317}
2318
2319
2320//===----------------------------------------------------------------------===//
2321// Floating point single operand instructions.
2322//===----------------------------------------------------------------------===//
2323
2324defm FABS   : SingleOperandFPData<0b0001, "fabs", fabs>;
2325defm FMOV   : SingleOperandFPData<0b0000, "fmov">;
2326defm FNEG   : SingleOperandFPData<0b0010, "fneg", fneg>;
2327defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>;
2328defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
2329defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
2330defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>;
2331defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
2332
2333def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
2334          (FRINTNDr FPR64:$Rn)>;
2335
2336// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior
2337// in the C spec. Setting hasSideEffects ensures it is not DCE'd.
2338// <rdar://problem/13715968>
2339// TODO: We should really model the FPSR flags correctly. This is really ugly.
2340let hasSideEffects = 1 in {
2341defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
2342}
2343
2344defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
2345
2346let SchedRW = [WriteFDiv] in {
2347defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
2348}
2349
2350//===----------------------------------------------------------------------===//
2351// Floating point two operand instructions.
2352//===----------------------------------------------------------------------===//
2353
2354defm FADD   : TwoOperandFPData<0b0010, "fadd", fadd>;
2355let SchedRW = [WriteFDiv] in {
2356defm FDIV   : TwoOperandFPData<0b0001, "fdiv", fdiv>;
2357}
2358defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>;
2359defm FMAX   : TwoOperandFPData<0b0100, "fmax", AArch64fmax>;
2360defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>;
2361defm FMIN   : TwoOperandFPData<0b0101, "fmin", AArch64fmin>;
2362let SchedRW = [WriteFMul] in {
2363defm FMUL   : TwoOperandFPData<0b0000, "fmul", fmul>;
2364defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
2365}
2366defm FSUB   : TwoOperandFPData<0b0011, "fsub", fsub>;
2367
2368def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
2369          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
2370def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
2371          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
2372def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
2373          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
2374def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
2375          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
2376
2377//===----------------------------------------------------------------------===//
2378// Floating point three operand instructions.
2379//===----------------------------------------------------------------------===//
2380
2381defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", fma>;
2382defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
2383     TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
2384defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
2385     TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
2386defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
2387     TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
2388
2389// The following def pats catch the case where the LHS of an FMA is negated.
2390// The TriOpFrag above catches the case where the middle operand is negated.
2391
2392// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
2393// the NEON variant.
2394def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
2395          (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
2396
2397def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
2398          (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
2399
2400// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and
2401// "(-a) + b*(-c)".
2402def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
2403          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
2404
2405def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
2406          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
2407
2408def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
2409          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
2410
2411def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))),
2412          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
2413
2414//===----------------------------------------------------------------------===//
2415// Floating point comparison instructions.
2416//===----------------------------------------------------------------------===//
2417
2418defm FCMPE : FPComparison<1, "fcmpe">;
2419defm FCMP  : FPComparison<0, "fcmp", AArch64fcmp>;
2420
2421//===----------------------------------------------------------------------===//
2422// Floating point conditional comparison instructions.
2423//===----------------------------------------------------------------------===//
2424
2425defm FCCMPE : FPCondComparison<1, "fccmpe">;
2426defm FCCMP  : FPCondComparison<0, "fccmp">;
2427
2428//===----------------------------------------------------------------------===//
2429// Floating point conditional select instruction.
2430//===----------------------------------------------------------------------===//
2431
2432defm FCSEL : FPCondSelect<"fcsel">;
2433
2434// CSEL instructions providing f128 types need to be handled by a
2435// pseudo-instruction since the eventual code will need to introduce basic
2436// blocks and control flow.
2437def F128CSEL : Pseudo<(outs FPR128:$Rd),
2438                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
2439                      [(set (f128 FPR128:$Rd),
2440                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
2441                                       (i32 imm:$cond), NZCV))]> {
2442  let Uses = [NZCV];
2443  let usesCustomInserter = 1;
2444}
2445
2446
2447//===----------------------------------------------------------------------===//
2448// Floating point immediate move.
2449//===----------------------------------------------------------------------===//
2450
2451let isReMaterializable = 1 in {
2452defm FMOV : FPMoveImmediate<"fmov">;
2453}
2454
2455//===----------------------------------------------------------------------===//
2456// Advanced SIMD two vector instructions.
2457//===----------------------------------------------------------------------===//
2458
2459defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
2460defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
2461defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
2462defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
2463defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
2464defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
2465defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
2466defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
2467defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
2468defm FABS   : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
2469
2470defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
2471defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
2472defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
2473defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
2474defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
2475defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
2476defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
2477defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
2478def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
2479          (FCVTLv4i16 V64:$Rn)>;
2480def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
2481                                                              (i64 4)))),
2482          (FCVTLv8i16 V128:$Rn)>;
2483def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
2484def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
2485                                                    (i64 2))))),
2486          (FCVTLv4i32 V128:$Rn)>;
2487
2488defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
2489defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
2490defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
2491defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
2492defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
2493def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
2494          (FCVTNv4i16 V128:$Rn)>;
2495def : Pat<(concat_vectors V64:$Rd,
2496                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
2497          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
2498def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
2499def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))),
2500          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
2501defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
2502defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
2503defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
2504                                        int_aarch64_neon_fcvtxn>;
2505defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
2506defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
2507let isCodeGenOnly = 1 in {
2508defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs",
2509                                       int_aarch64_neon_fcvtzs>;
2510defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu",
2511                                       int_aarch64_neon_fcvtzu>;
2512}
2513defm FNEG   : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
2514defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
2515defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>;
2516defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
2517defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
2518defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
2519defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
2520defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
2521defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
2522defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
2523defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
2524defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
2525                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
2526defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
2527// Aliases for MVN -> NOT.
2528def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
2529                (NOTv8i8 V64:$Vd, V64:$Vn)>;
2530def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
2531                (NOTv16i8 V128:$Vd, V128:$Vn)>;
2532
2533def : Pat<(AArch64neg (v8i8  V64:$Rn)),  (NEGv8i8  V64:$Rn)>;
2534def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>;
2535def : Pat<(AArch64neg (v4i16 V64:$Rn)),  (NEGv4i16 V64:$Rn)>;
2536def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>;
2537def : Pat<(AArch64neg (v2i32 V64:$Rn)),  (NEGv2i32 V64:$Rn)>;
2538def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
2539def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
2540
2541def : Pat<(AArch64not (v8i8 V64:$Rn)),   (NOTv8i8  V64:$Rn)>;
2542def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
2543def : Pat<(AArch64not (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
2544def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
2545def : Pat<(AArch64not (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
2546def : Pat<(AArch64not (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
2547def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
2548def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
2549
2550def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
2551def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
2552def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
2553def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
2554def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
2555
2556defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
2557defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
2558defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
2559defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
2560defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
2561       BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >;
2562defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>;
2563defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
2564defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
2565defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
2566defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
2567defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
2568defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
2569defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
2570defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
2571       BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >;
2572defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp",
2573                    int_aarch64_neon_uaddlp>;
2574defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
2575defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
2576defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
2577defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
2578defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
2579defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
2580
2581def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
2582def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
2583
2584// Patterns for vector long shift (by element width). These need to match all
2585// three of zext, sext and anyext so it's easier to pull the patterns out of the
2586// definition.
2587multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
2588  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
2589            (SHLLv8i8 V64:$Rn)>;
2590  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
2591            (SHLLv16i8 V128:$Rn)>;
2592  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
2593            (SHLLv4i16 V64:$Rn)>;
2594  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
2595            (SHLLv8i16 V128:$Rn)>;
2596  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
2597            (SHLLv2i32 V64:$Rn)>;
2598  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
2599            (SHLLv4i32 V128:$Rn)>;
2600}
2601
2602defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
2603defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
2604defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
2605
2606//===----------------------------------------------------------------------===//
2607// Advanced SIMD three vector instructions.
2608//===----------------------------------------------------------------------===//
2609
2610defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
2611defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>;
2612defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
2613defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
2614defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
2615defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
2616defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
2617defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
2618defm FABD    : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>;
2619defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>;
2620defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>;
2621defm FADDP   : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>;
2622defm FADD    : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>;
2623defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
2624defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
2625defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
2626defm FDIV    : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
2627defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
2628defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>;
2629defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>;
2630defm FMAX    : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>;
2631defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>;
2632defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>;
2633defm FMINP   : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>;
2634defm FMIN    : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>;
2635
2636// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
2637// instruction expects the addend first, while the fma intrinsic puts it last.
2638defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla",
2639            TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
2640defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls",
2641            TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
2642
2643// The following def pats catch the case where the LHS of an FMA is negated.
2644// The TriOpFrag above catches the case where the middle operand is negated.
2645def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)),
2646          (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>;
2647
2648def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
2649          (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>;
2650
2651def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
2652          (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
2653
2654defm FMULX    : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>;
2655defm FMUL     : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>;
2656defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>;
2657defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>;
2658defm FSUB     : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>;
2659defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
2660                      TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
2661defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
2662                      TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >;
2663defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
2664defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
2665defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
2666      TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
2667defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
2668defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
2669defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
2670defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
2671defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>;
2672defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
2673defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>;
2674defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
2675defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
2676defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
2677defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
2678defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
2679defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
2680defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>;
2681defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
2682defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
2683defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
2684defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
2685      TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
2686defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
2687defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
2688defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
2689defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
2690defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>;
2691defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
2692defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>;
2693defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
2694defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
2695defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
2696defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
2697defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
2698defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
2699defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
2700
2701defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
2702defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
2703                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
2704defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">;
2705defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
2706defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl",
2707    TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>;
2708defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
2709defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
2710                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
2711defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
2712
2713def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
2714          (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
2715def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
2716          (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
2717def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
2718          (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
2719def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
2720          (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
2721
2722def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
2723          (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
2724def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
2725          (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
2726def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
2727          (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
2728def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
2729          (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
2730
2731def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
2732                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
2733def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
2734                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
2735def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
2736                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
2737def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
2738                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
2739
2740def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
2741                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
2742def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
2743                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
2744def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
2745                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
2746def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
2747                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
2748
2749def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
2750                "|cmls.8b\t$dst, $src1, $src2}",
2751                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
2752def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
2753                "|cmls.16b\t$dst, $src1, $src2}",
2754                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
2755def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
2756                "|cmls.4h\t$dst, $src1, $src2}",
2757                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
2758def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
2759                "|cmls.8h\t$dst, $src1, $src2}",
2760                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
2761def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
2762                "|cmls.2s\t$dst, $src1, $src2}",
2763                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
2764def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
2765                "|cmls.4s\t$dst, $src1, $src2}",
2766                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
2767def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
2768                "|cmls.2d\t$dst, $src1, $src2}",
2769                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
2770
2771def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
2772                "|cmlo.8b\t$dst, $src1, $src2}",
2773                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
2774def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
2775                "|cmlo.16b\t$dst, $src1, $src2}",
2776                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
2777def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
2778                "|cmlo.4h\t$dst, $src1, $src2}",
2779                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
2780def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
2781                "|cmlo.8h\t$dst, $src1, $src2}",
2782                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
2783def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
2784                "|cmlo.2s\t$dst, $src1, $src2}",
2785                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
2786def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
2787                "|cmlo.4s\t$dst, $src1, $src2}",
2788                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
2789def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
2790                "|cmlo.2d\t$dst, $src1, $src2}",
2791                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
2792
2793def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
2794                "|cmle.8b\t$dst, $src1, $src2}",
2795                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
2796def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
2797                "|cmle.16b\t$dst, $src1, $src2}",
2798                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
2799def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
2800                "|cmle.4h\t$dst, $src1, $src2}",
2801                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
2802def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
2803                "|cmle.8h\t$dst, $src1, $src2}",
2804                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
2805def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
2806                "|cmle.2s\t$dst, $src1, $src2}",
2807                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
2808def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
2809                "|cmle.4s\t$dst, $src1, $src2}",
2810                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
2811def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
2812                "|cmle.2d\t$dst, $src1, $src2}",
2813                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
2814
2815def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
2816                "|cmlt.8b\t$dst, $src1, $src2}",
2817                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
2818def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
2819                "|cmlt.16b\t$dst, $src1, $src2}",
2820                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
2821def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
2822                "|cmlt.4h\t$dst, $src1, $src2}",
2823                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
2824def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
2825                "|cmlt.8h\t$dst, $src1, $src2}",
2826                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
2827def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
2828                "|cmlt.2s\t$dst, $src1, $src2}",
2829                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
2830def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
2831                "|cmlt.4s\t$dst, $src1, $src2}",
2832                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
2833def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
2834                "|cmlt.2d\t$dst, $src1, $src2}",
2835                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
2836
2837def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
2838                "|fcmle.2s\t$dst, $src1, $src2}",
2839                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
2840def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
2841                "|fcmle.4s\t$dst, $src1, $src2}",
2842                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
2843def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
2844                "|fcmle.2d\t$dst, $src1, $src2}",
2845                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
2846
2847def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
2848                "|fcmlt.2s\t$dst, $src1, $src2}",
2849                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
2850def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
2851                "|fcmlt.4s\t$dst, $src1, $src2}",
2852                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
2853def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
2854                "|fcmlt.2d\t$dst, $src1, $src2}",
2855                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
2856
2857def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
2858                "|facle.2s\t$dst, $src1, $src2}",
2859                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
2860def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
2861                "|facle.4s\t$dst, $src1, $src2}",
2862                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
2863def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
2864                "|facle.2d\t$dst, $src1, $src2}",
2865                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
2866
2867def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
2868                "|faclt.2s\t$dst, $src1, $src2}",
2869                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
2870def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
2871                "|faclt.4s\t$dst, $src1, $src2}",
2872                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
2873def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
2874                "|faclt.2d\t$dst, $src1, $src2}",
2875                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
2876
2877//===----------------------------------------------------------------------===//
2878// Advanced SIMD three scalar instructions.
2879//===----------------------------------------------------------------------===//
2880
2881defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
2882defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
2883defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
2884defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
2885defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
2886defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
2887defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
2888defm FABD     : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>;
2889def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
2890          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
2891defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge",
2892                                     int_aarch64_neon_facge>;
2893defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt",
2894                                     int_aarch64_neon_facgt>;
2895defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
2896defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
2897defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
2898defm FMULX    : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>;
2899defm FRECPS   : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>;
2900defm FRSQRTS  : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>;
2901defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
2902defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
2903defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
2904defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
2905defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
2906defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
2907defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
2908defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
2909defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
2910defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
2911defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
2912defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
2913defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
2914defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
2915defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
2916
2917def : InstAlias<"cmls $dst, $src1, $src2",
2918                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
2919def : InstAlias<"cmle $dst, $src1, $src2",
2920                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
2921def : InstAlias<"cmlo $dst, $src1, $src2",
2922                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
2923def : InstAlias<"cmlt $dst, $src1, $src2",
2924                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
2925def : InstAlias<"fcmle $dst, $src1, $src2",
2926                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
2927def : InstAlias<"fcmle $dst, $src1, $src2",
2928                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
2929def : InstAlias<"fcmlt $dst, $src1, $src2",
2930                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
2931def : InstAlias<"fcmlt $dst, $src1, $src2",
2932                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
2933def : InstAlias<"facle $dst, $src1, $src2",
2934                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
2935def : InstAlias<"facle $dst, $src1, $src2",
2936                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
2937def : InstAlias<"faclt $dst, $src1, $src2",
2938                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
2939def : InstAlias<"faclt $dst, $src1, $src2",
2940                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
2941
2942//===----------------------------------------------------------------------===//
2943// Advanced SIMD three scalar instructions (mixed operands).
2944//===----------------------------------------------------------------------===//
2945defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
2946                                       int_aarch64_neon_sqdmulls_scalar>;
2947defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
2948defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
2949
2950def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
2951                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
2952                                                        (i32 FPR32:$Rm))))),
2953          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
2954def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
2955                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
2956                                                        (i32 FPR32:$Rm))))),
2957          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
2958
2959//===----------------------------------------------------------------------===//
2960// Advanced SIMD two scalar instructions.
2961//===----------------------------------------------------------------------===//
2962
2963defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", int_aarch64_neon_abs>;
2964defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
2965defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
2966defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
2967defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
2968defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
2969defm FCMEQ  : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
2970defm FCMGE  : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
2971defm FCMGT  : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
2972defm FCMLE  : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
2973defm FCMLT  : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
2974defm FCVTAS : SIMDTwoScalarSD<   0, 0, 0b11100, "fcvtas">;
2975defm FCVTAU : SIMDTwoScalarSD<   1, 0, 0b11100, "fcvtau">;
2976defm FCVTMS : SIMDTwoScalarSD<   0, 0, 0b11011, "fcvtms">;
2977defm FCVTMU : SIMDTwoScalarSD<   1, 0, 0b11011, "fcvtmu">;
2978defm FCVTNS : SIMDTwoScalarSD<   0, 0, 0b11010, "fcvtns">;
2979defm FCVTNU : SIMDTwoScalarSD<   1, 0, 0b11010, "fcvtnu">;
2980defm FCVTPS : SIMDTwoScalarSD<   0, 1, 0b11010, "fcvtps">;
2981defm FCVTPU : SIMDTwoScalarSD<   1, 1, 0b11010, "fcvtpu">;
2982def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
2983defm FCVTZS : SIMDTwoScalarSD<   0, 1, 0b11011, "fcvtzs">;
2984defm FCVTZU : SIMDTwoScalarSD<   1, 1, 0b11011, "fcvtzu">;
2985defm FRECPE : SIMDTwoScalarSD<   0, 1, 0b11101, "frecpe">;
2986defm FRECPX : SIMDTwoScalarSD<   0, 1, 0b11111, "frecpx">;
2987defm FRSQRTE : SIMDTwoScalarSD<  1, 1, 0b11101, "frsqrte">;
2988defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
2989                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
2990defm SCVTF  : SIMDTwoScalarCVTSD<   0, 0, 0b11101, "scvtf", AArch64sitof>;
2991defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
2992defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
2993defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
2994defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
2995defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
2996                                     int_aarch64_neon_suqadd>;
2997defm UCVTF  : SIMDTwoScalarCVTSD<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
2998defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
2999defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
3000                                    int_aarch64_neon_usqadd>;
3001
3002def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>;
3003
3004def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
3005          (FCVTASv1i64 FPR64:$Rn)>;
3006def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
3007          (FCVTAUv1i64 FPR64:$Rn)>;
3008def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
3009          (FCVTMSv1i64 FPR64:$Rn)>;
3010def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
3011          (FCVTMUv1i64 FPR64:$Rn)>;
3012def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
3013          (FCVTNSv1i64 FPR64:$Rn)>;
3014def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
3015          (FCVTNUv1i64 FPR64:$Rn)>;
3016def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
3017          (FCVTPSv1i64 FPR64:$Rn)>;
3018def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
3019          (FCVTPUv1i64 FPR64:$Rn)>;
3020
3021def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
3022          (FRECPEv1i32 FPR32:$Rn)>;
3023def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
3024          (FRECPEv1i64 FPR64:$Rn)>;
3025def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
3026          (FRECPEv1i64 FPR64:$Rn)>;
3027
3028def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
3029          (FRECPXv1i32 FPR32:$Rn)>;
3030def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
3031          (FRECPXv1i64 FPR64:$Rn)>;
3032
3033def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
3034          (FRSQRTEv1i32 FPR32:$Rn)>;
3035def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
3036          (FRSQRTEv1i64 FPR64:$Rn)>;
3037def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
3038          (FRSQRTEv1i64 FPR64:$Rn)>;
3039
3040// If an integer is about to be converted to a floating point value,
3041// just load it on the floating point unit.
3042// Here are the patterns for 8 and 16-bits to float.
3043// 8-bits -> float.
3044multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
3045                             SDPatternOperator loadop, Instruction UCVTF,
3046                             ROAddrMode ro, Instruction LDRW, Instruction LDRX,
3047                             SubRegIndex sub> {
3048  def : Pat<(DstTy (uint_to_fp (SrcTy
3049                     (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
3050                                      ro.Wext:$extend))))),
3051           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
3052                                 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
3053                                 sub))>;
3054
3055  def : Pat<(DstTy (uint_to_fp (SrcTy
3056                     (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
3057                                      ro.Wext:$extend))))),
3058           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
3059                                 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
3060                                 sub))>;
3061}
3062
3063defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
3064                         UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
3065def : Pat <(f32 (uint_to_fp (i32
3066               (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3067           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
3068                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
3069def : Pat <(f32 (uint_to_fp (i32
3070                     (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
3071           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
3072                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
3073// 16-bits -> float.
3074defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
3075                         UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
3076def : Pat <(f32 (uint_to_fp (i32
3077                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3078           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
3079                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
3080def : Pat <(f32 (uint_to_fp (i32
3081                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
3082           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
3083                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
3084// 32-bits are handled in target specific dag combine:
3085// performIntToFpCombine.
3086// 64-bits integer to 32-bits floating point, not possible with
3087// UCVTF on floating point registers (both source and destination
3088// must have the same size).
3089
3090// Here are the patterns for 8, 16, 32, and 64-bits to double.
3091// 8-bits -> double.
3092defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
3093                         UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
3094def : Pat <(f64 (uint_to_fp (i32
3095                    (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3096           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
3097                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
3098def : Pat <(f64 (uint_to_fp (i32
3099                  (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
3100           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
3101                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
3102// 16-bits -> double.
3103defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
3104                         UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
3105def : Pat <(f64 (uint_to_fp (i32
3106                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3107           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
3108                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
3109def : Pat <(f64 (uint_to_fp (i32
3110                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
3111           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
3112                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
3113// 32-bits -> double.
3114defm : UIntToFPROLoadPat<f64, i32, load,
3115                         UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
3116def : Pat <(f64 (uint_to_fp (i32
3117                  (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3118           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
3119                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
3120def : Pat <(f64 (uint_to_fp (i32
3121                  (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
3122           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
3123                          (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
3124// 64-bits -> double are handled in target specific dag combine:
3125// performIntToFpCombine.
3126
3127//===----------------------------------------------------------------------===//
3128// Advanced SIMD three different-sized vector instructions.
3129//===----------------------------------------------------------------------===//
3130
3131defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
3132defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
3133defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
3134defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
3135defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
3136defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
3137                                             int_aarch64_neon_sabd>;
3138defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
3139                                          int_aarch64_neon_sabd>;
3140defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
3141            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
3142defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
3143                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
3144defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
3145    TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
3146defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
3147    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
3148defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>;
3149defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
3150                                               int_aarch64_neon_sqadd>;
3151defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
3152                                               int_aarch64_neon_sqsub>;
3153defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
3154                                     int_aarch64_neon_sqdmull>;
3155defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
3156                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
3157defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
3158                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
3159defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
3160                                              int_aarch64_neon_uabd>;
3161defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
3162                                          int_aarch64_neon_uabd>;
3163defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
3164                 BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
3165defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
3166                 BinOpFrag<(add node:$LHS, (zext node:$RHS))>>;
3167defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
3168    TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
3169defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
3170    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
3171defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
3172defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
3173                 BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>;
3174defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
3175                 BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
3176
3177// Patterns for 64-bit pmull
3178def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
3179          (PMULLv1i64 V64:$Rn, V64:$Rm)>;
3180def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
3181                                  (vector_extract (v2i64 V128:$Rm), (i64 1))),
3182          (PMULLv2i64 V128:$Rn, V128:$Rm)>;
3183
3184// CodeGen patterns for addhn and subhn instructions, which can actually be
3185// written in LLVM IR without too much difficulty.
3186
3187// ADDHN
3188def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
3189          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
3190def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
3191                                           (i32 16))))),
3192          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
3193def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
3194                                           (i32 32))))),
3195          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
3196def : Pat<(concat_vectors (v8i8 V64:$Rd),
3197                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
3198                                                    (i32 8))))),
3199          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
3200                            V128:$Rn, V128:$Rm)>;
3201def : Pat<(concat_vectors (v4i16 V64:$Rd),
3202                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
3203                                                    (i32 16))))),
3204          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
3205                            V128:$Rn, V128:$Rm)>;
3206def : Pat<(concat_vectors (v2i32 V64:$Rd),
3207                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
3208                                                    (i32 32))))),
3209          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
3210                            V128:$Rn, V128:$Rm)>;
3211
3212// SUBHN
3213def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
3214          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
3215def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
3216                                           (i32 16))))),
3217          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
3218def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
3219                                           (i32 32))))),
3220          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
3221def : Pat<(concat_vectors (v8i8 V64:$Rd),
3222                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
3223                                                    (i32 8))))),
3224          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
3225                            V128:$Rn, V128:$Rm)>;
3226def : Pat<(concat_vectors (v4i16 V64:$Rd),
3227                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
3228                                                    (i32 16))))),
3229          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
3230                            V128:$Rn, V128:$Rm)>;
3231def : Pat<(concat_vectors (v2i32 V64:$Rd),
3232                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
3233                                                    (i32 32))))),
3234          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
3235                            V128:$Rn, V128:$Rm)>;
3236
3237//----------------------------------------------------------------------------
3238// AdvSIMD bitwise extract from vector instruction.
3239//----------------------------------------------------------------------------
3240
3241defm EXT : SIMDBitwiseExtract<"ext">;
3242
3243def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
3244          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
3245def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
3246          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
3247def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
3248          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
3249def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
3250          (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
3251def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
3252          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
3253def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
3254          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
3255def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
3256          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
3257def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
3258          (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
3259
3260// We use EXT to handle extract_subvector to copy the upper 64-bits of a
3261// 128-bit vector.
3262def : Pat<(v8i8  (extract_subvector V128:$Rn, (i64 8))),
3263          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
3264def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))),
3265          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
3266def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
3267          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
3268def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
3269          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
3270def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
3271          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
3272def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
3273          (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
3274
3275
3276//----------------------------------------------------------------------------
3277// AdvSIMD zip vector
3278//----------------------------------------------------------------------------
3279
3280defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
3281defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
3282defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
3283defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
3284defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
3285defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
3286
3287//----------------------------------------------------------------------------
3288// AdvSIMD TBL/TBX instructions
3289//----------------------------------------------------------------------------
3290
3291defm TBL : SIMDTableLookup<    0, "tbl">;
3292defm TBX : SIMDTableLookupTied<1, "tbx">;
3293
3294def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
3295          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
3296def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
3297          (TBLv16i8One V128:$Ri, V128:$Rn)>;
3298
3299def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
3300                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
3301          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
3302def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
3303                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
3304          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
3305
3306
3307//----------------------------------------------------------------------------
3308// AdvSIMD scalar CPY instruction
3309//----------------------------------------------------------------------------
3310
3311defm CPY : SIMDScalarCPY<"cpy">;
3312
3313//----------------------------------------------------------------------------
3314// AdvSIMD scalar pairwise instructions
3315//----------------------------------------------------------------------------
3316
3317defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
3318defm FADDP   : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">;
3319defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
3320defm FMAXP   : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
3321defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
3322defm FMINP   : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
3323def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
3324          (ADDPv2i64p V128:$Rn)>;
3325def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
3326          (ADDPv2i64p V128:$Rn)>;
3327def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
3328          (FADDPv2i32p V64:$Rn)>;
3329def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
3330          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
3331def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
3332          (FADDPv2i64p V128:$Rn)>;
3333def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))),
3334          (FMAXNMPv2i32p V64:$Rn)>;
3335def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))),
3336          (FMAXNMPv2i64p V128:$Rn)>;
3337def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))),
3338          (FMAXPv2i32p V64:$Rn)>;
3339def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))),
3340          (FMAXPv2i64p V128:$Rn)>;
3341def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))),
3342          (FMINNMPv2i32p V64:$Rn)>;
3343def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))),
3344          (FMINNMPv2i64p V128:$Rn)>;
3345def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))),
3346          (FMINPv2i32p V64:$Rn)>;
3347def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
3348          (FMINPv2i64p V128:$Rn)>;
3349
3350//----------------------------------------------------------------------------
3351// AdvSIMD INS/DUP instructions
3352//----------------------------------------------------------------------------
3353
3354def DUPv8i8gpr  : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
3355def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
3356def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
3357def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
3358def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
3359def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
3360def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
3361
3362def DUPv2i64lane : SIMDDup64FromElement;
3363def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
3364def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
3365def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
3366def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
3367def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
3368def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
3369
3370def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
3371          (v2f32 (DUPv2i32lane
3372            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
3373            (i64 0)))>;
3374def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
3375          (v4f32 (DUPv4i32lane
3376            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
3377            (i64 0)))>;
3378def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
3379          (v2f64 (DUPv2i64lane
3380            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
3381            (i64 0)))>;
3382
3383def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
3384          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
3385def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
3386         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
3387def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
3388          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
3389
3390// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
3391// instruction even if the types don't match: we just have to remap the lane
3392// carefully. N.b. this trick only applies to truncations.
3393def VecIndex_x2 : SDNodeXForm<imm, [{
3394  return CurDAG->getTargetConstant(2 * N->getZExtValue(), MVT::i64);
3395}]>;
3396def VecIndex_x4 : SDNodeXForm<imm, [{
3397  return CurDAG->getTargetConstant(4 * N->getZExtValue(), MVT::i64);
3398}]>;
3399def VecIndex_x8 : SDNodeXForm<imm, [{
3400  return CurDAG->getTargetConstant(8 * N->getZExtValue(), MVT::i64);
3401}]>;
3402
3403multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
3404                            ValueType Src128VT, ValueType ScalVT,
3405                            Instruction DUP, SDNodeXForm IdxXFORM> {
3406  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
3407                                                     imm:$idx)))),
3408            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
3409
3410  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
3411                                                     imm:$idx)))),
3412            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
3413}
3414
3415defm : DUPWithTruncPats<v8i8,   v4i16, v8i16, i32, DUPv8i8lane,  VecIndex_x2>;
3416defm : DUPWithTruncPats<v8i8,   v2i32, v4i32, i32, DUPv8i8lane,  VecIndex_x4>;
3417defm : DUPWithTruncPats<v4i16,  v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
3418
3419defm : DUPWithTruncPats<v16i8,  v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
3420defm : DUPWithTruncPats<v16i8,  v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
3421defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
3422
3423multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
3424                               SDNodeXForm IdxXFORM> {
3425  def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn),
3426                                                         imm:$idx))))),
3427            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
3428
3429  def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn),
3430                                                         imm:$idx))))),
3431            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
3432}
3433
3434defm : DUPWithTrunci64Pats<v8i8,  DUPv8i8lane,   VecIndex_x8>;
3435defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane,  VecIndex_x4>;
3436defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane,  VecIndex_x2>;
3437
3438defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
3439defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
3440defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
3441
3442// SMOV and UMOV definitions, with some extra patterns for convenience
3443defm SMOV : SMov;
3444defm UMOV : UMov;
3445
3446def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
3447          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
3448def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
3449          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
3450def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
3451          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
3452def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
3453          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
3454def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
3455          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
3456def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
3457          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
3458
3459// Extracting i8 or i16 elements will have the zero-extend transformed to
3460// an 'and' mask by type legalization since neither i8 nor i16 are legal types
3461// for AArch64. Match these patterns here since UMOV already zeroes out the high
3462// bits of the destination register.
3463def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
3464               (i32 0xff)),
3465          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
3466def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
3467               (i32 0xffff)),
3468          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
3469
3470defm INS : SIMDIns;
3471
3472def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
3473          (SUBREG_TO_REG (i32 0),
3474                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
3475def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
3476          (SUBREG_TO_REG (i32 0),
3477                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
3478
3479def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
3480          (SUBREG_TO_REG (i32 0),
3481                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
3482def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
3483          (SUBREG_TO_REG (i32 0),
3484                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
3485
3486def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
3487            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
3488                                  (i32 FPR32:$Rn), ssub))>;
3489def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
3490            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
3491                                  (i32 FPR32:$Rn), ssub))>;
3492def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
3493            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
3494                                  (i64 FPR64:$Rn), dsub))>;
3495
3496def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
3497          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
3498def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
3499          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
3500def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
3501          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
3502
3503def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
3504            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
3505          (EXTRACT_SUBREG
3506            (INSvi32lane
3507              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
3508              VectorIndexS:$imm,
3509              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
3510              (i64 0)),
3511            dsub)>;
3512def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
3513            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
3514          (INSvi32lane
3515            V128:$Rn, VectorIndexS:$imm,
3516            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
3517            (i64 0))>;
3518def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
3519            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
3520          (INSvi64lane
3521            V128:$Rn, VectorIndexD:$imm,
3522            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
3523            (i64 0))>;
3524
3525// Copy an element at a constant index in one vector into a constant indexed
3526// element of another.
3527// FIXME refactor to a shared class/dev parameterized on vector type, vector
3528// index type and INS extension
3529def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
3530                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
3531                   VectorIndexB:$idx2)),
3532          (v16i8 (INSvi8lane
3533                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
3534          )>;
3535def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
3536                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
3537                   VectorIndexH:$idx2)),
3538          (v8i16 (INSvi16lane
3539                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
3540          )>;
3541def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
3542                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
3543                   VectorIndexS:$idx2)),
3544          (v4i32 (INSvi32lane
3545                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
3546          )>;
3547def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
3548                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
3549                   VectorIndexD:$idx2)),
3550          (v2i64 (INSvi64lane
3551                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
3552          )>;
3553
3554multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
3555                                ValueType VTScal, Instruction INS> {
3556  def : Pat<(VT128 (vector_insert V128:$src,
3557                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
3558                        imm:$Immd)),
3559            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
3560
3561  def : Pat<(VT128 (vector_insert V128:$src,
3562                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
3563                        imm:$Immd)),
3564            (INS V128:$src, imm:$Immd,
3565                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
3566
3567  def : Pat<(VT64 (vector_insert V64:$src,
3568                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
3569                        imm:$Immd)),
3570            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
3571                                 imm:$Immd, V128:$Rn, imm:$Immn),
3572                            dsub)>;
3573
3574  def : Pat<(VT64 (vector_insert V64:$src,
3575                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
3576                        imm:$Immd)),
3577            (EXTRACT_SUBREG
3578                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
3579                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
3580                dsub)>;
3581}
3582
3583defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
3584defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
3585defm : Neon_INS_elt_pattern<v16i8, v8i8,  i32, INSvi8lane>;
3586defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
3587defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
3588defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
3589
3590
3591// Floating point vector extractions are codegen'd as either a sequence of
3592// subregister extractions, possibly fed by an INS if the lane number is
3593// anything other than zero.
3594def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
3595          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
3596def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
3597          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
3598def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
3599          (f64 (EXTRACT_SUBREG
3600            (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
3601                         V128:$Rn, VectorIndexD:$idx),
3602            dsub))>;
3603def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
3604          (f32 (EXTRACT_SUBREG
3605            (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
3606                         V128:$Rn, VectorIndexS:$idx),
3607            ssub))>;
3608
3609// All concat_vectors operations are canonicalised to act on i64 vectors for
3610// AArch64. In the general case we need an instruction, which had just as well be
3611// INS.
3612class ConcatPat<ValueType DstTy, ValueType SrcTy>
3613  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
3614        (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
3615                     (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
3616
3617def : ConcatPat<v2i64, v1i64>;
3618def : ConcatPat<v2f64, v1f64>;
3619def : ConcatPat<v4i32, v2i32>;
3620def : ConcatPat<v4f32, v2f32>;
3621def : ConcatPat<v8i16, v4i16>;
3622def : ConcatPat<v16i8, v8i8>;
3623
3624// If the high lanes are undef, though, we can just ignore them:
3625class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
3626  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
3627        (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
3628
3629def : ConcatUndefPat<v2i64, v1i64>;
3630def : ConcatUndefPat<v2f64, v1f64>;
3631def : ConcatUndefPat<v4i32, v2i32>;
3632def : ConcatUndefPat<v4f32, v2f32>;
3633def : ConcatUndefPat<v8i16, v4i16>;
3634def : ConcatUndefPat<v16i8, v8i8>;
3635
3636//----------------------------------------------------------------------------
3637// AdvSIMD across lanes instructions
3638//----------------------------------------------------------------------------
3639
3640defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
3641defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
3642defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
3643defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
3644defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
3645defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
3646defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
3647defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
3648defm FMAXV   : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
3649defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
3650defm FMINV   : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
3651
3652multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
3653// If there is a sign extension after this intrinsic, consume it as smov already
3654// performed it
3655  def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
3656        (i32 (SMOVvi8to32
3657          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3658            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
3659          (i64 0)))>;
3660  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
3661        (i32 (SMOVvi8to32
3662          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3663            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
3664          (i64 0)))>;
3665// If there is a sign extension after this intrinsic, consume it as smov already
3666// performed it
3667def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
3668        (i32 (SMOVvi8to32
3669          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3670           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
3671          (i64 0)))>;
3672def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
3673        (i32 (SMOVvi8to32
3674          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3675           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
3676          (i64 0)))>;
3677// If there is a sign extension after this intrinsic, consume it as smov already
3678// performed it
3679def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
3680          (i32 (SMOVvi16to32
3681           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3682            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
3683           (i64 0)))>;
3684def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
3685          (i32 (SMOVvi16to32
3686           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3687            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
3688           (i64 0)))>;
3689// If there is a sign extension after this intrinsic, consume it as smov already
3690// performed it
3691def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
3692        (i32 (SMOVvi16to32
3693          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3694           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
3695          (i64 0)))>;
3696def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
3697        (i32 (SMOVvi16to32
3698          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3699           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
3700          (i64 0)))>;
3701
3702def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
3703        (i32 (EXTRACT_SUBREG
3704          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3705           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
3706          ssub))>;
3707}
3708
3709multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
3710// If there is a masking operation keeping only what has been actually
3711// generated, consume it.
3712  def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
3713        (i32 (EXTRACT_SUBREG
3714          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3715            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
3716          ssub))>;
3717  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
3718        (i32 (EXTRACT_SUBREG
3719          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3720            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
3721          ssub))>;
3722// If there is a masking operation keeping only what has been actually
3723// generated, consume it.
3724def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
3725        (i32 (EXTRACT_SUBREG
3726          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3727            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
3728          ssub))>;
3729def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
3730        (i32 (EXTRACT_SUBREG
3731          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3732            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
3733          ssub))>;
3734
3735// If there is a masking operation keeping only what has been actually
3736// generated, consume it.
3737def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
3738          (i32 (EXTRACT_SUBREG
3739            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3740              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
3741            ssub))>;
3742def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
3743          (i32 (EXTRACT_SUBREG
3744            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3745              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
3746            ssub))>;
3747// If there is a masking operation keeping only what has been actually
3748// generated, consume it.
3749def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
3750        (i32 (EXTRACT_SUBREG
3751          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3752            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
3753          ssub))>;
3754def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
3755        (i32 (EXTRACT_SUBREG
3756          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3757            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
3758          ssub))>;
3759
3760def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
3761        (i32 (EXTRACT_SUBREG
3762          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3763            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
3764          ssub))>;
3765
3766}
3767
3768multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
3769  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
3770        (i32 (SMOVvi16to32
3771          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3772            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
3773          (i64 0)))>;
3774def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
3775        (i32 (SMOVvi16to32
3776          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3777           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
3778          (i64 0)))>;
3779
3780def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
3781          (i32 (EXTRACT_SUBREG
3782           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3783            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
3784           ssub))>;
3785def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
3786        (i32 (EXTRACT_SUBREG
3787          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3788           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
3789          ssub))>;
3790
3791def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
3792        (i64 (EXTRACT_SUBREG
3793          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3794           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
3795          dsub))>;
3796}
3797
3798multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
3799                                                Intrinsic intOp> {
3800  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
3801        (i32 (EXTRACT_SUBREG
3802          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3803            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
3804          ssub))>;
3805def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
3806        (i32 (EXTRACT_SUBREG
3807          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3808            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
3809          ssub))>;
3810
3811def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
3812          (i32 (EXTRACT_SUBREG
3813            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3814              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
3815            ssub))>;
3816def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
3817        (i32 (EXTRACT_SUBREG
3818          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3819            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
3820          ssub))>;
3821
3822def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
3823        (i64 (EXTRACT_SUBREG
3824          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3825            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
3826          dsub))>;
3827}
3828
3829defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  int_aarch64_neon_saddv>;
3830// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
3831def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
3832          (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
3833
3834defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV",  int_aarch64_neon_uaddv>;
3835// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
3836def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
3837          (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
3838
3839defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>;
3840def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
3841           (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
3842
3843defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>;
3844def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
3845           (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
3846
3847defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>;
3848def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
3849           (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
3850
3851defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>;
3852def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
3853           (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
3854
3855defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
3856defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
3857
3858// The vaddlv_s32 intrinsic gets mapped to SADDLP.
3859def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
3860          (i64 (EXTRACT_SUBREG
3861            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3862              (SADDLPv2i32_v1i64 V64:$Rn), dsub),
3863            dsub))>;
3864// The vaddlv_u32 intrinsic gets mapped to UADDLP.
3865def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
3866          (i64 (EXTRACT_SUBREG
3867            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3868              (UADDLPv2i32_v1i64 V64:$Rn), dsub),
3869            dsub))>;
3870
3871//------------------------------------------------------------------------------
3872// AdvSIMD modified immediate instructions
3873//------------------------------------------------------------------------------
3874
3875// AdvSIMD BIC
3876defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
3877// AdvSIMD ORR
3878defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
3879
3880def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
3881def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
3882def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
3883def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
3884
3885def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
3886def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
3887def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
3888def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
3889
3890def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
3891def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
3892def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
3893def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
3894
3895def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
3896def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
3897def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
3898def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
3899
3900// AdvSIMD FMOV
3901def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8,
3902                                              "fmov", ".2d",
3903                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
3904def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64,  fpimm8,
3905                                              "fmov", ".2s",
3906                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
3907def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8,
3908                                              "fmov", ".4s",
3909                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
3910
3911// AdvSIMD MOVI
3912
3913// EDIT byte mask: scalar
3914let isReMaterializable = 1, isAsCheapAsAMove = 1 in
3915def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
3916                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
3917// The movi_edit node has the immediate value already encoded, so we use
3918// a plain imm0_255 here.
3919def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
3920          (MOVID imm0_255:$shift)>;
3921
3922def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
3923def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>;
3924def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>;
3925def : Pat<(v8i8  immAllZerosV), (MOVID (i32 0))>;
3926
3927def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>;
3928def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>;
3929def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>;
3930def : Pat<(v8i8  immAllOnesV), (MOVID (i32 255))>;
3931
3932// EDIT byte mask: 2d
3933
3934// The movi_edit node has the immediate value already encoded, so we use
3935// a plain imm0_255 in the pattern
3936let isReMaterializable = 1, isAsCheapAsAMove = 1 in
3937def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128,
3938                                                simdimmtype10,
3939                                                "movi", ".2d",
3940                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
3941
3942
3943// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing.
3944// Complexity is added to break a tie with a plain MOVI.
3945let AddedComplexity = 1 in {
3946def : Pat<(f32   fpimm0),
3947          (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>,
3948      Requires<[HasZCZ]>;
3949def : Pat<(f64   fpimm0),
3950          (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>,
3951      Requires<[HasZCZ]>;
3952}
3953
3954def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
3955def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
3956def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
3957def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
3958
3959def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
3960def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
3961def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
3962def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
3963
3964def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>;
3965def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>;
3966
3967// EDIT per word & halfword: 2s, 4h, 4s, & 8h
3968defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
3969
3970def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
3971def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
3972def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
3973def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
3974
3975def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
3976def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
3977def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
3978def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
3979
3980def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
3981          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
3982def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
3983          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
3984def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
3985          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
3986def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
3987          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
3988
3989// EDIT per word: 2s & 4s with MSL shifter
3990def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
3991                      [(set (v2i32 V64:$Rd),
3992                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
3993def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
3994                      [(set (v4i32 V128:$Rd),
3995                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
3996
3997// Per byte: 8b & 16b
3998def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64,  imm0_255,
3999                                                 "movi", ".8b",
4000                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
4001def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255,
4002                                                 "movi", ".16b",
4003                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
4004
4005// AdvSIMD MVNI
4006
4007// EDIT per word & halfword: 2s, 4h, 4s, & 8h
4008defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
4009
4010def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
4011def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
4012def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
4013def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
4014
4015def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
4016def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
4017def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
4018def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
4019
4020def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
4021          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
4022def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
4023          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
4024def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
4025          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
4026def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
4027          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
4028
4029// EDIT per word: 2s & 4s with MSL shifter
4030def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
4031                      [(set (v2i32 V64:$Rd),
4032                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
4033def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
4034                      [(set (v4i32 V128:$Rd),
4035                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
4036
4037//----------------------------------------------------------------------------
4038// AdvSIMD indexed element
4039//----------------------------------------------------------------------------
4040
4041let neverHasSideEffects = 1 in {
4042  defm FMLA  : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
4043  defm FMLS  : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
4044}
4045
4046// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
4047// instruction expects the addend first, while the intrinsic expects it last.
4048
4049// On the other hand, there are quite a few valid combinatorial options due to
4050// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
4051defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
4052           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
4053defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
4054           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
4055
4056defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
4057           TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
4058defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
4059           TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
4060defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
4061           TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
4062defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
4063           TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
4064
4065multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
4066  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
4067  // and DUP scalar.
4068  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
4069                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
4070                                           VectorIndexS:$idx))),
4071            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
4072  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
4073                           (v2f32 (AArch64duplane32
4074                                      (v4f32 (insert_subvector undef,
4075                                                 (v2f32 (fneg V64:$Rm)),
4076                                                 (i32 0))),
4077                                      VectorIndexS:$idx)))),
4078            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
4079                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
4080                               VectorIndexS:$idx)>;
4081  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
4082                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
4083            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
4084                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
4085
4086  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
4087  // and DUP scalar.
4088  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
4089                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
4090                                           VectorIndexS:$idx))),
4091            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
4092                               VectorIndexS:$idx)>;
4093  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
4094                           (v4f32 (AArch64duplane32
4095                                      (v4f32 (insert_subvector undef,
4096                                                 (v2f32 (fneg V64:$Rm)),
4097                                                 (i32 0))),
4098                                      VectorIndexS:$idx)))),
4099            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
4100                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
4101                               VectorIndexS:$idx)>;
4102  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
4103                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
4104            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
4105                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
4106
4107  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
4108  // (DUPLANE from 64-bit would be trivial).
4109  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
4110                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
4111                                           VectorIndexD:$idx))),
4112            (FMLSv2i64_indexed
4113                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
4114  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
4115                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
4116            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
4117                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
4118
4119  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
4120  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
4121                         (vector_extract (v4f32 (fneg V128:$Rm)),
4122                                         VectorIndexS:$idx))),
4123            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
4124                V128:$Rm, VectorIndexS:$idx)>;
4125  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
4126                         (vector_extract (v2f32 (fneg V64:$Rm)),
4127                                         VectorIndexS:$idx))),
4128            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
4129                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
4130
4131  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
4132  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
4133                         (vector_extract (v2f64 (fneg V128:$Rm)),
4134                                         VectorIndexS:$idx))),
4135            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
4136                V128:$Rm, VectorIndexS:$idx)>;
4137}
4138
4139defm : FMLSIndexedAfterNegPatterns<
4140           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
4141defm : FMLSIndexedAfterNegPatterns<
4142           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
4143
4144defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
4145defm FMUL  : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>;
4146
4147def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
4148          (FMULv2i32_indexed V64:$Rn,
4149            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
4150            (i64 0))>;
4151def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
4152          (FMULv4i32_indexed V128:$Rn,
4153            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
4154            (i64 0))>;
4155def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
4156          (FMULv2i64_indexed V128:$Rn,
4157            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
4158            (i64 0))>;
4159
4160defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
4161defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
4162defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla",
4163              TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>;
4164defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls",
4165              TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>;
4166defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
4167defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
4168    TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
4169defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
4170    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
4171defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
4172                int_aarch64_neon_smull>;
4173defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
4174                                           int_aarch64_neon_sqadd>;
4175defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
4176                                           int_aarch64_neon_sqsub>;
4177defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
4178defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
4179    TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
4180defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
4181    TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
4182defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
4183                int_aarch64_neon_umull>;
4184
4185// A scalar sqdmull with the second operand being a vector lane can be
4186// handled directly with the indexed instruction encoding.
4187def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
4188                                          (vector_extract (v4i32 V128:$Vm),
4189                                                           VectorIndexS:$idx)),
4190          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
4191
4192//----------------------------------------------------------------------------
4193// AdvSIMD scalar shift instructions
4194//----------------------------------------------------------------------------
4195defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">;
4196defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">;
4197defm SCVTF  : SIMDScalarRShiftSD<0, 0b11100, "scvtf">;
4198defm UCVTF  : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">;
4199// Codegen patterns for the above. We don't put these directly on the
4200// instructions because TableGen's type inference can't handle the truth.
4201// Having the same base pattern for fp <--> int totally freaks it out.
4202def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
4203          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
4204def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
4205          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
4206def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
4207          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
4208def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
4209          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
4210def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
4211                                            vecshiftR64:$imm)),
4212          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
4213def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
4214                                            vecshiftR64:$imm)),
4215          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
4216def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
4217          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
4218def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
4219          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
4220def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
4221          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
4222def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
4223          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
4224def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
4225                                            vecshiftR64:$imm)),
4226          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
4227def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
4228                                            vecshiftR64:$imm)),
4229          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
4230
4231defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
4232defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
4233defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
4234                                     int_aarch64_neon_sqrshrn>;
4235defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
4236                                     int_aarch64_neon_sqrshrun>;
4237defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
4238defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
4239defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
4240                                     int_aarch64_neon_sqshrn>;
4241defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
4242                                     int_aarch64_neon_sqshrun>;
4243defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
4244defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
4245defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
4246    TriOpFrag<(add node:$LHS,
4247                   (AArch64srshri node:$MHS, node:$RHS))>>;
4248defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
4249defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
4250    TriOpFrag<(add node:$LHS,
4251                   (AArch64vashr node:$MHS, node:$RHS))>>;
4252defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
4253                                     int_aarch64_neon_uqrshrn>;
4254defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
4255defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
4256                                     int_aarch64_neon_uqshrn>;
4257defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
4258defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
4259    TriOpFrag<(add node:$LHS,
4260                   (AArch64urshri node:$MHS, node:$RHS))>>;
4261defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
4262defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
4263    TriOpFrag<(add node:$LHS,
4264                   (AArch64vlshr node:$MHS, node:$RHS))>>;
4265
4266//----------------------------------------------------------------------------
4267// AdvSIMD vector shift instructions
4268//----------------------------------------------------------------------------
4269defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
4270defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
4271defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf",
4272                                   int_aarch64_neon_vcvtfxs2fp>;
4273defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
4274                                         int_aarch64_neon_rshrn>;
4275defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
4276defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
4277                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
4278defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;
4279def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
4280                                      (i32 vecshiftL64:$imm))),
4281          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
4282defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
4283                                         int_aarch64_neon_sqrshrn>;
4284defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
4285                                         int_aarch64_neon_sqrshrun>;
4286defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
4287defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
4288defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
4289                                         int_aarch64_neon_sqshrn>;
4290defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
4291                                         int_aarch64_neon_sqshrun>;
4292defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;
4293def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
4294                                      (i32 vecshiftR64:$imm))),
4295          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
4296defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
4297defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
4298                 TriOpFrag<(add node:$LHS,
4299                                (AArch64srshri node:$MHS, node:$RHS))> >;
4300defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
4301                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
4302
4303defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
4304defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
4305                TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
4306defm UCVTF   : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf",
4307                        int_aarch64_neon_vcvtfxu2fp>;
4308defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
4309                                         int_aarch64_neon_uqrshrn>;
4310defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
4311defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
4312                                         int_aarch64_neon_uqshrn>;
4313defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
4314defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
4315                TriOpFrag<(add node:$LHS,
4316                               (AArch64urshri node:$MHS, node:$RHS))> >;
4317defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
4318                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
4319defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
4320defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
4321                TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
4322
4323// SHRN patterns for when a logical right shift was used instead of arithmetic
4324// (the immediate guarantees no sign bits actually end up in the result so it
4325// doesn't matter).
4326def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
4327          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
4328def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
4329          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
4330def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
4331          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
4332
4333def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
4334                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
4335                                                    vecshiftR16Narrow:$imm)))),
4336          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
4337                           V128:$Rn, vecshiftR16Narrow:$imm)>;
4338def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
4339                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
4340                                                    vecshiftR32Narrow:$imm)))),
4341          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
4342                           V128:$Rn, vecshiftR32Narrow:$imm)>;
4343def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
4344                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
4345                                                    vecshiftR64Narrow:$imm)))),
4346          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
4347                           V128:$Rn, vecshiftR32Narrow:$imm)>;
4348
4349// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
4350// Anyexts are implemented as zexts.
4351def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
4352def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
4353def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
4354def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
4355def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
4356def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
4357def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
4358def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
4359def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
4360// Also match an extend from the upper half of a 128 bit source register.
4361def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
4362          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
4363def : Pat<(v8i16 (zext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
4364          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
4365def : Pat<(v8i16 (sext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
4366          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
4367def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
4368          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
4369def : Pat<(v4i32 (zext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
4370          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
4371def : Pat<(v4i32 (sext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
4372          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
4373def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
4374          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
4375def : Pat<(v2i64 (zext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
4376          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
4377def : Pat<(v2i64 (sext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
4378          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
4379
4380// Vector shift sxtl aliases
4381def : InstAlias<"sxtl.8h $dst, $src1",
4382                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
4383def : InstAlias<"sxtl $dst.8h, $src1.8b",
4384                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
4385def : InstAlias<"sxtl.4s $dst, $src1",
4386                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
4387def : InstAlias<"sxtl $dst.4s, $src1.4h",
4388                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
4389def : InstAlias<"sxtl.2d $dst, $src1",
4390                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
4391def : InstAlias<"sxtl $dst.2d, $src1.2s",
4392                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
4393
4394// Vector shift sxtl2 aliases
4395def : InstAlias<"sxtl2.8h $dst, $src1",
4396                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
4397def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
4398                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
4399def : InstAlias<"sxtl2.4s $dst, $src1",
4400                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
4401def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
4402                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
4403def : InstAlias<"sxtl2.2d $dst, $src1",
4404                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
4405def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
4406                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
4407
4408// Vector shift uxtl aliases
4409def : InstAlias<"uxtl.8h $dst, $src1",
4410                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
4411def : InstAlias<"uxtl $dst.8h, $src1.8b",
4412                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
4413def : InstAlias<"uxtl.4s $dst, $src1",
4414                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
4415def : InstAlias<"uxtl $dst.4s, $src1.4h",
4416                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
4417def : InstAlias<"uxtl.2d $dst, $src1",
4418                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
4419def : InstAlias<"uxtl $dst.2d, $src1.2s",
4420                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
4421
4422// Vector shift uxtl2 aliases
4423def : InstAlias<"uxtl2.8h $dst, $src1",
4424                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
4425def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
4426                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
4427def : InstAlias<"uxtl2.4s $dst, $src1",
4428                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
4429def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
4430                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
4431def : InstAlias<"uxtl2.2d $dst, $src1",
4432                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
4433def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
4434                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
4435
4436// If an integer is about to be converted to a floating point value,
4437// just load it on the floating point unit.
4438// These patterns are more complex because floating point loads do not
4439// support sign extension.
4440// The sign extension has to be explicitly added and is only supported for
4441// one step: byte-to-half, half-to-word, word-to-doubleword.
4442// SCVTF GPR -> FPR is 9 cycles.
4443// SCVTF FPR -> FPR is 4 cyclces.
4444// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
4445// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
4446// and still being faster.
4447// However, this is not good for code size.
4448// 8-bits -> float. 2 sizes step-up.
4449class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
4450  : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
4451        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
4452                            (SSHLLv4i16_shift
4453                              (f64
4454                                (EXTRACT_SUBREG
4455                                  (SSHLLv8i8_shift
4456                                    (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4457                                        INST,
4458                                        bsub),
4459                                    0),
4460                                  dsub)),
4461                               0),
4462                             ssub)))>, Requires<[NotForCodeSize]>;
4463
4464def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
4465                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
4466def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
4467                          (LDRBroX  GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
4468def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
4469                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
4470def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
4471                          (LDURBi GPR64sp:$Rn, simm9:$offset)>;
4472
4473// 16-bits -> float. 1 size step-up.
4474class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
4475  : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
4476        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
4477                            (SSHLLv4i16_shift
4478                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4479                                  INST,
4480                                  hsub),
4481                                0),
4482                            ssub)))>, Requires<[NotForCodeSize]>;
4483
4484def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
4485                           (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
4486def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
4487                           (LDRHroX   GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
4488def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
4489                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
4490def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
4491                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
4492
4493// 32-bits to 32-bits are handled in target specific dag combine:
4494// performIntToFpCombine.
4495// 64-bits integer to 32-bits floating point, not possible with
4496// SCVTF on floating point registers (both source and destination
4497// must have the same size).
4498
4499// Here are the patterns for 8, 16, 32, and 64-bits to double.
4500// 8-bits -> double. 3 size step-up: give up.
4501// 16-bits -> double. 2 size step.
4502class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
4503  : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
4504           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
4505                              (SSHLLv2i32_shift
4506                                 (f64
4507                                  (EXTRACT_SUBREG
4508                                    (SSHLLv4i16_shift
4509                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4510                                        INST,
4511                                        hsub),
4512                                     0),
4513                                   dsub)),
4514                               0),
4515                             dsub)))>, Requires<[NotForCodeSize]>;
4516
4517def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
4518                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
4519def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
4520                           (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
4521def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
4522                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
4523def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
4524                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
4525// 32-bits -> double. 1 size step-up.
4526class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
4527  : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
4528           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
4529                              (SSHLLv2i32_shift
4530                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
4531                                  INST,
4532                                  ssub),
4533                               0),
4534                             dsub)))>, Requires<[NotForCodeSize]>;
4535
4536def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
4537                           (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
4538def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
4539                           (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
4540def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
4541                           (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
4542def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
4543                           (LDURSi GPR64sp:$Rn, simm9:$offset)>;
4544
4545// 64-bits -> double are handled in target specific dag combine:
4546// performIntToFpCombine.
4547
4548
4549//----------------------------------------------------------------------------
4550// AdvSIMD Load-Store Structure
4551//----------------------------------------------------------------------------
4552defm LD1 : SIMDLd1Multiple<"ld1">;
4553defm LD2 : SIMDLd2Multiple<"ld2">;
4554defm LD3 : SIMDLd3Multiple<"ld3">;
4555defm LD4 : SIMDLd4Multiple<"ld4">;
4556
4557defm ST1 : SIMDSt1Multiple<"st1">;
4558defm ST2 : SIMDSt2Multiple<"st2">;
4559defm ST3 : SIMDSt3Multiple<"st3">;
4560defm ST4 : SIMDSt4Multiple<"st4">;
4561
4562class Ld1Pat<ValueType ty, Instruction INST>
4563  : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
4564
4565def : Ld1Pat<v16i8, LD1Onev16b>;
4566def : Ld1Pat<v8i16, LD1Onev8h>;
4567def : Ld1Pat<v4i32, LD1Onev4s>;
4568def : Ld1Pat<v2i64, LD1Onev2d>;
4569def : Ld1Pat<v8i8,  LD1Onev8b>;
4570def : Ld1Pat<v4i16, LD1Onev4h>;
4571def : Ld1Pat<v2i32, LD1Onev2s>;
4572def : Ld1Pat<v1i64, LD1Onev1d>;
4573
4574class St1Pat<ValueType ty, Instruction INST>
4575  : Pat<(store ty:$Vt, GPR64sp:$Rn),
4576        (INST ty:$Vt, GPR64sp:$Rn)>;
4577
4578def : St1Pat<v16i8, ST1Onev16b>;
4579def : St1Pat<v8i16, ST1Onev8h>;
4580def : St1Pat<v4i32, ST1Onev4s>;
4581def : St1Pat<v2i64, ST1Onev2d>;
4582def : St1Pat<v8i8,  ST1Onev8b>;
4583def : St1Pat<v4i16, ST1Onev4h>;
4584def : St1Pat<v2i32, ST1Onev2s>;
4585def : St1Pat<v1i64, ST1Onev1d>;
4586
4587//---
4588// Single-element
4589//---
4590
4591defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
4592defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
4593defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
4594defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
4595let mayLoad = 1, neverHasSideEffects = 1 in {
4596defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
4597defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
4598defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
4599defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
4600defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
4601defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
4602defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
4603defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
4604defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
4605defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
4606defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
4607defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
4608defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
4609defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
4610defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
4611defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
4612}
4613
4614def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
4615          (LD1Rv8b GPR64sp:$Rn)>;
4616def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
4617          (LD1Rv16b GPR64sp:$Rn)>;
4618def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
4619          (LD1Rv4h GPR64sp:$Rn)>;
4620def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
4621          (LD1Rv8h GPR64sp:$Rn)>;
4622def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
4623          (LD1Rv2s GPR64sp:$Rn)>;
4624def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
4625          (LD1Rv4s GPR64sp:$Rn)>;
4626def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
4627          (LD1Rv2d GPR64sp:$Rn)>;
4628def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
4629          (LD1Rv1d GPR64sp:$Rn)>;
4630// Grab the floating point version too
4631def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
4632          (LD1Rv2s GPR64sp:$Rn)>;
4633def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
4634          (LD1Rv4s GPR64sp:$Rn)>;
4635def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
4636          (LD1Rv2d GPR64sp:$Rn)>;
4637def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
4638          (LD1Rv1d GPR64sp:$Rn)>;
4639
4640class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
4641                    ValueType VTy, ValueType STy, Instruction LD1>
4642  : Pat<(vector_insert (VTy VecListOne128:$Rd),
4643           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
4644        (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
4645
4646def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
4647def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
4648def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
4649def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
4650def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
4651def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
4652
4653class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
4654                   ValueType VTy, ValueType STy, Instruction LD1>
4655  : Pat<(vector_insert (VTy VecListOne64:$Rd),
4656           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
4657        (EXTRACT_SUBREG
4658            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
4659                          VecIndex:$idx, GPR64sp:$Rn),
4660            dsub)>;
4661
4662def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
4663def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
4664def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
4665def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
4666
4667
4668defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
4669defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
4670defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
4671defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
4672
4673// Stores
4674defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
4675defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
4676defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
4677defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
4678
4679let AddedComplexity = 15 in
4680class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
4681                    ValueType VTy, ValueType STy, Instruction ST1>
4682  : Pat<(scalar_store
4683             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
4684             GPR64sp:$Rn),
4685        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
4686
4687def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
4688def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
4689def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
4690def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
4691def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
4692def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
4693
4694let AddedComplexity = 15 in
4695class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
4696                   ValueType VTy, ValueType STy, Instruction ST1>
4697  : Pat<(scalar_store
4698             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
4699             GPR64sp:$Rn),
4700        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
4701             VecIndex:$idx, GPR64sp:$Rn)>;
4702
4703def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
4704def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
4705def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
4706def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
4707
4708multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
4709                             ValueType VTy, ValueType STy, Instruction ST1,
4710                             int offset> {
4711  def : Pat<(scalar_store
4712              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
4713              GPR64sp:$Rn, offset),
4714        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
4715             VecIndex:$idx, GPR64sp:$Rn, XZR)>;
4716
4717  def : Pat<(scalar_store
4718              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
4719              GPR64sp:$Rn, GPR64:$Rm),
4720        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
4721             VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
4722}
4723
4724defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
4725defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
4726                        2>;
4727defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
4728defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
4729defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
4730defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
4731
4732multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
4733                             ValueType VTy, ValueType STy, Instruction ST1,
4734                             int offset> {
4735  def : Pat<(scalar_store
4736              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
4737              GPR64sp:$Rn, offset),
4738        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
4739
4740  def : Pat<(scalar_store
4741              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
4742              GPR64sp:$Rn, GPR64:$Rm),
4743        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
4744}
4745
4746defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
4747                         1>;
4748defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
4749                         2>;
4750defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
4751defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
4752defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
4753defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
4754
4755let mayStore = 1, neverHasSideEffects = 1 in {
4756defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
4757defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
4758defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
4759defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
4760defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
4761defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
4762defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
4763defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
4764defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
4765defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
4766defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
4767defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
4768}
4769
4770defm ST1 : SIMDLdSt1SingleAliases<"st1">;
4771defm ST2 : SIMDLdSt2SingleAliases<"st2">;
4772defm ST3 : SIMDLdSt3SingleAliases<"st3">;
4773defm ST4 : SIMDLdSt4SingleAliases<"st4">;
4774
4775//----------------------------------------------------------------------------
4776// Crypto extensions
4777//----------------------------------------------------------------------------
4778
4779def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
4780def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
4781def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
4782def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
4783
4784def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
4785def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
4786def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
4787def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
4788def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
4789def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
4790def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
4791
4792def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
4793def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
4794def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
4795
4796//----------------------------------------------------------------------------
4797// Compiler-pseudos
4798//----------------------------------------------------------------------------
4799// FIXME: Like for X86, these should go in their own separate .td file.
4800
4801// Any instruction that defines a 32-bit result leaves the high half of the
4802// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
4803// be copying from a truncate. But any other 32-bit operation will zero-extend
4804// up to 64 bits.
4805// FIXME: X86 also checks for CMOV here. Do we need something similar?
4806def def32 : PatLeaf<(i32 GPR32:$src), [{
4807  return N->getOpcode() != ISD::TRUNCATE &&
4808         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
4809         N->getOpcode() != ISD::CopyFromReg;
4810}]>;
4811
4812// In the case of a 32-bit def that is known to implicitly zero-extend,
4813// we can use a SUBREG_TO_REG.
4814def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
4815
4816// For an anyext, we don't care what the high bits are, so we can perform an
4817// INSERT_SUBREF into an IMPLICIT_DEF.
4818def : Pat<(i64 (anyext GPR32:$src)),
4819          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
4820
4821// When we need to explicitly zero-extend, we use an unsigned bitfield move
4822// instruction (UBFM) on the enclosing super-reg.
4823def : Pat<(i64 (zext GPR32:$src)),
4824 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
4825
4826// To sign extend, we use a signed bitfield move instruction (SBFM) on the
4827// containing super-reg.
4828def : Pat<(i64 (sext GPR32:$src)),
4829   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
4830def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
4831def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
4832def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
4833def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
4834def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
4835def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
4836def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
4837
4838def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
4839          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
4840                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
4841def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
4842          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
4843                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
4844
4845def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
4846          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
4847                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
4848def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
4849          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
4850                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
4851
4852def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
4853          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
4854                   (i64 (i64shift_a        imm0_63:$imm)),
4855                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
4856
4857// sra patterns have an AddedComplexity of 10, so make sure we have a higher
4858// AddedComplexity for the following patterns since we want to match sext + sra
4859// patterns before we attempt to match a single sra node.
4860let AddedComplexity = 20 in {
4861// We support all sext + sra combinations which preserve at least one bit of the
4862// original value which is to be sign extended. E.g. we support shifts up to
4863// bitwidth-1 bits.
4864def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
4865          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
4866def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
4867          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
4868
4869def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
4870          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
4871def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
4872          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
4873
4874def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
4875          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
4876                   (i64 imm0_31:$imm), 31)>;
4877} // AddedComplexity = 20
4878
4879// To truncate, we can simply extract from a subregister.
4880def : Pat<(i32 (trunc GPR64sp:$src)),
4881          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
4882
4883// __builtin_trap() uses the BRK instruction on AArch64.
4884def : Pat<(trap), (BRK 1)>;
4885
4886// Conversions within AdvSIMD types in the same register size are free.
4887// But because we need a consistent lane ordering, in big endian many
4888// conversions require one or more REV instructions.
4889//
4890// Consider a simple memory load followed by a bitconvert then a store.
4891//   v0 = load v2i32
4892//   v1 = BITCAST v2i32 v0 to v4i16
4893//        store v4i16 v2
4894//
4895// In big endian mode every memory access has an implicit byte swap. LDR and
4896// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
4897// is, they treat the vector as a sequence of elements to be byte-swapped.
4898// The two pairs of instructions are fundamentally incompatible. We've decided
4899// to use LD1/ST1 only to simplify compiler implementation.
4900//
4901// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
4902// the original code sequence:
4903//   v0 = load v2i32
4904//   v1 = REV v2i32                  (implicit)
4905//   v2 = BITCAST v2i32 v1 to v4i16
4906//   v3 = REV v4i16 v2               (implicit)
4907//        store v4i16 v3
4908//
4909// But this is now broken - the value stored is different to the value loaded
4910// due to lane reordering. To fix this, on every BITCAST we must perform two
4911// other REVs:
4912//   v0 = load v2i32
4913//   v1 = REV v2i32                  (implicit)
4914//   v2 = REV v2i32
4915//   v3 = BITCAST v2i32 v2 to v4i16
4916//   v4 = REV v4i16
4917//   v5 = REV v4i16 v4               (implicit)
4918//        store v4i16 v5
4919//
4920// This means an extra two instructions, but actually in most cases the two REV
4921// instructions can be combined into one. For example:
4922//   (REV64_2s (REV64_4h X)) === (REV32_4h X)
4923//
4924// There is also no 128-bit REV instruction. This must be synthesized with an
4925// EXT instruction.
4926//
4927// Most bitconverts require some sort of conversion. The only exceptions are:
4928//   a) Identity conversions -  vNfX <-> vNiX
4929//   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
4930//
4931
4932let Predicates = [IsLE] in {
4933def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
4934def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
4935def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
4936def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
4937
4938def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
4939          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
4940def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
4941          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
4942def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
4943          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
4944def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
4945          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
4946def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
4947          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
4948}
4949let Predicates = [IsBE] in {
4950def : Pat<(v8i8  (bitconvert GPR64:$Xn)),
4951                 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
4952def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
4953                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
4954def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
4955                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
4956def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
4957                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
4958
4959def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
4960          (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
4961def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
4962          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
4963def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
4964          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
4965def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
4966          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
4967}
4968def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
4969def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
4970def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
4971          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
4972def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
4973          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
4974def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
4975          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
4976def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
4977
4978def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
4979          (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
4980def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
4981          (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
4982def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
4983          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
4984def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
4985          (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
4986def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
4987          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
4988
4989let Predicates = [IsLE] in {
4990def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
4991def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
4992def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
4993def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
4994}
4995let Predicates = [IsBE] in {
4996def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
4997                             (v1i64 (REV64v2i32 FPR64:$src))>;
4998def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
4999                             (v1i64 (REV64v4i16 FPR64:$src))>;
5000def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))),
5001                             (v1i64 (REV64v8i8 FPR64:$src))>;
5002def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
5003                             (v1i64 (REV64v2i32 FPR64:$src))>;
5004}
5005def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
5006def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
5007
5008let Predicates = [IsLE] in {
5009def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
5010def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
5011def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
5012def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
5013def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
5014}
5015let Predicates = [IsBE] in {
5016def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
5017                             (v2i32 (REV64v2i32 FPR64:$src))>;
5018def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
5019                             (v2i32 (REV32v4i16 FPR64:$src))>;
5020def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))),
5021                             (v2i32 (REV32v8i8 FPR64:$src))>;
5022def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))),
5023                             (v2i32 (REV64v2i32 FPR64:$src))>;
5024def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
5025                             (v2i32 (REV64v2i32 FPR64:$src))>;
5026}
5027def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
5028
5029let Predicates = [IsLE] in {
5030def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
5031def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
5032def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
5033def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
5034def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
5035def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
5036}
5037let Predicates = [IsBE] in {
5038def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
5039                             (v4i16 (REV64v4i16 FPR64:$src))>;
5040def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
5041                             (v4i16 (REV32v4i16 FPR64:$src))>;
5042def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))),
5043                             (v4i16 (REV16v8i8 FPR64:$src))>;
5044def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))),
5045                             (v4i16 (REV64v4i16 FPR64:$src))>;
5046def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
5047                             (v4i16 (REV32v4i16 FPR64:$src))>;
5048def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
5049                             (v4i16 (REV64v4i16 FPR64:$src))>;
5050}
5051
5052let Predicates = [IsLE] in {
5053def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
5054def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
5055def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
5056def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
5057def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
5058def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
5059}
5060let Predicates = [IsBE] in {
5061def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))),
5062                             (v8i8 (REV64v8i8 FPR64:$src))>;
5063def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))),
5064                             (v8i8 (REV32v8i8 FPR64:$src))>;
5065def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))),
5066                             (v8i8 (REV16v8i8 FPR64:$src))>;
5067def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))),
5068                             (v8i8 (REV64v8i8 FPR64:$src))>;
5069def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))),
5070                             (v8i8 (REV32v8i8 FPR64:$src))>;
5071def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))),
5072                             (v8i8 (REV64v8i8 FPR64:$src))>;
5073}
5074
5075let Predicates = [IsLE] in {
5076def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
5077def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
5078def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
5079def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
5080}
5081let Predicates = [IsBE] in {
5082def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))),
5083                             (f64 (REV64v2i32 FPR64:$src))>;
5084def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))),
5085                             (f64 (REV64v4i16 FPR64:$src))>;
5086def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))),
5087                             (f64 (REV64v2i32 FPR64:$src))>;
5088def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))),
5089                             (f64 (REV64v8i8 FPR64:$src))>;
5090}
5091def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
5092def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
5093
5094let Predicates = [IsLE] in {
5095def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
5096def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
5097def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
5098def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
5099}
5100let Predicates = [IsBE] in {
5101def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
5102                             (v1f64 (REV64v2i32 FPR64:$src))>;
5103def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
5104                             (v1f64 (REV64v4i16 FPR64:$src))>;
5105def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))),
5106                             (v1f64 (REV64v8i8 FPR64:$src))>;
5107def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
5108                             (v1f64 (REV64v2i32 FPR64:$src))>;
5109}
5110def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
5111def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
5112
5113let Predicates = [IsLE] in {
5114def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
5115def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
5116def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
5117def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
5118def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
5119}
5120let Predicates = [IsBE] in {
5121def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
5122                             (v2f32 (REV64v2i32 FPR64:$src))>;
5123def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
5124                             (v2f32 (REV32v4i16 FPR64:$src))>;
5125def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))),
5126                             (v2f32 (REV32v8i8 FPR64:$src))>;
5127def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
5128                             (v2f32 (REV64v2i32 FPR64:$src))>;
5129def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))),
5130                             (v2f32 (REV64v2i32 FPR64:$src))>;
5131}
5132def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
5133
5134let Predicates = [IsLE] in {
5135def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
5136def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
5137def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
5138def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
5139def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
5140def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
5141}
5142let Predicates = [IsBE] in {
5143def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
5144                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
5145def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
5146                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
5147                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
5148def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
5149                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
5150                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
5151def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
5152                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
5153def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
5154                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
5155                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
5156def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
5157                            (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
5158                                            (REV64v16i8 FPR128:$src), (i32 8)))>;
5159}
5160
5161let Predicates = [IsLE] in {
5162def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
5163def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
5164def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
5165def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
5166def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
5167}
5168let Predicates = [IsBE] in {
5169def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))),
5170                             (v2f64 (EXTv16i8 FPR128:$src,
5171                                              FPR128:$src, (i32 8)))>;
5172def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
5173                             (v2f64 (REV64v4i32 FPR128:$src))>;
5174def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
5175                             (v2f64 (REV64v8i16 FPR128:$src))>;
5176def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
5177                             (v2f64 (REV64v16i8 FPR128:$src))>;
5178def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
5179                             (v2f64 (REV64v4i32 FPR128:$src))>;
5180}
5181def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
5182
5183let Predicates = [IsLE] in {
5184def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
5185def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
5186def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
5187def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
5188def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
5189}
5190let Predicates = [IsBE] in {
5191def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
5192                             (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
5193                                    (REV64v4i32 FPR128:$src), (i32 8)))>;
5194def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
5195                             (v4f32 (REV32v8i16 FPR128:$src))>;
5196def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
5197                             (v4f32 (REV32v16i8 FPR128:$src))>;
5198def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
5199                             (v4f32 (REV64v4i32 FPR128:$src))>;
5200def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
5201                             (v4f32 (REV64v4i32 FPR128:$src))>;
5202}
5203def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
5204
5205let Predicates = [IsLE] in {
5206def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
5207def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
5208def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
5209def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
5210def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
5211}
5212let Predicates = [IsBE] in {
5213def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))),
5214                             (v2i64 (EXTv16i8 FPR128:$src,
5215                                              FPR128:$src, (i32 8)))>;
5216def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
5217                             (v2i64 (REV64v4i32 FPR128:$src))>;
5218def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
5219                             (v2i64 (REV64v8i16 FPR128:$src))>;
5220def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
5221                             (v2i64 (REV64v16i8 FPR128:$src))>;
5222def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
5223                             (v2i64 (REV64v4i32 FPR128:$src))>;
5224}
5225def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
5226
5227let Predicates = [IsLE] in {
5228def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
5229def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
5230def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
5231def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
5232def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
5233}
5234let Predicates = [IsBE] in {
5235def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))),
5236                             (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
5237                                              (REV64v4i32 FPR128:$src),
5238                                              (i32 8)))>;
5239def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
5240                             (v4i32 (REV64v4i32 FPR128:$src))>;
5241def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
5242                             (v4i32 (REV32v8i16 FPR128:$src))>;
5243def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
5244                             (v4i32 (REV32v16i8 FPR128:$src))>;
5245def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
5246                             (v4i32 (REV64v4i32 FPR128:$src))>;
5247}
5248def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
5249
5250let Predicates = [IsLE] in {
5251def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
5252def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
5253def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
5254def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
5255def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
5256def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
5257}
5258let Predicates = [IsBE] in {
5259def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))),
5260                             (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
5261                                              (REV64v8i16 FPR128:$src),
5262                                              (i32 8)))>;
5263def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
5264                             (v8i16 (REV64v8i16 FPR128:$src))>;
5265def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
5266                             (v8i16 (REV32v8i16 FPR128:$src))>;
5267def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
5268                             (v8i16 (REV16v16i8 FPR128:$src))>;
5269def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
5270                             (v8i16 (REV64v8i16 FPR128:$src))>;
5271def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
5272                             (v8i16 (REV32v8i16 FPR128:$src))>;
5273}
5274
5275let Predicates = [IsLE] in {
5276def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
5277def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
5278def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
5279def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
5280def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
5281def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
5282}
5283let Predicates = [IsBE] in {
5284def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))),
5285                             (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
5286                                              (REV64v16i8 FPR128:$src),
5287                                              (i32 8)))>;
5288def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
5289                             (v16i8 (REV64v16i8 FPR128:$src))>;
5290def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
5291                             (v16i8 (REV32v16i8 FPR128:$src))>;
5292def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
5293                             (v16i8 (REV16v16i8 FPR128:$src))>;
5294def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
5295                             (v16i8 (REV64v16i8 FPR128:$src))>;
5296def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
5297                             (v16i8 (REV32v16i8 FPR128:$src))>;
5298}
5299
5300def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
5301          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
5302def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
5303          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
5304def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
5305          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
5306def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
5307          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
5308
5309// A 64-bit subvector insert to the first 128-bit vector position
5310// is a subregister copy that needs no instruction.
5311def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)),
5312          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
5313def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)),
5314          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
5315def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)),
5316          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
5317def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)),
5318          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
5319def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)),
5320          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
5321def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)),
5322          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
5323
5324// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
5325// or v2f32.
5326def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
5327                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
5328           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
5329def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
5330                     (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
5331           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
5332    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
5333    // so we match on v4f32 here, not v2f32. This will also catch adding
5334    // the low two lanes of a true v4f32 vector.
5335def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
5336                (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
5337          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
5338
5339// Scalar 64-bit shifts in FPR64 registers.
5340def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
5341          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
5342def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
5343          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
5344def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
5345          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
5346def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
5347          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
5348
5349// Tail call return handling. These are all compiler pseudo-instructions,
5350// so no encoding information or anything like that.
5351let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
5352  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff),[]>;
5353  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>;
5354}
5355
5356def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
5357          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>;
5358def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
5359          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
5360def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
5361          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
5362
5363include "AArch64InstrAtomics.td"
5364