1//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the Swift processor..
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// This section contains legacy support for itineraries. This is
16// required until SD and PostRA schedulers are replaced by MachineScheduler.
17
18def SW_DIS0 : FuncUnit;
19def SW_DIS1 : FuncUnit;
20def SW_DIS2 : FuncUnit;
21
22def SW_ALU0 : FuncUnit;
23def SW_ALU1 : FuncUnit;
24def SW_LS   : FuncUnit;
25def SW_IDIV : FuncUnit;
26def SW_FDIV : FuncUnit;
27
28// FIXME: Need bypasses.
29// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
30//        IIC_iMOVix2ld better.
31// FIXME: Model the special immediate shifts that are not microcoded.
32// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
33//        to issue on pipe 1?
34// FIXME: Model the pipelined behavior of CMP / TST instructions.
35// FIXME: Better model the microcode stages of multiply instructions, especially
36//        conditional variants.
37// FIXME: Add preload instruction when it is documented.
38// FIXME: Model non-pipelined nature of FP div / sqrt unit.
39
40def SwiftItineraries : ProcessorItineraries<
41  [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
42  //
43  // Move instructions, unconditional
44  InstrItinData<IIC_iMOVi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
45                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
46                              [1]>,
47  InstrItinData<IIC_iMOVr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
48                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
49                              [1]>,
50  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
51                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
52                              [1]>,
53  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
54                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
55                              [1]>,
56  InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
57                               InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
58                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
59                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
60                              [2]>,
61  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
62                                  InstrStage<1, [SW_ALU0, SW_ALU1]>,
63                                  InstrStage<1, [SW_ALU0, SW_ALU1]>,
64                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
65                                 [3]>,
66  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
67                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
68                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
69                               InstrStage<1, [SW_LS]>],
70                              [5]>,
71  //
72  // MVN instructions
73  InstrItinData<IIC_iMVNi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
74                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
75                              [1]>,
76  InstrItinData<IIC_iMVNr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
77                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
78                              [1]>,
79  InstrItinData<IIC_iMVNsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
80                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
81                              [1]>,
82  InstrItinData<IIC_iMVNsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
83                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
84                              [1]>,
85  //
86  // No operand cycles
87  InstrItinData<IIC_iALUx   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
88                               InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
89  //
90  // Binary Instructions that produce a result
91  InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
92                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
93                            [1, 1]>,
94  InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
95                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
96                            [1, 1, 1]>,
97  InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
98                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
99                            [2, 1, 1]>,
100  InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
101                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
102                            [2, 1, 1]>,
103  InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
104                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
105                            [2, 1, 1, 1]>,
106  //
107  // Bitwise Instructions that produce a result
108  InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
109                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
110                            [1, 1]>,
111  InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
112                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
113                            [1, 1, 1]>,
114  InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
115                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
116                            [2, 1, 1]>,
117  InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
118                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
119                            [2, 1, 1, 1]>,
120  //
121  // Unary Instructions that produce a result
122
123  // CLZ, RBIT, etc.
124  InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
125                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
126                            [1, 1]>,
127
128  // BFC, BFI, UBFX, SBFX
129  InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
130                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
131                            [2, 1]>,
132
133  //
134  // Zero and sign extension instructions
135  InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
136                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
137                            [1, 1]>,
138  InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
139                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
140                            [1, 1, 1]>,
141  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
142                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
143                            [1, 1, 1, 1]>,
144  //
145  // Compare instructions
146  InstrItinData<IIC_iCMPi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
147                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
148                              [1]>,
149  InstrItinData<IIC_iCMPr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
150                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
151                              [1, 1]>,
152  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
153                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
154                              [1, 1]>,
155  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
156                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
157                              [1, 1, 1]>,
158  //
159  // Test instructions
160  InstrItinData<IIC_iTSTi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
161                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
162                              [1]>,
163  InstrItinData<IIC_iTSTr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
164                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
165                              [1, 1]>,
166  InstrItinData<IIC_iTSTsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
167                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
168                              [1, 1]>,
169  InstrItinData<IIC_iTSTsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
170                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
171                              [1, 1, 1]>,
172  //
173  // Move instructions, conditional
174  // FIXME: Correctly model the extra input dep on the destination.
175  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
176                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
177                              [1]>,
178  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
179                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
180                              [1, 1]>,
181  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
182                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
183                              [1, 1]>,
184  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
185                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
186                              [2, 1, 1]>,
187  InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
188                               InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
189                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
190                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
191                              [2]>,
192
193  // Integer multiply pipeline
194  //
195  InstrItinData<IIC_iMUL16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
196                               InstrStage<1, [SW_ALU0]>],
197                              [3, 1, 1]>,
198  InstrItinData<IIC_iMAC16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
199                               InstrStage<1, [SW_ALU0]>],
200                              [3, 1, 1, 1]>,
201  InstrItinData<IIC_iMUL32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
202                               InstrStage<1, [SW_ALU0]>],
203                              [4, 1, 1]>,
204  InstrItinData<IIC_iMAC32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
205                               InstrStage<1, [SW_ALU0]>],
206                              [4, 1, 1, 1]>,
207  InstrItinData<IIC_iMUL64  , [InstrStage<1, [SW_DIS0], 0>,
208                               InstrStage<1, [SW_DIS1], 0>,
209                               InstrStage<1, [SW_DIS2], 0>,
210                               InstrStage<1, [SW_ALU0], 1>,
211                               InstrStage<1, [SW_ALU0], 3>,
212                               InstrStage<1, [SW_ALU0]>],
213                              [5, 5, 1, 1]>,
214  InstrItinData<IIC_iMAC64  , [InstrStage<1, [SW_DIS0], 0>,
215                               InstrStage<1, [SW_DIS1], 0>,
216                               InstrStage<1, [SW_DIS2], 0>,
217                               InstrStage<1, [SW_ALU0], 1>,
218                               InstrStage<1, [SW_ALU0], 1>,
219                               InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
220                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
221                              [5, 6, 1, 1]>,
222  //
223  // Integer divide
224  InstrItinData<IIC_iDIV  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
225                             InstrStage<1, [SW_ALU0], 0>,
226                             InstrStage<14, [SW_IDIV]>],
227                            [14, 1, 1]>,
228
229  // Integer load pipeline
230  // FIXME: The timings are some rough approximations
231  //
232  // Immediate offset
233  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
234                                 InstrStage<1, [SW_LS]>],
235                                [3, 1]>,
236  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
237                                 InstrStage<1, [SW_LS]>],
238                                [3, 1]>,
239  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
240                                 InstrStage<1, [SW_DIS1], 0>,
241                                 InstrStage<1, [SW_LS], 1>,
242                                 InstrStage<1, [SW_LS]>],
243                                [3, 4, 1]>,
244  //
245  // Register offset
246  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
247                                 InstrStage<1, [SW_LS]>],
248                                [3, 1, 1]>,
249  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
250                                 InstrStage<1, [SW_LS]>],
251                                [3, 1, 1]>,
252  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
253                                 InstrStage<1, [SW_DIS1], 0>,
254                                 InstrStage<1, [SW_DIS2], 0>,
255                                 InstrStage<1, [SW_LS], 1>,
256                                 InstrStage<1, [SW_LS], 3>,
257                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
258                                [3, 4, 1, 1]>,
259  //
260  // Scaled register offset
261  InstrItinData<IIC_iLoad_si  , [InstrStage<1, [SW_DIS0], 0>,
262                                 InstrStage<1, [SW_DIS1], 0>,
263                                 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
264                                 InstrStage<1, [SW_LS]>],
265                                [5, 1, 1]>,
266  InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
267                                 InstrStage<1, [SW_DIS1], 0>,
268                                 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
269                                 InstrStage<1, [SW_LS]>],
270                                [5, 1, 1]>,
271  //
272  // Immediate offset with update
273  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [SW_DIS0], 0>,
274                                 InstrStage<1, [SW_DIS1], 0>,
275                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
276                                 InstrStage<1, [SW_LS]>],
277                                [3, 1, 1]>,
278  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
279                                 InstrStage<1, [SW_DIS1], 0>,
280                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
281                                 InstrStage<1, [SW_LS]>],
282                                [3, 1, 1]>,
283  //
284  // Register offset with update
285  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [SW_DIS0], 0>,
286                                 InstrStage<1, [SW_DIS1], 0>,
287                                 InstrStage<1, [SW_ALU0], 1>,
288                                 InstrStage<1, [SW_LS]>],
289                                [3, 1, 1, 1]>,
290  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
291                                 InstrStage<1, [SW_DIS1], 0>,
292                                 InstrStage<1, [SW_ALU0], 1>,
293                                 InstrStage<1, [SW_LS]>],
294                                [3, 1, 1, 1]>,
295  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
296                                 InstrStage<1, [SW_DIS1], 0>,
297                                 InstrStage<1, [SW_DIS2], 0>,
298                                 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
299                                 InstrStage<1, [SW_LS], 3>,
300                                 InstrStage<1, [SW_LS], 0>,
301                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
302                                [3, 4, 1, 1]>,
303  //
304  // Scaled register offset with update
305  InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
306                                 InstrStage<1, [SW_DIS1], 0>,
307                                 InstrStage<1, [SW_DIS2], 0>,
308                                 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
309                                 InstrStage<1, [SW_LS], 3>,
310                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
311                                [5, 3, 1, 1]>,
312  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
313                                  InstrStage<1, [SW_DIS1], 0>,
314                                  InstrStage<1, [SW_DIS2], 0>,
315                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
316                                  InstrStage<1, [SW_LS], 0>,
317                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
318                                [5, 3, 1, 1]>,
319  //
320  // Load multiple, def is the 5th operand.
321  // FIXME: This assumes 3 to 4 registers.
322  InstrItinData<IIC_iLoad_m  , [InstrStage<1, [SW_DIS0], 0>,
323                                InstrStage<1, [SW_DIS1], 0>,
324                                InstrStage<1, [SW_DIS2], 0>,
325                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
326                                InstrStage<1, [SW_LS]>],
327                               [1, 1, 1, 1, 3], [], -1>, // dynamic uops
328
329  //
330  // Load multiple + update, defs are the 1st and 5th operands.
331  InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
332                                InstrStage<1, [SW_DIS1], 0>,
333                                InstrStage<1, [SW_DIS2], 0>,
334                                InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
335                                InstrStage<1, [SW_LS], 3>,
336                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
337                               [2, 1, 1, 1, 3], [], -1>, // dynamic uops
338  //
339  // Load multiple plus branch
340  InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
341                                InstrStage<1, [SW_DIS1], 0>,
342                                InstrStage<1, [SW_DIS2], 0>,
343                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
344                                InstrStage<1, [SW_LS]>],
345                               [1, 1, 1, 1, 3], [], -1>, // dynamic uops
346  //
347  // Pop, def is the 3rd operand.
348  InstrItinData<IIC_iPop  ,    [InstrStage<1, [SW_DIS0], 0>,
349                                InstrStage<1, [SW_DIS1], 0>,
350                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
351                                InstrStage<1, [SW_LS]>],
352                               [1, 1, 3], [], -1>, // dynamic uops
353  //
354  // Pop + branch, def is the 3rd operand.
355  InstrItinData<IIC_iPop_Br,   [InstrStage<1, [SW_DIS0], 0>,
356                                InstrStage<1, [SW_DIS1], 0>,
357                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
358                                InstrStage<1, [SW_LS]>],
359                               [1, 1, 3], [], -1>, // dynamic uops
360
361  //
362  // iLoadi + iALUr for t2LDRpci_pic.
363  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
364                                InstrStage<1, [SW_LS], 3>,
365                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
366                               [4, 1]>,
367
368  // Integer store pipeline
369  ///
370  // Immediate offset
371  InstrItinData<IIC_iStore_i  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
372                                 InstrStage<1, [SW_LS]>],
373                                [1, 1]>,
374  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
375                                 InstrStage<1, [SW_LS]>],
376                                [1, 1]>,
377  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
378                                 InstrStage<1, [SW_DIS1], 0>,
379                                 InstrStage<1, [SW_DIS2], 0>,
380                                 InstrStage<1, [SW_LS], 0>,
381                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
382                                 InstrStage<1, [SW_LS]>],
383                                [1, 1]>,
384  //
385  // Register offset
386  InstrItinData<IIC_iStore_r  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
387                                 InstrStage<1, [SW_LS]>],
388                                [1, 1, 1]>,
389  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
390                                 InstrStage<1, [SW_LS]>],
391                                [1, 1, 1]>,
392  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
393                                 InstrStage<1, [SW_DIS1], 0>,
394                                 InstrStage<1, [SW_DIS2], 0>,
395                                 InstrStage<1, [SW_LS], 0>,
396                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
397                                 InstrStage<1, [SW_LS]>],
398                                [1, 1, 1]>,
399  //
400  // Scaled register offset
401  InstrItinData<IIC_iStore_si ,  [InstrStage<1, [SW_DIS0], 0>,
402                                  InstrStage<1, [SW_DIS1], 0>,
403                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
404                                  InstrStage<1, [SW_LS]>],
405                                 [1, 1, 1]>,
406  InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
407                                  InstrStage<1, [SW_DIS1], 0>,
408                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
409                                  InstrStage<1, [SW_LS]>],
410                                 [1, 1, 1]>,
411  //
412  // Immediate offset with update
413  InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [SW_DIS0], 0>,
414                                  InstrStage<1, [SW_DIS1], 0>,
415                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
416                                  InstrStage<1, [SW_LS]>],
417                                 [1, 1, 1]>,
418  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
419                                  InstrStage<1, [SW_DIS1], 0>,
420                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
421                                  InstrStage<1, [SW_LS]>],
422                                 [1, 1, 1]>,
423  //
424  // Register offset with update
425  InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [SW_DIS0], 0>,
426                                  InstrStage<1, [SW_DIS1], 0>,
427                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
428                                  InstrStage<1, [SW_LS]>],
429                                 [1, 1, 1, 1]>,
430  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
431                                  InstrStage<1, [SW_DIS1], 0>,
432                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
433                                  InstrStage<1, [SW_LS]>],
434                                 [1, 1, 1, 1]>,
435  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
436                                  InstrStage<1, [SW_DIS1], 0>,
437                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
438                                  InstrStage<1, [SW_LS]>],
439                                 [1, 1, 1, 1]>,
440  //
441  // Scaled register offset with update
442  InstrItinData<IIC_iStore_siu,    [InstrStage<1, [SW_DIS0], 0>,
443                                    InstrStage<1, [SW_DIS1], 0>,
444                                    InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
445                                    InstrStage<1, [SW_LS], 0>,
446                                    InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
447                                   [3, 1, 1, 1]>,
448  InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
449                                    InstrStage<1, [SW_DIS1], 0>,
450                                    InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
451                                    InstrStage<1, [SW_LS], 0>,
452                                    InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
453                                   [3, 1, 1, 1]>,
454  //
455  // Store multiple
456  InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
457                                InstrStage<1, [SW_DIS1], 0>,
458                                InstrStage<1, [SW_DIS2], 0>,
459                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
460                                InstrStage<1, [SW_LS], 1>,
461                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
462                                InstrStage<1, [SW_LS], 1>,
463                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
464                                InstrStage<1, [SW_LS]>],
465                                [], [], -1>, // dynamic uops
466  //
467  // Store multiple + update
468  InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
469                                InstrStage<1, [SW_DIS1], 0>,
470                                InstrStage<1, [SW_DIS2], 0>,
471                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
472                                InstrStage<1, [SW_LS], 1>,
473                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
474                                InstrStage<1, [SW_LS], 1>,
475                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
476                                InstrStage<1, [SW_LS]>],
477                               [2], [], -1>, // dynamic uops
478
479  //
480  // Preload
481  InstrItinData<IIC_Preload,   [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
482
483  // Branch
484  //
485  // no delay slots, so the latency of a branch is unimportant
486  InstrItinData<IIC_Br       , [InstrStage<1, [SW_DIS0], 0>]>,
487
488  // FP Special Register to Integer Register File Move
489  InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
490                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
491                             [1]>,
492  //
493  // Single-precision FP Unary
494  //
495  // Most floating-point moves get issued on ALU0.
496  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
497                               InstrStage<1, [SW_ALU0]>],
498                              [2, 1]>,
499  //
500  // Double-precision FP Unary
501  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
502                               InstrStage<1, [SW_ALU0]>],
503                              [2, 1]>,
504
505  //
506  // Single-precision FP Compare
507  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
508                               InstrStage<1, [SW_ALU0]>],
509                              [1, 1]>,
510  //
511  // Double-precision FP Compare
512  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
513                               InstrStage<1, [SW_ALU0]>],
514                              [1, 1]>,
515  //
516  // Single to Double FP Convert
517  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
518                               InstrStage<1, [SW_ALU1]>],
519                              [4, 1]>,
520  //
521  // Double to Single FP Convert
522  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
523                               InstrStage<1, [SW_ALU1]>],
524                              [4, 1]>,
525
526  //
527  // Single to Half FP Convert
528  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
529                               InstrStage<1, [SW_DIS1], 0>,
530                               InstrStage<1, [SW_ALU1], 4>,
531                               InstrStage<1, [SW_ALU1]>],
532                              [6, 1]>,
533  //
534  // Half to Single FP Convert
535  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
536                               InstrStage<1, [SW_ALU1]>],
537                              [4, 1]>,
538
539  //
540  // Single-Precision FP to Integer Convert
541  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
542                               InstrStage<1, [SW_ALU1]>],
543                              [4, 1]>,
544  //
545  // Double-Precision FP to Integer Convert
546  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
547                               InstrStage<1, [SW_ALU1]>],
548                              [4, 1]>,
549  //
550  // Integer to Single-Precision FP Convert
551  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
552                               InstrStage<1, [SW_ALU1]>],
553                              [4, 1]>,
554  //
555  // Integer to Double-Precision FP Convert
556  InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
557                               InstrStage<1, [SW_ALU1]>],
558                              [4, 1]>,
559  //
560  // Single-precision FP ALU
561  InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
562                               InstrStage<1, [SW_ALU0]>],
563                              [2, 1, 1]>,
564  //
565  // Double-precision FP ALU
566  InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
567                               InstrStage<1, [SW_ALU0]>],
568                              [2, 1, 1]>,
569  //
570  // Single-precision FP Multiply
571  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
572                               InstrStage<1, [SW_ALU1]>],
573                              [4, 1, 1]>,
574  //
575  // Double-precision FP Multiply
576  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
577                               InstrStage<1, [SW_ALU1]>],
578                              [6, 1, 1]>,
579  //
580  // Single-precision FP MAC
581  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
582                               InstrStage<1, [SW_ALU1]>],
583                              [8, 1, 1]>,
584  //
585  // Double-precision FP MAC
586  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
587                               InstrStage<1, [SW_ALU1]>],
588                              [12, 1, 1]>,
589  //
590  // Single-precision Fused FP MAC
591  InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
592                               InstrStage<1, [SW_ALU1]>],
593                              [8, 1, 1]>,
594  //
595  // Double-precision Fused FP MAC
596  InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
597                               InstrStage<1, [SW_ALU1]>],
598                              [12, 1, 1]>,
599  //
600  // Single-precision FP DIV
601  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
602                               InstrStage<1, [SW_ALU1], 0>,
603                               InstrStage<15, [SW_FDIV]>],
604                              [17, 1, 1]>,
605  //
606  // Double-precision FP DIV
607  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
608                               InstrStage<1, [SW_ALU1], 0>,
609                               InstrStage<30, [SW_FDIV]>],
610                              [32, 1, 1]>,
611  //
612  // Single-precision FP SQRT
613  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
614                               InstrStage<1, [SW_ALU1], 0>,
615                               InstrStage<15, [SW_FDIV]>],
616                              [17, 1]>,
617  //
618  // Double-precision FP SQRT
619  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
620                               InstrStage<1, [SW_ALU1], 0>,
621                               InstrStage<30, [SW_FDIV]>],
622                              [32, 1, 1]>,
623
624  //
625  // Integer to Single-precision Move
626  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [SW_DIS0], 0>,
627                               InstrStage<1, [SW_DIS1], 0>,
628                               InstrStage<1, [SW_LS], 4>,
629                               InstrStage<1, [SW_ALU0]>],
630                              [6, 1]>,
631  //
632  // Integer to Double-precision Move
633  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
634                               InstrStage<1, [SW_LS]>],
635                              [4, 1]>,
636  //
637  // Single-precision to Integer Move
638  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
639                               InstrStage<1, [SW_LS]>],
640                              [3, 1]>,
641  //
642  // Double-precision to Integer Move
643  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [SW_DIS0], 0>,
644                               InstrStage<1, [SW_DIS1], 0>,
645                               InstrStage<1, [SW_LS], 3>,
646                               InstrStage<1, [SW_LS]>],
647                              [3, 4, 1]>,
648  //
649  // Single-precision FP Load
650  InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
651                               InstrStage<1, [SW_LS]>],
652                              [4, 1]>,
653  //
654  // Double-precision FP Load
655  InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
656                               InstrStage<1, [SW_LS]>],
657                              [4, 1]>,
658  //
659  // FP Load Multiple
660  // FIXME: Assumes a single Q register.
661  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
662                               InstrStage<1, [SW_LS]>],
663                              [1, 1, 1, 4], [], -1>, // dynamic uops
664  //
665  // FP Load Multiple + update
666  // FIXME: Assumes a single Q register.
667  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
668                               InstrStage<1, [SW_DIS1], 0>,
669                               InstrStage<1, [SW_LS], 4>,
670                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
671                              [2, 1, 1, 1, 4], [], -1>, // dynamic uops
672  //
673  // Single-precision FP Store
674  InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
675                               InstrStage<1, [SW_LS]>],
676                              [1, 1]>,
677  //
678  // Double-precision FP Store
679  InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
680                               InstrStage<1, [SW_LS]>],
681                              [1, 1]>,
682  //
683  // FP Store Multiple
684  // FIXME: Assumes a single Q register.
685  InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
686                               InstrStage<1, [SW_LS]>],
687                              [1, 1, 1], [], -1>, // dynamic uops
688  //
689  // FP Store Multiple + update
690  // FIXME: Assumes a single Q register.
691  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
692                                InstrStage<1, [SW_DIS1], 0>,
693                                InstrStage<1, [SW_LS], 4>,
694                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
695                               [2, 1, 1, 1], [], -1>, // dynamic uops
696  // NEON
697  //
698  // Double-register Integer Unary
699  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
700                               InstrStage<1, [SW_ALU0]>],
701                              [4, 1]>,
702  //
703  // Quad-register Integer Unary
704  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
705                               InstrStage<1, [SW_ALU0]>],
706                              [4, 1]>,
707  //
708  // Double-register Integer Q-Unary
709  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
710                               InstrStage<1, [SW_ALU0]>],
711                              [4, 1]>,
712  //
713  // Quad-register Integer CountQ-Unary
714  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
715                               InstrStage<1, [SW_ALU0]>],
716                              [4, 1]>,
717  //
718  // Double-register Integer Binary
719  InstrItinData<IIC_VBINiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
720                               InstrStage<1, [SW_ALU0]>],
721                              [2, 1, 1]>,
722  //
723  // Quad-register Integer Binary
724  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
725                               InstrStage<1, [SW_ALU0]>],
726                              [2, 1, 1]>,
727  //
728  // Double-register Integer Subtract
729  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
730                               InstrStage<1, [SW_ALU0]>],
731                              [2, 1, 1]>,
732  //
733  // Quad-register Integer Subtract
734  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
735                               InstrStage<1, [SW_ALU0]>],
736                              [2, 1, 1]>,
737  //
738  // Double-register Integer Shift
739  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
740                               InstrStage<1, [SW_ALU0]>],
741                              [2, 1, 1]>,
742  //
743  // Quad-register Integer Shift
744  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
745                               InstrStage<1, [SW_ALU0]>],
746                              [2, 1, 1]>,
747  //
748  // Double-register Integer Shift (4 cycle)
749  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
750                               InstrStage<1, [SW_ALU0]>],
751                              [4, 1, 1]>,
752  //
753  // Quad-register Integer Shift (4 cycle)
754  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
755                               InstrStage<1, [SW_ALU0]>],
756                              [4, 1, 1]>,
757  //
758  // Double-register Integer Binary (4 cycle)
759  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
760                               InstrStage<1, [SW_ALU0]>],
761                              [4, 1, 1]>,
762  //
763  // Quad-register Integer Binary (4 cycle)
764  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
765                               InstrStage<1, [SW_ALU0]>],
766                              [4, 1, 1]>,
767  //
768  // Double-register Integer Subtract (4 cycle)
769  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
770                               InstrStage<1, [SW_ALU0]>],
771                              [4, 1, 1]>,
772  //
773  // Quad-register Integer Subtract (4 cycle)
774  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
775                               InstrStage<1, [SW_ALU0]>],
776                              [4, 1, 1]>,
777
778  //
779  // Double-register Integer Count
780  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
781                               InstrStage<1, [SW_ALU0]>],
782                              [2, 1, 1]>,
783  //
784  // Quad-register Integer Count
785  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
786                               InstrStage<1, [SW_ALU0]>],
787                              [2, 1, 1]>,
788  //
789  // Double-register Absolute Difference and Accumulate
790  InstrItinData<IIC_VABAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
791                               InstrStage<1, [SW_ALU0]>],
792                              [4, 1, 1, 1]>,
793  //
794  // Quad-register Absolute Difference and Accumulate
795  InstrItinData<IIC_VABAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
796                               InstrStage<1, [SW_ALU0]>],
797                              [4, 1, 1, 1]>,
798  //
799  // Double-register Integer Pair Add Long
800  InstrItinData<IIC_VPALiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
801                               InstrStage<1, [SW_ALU0]>],
802                              [4, 1, 1]>,
803  //
804  // Quad-register Integer Pair Add Long
805  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
806                               InstrStage<1, [SW_ALU0]>],
807                              [4, 1, 1]>,
808
809  //
810  // Double-register Integer Multiply (.8, .16)
811  InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
812                               InstrStage<1, [SW_ALU1]>],
813                              [4, 1, 1]>,
814  //
815  // Quad-register Integer Multiply (.8, .16)
816  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
817                               InstrStage<1, [SW_ALU1]>],
818                              [4, 1, 1]>,
819
820  //
821  // Double-register Integer Multiply (.32)
822  InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
823                               InstrStage<1, [SW_ALU1]>],
824                              [4, 1, 1]>,
825  //
826  // Quad-register Integer Multiply (.32)
827  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
828                               InstrStage<1, [SW_ALU1]>],
829                              [4, 1, 1]>,
830  //
831  // Double-register Integer Multiply-Accumulate (.8, .16)
832  InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
833                               InstrStage<1, [SW_ALU1]>],
834                              [4, 1, 1, 1]>,
835  //
836  // Double-register Integer Multiply-Accumulate (.32)
837  InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
838                               InstrStage<1, [SW_ALU1]>],
839                              [4, 1, 1, 1]>,
840  //
841  // Quad-register Integer Multiply-Accumulate (.8, .16)
842  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
843                               InstrStage<1, [SW_ALU1]>],
844                              [4, 1, 1, 1]>,
845  //
846  // Quad-register Integer Multiply-Accumulate (.32)
847  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
848                               InstrStage<1, [SW_ALU1]>],
849                              [4, 1, 1, 1]>,
850
851  //
852  // Move
853  InstrItinData<IIC_VMOV,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
854                               InstrStage<1, [SW_ALU0]>],
855                              [2, 1]>,
856  //
857  // Move Immediate
858  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
859                               InstrStage<1, [SW_ALU0]>],
860                              [2]>,
861  //
862  // Double-register Permute Move
863  InstrItinData<IIC_VMOVD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
864                               InstrStage<1, [SW_ALU1]>],
865                              [2, 1]>,
866  //
867  // Quad-register Permute Move
868  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
869                               InstrStage<1, [SW_ALU1]>],
870                              [2, 1]>,
871  //
872  // Integer to Single-precision Move
873  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [SW_DIS0], 0>,
874                               InstrStage<1, [SW_DIS1], 0>,
875                               InstrStage<1, [SW_LS], 4>,
876                               InstrStage<1, [SW_ALU0]>],
877                              [6, 1]>,
878  //
879  // Integer to Double-precision Move
880  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
881                               InstrStage<1, [SW_LS]>],
882                              [4, 1, 1]>,
883  //
884  // Single-precision to Integer Move
885  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
886                               InstrStage<1, [SW_LS]>],
887                              [3, 1]>,
888  //
889  // Double-precision to Integer Move
890  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [SW_DIS0], 0>,
891                               InstrStage<1, [SW_DIS1], 0>,
892                               InstrStage<1, [SW_LS], 3>,
893                               InstrStage<1, [SW_LS]>],
894                              [3, 4, 1]>,
895  //
896  // Integer to Lane Move
897  // FIXME: I think this is correct, but it is not clear from the tuning guide.
898  InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
899                               InstrStage<1, [SW_DIS1], 0>,
900                               InstrStage<1, [SW_LS], 4>,
901                               InstrStage<1, [SW_ALU0]>],
902                              [6, 1]>,
903
904  //
905  // Vector narrow move
906  InstrItinData<IIC_VMOVN,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
907                               InstrStage<1, [SW_ALU1]>],
908                              [2, 1]>,
909  //
910  // Double-register FP Unary
911  // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
912  //        and they issue on a different pipeline.
913  InstrItinData<IIC_VUNAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
914                               InstrStage<1, [SW_ALU0]>],
915                              [2, 1]>,
916  //
917  // Quad-register FP Unary
918  // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
919  //        and they issue on a different pipeline.
920  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
921                               InstrStage<1, [SW_ALU0]>],
922                              [2, 1]>,
923  //
924  // Double-register FP Binary
925  // FIXME: We're using this itin for many instructions.
926  InstrItinData<IIC_VBIND,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
927                               InstrStage<1, [SW_ALU0]>],
928                              [4, 1, 1]>,
929
930  //
931  // VPADD, etc.
932  InstrItinData<IIC_VPBIND,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
933                               InstrStage<1, [SW_ALU0]>],
934                              [4, 1, 1]>,
935  //
936  // Double-register FP VMUL
937  InstrItinData<IIC_VFMULD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
938                               InstrStage<1, [SW_ALU1]>],
939                              [4, 1, 1]>,
940  //
941  // Quad-register FP Binary
942  InstrItinData<IIC_VBINQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
943                               InstrStage<1, [SW_ALU0]>],
944                              [4, 1, 1]>,
945  //
946  // Quad-register FP VMUL
947  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
948                               InstrStage<1, [SW_ALU1]>],
949                              [4, 1, 1]>,
950  //
951  // Double-register FP Multiple-Accumulate
952  InstrItinData<IIC_VMACD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
953                               InstrStage<1, [SW_ALU1]>],
954                              [8, 1, 1]>,
955  //
956  // Quad-register FP Multiple-Accumulate
957  InstrItinData<IIC_VMACQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
958                               InstrStage<1, [SW_ALU1]>],
959                              [8, 1, 1]>,
960  //
961  // Double-register Fused FP Multiple-Accumulate
962  InstrItinData<IIC_VFMACD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
963                               InstrStage<1, [SW_ALU1]>],
964                              [8, 1, 1]>,
965  //
966  // Quad-register FusedF P Multiple-Accumulate
967  InstrItinData<IIC_VFMACQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
968                               InstrStage<1, [SW_ALU1]>],
969                              [8, 1, 1]>,
970  //
971  // Double-register Reciprical Step
972  InstrItinData<IIC_VRECSD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
973                               InstrStage<1, [SW_ALU1]>],
974                              [8, 1, 1]>,
975  //
976  // Quad-register Reciprical Step
977  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
978                               InstrStage<1, [SW_ALU1]>],
979                              [8, 1, 1]>,
980  //
981  // Double-register Permute
982  // FIXME: The latencies are unclear from the documentation.
983  InstrItinData<IIC_VPERMD,   [InstrStage<1, [SW_DIS0], 0>,
984                               InstrStage<1, [SW_DIS1], 0>,
985                               InstrStage<1, [SW_DIS2], 0>,
986                               InstrStage<1, [SW_ALU1], 2>,
987                               InstrStage<1, [SW_ALU1], 2>,
988                               InstrStage<1, [SW_ALU1]>],
989                              [3, 4, 3, 4]>,
990  //
991  // Quad-register Permute
992  // FIXME: The latencies are unclear from the documentation.
993  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [SW_DIS0], 0>,
994                               InstrStage<1, [SW_DIS1], 0>,
995                               InstrStage<1, [SW_DIS2], 0>,
996                               InstrStage<1, [SW_ALU1], 2>,
997                               InstrStage<1, [SW_ALU1], 2>,
998                               InstrStage<1, [SW_ALU1]>],
999                              [3, 4, 3, 4]>,
1000  //
1001  // Quad-register Permute (3 cycle issue on A9)
1002  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [SW_DIS0], 0>,
1003                               InstrStage<1, [SW_DIS1], 0>,
1004                               InstrStage<1, [SW_DIS2], 0>,
1005                               InstrStage<1, [SW_ALU1], 2>,
1006                               InstrStage<1, [SW_ALU1], 2>,
1007                               InstrStage<1, [SW_ALU1]>],
1008                              [3, 4, 3, 4]>,
1009
1010  //
1011  // Double-register VEXT
1012  InstrItinData<IIC_VEXTD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1013                               InstrStage<1, [SW_ALU1]>],
1014                              [2, 1, 1]>,
1015  //
1016  // Quad-register VEXT
1017  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1018                               InstrStage<1, [SW_ALU1]>],
1019                              [2, 1, 1]>,
1020  //
1021  // VTB
1022  InstrItinData<IIC_VTB1,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1023                               InstrStage<1, [SW_ALU1]>],
1024                              [2, 1, 1]>,
1025  InstrItinData<IIC_VTB2,     [InstrStage<1, [SW_DIS0], 0>,
1026                               InstrStage<1, [SW_DIS1], 0>,
1027                               InstrStage<1, [SW_ALU1], 2>,
1028                               InstrStage<1, [SW_ALU1]>],
1029                              [4, 1, 3, 3]>,
1030  InstrItinData<IIC_VTB3,     [InstrStage<1, [SW_DIS0], 0>,
1031                               InstrStage<1, [SW_DIS1], 0>,
1032                               InstrStage<1, [SW_DIS2], 0>,
1033                               InstrStage<1, [SW_ALU1], 2>,
1034                               InstrStage<1, [SW_ALU1], 2>,
1035                               InstrStage<1, [SW_ALU1]>],
1036                              [6, 1, 3, 5, 5]>,
1037  InstrItinData<IIC_VTB4,     [InstrStage<1, [SW_DIS0], 0>,
1038                               InstrStage<1, [SW_DIS1], 0>,
1039                               InstrStage<1, [SW_DIS2], 0>,
1040                               InstrStage<1, [SW_ALU1], 2>,
1041                               InstrStage<1, [SW_ALU1], 2>,
1042                               InstrStage<1, [SW_ALU1], 2>,
1043                               InstrStage<1, [SW_ALU1]>],
1044                              [8, 1, 3, 5, 7, 7]>,
1045  //
1046  // VTBX
1047  InstrItinData<IIC_VTBX1,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
1048                               InstrStage<1, [SW_ALU1]>],
1049                              [2, 1, 1]>,
1050  InstrItinData<IIC_VTBX2,    [InstrStage<1, [SW_DIS0], 0>,
1051                               InstrStage<1, [SW_DIS1], 0>,
1052                               InstrStage<1, [SW_ALU1], 2>,
1053                               InstrStage<1, [SW_ALU1]>],
1054                              [4, 1, 3, 3]>,
1055  InstrItinData<IIC_VTBX3,    [InstrStage<1, [SW_DIS0], 0>,
1056                               InstrStage<1, [SW_DIS1], 0>,
1057                               InstrStage<1, [SW_DIS2], 0>,
1058                               InstrStage<1, [SW_ALU1], 2>,
1059                               InstrStage<1, [SW_ALU1], 2>,
1060                               InstrStage<1, [SW_ALU1]>],
1061                              [6, 1, 3, 5, 5]>,
1062  InstrItinData<IIC_VTBX4,    [InstrStage<1, [SW_DIS0], 0>,
1063                               InstrStage<1, [SW_DIS1], 0>,
1064                               InstrStage<1, [SW_DIS2], 0>,
1065                               InstrStage<1, [SW_ALU1], 2>,
1066                               InstrStage<1, [SW_ALU1], 2>,
1067                               InstrStage<1, [SW_ALU1], 2>,
1068                               InstrStage<1, [SW_ALU1]>],
1069                              [8, 1, 3, 5, 7, 7]>
1070]>;
1071
1072// ===---------------------------------------------------------------------===//
1073// This following definitions describe the simple machine model which
1074// will replace itineraries.
1075
1076// Swift machine model for scheduling and other instruction cost heuristics.
1077def SwiftModel : SchedMachineModel {
1078  let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
1079  let MicroOpBufferSize = 45; // Based on NEON renamed registers.
1080  let LoadLatency = 3;
1081  let MispredictPenalty = 14; // A branch direction mispredict.
1082
1083  let Itineraries = SwiftItineraries;
1084}
1085
1086// Swift predicates.
1087def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
1088
1089// Swift resource mapping.
1090let SchedModel = SwiftModel in {
1091  // Processor resources.
1092  def SwiftUnitP01 : ProcResource<2>; // ALU unit.
1093  def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
1094  def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
1095  def SwiftUnitP2 : ProcResource<1>; // LS unit.
1096  def SwiftUnitDiv : ProcResource<1>;
1097
1098  // Generic resource requirements.
1099  def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>;
1100  def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; }
1101  def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; }
1102  def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; }
1103  def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1104    let Latency = 4;
1105  }
1106  def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> {
1107    let Latency = 6;
1108  }
1109  def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>;
1110  def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; }
1111  def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; }
1112  def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; }
1113  def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; }
1114  def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; }
1115  def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>;
1116  def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>;
1117  def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
1118  def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01,
1119                                                      SwiftUnitP01]> {
1120    let Latency = 3;
1121    let NumMicroOps = 2;
1122  }
1123  def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
1124    let Latency = 3;
1125    let NumMicroOps = 3;
1126    let ResourceCycles = [3];
1127  }
1128  // Plain load without writeback.
1129  def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> {
1130    let Latency = 3;
1131  }
1132  def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> {
1133    let Latency = 4;
1134  }
1135  // A store does not write to a register.
1136  def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> {
1137    let Latency = 0;
1138  }
1139  foreach Num = 1-4 in {
1140    def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>;
1141  }
1142  def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle,
1143                                                    SwiftWriteP01OneCycle,
1144                                                    SwiftWriteP2ThreeCycle]>;
1145  // 4.2.4 Arithmetic and Logical.
1146  // ALU operation register shifted by immediate variant.
1147  def SwiftWriteALUsi : SchedWriteVariant<[
1148    // lsl #2, lsl #1, or lsr #1.
1149    SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
1150    SchedVar<NoSchedPred,             [WriteALU]>
1151  ]>;
1152  def SwiftWriteALUsr : SchedWriteVariant<[
1153    SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
1154    SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
1155  ]>;
1156  def SwiftWriteALUSsr : SchedWriteVariant<[
1157    SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
1158    SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
1159  ]>;
1160  def SwiftReadAdvanceALUsr : SchedReadVariant<[
1161    SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
1162    SchedVar<NoSchedPred,      [NoReadAdvance]>
1163  ]>;
1164  // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
1165  // AND,BIC,EOR,ORN,ORR
1166  // CLZ,RBIT,REV,REV16,REVSH,PKH
1167  def : WriteRes<WriteALU, [SwiftUnitP01]>;
1168  def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
1169  def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
1170  def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
1171  def : ReadAdvance<ReadALU, 0>;
1172  def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
1173
1174
1175  def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
1176    SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>,
1177    SchedVar<NoSchedPred,             [SwiftWriteP01TwoCycle]>
1178  ]>;
1179
1180  // 4.2.5 Integer comparison
1181  def : WriteRes<WriteCMP, [SwiftUnitP01]>;
1182  def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>;
1183  def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>;
1184
1185  // 4.2.6 Shift, Move
1186  // Shift
1187  //  ASR,LSL,ROR,RRX
1188  //  MOV(register-shiftedregister)  MVN(register-shiftedregister)
1189  // Move
1190  //  MOV,MVN
1191  //  MOVT
1192  // Sign/Zero extension
1193  def : InstRW<[SwiftWriteP01OneCycle],
1194               (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
1195                          "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH",
1196                          "t2UXTB16")>;
1197  // Pseudo instructions.
1198  def : InstRW<[SwiftWriteP01OneCycle2x],
1199        (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm",
1200                   "t2MOVi32imm", "t2MOV_ga_dyn")>;
1201  def : InstRW<[SwiftWriteP01OneCycle3x],
1202        (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>;
1203  def : InstRW<[SwiftWriteP01OneCycle2x_load],
1204        (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
1205
1206  def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
1207
1208  def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
1209    SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
1210    SchedVar<NoSchedPred,     [ SwiftWriteP0OneCycle ]>
1211  ]>;
1212
1213  // 4.2.7 Select
1214  // SEL
1215  def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>;
1216
1217  // 4.2.8 Bitfield
1218  // BFI,BFC, SBFX,UBFX
1219  def : InstRW< [SwiftWriteP01TwoCycle],
1220        (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
1221        "(t|t2)UBFX", "(t|t2)SBFX")>;
1222
1223  // 4.2.9 Saturating arithmetic
1224  def : InstRW< [SwiftWriteP01TwoCycle],
1225        (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
1226        "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
1227        "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
1228        "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
1229        "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
1230        "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
1231
1232  // 4.2.10 Parallel Arithmetic
1233  // Not flag setting.
1234  def : InstRW< [SwiftWriteALUsr],
1235        (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
1236        "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
1237        "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
1238        "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
1239  // Flag setting.
1240  def : InstRW< [SwiftWriteP01TwoCycle],
1241       (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
1242       "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
1243       "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
1244       "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
1245       "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
1246       "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
1247
1248  // 4.2.11 Sum of Absolute Difference
1249  def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >;
1250  def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>],
1251        (instregex "USADA8")>;
1252
1253  // 4.2.12 Integer Multiply (32-bit result)
1254  // Two sources.
1255  def : InstRW< [SwiftWriteP0FourCycle],
1256        (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
1257        "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
1258        "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
1259        "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
1260
1261  def SwiftWriteP0P01FiveCycleTwoUops :
1262      SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]>  {
1263    let Latency = 5;
1264  }
1265
1266  def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[
1267    SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>,
1268    SchedVar<NoSchedPred,      [ SwiftWriteP0FourCycle ]>
1269  ]>;
1270
1271  def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[
1272     SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>,
1273     SchedVar<NoSchedPred,      [ReadALU]>
1274  ]>;
1275
1276  // Multiply accumulate, three sources
1277  def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1278                 SwiftReadAdvanceFourCyclesPred],
1279        (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
1280        "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS",
1281        "t2SMMLSR")>;
1282
1283  // 4.2.13 Integer Multiply (32-bit result, Q flag)
1284  def : InstRW< [SwiftWriteP0FourCycle],
1285        (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>;
1286  def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU,
1287                 SwiftReadAdvanceFourCyclesPred],
1288        (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
1289        "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
1290        "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>;
1291  def : InstRW< [SwiftPredP0P01FourFiveCycle],
1292        (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>;
1293
1294  def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1295    let Latency = 5;
1296    let NumMicroOps = 3;
1297    let ResourceCycles = [2, 1];
1298  }
1299  def SwiftWrite1Cycle : SchedWriteRes<[]> {
1300    let Latency = 1;
1301    let NumMicroOps = 0;
1302  }
1303  def SwiftWrite5Cycle : SchedWriteRes<[]> {
1304    let Latency = 5;
1305    let NumMicroOps = 0;
1306  }
1307  def SwiftWrite6Cycle : SchedWriteRes<[]> {
1308    let Latency = 6;
1309    let NumMicroOps = 0;
1310  }
1311
1312  // 4.2.14 Integer Multiply, Long
1313  def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle],
1314        (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>;
1315
1316  def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> {
1317    let Latency = 7;
1318    let NumMicroOps = 5;
1319    let ResourceCycles = [2, 3];
1320  }
1321
1322  // 4.2.15 Integer Multiply Accumulate, Long
1323  // 4.2.16 Integer Multiply Accumulate, Dual
1324  // 4.2.17 Integer Multiply Accumulate Accumulate, Long
1325  // We are being a bit inaccurate here.
1326  def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU,
1327                 SchedReadAdvance<4>, SchedReadAdvance<3>],
1328        (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
1329        "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
1330        "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT",
1331        "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX",
1332        "t2UMAAL")>;
1333
1334  def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
1335    let NumMicroOps = 1;
1336    let Latency = 14;
1337    let ResourceCycles = [1, 14];
1338  }
1339  // 4.2.18 Integer Divide
1340  def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
1341  def : InstRW <[SwiftDiv],
1342        (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
1343
1344  // 4.2.19 Integer Load Single Element
1345  // 4.2.20 Integer Load Signextended
1346  def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1347    let Latency = 3;
1348    let NumMicroOps = 2;
1349  }
1350  def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1351    let Latency = 4;
1352    let NumMicroOps = 2;
1353  }
1354  def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
1355                                                   SwiftUnitP01]> {
1356    let Latency = 4;
1357    let NumMicroOps = 3;
1358  }
1359  def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
1360    let Latency = 3;
1361    let NumMicroOps = 2;
1362  }
1363  def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
1364                                                   SwiftUnitP01]> {
1365    let Latency = 3;
1366    let NumMicroOps = 3;
1367  }
1368  def SwiftWrBackOne : SchedWriteRes<[]> {
1369    let Latency = 1;
1370    let NumMicroOps = 0;
1371  }
1372  def SwiftWriteLdFour : SchedWriteRes<[]> {
1373    let Latency = 4;
1374    let NumMicroOps = 0;
1375  }
1376   // Not accurate.
1377  def : InstRW<[SwiftWriteP2ThreeCycle],
1378        (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
1379        "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)",
1380        "tLDR(r|i|spi|pci|pciASM)")>;
1381  def : InstRW<[SwiftWriteP2ThreeCycle],
1382        (instregex "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
1383  def : InstRW<[SwiftWriteP2P01FourCyle],
1384        (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
1385        "t2LDRpci_pic", "tLDRS(B|H)")>;
1386  def : InstRW<[SwiftWriteP2P01ThreeCycle,  SwiftWrBackOne],
1387        (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
1388        "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
1389        "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>;
1390  def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne],
1391        (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
1392        "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>;
1393
1394  // 4.2.21 Integer Dual Load
1395  // Not accurate.
1396  def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour],
1397        (instregex "t2LDRDi8", "LDRD$")>;
1398  def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne],
1399        (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
1400
1401  // 4.2.22 Integer Load, Multiple
1402  // NumReg = 1 .. 16
1403  foreach Lat = 3-25 in {
1404    def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
1405      let Latency = Lat;
1406    }
1407    def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> {
1408      let Latency = Lat;
1409      let NumMicroOps = 0;
1410    }
1411  }
1412  // Predicate.
1413  foreach NumAddr = 1-16 in {
1414    def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
1415  }
1416  def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
1417  def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
1418  def SwiftWriteLM : SchedWriteVariant<[
1419    SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>,
1420    SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1421                                SwiftWriteLM5Cy]>,
1422    SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1423                                SwiftWriteLM5Cy, SwiftWriteLM6Cy]>,
1424    SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1425                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1426                                SwiftWriteLM7Cy]>,
1427    SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1428                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1429                                SwiftWriteLM7Cy, SwiftWriteLM8Cy]>,
1430    SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1431                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1432                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1433                                SwiftWriteLM9Cy]>,
1434    SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1435                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1436                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1437                                SwiftWriteLM9Cy, SwiftWriteLM10Cy]>,
1438    SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1439                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1440                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1441                                SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1442                                SwiftWriteLM11Cy]>,
1443    SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1444                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1445                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1446                                SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1447                                SwiftWriteLM11Cy, SwiftWriteLM12Cy]>,
1448    SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1449                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1450                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1451                                SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1452                                SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1453                                SwiftWriteLM13Cy]>,
1454    SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1455                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1456                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1457                                SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1458                                SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1459                                SwiftWriteLM13Cy, SwiftWriteLM14Cy]>,
1460    SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1461                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1462                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1463                                SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1464                                SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1465                                SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1466                                SwiftWriteLM15Cy]>,
1467    SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1468                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1469                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1470                                SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1471                                SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1472                                SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1473                                SwiftWriteLM15Cy, SwiftWriteLM16Cy]>,
1474    SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1475                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1476                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1477                                SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1478                                SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1479                                SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1480                                SwiftWriteLM15Cy, SwiftWriteLM16Cy,
1481                                SwiftWriteLM17Cy]>,
1482    SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1483                                SwiftWriteLM5Cy, SwiftWriteLM6Cy,
1484                                SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1485                                SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1486                                SwiftWriteLM11Cy, SwiftWriteLM12Cy,
1487                                SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1488                                SwiftWriteLM15Cy, SwiftWriteLM16Cy,
1489                                SwiftWriteLM17Cy, SwiftWriteLM18Cy]>,
1490    // Unknow number of registers, just use resources for two registers.
1491    SchedVar<NoSchedPred,      [SwiftWriteLM3Cy, SwiftWriteLM4Cy,
1492                                SwiftWriteLM5CyNo, SwiftWriteLM6CyNo,
1493                                SwiftWriteLM7CyNo, SwiftWriteLM8CyNo,
1494                                SwiftWriteLM9CyNo, SwiftWriteLM10CyNo,
1495                                SwiftWriteLM11CyNo, SwiftWriteLM12CyNo,
1496                                SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
1497                                SwiftWriteLM15CyNo, SwiftWriteLM16CyNo,
1498                                SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]>
1499
1500  ]> { let Variadic=1; }
1501
1502  def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB],
1503        (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
1504        "(t|sys)LDM(IA|DA|DB|IB)$")>;
1505  def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM],
1506        (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/
1507        "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
1508  def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle],
1509        (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
1510  // 4.2.23 Integer Store, Single Element
1511  def : InstRW<[SwiftWriteP2],
1512        (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX",
1513        "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
1514
1515  def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2],
1516        (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
1517        "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
1518        "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
1519        "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
1520
1521  // 4.2.24 Integer Store, Dual
1522  def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle],
1523        (instregex "STRD$", "t2STRDi8")>;
1524  def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2,
1525                SwiftWriteP01OneCycle],
1526        (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
1527
1528  // 4.2.25 Integer Store, Multiple
1529  def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
1530    let Latency = 0;
1531    let NumMicroOps = 2;
1532  }
1533  foreach NumAddr = 1-16 in {
1534     def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
1535  }
1536  def SwiftWriteSTM : SchedWriteVariant<[
1537    SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>,
1538    SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>,
1539    SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>,
1540    SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>,
1541    SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>,
1542    SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>,
1543    SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>,
1544    SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>,
1545    SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>,
1546    SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>,
1547    SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>,
1548    SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>,
1549    SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>,
1550    SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>,
1551    SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>,
1552    // Unknow number of registers, just use resources for two registers.
1553    SchedVar<NoSchedPred,      [SwiftWriteSTM2]>
1554  ]>;
1555  def : InstRW<[SwiftWriteSTM],
1556        (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
1557  def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM],
1558        (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
1559        "PUSH", "tPUSH")>;
1560
1561  // 4.2.26 Branch
1562  def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
1563  def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
1564  def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; }
1565
1566  // 4.2.27 Not issued
1567  def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
1568  def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>;
1569
1570  // 4.2.28 Advanced SIMD, Integer, 2 cycle
1571  def : InstRW<[SwiftWriteP0TwoCycle],
1572        (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
1573                   "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
1574                   "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
1575                   "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF",
1576                   "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
1577
1578  def : InstRW<[SwiftWriteP1TwoCycle],
1579        (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
1580
1581  // 4.2.29 Advanced SIMD, Integer, 4 cycle
1582  // 4.2.30 Advanced SIMD, Integer with Accumulate
1583  def : InstRW<[SwiftWriteP0FourCycle],
1584        (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT",
1585        "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL",
1586        "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD",
1587        "VQSUB")>;
1588  def : InstRW<[SwiftWriteP1FourCycle],
1589        (instregex "VRECPE", "VRSQRTE")>;
1590
1591  // 4.2.31 Advanced SIMD, Add and Shift with Narrow
1592  def : InstRW<[SwiftWriteP0P1FourCycle],
1593        (instregex "VADDHN", "VSUBHN", "VSHRN")>;
1594  def : InstRW<[SwiftWriteP0P1SixCycle],
1595        (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN",
1596                   "VQRSHRN", "VQRSHRUN")>;
1597
1598  // 4.2.32 Advanced SIMD, Vector Table Lookup
1599  foreach Num = 1-4 in {
1600    def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>;
1601  }
1602  def : InstRW<[SwiftWrite1xP1TwoCycle],
1603        (instregex "VTB(L|X)1")>;
1604  def : InstRW<[SwiftWrite2xP1TwoCycle],
1605        (instregex "VTB(L|X)2")>;
1606  def : InstRW<[SwiftWrite3xP1TwoCycle],
1607        (instregex "VTB(L|X)3")>;
1608  def : InstRW<[SwiftWrite4xP1TwoCycle],
1609        (instregex "VTB(L|X)4")>;
1610
1611  // 4.2.33 Advanced SIMD, Transpose
1612  def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle,
1613                SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>],
1614        (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
1615
1616  // 4.2.34 Advanced SIMD and VFP, Floating Point
1617  def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>;
1618  def : InstRW<[SwiftWriteP0FourCycle],
1619        (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>;
1620  def : InstRW<[SwiftWriteP0FourCycle],
1621        (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX",
1622                   "VPMIN")>;
1623  def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>;
1624  def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>;
1625
1626  // 4.2.35 Advanced SIMD and VFP, Multiply
1627  def : InstRW<[SwiftWriteP1FourCycle],
1628        (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
1629                   "VMULL", "VQDMULL")>;
1630  def : InstRW<[SwiftWriteP1SixCycle],
1631        (instregex "VMULD", "VNMULD")>;
1632  def : InstRW<[SwiftWriteP1FourCycle],
1633        (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
1634        "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
1635  def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>;
1636  def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>;
1637
1638  // 4.2.36 Advanced SIMD and VFP, Convert
1639  def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
1640  // Fixpoint conversions.
1641  def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
1642
1643  // 4.2.37 Advanced SIMD and VFP, Move
1644  def : InstRW<[SwiftWriteP0TwoCycle],
1645        (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc",
1646                   "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc",
1647                   "FCONST(D|S)")>;
1648  def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>;
1649  def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>],
1650        (instregex "VQMOVN")>;
1651  def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>;
1652  def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>],
1653        (instregex "VDUP(8|16|32)")>;
1654  def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>;
1655  def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>],
1656        (instregex "VMOVSR$", "VSETLN")>;
1657  def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle],
1658        (instregex "VMOVRR(D|S)$")>;
1659  def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>;
1660  def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>,
1661                WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle,
1662                               SwiftWriteP1TwoCycle]>],
1663                (instregex "VMOVSRR$")>;
1664  def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>],
1665        (instregex "VGETLN(u|i)")>;
1666  def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle,
1667                               SwiftWriteP01OneCycle]>],
1668        (instregex "VGETLNs")>;
1669
1670  // 4.2.38 Advanced SIMD and VFP, Move FPSCR
1671  // Serializing instructions.
1672  def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> {
1673    let Latency = 15;
1674    let ResourceCycles = [15];
1675  }
1676  def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> {
1677    let Latency = 15;
1678    let ResourceCycles = [15];
1679  }
1680  def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> {
1681    let Latency = 15;
1682    let ResourceCycles = [15];
1683  }
1684  def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
1685        (instregex "VMRS")>;
1686  def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy],
1687        (instregex "VMSR")>;
1688  // Not serializing.
1689  def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>;
1690
1691  // 4.2.39 Advanced SIMD and VFP, Load Single Element
1692  def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>;
1693
1694  // 4.2.40 Advanced SIMD and VFP, Store Single Element
1695  def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>;
1696
1697  // 4.2.41 Advanced SIMD and VFP, Load Multiple
1698  // 4.2.42 Advanced SIMD and VFP, Store Multiple
1699
1700  // Resource requirement for permuting, just reserves the resources.
1701  foreach Num = 1-28 in {
1702    def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> {
1703      let Latency = 0;
1704      let NumMicroOps = Num;
1705      let ResourceCycles = [Num];
1706    }
1707  }
1708
1709  // Pre RA pseudos - load/store to a Q register as a D register pair.
1710  def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>;
1711
1712  // Post RA not modelled accurately. We assume that register use of width 64
1713  // bit maps to a D register, 128 maps to a Q register. Not all different kinds
1714  // are accurately represented.
1715  def SwiftWriteVLDM : SchedWriteVariant<[
1716    // Load of one S register.
1717    SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>,
1718    // Load of one D register.
1719    SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>,
1720    // Load of 3 S register.
1721    SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1722                                SwiftWriteLM13CyNo, SwiftWriteP01OneCycle,
1723                                SwiftVLDMPerm3]>,
1724    // Load of a Q register (not necessarily true). We should not be mapping to
1725    // 4 S registers, either.
1726    SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo,
1727                                SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>,
1728    // Load of 5 S registers.
1729    SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1730                                SwiftWriteLM13CyNo, SwiftWriteLM14CyNo,
1731                                SwiftWriteLM17CyNo,  SwiftWriteP01OneCycle,
1732                                SwiftVLDMPerm5]>,
1733    // Load of 3 D registers. (Must also be able to handle s register list -
1734    // though, not accurate)
1735    SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1736                                SwiftWriteLM10Cy, SwiftWriteLM14CyNo,
1737                                SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1738                                SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
1739    // Load of 7 S registers.
1740    SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1741                                SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1742                                SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1743                                SwiftWriteLM21CyNo, SwiftWriteP01OneCycle,
1744                                SwiftVLDMPerm7]>,
1745    // Load of two Q registers.
1746    SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1747                                SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
1748                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1749                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1750                                SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>,
1751    // Load of 9 S registers.
1752    SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1753                                SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1754                                SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1755                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1756                                SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1757                                SwiftVLDMPerm9]>,
1758    // Load of 5 D registers.
1759    SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1760                                SwiftWriteLM10Cy, SwiftWriteLM14Cy,
1761                                SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1762                                SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1763                                SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
1764                                SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
1765    // Inaccurate: reuse describtion from 9 S registers.
1766    SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1767                                SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1768                                SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1769                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1770                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1771                                SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1772                                SwiftVLDMPerm9]>,
1773    // Load of three Q registers.
1774    SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1775                                SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1776                                SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1777                                SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1778                                SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1779                                SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
1780                                SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
1781    // Inaccurate: reuse describtion from 9 S registers.
1782    SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1783                                SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
1784                                SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
1785                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1786                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1787                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1788                                SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1789                                SwiftVLDMPerm9]>,
1790    // Load of 7 D registers inaccurate.
1791    SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1792                                SwiftWriteLM10Cy, SwiftWriteLM14Cy,
1793                                SwiftWriteLM14Cy, SwiftWriteLM14CyNo,
1794                                SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
1795                                SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
1796                                SwiftWriteLM14CyNo,  SwiftWriteLM14CyNo,
1797                                SwiftWriteP01OneCycle, SwiftVLDMPerm7]>,
1798    SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
1799                                SwiftWriteLM13Cy, SwiftWriteLM14Cy,
1800                                SwiftWriteLM17Cy, SwiftWriteLM18CyNo,
1801                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1802                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1803                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1804                                SwiftWriteLM21CyNo, SwiftWriteLM22CyNo,
1805                                SwiftWriteLM25CyNo, SwiftWriteP01OneCycle,
1806                                SwiftVLDMPerm9]>,
1807    // Load of 4 Q registers.
1808    SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy,
1809                                SwiftWriteLM11Cy, SwiftWriteLM14Cy,
1810                                SwiftWriteLM15Cy, SwiftWriteLM18CyNo,
1811                                SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1812                                SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1813                                SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1814                                SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1815                                SwiftWriteLM19CyNo, SwiftWriteLM22CyNo,
1816                                SwiftWriteP01OneCycle, SwiftVLDMPerm4]>,
1817    // Unknow number of registers, just use resources for two registers.
1818    SchedVar<NoSchedPred,      [SwiftWriteLM7Cy, SwiftWriteLM8Cy,
1819                                SwiftWriteLM13Cy, SwiftWriteLM13CyNo,
1820                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1821                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1822                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1823                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1824                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1825                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1826                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1827                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1828                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1829                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1830                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1831                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1832                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1833                                SwiftWriteLM13CyNo, SwiftWriteLM13CyNo,
1834                                SwiftWriteP01OneCycle,  SwiftVLDMPerm2]>
1835  ]> { let Variadic = 1; }
1836
1837  def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
1838
1839  def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM],
1840        (instregex "VLDM[SD](IA|DB)_UPD$")>;
1841
1842  def SwiftWriteVSTM : SchedWriteVariant<[
1843    // One S register.
1844    SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>,
1845    // One D register.
1846    SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>,
1847    // Three S registers.
1848    SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>,
1849    // Assume one Q register.
1850    SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>,
1851    SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>,
1852    // Assume three D registers.
1853    SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>,
1854    SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>,
1855    // Assume two Q registers.
1856    SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>,
1857    SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>,
1858    // Assume 5 D registers.
1859    SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>,
1860    SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>,
1861    // Assume three Q registers.
1862    SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>,
1863    SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>,
1864    // Assume 7 D registers.
1865    SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>,
1866    SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>,
1867    // Assume four Q registers.
1868    SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>,
1869    // Asumme two Q registers.
1870    SchedVar<NoSchedPred, [SwiftWriteSTM3]>
1871  ]> { let Variadic = 1; }
1872
1873  def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>;
1874
1875  def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM],
1876        (instregex "VSTM[SD](IA|DB)_UPD")>;
1877
1878  // 4.2.43 Advanced SIMD, Element or Structure Load and Store
1879  def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
1880      let Latency = 4;
1881      let ResourceCycles = [2];
1882  }
1883  def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> {
1884      let Latency = 4;
1885      let ResourceCycles = [3];
1886  }
1887  foreach Num = 1-2 in {
1888    def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> {
1889      let Latency = 0;
1890      let NumMicroOps = Num;
1891      let ResourceCycles = [Num];
1892    }
1893  }
1894  // VLDx
1895  // Multiple structures.
1896  // Single element structure loads.
1897  // We assume aligned.
1898  // Single/two register.
1899  def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>;
1900  def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle],
1901        (instregex "VLD1(d|q)(8|16|32|64)wb")>;
1902  // Three register.
1903  def : InstRW<[SwiftWrite3xP2FourCy],
1904        (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>;
1905  def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle],
1906        (instregex "VLD1(d|q)(8|16|32|64)Twb")>;
1907  /// Four Register.
1908  def : InstRW<[SwiftWrite2xP2FourCy],
1909        (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>;
1910  def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle],
1911        (instregex "VLD1(d|q)(8|16|32|64)Qwb")>;
1912  // Two element structure loads.
1913  // Two/four register.
1914  def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2],
1915        (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
1916  def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1917                SwiftVLDMPerm2],
1918        (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
1919  // Three element structure.
1920  def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
1921                SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1922        (instregex "VLD3(d|q)(8|16|32)$")>;
1923  def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1924        (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
1925
1926  def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo,
1927                SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy],
1928        (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
1929  def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3,
1930                SwiftWrite3xP2FourCy],
1931        (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1932  // Four element structure loads.
1933  def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1934                SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4,
1935                SwiftWrite3xP2FourCy],
1936        (instregex "VLD4(d|q)(8|16|32)$")>;
1937  def : InstRW<[SwiftWriteLM11Cy,  SwiftExt2xP0, SwiftVLDMPerm4,
1938                SwiftWrite3xP2FourCy],
1939        (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
1940  def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy,
1941                SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1942                SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
1943        (instregex "VLD4(d|q)(8|16|32)_UPD")>;
1944  def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0,
1945                SwiftVLDMPerm4, SwiftWrite3xP2FourCy],
1946        (instregex  "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1947
1948  // Single all/lane loads.
1949  // One element structure.
1950  def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2],
1951        (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1952  def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2],
1953        (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)",
1954                  "VLD1LNq(8|16|32)Pseudo_UPD")>;
1955  // Two element structure.
1956  def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2],
1957        (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
1958                   "VLD2LN(d|q)(8|16|32)Pseudo$")>;
1959  def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle,
1960                SwiftExt1xP0, SwiftVLDMPerm2],
1961        (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
1962  def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
1963                SwiftExt1xP0, SwiftVLDMPerm2],
1964        (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>;
1965  def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy,
1966                SwiftExt1xP0, SwiftVLDMPerm2],
1967        (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
1968  // Three element structure.
1969  def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0,
1970                SwiftVLDMPerm3],
1971        (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$",
1972                   "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1973  def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy,
1974                SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3],
1975        (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
1976  def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
1977                SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
1978        (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
1979  // Four element struture.
1980  def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
1981                SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
1982        (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
1983                   "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
1984  def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
1985                SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0,
1986                SwiftVLDMPerm5],
1987        (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>;
1988  def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy,
1989                SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0,
1990                SwiftVLDMPerm5],
1991        (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>;
1992  // VSTx
1993  // Multiple structures.
1994  // Single element structure store.
1995  def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>;
1996  def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>;
1997  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2],
1998        (instregex "VST1d(8|16|32|64)wb")>;
1999  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2],
2000        (instregex "VST1q(8|16|32|64)wb")>;
2001  def : InstRW<[SwiftWrite3xP2],
2002        (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
2003  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2],
2004        (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
2005  def : InstRW<[SwiftWrite4xP2],
2006        (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
2007  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2],
2008        (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
2009  // Two element structure store.
2010  def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
2011        (instregex "VST2(d|b)(8|16|32)$")>;
2012  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
2013        (instregex "VST2(b|d)(8|16|32)wb")>;
2014  def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2015        (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
2016  def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2017        (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
2018  // Three element structure store.
2019  def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2020        (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
2021  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
2022        (instregex "VST3(d|q)(8|16|32)_UPD",
2023                   "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
2024  // Four element structure store.
2025  def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2026        (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
2027  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4],
2028        (instregex "VST4(d|q)(8|16|32)_UPD",
2029                   "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
2030  // Single/all lane store.
2031  // One element structure.
2032  def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1],
2033        (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
2034  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1],
2035        (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
2036  // Two element structure.
2037  def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2],
2038        (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
2039  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2],
2040        (instregex "VST2LN(d|q)(8|16|32)_UPD",
2041                   "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
2042  // Three element structure.
2043  def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2],
2044        (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
2045  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2],
2046        (instregex "VST3LN(d|q)(8|16|32)_UPD",
2047                   "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
2048  // Four element structure.
2049  def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2],
2050        (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
2051  def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2],
2052        (instregex "VST4LN(d|q)(8|16|32)_UPD",
2053                   "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
2054
2055  // 4.2.44 VFP, Divide and Square Root
2056  def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
2057    let NumMicroOps = 1;
2058    let Latency = 17;
2059    let ResourceCycles = [1, 15];
2060  }
2061  def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> {
2062    let NumMicroOps = 1;
2063    let Latency = 32;
2064    let ResourceCycles = [1, 30];
2065  }
2066  def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
2067  def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
2068
2069  // Not specified.
2070  def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
2071  // Preload.
2072  def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
2073    let ResourceCycles = [0];
2074  }
2075
2076}
2077