ARMScheduleA8.td revision ccba3a6a1ddb723aa799044d9b1c3c5392d0b22d
1//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the ARM Cortex A8 processors.
11//
12//===----------------------------------------------------------------------===//
13
14//
15// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
16// Functional Units.
17def A8_Issue   : FuncUnit; // issue
18def A8_Pipe0   : FuncUnit; // pipeline 0
19def A8_Pipe1   : FuncUnit; // pipeline 1
20def A8_LdSt0   : FuncUnit; // pipeline 0 load/store
21def A8_LdSt1   : FuncUnit; // pipeline 1 load/store
22def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23def A8_NLSPipe : FuncUnit; // NEON LS pipe
24//
25// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
26//
27def CortexA8Itineraries : ProcessorItineraries<
28  [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [
29  // Two fully-pipelined integer ALU pipelines
30  //
31  // No operand cycles
32  InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
33  //
34  // Binary Instructions that produce a result
35  InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
36  InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
37  InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
38  InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
39  //
40  // Unary Instructions that produce a result
41  InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
42  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
43  InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
44  //
45  // Compare instructions
46  InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
47  InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
48  InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
49  InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
50  //
51  // Move instructions, unconditional
52  InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
53  InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
54                             InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
55  InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
56  InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
57  InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
58  //
59  // Move instructions, conditional
60  InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
61  InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
62  InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
63  InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
64
65  // Integer multiply pipeline
66  // Result written in E5, but that is relative to the last cycle of multicycle,
67  // so we use 6 for those cases
68  //
69  InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
70  InstrItinData<IIC_iMAC16   , [InstrStage<1, [A8_Pipe1], 0>,
71                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
72  InstrItinData<IIC_iMUL32   , [InstrStage<1, [A8_Pipe1], 0>,
73                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
74  InstrItinData<IIC_iMAC32   , [InstrStage<1, [A8_Pipe1], 0>,
75                                InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
76  InstrItinData<IIC_iMUL64   , [InstrStage<2, [A8_Pipe1], 0>,
77                                InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
78  InstrItinData<IIC_iMAC64   , [InstrStage<2, [A8_Pipe1], 0>,
79                                InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
80
81  // Integer load pipeline
82  //
83  // loads have an extra cycle of latency, but are fully pipelined
84  // use A8_Issue to enforce the 1 load/store per cycle limit
85  //
86  // Immediate offset
87  InstrItinData<IIC_iLoadi   , [InstrStage<1, [A8_Issue], 0>,
88                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
89                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
90  //
91  // Register offset
92  InstrItinData<IIC_iLoadr   , [InstrStage<1, [A8_Issue], 0>,
93                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
94                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
95  //
96  // Scaled register offset, issues over 2 cycles
97  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [A8_Issue], 0>,
98                                InstrStage<1, [A8_Pipe0], 0>,
99                                InstrStage<1, [A8_Pipe1]>,
100                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
101                                InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
102  //
103  // Immediate offset with update
104  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A8_Issue], 0>,
105                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
106                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
107  //
108  // Register offset with update
109  InstrItinData<IIC_iLoadru  , [InstrStage<1, [A8_Issue], 0>,
110                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
111                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
112  //
113  // Scaled register offset with update, issues over 2 cycles
114  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
115                                InstrStage<1, [A8_Pipe0], 0>,
116                                InstrStage<1, [A8_Pipe1]>,
117                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
118                                InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
119  //
120  // Load multiple
121  InstrItinData<IIC_iLoadm   , [InstrStage<2, [A8_Issue], 0>,
122                                InstrStage<2, [A8_Pipe0], 0>,
123                                InstrStage<2, [A8_Pipe1]>,
124                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
125                                InstrStage<1, [A8_LdSt0]>]>,
126
127  //
128  // Load multiple plus branch
129  InstrItinData<IIC_iLoadmBr , [InstrStage<2, [A8_Issue], 0>,
130                                InstrStage<2, [A8_Pipe0], 0>,
131                                InstrStage<2, [A8_Pipe1]>,
132                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
133                                InstrStage<1, [A8_LdSt0]>,
134                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
135
136  //
137  // iLoadi + iALUr for t2LDRpci_pic.
138  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Issue], 0>,
139                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
140                                InstrStage<1, [A8_LdSt0]>,
141                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
142
143
144  // Integer store pipeline
145  //
146  // use A8_Issue to enforce the 1 load/store per cycle limit
147  //
148  // Immediate offset
149  InstrItinData<IIC_iStorei  , [InstrStage<1, [A8_Issue], 0>,
150                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
151                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
152  //
153  // Register offset
154  InstrItinData<IIC_iStorer  , [InstrStage<1, [A8_Issue], 0>,
155                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
156                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
157  //
158  // Scaled register offset, issues over 2 cycles
159  InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
160                                InstrStage<1, [A8_Pipe0], 0>,
161                                InstrStage<1, [A8_Pipe1]>,
162                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
163                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
164  //
165  // Immediate offset with update
166  InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
167                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
168                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
169  //
170  // Register offset with update
171  InstrItinData<IIC_iStoreru  , [InstrStage<1, [A8_Issue], 0>,
172                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
173                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
174  //
175  // Scaled register offset with update, issues over 2 cycles
176  InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
177                                InstrStage<1, [A8_Pipe0], 0>,
178                                InstrStage<1, [A8_Pipe1]>,
179                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
180                                InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
181  //
182  // Store multiple
183  InstrItinData<IIC_iStorem  , [InstrStage<2, [A8_Issue], 0>,
184                                InstrStage<2, [A8_Pipe0], 0>,
185                                InstrStage<2, [A8_Pipe1]>,
186                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
187                                InstrStage<1, [A8_LdSt0]>]>,
188
189  // Branch
190  //
191  // no delay slots, so the latency of a branch is unimportant
192  InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
193
194  // VFP
195  // Issue through integer pipeline, and execute in NEON unit. We assume
196  // RunFast mode so that NFP pipeline is used for single-precision when
197  // possible.
198  //
199  // FP Special Register to Integer Register File Move
200  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
201                              InstrStage<1, [A8_NLSPipe]>]>,
202  //
203  // Single-precision FP Unary
204  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
205                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
206  //
207  // Double-precision FP Unary
208  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
209                               InstrStage<4, [A8_NPipe], 0>,
210                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
211  //
212  // Single-precision FP Compare
213  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
214                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
215  //
216  // Double-precision FP Compare
217  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
218                               InstrStage<4, [A8_NPipe], 0>,
219                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
220  //
221  // Single to Double FP Convert
222  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
223                               InstrStage<7, [A8_NPipe], 0>,
224                               InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
225  //
226  // Double to Single FP Convert
227  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
228                               InstrStage<5, [A8_NPipe], 0>,
229                               InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
230  //
231  // Single-Precision FP to Integer Convert
232  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
233                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
234  //
235  // Double-Precision FP to Integer Convert
236  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
237                               InstrStage<8, [A8_NPipe], 0>,
238                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
239  //
240  // Integer to Single-Precision FP Convert
241  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
242                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
243  //
244  // Integer to Double-Precision FP Convert
245  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
246                               InstrStage<8, [A8_NPipe], 0>,
247                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
248  //
249  // Single-precision FP ALU
250  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
251                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
252  //
253  // Double-precision FP ALU
254  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
255                               InstrStage<9, [A8_NPipe], 0>,
256                               InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
257  //
258  // Single-precision FP Multiply
259  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
260                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
261  //
262  // Double-precision FP Multiply
263  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
264                               InstrStage<11, [A8_NPipe], 0>,
265                               InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
266  //
267  // Single-precision FP MAC
268  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
269                               InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
270  //
271  // Double-precision FP MAC
272  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
273                               InstrStage<19, [A8_NPipe], 0>,
274                               InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
275  //
276  // Single-precision FP DIV
277  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
278                               InstrStage<20, [A8_NPipe], 0>,
279                               InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
280  //
281  // Double-precision FP DIV
282  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
283                               InstrStage<29, [A8_NPipe], 0>,
284                               InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
285  //
286  // Single-precision FP SQRT
287  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
288                               InstrStage<19, [A8_NPipe], 0>,
289                               InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
290  //
291  // Double-precision FP SQRT
292  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
293                               InstrStage<29, [A8_NPipe], 0>,
294                               InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
295  //
296  // Single-precision FP Load
297  // use A8_Issue to enforce the 1 load/store per cycle limit
298  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
299                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
300                               InstrStage<1, [A8_LdSt0], 0>,
301                               InstrStage<1, [A8_NLSPipe]>]>,
302  //
303  // Double-precision FP Load
304  // use A8_Issue to enforce the 1 load/store per cycle limit
305  InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
306                               InstrStage<1, [A8_Pipe0], 0>,
307                               InstrStage<1, [A8_Pipe1]>,
308                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
309                               InstrStage<1, [A8_LdSt0], 0>,
310                               InstrStage<1, [A8_NLSPipe]>]>,
311  //
312  // FP Load Multiple
313  // use A8_Issue to enforce the 1 load/store per cycle limit
314  InstrItinData<IIC_fpLoadm,  [InstrStage<3, [A8_Issue], 0>,
315                               InstrStage<2, [A8_Pipe0], 0>,
316                               InstrStage<2, [A8_Pipe1]>,
317                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
318                               InstrStage<1, [A8_LdSt0], 0>,
319                               InstrStage<1, [A8_NLSPipe]>]>,
320  //
321  // Single-precision FP Store
322  // use A8_Issue to enforce the 1 load/store per cycle limit
323  InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
324                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
325                               InstrStage<1, [A8_LdSt0], 0>,
326                               InstrStage<1, [A8_NLSPipe]>]>,
327  //
328  // Double-precision FP Store
329  // use A8_Issue to enforce the 1 load/store per cycle limit
330  InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
331                               InstrStage<1, [A8_Pipe0], 0>,
332                               InstrStage<1, [A8_Pipe1]>,
333                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
334                               InstrStage<1, [A8_LdSt0], 0>,
335                               InstrStage<1, [A8_NLSPipe]>]>,
336  //
337  // FP Store Multiple
338  // use A8_Issue to enforce the 1 load/store per cycle limit
339  InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
340                               InstrStage<2, [A8_Pipe0], 0>,
341                               InstrStage<2, [A8_Pipe1]>,
342                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
343                               InstrStage<1, [A8_LdSt0], 0>,
344                               InstrStage<1, [A8_NLSPipe]>]>,
345
346  // NEON
347  // Issue through integer pipeline, and execute in NEON unit.
348  //
349  // VLD1
350  // FIXME: We don't model this instruction properly
351  InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Issue], 0>,
352                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
353                               InstrStage<1, [A8_LdSt0], 0>,
354                               InstrStage<1, [A8_NLSPipe]>]>,
355  //
356  // VLD2
357  // FIXME: We don't model this instruction properly
358  InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Issue], 0>,
359                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
360                               InstrStage<1, [A8_LdSt0], 0>,
361                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
362  //
363  // VLD3
364  // FIXME: We don't model this instruction properly
365  InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Issue], 0>,
366                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
367                               InstrStage<1, [A8_LdSt0], 0>,
368                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
369  //
370  // VLD4
371  // FIXME: We don't model this instruction properly
372  InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Issue], 0>,
373                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
374                               InstrStage<1, [A8_LdSt0], 0>,
375                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
376  //
377  // VST
378  // FIXME: We don't model this instruction properly
379  InstrItinData<IIC_VST,      [InstrStage<1, [A8_Issue], 0>,
380                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
381                               InstrStage<1, [A8_LdSt0], 0>,
382                               InstrStage<1, [A8_NLSPipe]>]>,
383  //
384  // Double-register FP Unary
385  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
386                               InstrStage<1, [A8_NPipe]>], [5, 2]>,
387  //
388  // Quad-register FP Unary
389  // Result written in N5, but that is relative to the last cycle of multicycle,
390  // so we use 6 for those cases
391  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
392                               InstrStage<2, [A8_NPipe]>], [6, 2]>,
393  //
394  // Double-register FP Binary
395  InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
396                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
397  //
398  // Quad-register FP Binary
399  // Result written in N5, but that is relative to the last cycle of multicycle,
400  // so we use 6 for those cases
401  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
402                               InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
403  //
404  // Move Immediate
405  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
406                               InstrStage<1, [A8_NPipe]>], [3]>,
407  //
408  // Double-register Permute Move
409  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
410                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
411  //
412  // Quad-register Permute Move
413  // Result written in N2, but that is relative to the last cycle of multicycle,
414  // so we use 3 for those cases
415  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
416                               InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
417  //
418  // Integer to Single-precision Move
419  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
420                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
421  //
422  // Integer to Double-precision Move
423  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
424                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
425  //
426  // Single-precision to Integer Move
427  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
428                               InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
429  //
430  // Double-precision to Integer Move
431  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
432                               InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
433  //
434  // Integer to Lane Move
435  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
436                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
437  //
438  // Double-register Permute
439  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
440                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
441  //
442  // Quad-register Permute
443  // Result written in N2, but that is relative to the last cycle of multicycle,
444  // so we use 3 for those cases
445  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
446                               InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
447  //
448  // Quad-register Permute (3 cycle issue)
449  // Result written in N2, but that is relative to the last cycle of multicycle,
450  // so we use 4 for those cases
451  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
452                               InstrStage<1, [A8_NLSPipe]>,
453                               InstrStage<1, [A8_NPipe], 0>,
454                               InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
455  //
456  // Double-register FP Multiple-Accumulate
457  InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
458                               InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
459  //
460  // Quad-register FP Multiple-Accumulate
461  // Result written in N9, but that is relative to the last cycle of multicycle,
462  // so we use 10 for those cases
463  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
464                               InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
465  //
466  // Double-register Reciprical Step
467  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
468                               InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
469  //
470  // Quad-register Reciprical Step
471  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
472                               InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
473  //
474  // Double-register Integer Count
475  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
476                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
477  //
478  // Quad-register Integer Count
479  // Result written in N3, but that is relative to the last cycle of multicycle,
480  // so we use 4 for those cases
481  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
482                               InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
483  //
484  // Double-register Integer Unary
485  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
486                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
487  //
488  // Quad-register Integer Unary
489  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
490                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
491  //
492  // Double-register Integer Q-Unary
493  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
494                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
495  //
496  // Quad-register Integer CountQ-Unary
497  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
498                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
499  //
500  // Double-register Integer Binary
501  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
502                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
503  //
504  // Quad-register Integer Binary
505  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
506                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
507  //
508  // Double-register Integer Binary (4 cycle)
509  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
510                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
511  //
512  // Quad-register Integer Binary (4 cycle)
513  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
514                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
515
516  //
517  // Double-register Integer Subtract
518  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
519                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
520  //
521  // Quad-register Integer Subtract
522  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
523                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
524  //
525  // Double-register Integer Subtract
526  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
527                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
528  //
529  // Quad-register Integer Subtract
530  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
531                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
532  //
533  // Double-register Integer Shift
534  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
535                               InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
536  //
537  // Quad-register Integer Shift
538  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
539                               InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
540  //
541  // Double-register Integer Shift (4 cycle)
542  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
543                               InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
544  //
545  // Quad-register Integer Shift (4 cycle)
546  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
547                               InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
548  //
549  // Double-register Integer Pair Add Long
550  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
551                               InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
552  //
553  // Quad-register Integer Pair Add Long
554  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
555                               InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
556  //
557  // Double-register Absolute Difference and Accumulate
558  InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
559                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
560  //
561  // Quad-register Absolute Difference and Accumulate
562  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
563                               InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
564
565  //
566  // Double-register Integer Multiply (.8, .16)
567  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
568                               InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
569  //
570  // Double-register Integer Multiply (.32)
571  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
572                               InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
573  //
574  // Quad-register Integer Multiply (.8, .16)
575  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
576                               InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
577  //
578  // Quad-register Integer Multiply (.32)
579  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
580                               InstrStage<1, [A8_NPipe]>,
581                               InstrStage<2, [A8_NLSPipe], 0>,
582                               InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
583  //
584  // Double-register Integer Multiply-Accumulate (.8, .16)
585  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
586                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
587  //
588  // Double-register Integer Multiply-Accumulate (.32)
589  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
590                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
591  //
592  // Quad-register Integer Multiply-Accumulate (.8, .16)
593  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
594                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
595  //
596  // Quad-register Integer Multiply-Accumulate (.32)
597  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
598                               InstrStage<1, [A8_NPipe]>,
599                               InstrStage<2, [A8_NLSPipe], 0>,
600                               InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
601  //
602  // Double-register VEXT
603  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
604                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
605  //
606  // Quad-register VEXT
607  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
608                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
609  //
610  // VTB
611  InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
612                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
613  InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
614                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
615  InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
616                               InstrStage<1, [A8_NLSPipe]>,
617                               InstrStage<1, [A8_NPipe], 0>,
618                               InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
619  InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
620                               InstrStage<1, [A8_NLSPipe]>,
621                               InstrStage<1, [A8_NPipe], 0>,
622                               InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
623  //
624  // VTBX
625  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
626                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
627  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
628                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
629  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
630                               InstrStage<1, [A8_NLSPipe]>,
631                               InstrStage<1, [A8_NPipe], 0>,
632                               InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
633  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
634                               InstrStage<1, [A8_NLSPipe]>,
635                               InstrStage<1, [A8_NPipe], 0>,
636                            InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
637]>;
638