ARMScheduleA9.td revision 7602acbf3b90af995606e199d68510b856c8e7e7
1//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the ARM Cortex A9 processors.
11//
12//===----------------------------------------------------------------------===//
13
14//
15// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16// Reference Manual".
17//
18// Functional units
19def A9_Pipe0   : FuncUnit; // pipeline 0
20def A9_Pipe1   : FuncUnit; // pipeline 1
21def A9_LSPipe  : FuncUnit; // LS pipe
22def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24def A9_DRegsN  : FuncUnit; // FP register set, NEON side
25
26// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
27//
28def CortexA9Itineraries : ProcessorItineraries<
29  [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
30  // Two fully-pipelined integer ALU pipelines
31  // FIXME: There are no operand latencies for these instructions at all!
32  //
33  // Move instructions, unconditional
34  InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
35  InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
36  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
37  InstrItinData<IIC_iMOVsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
38  //
39  // No operand cycles
40  InstrItinData<IIC_iALUx    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
41  //
42  // Binary Instructions that produce a result
43  InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
44  InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
45  InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
46  InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
47  //
48  // Unary Instructions that produce a result
49  InstrItinData<IIC_iUNAr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
50  InstrItinData<IIC_iUNAsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
51  InstrItinData<IIC_iUNAsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
52  //
53  // Compare instructions
54  InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
55  InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
56  InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
57  InstrItinData<IIC_iCMPsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
58  //
59  // Move instructions, conditional
60  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
61  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
62  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
63  InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
64
65  // Integer multiply pipeline
66  //
67  InstrItinData<IIC_iMUL16   , [InstrStage<1, [A9_Pipe1], 0>,
68                                InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
69  InstrItinData<IIC_iMAC16   , [InstrStage<1, [A9_Pipe1], 0>,
70                                InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
71  InstrItinData<IIC_iMUL32   , [InstrStage<1, [A9_Pipe1], 0>,
72                                InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
73  InstrItinData<IIC_iMAC32   , [InstrStage<1, [A9_Pipe1], 0>,
74                                InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
75  InstrItinData<IIC_iMUL64   , [InstrStage<2, [A9_Pipe1], 0>,
76                                InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
77  InstrItinData<IIC_iMAC64   , [InstrStage<2, [A9_Pipe1], 0>,
78                                InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
79  // Integer load pipeline
80  // FIXME: The timings are some rough approximations
81  //
82  // Immediate offset
83  InstrItinData<IIC_iLoadi   , [InstrStage<1, [A9_Pipe1]>,
84                                InstrStage<1, [A9_LSPipe]>], [3, 1]>,
85  //
86  // Register offset
87  InstrItinData<IIC_iLoadr   , [InstrStage<1, [A9_Pipe1]>,
88                                InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
89  //
90  // Scaled register offset
91  InstrItinData<IIC_iLoadsi  , [InstrStage<1, [A9_Pipe1]>,
92                                InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
93  //
94  // Immediate offset with update
95  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A9_Pipe1]>,
96                                InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
97  //
98  // Register offset with update
99  InstrItinData<IIC_iLoadru  , [InstrStage<1, [A9_Pipe1]>,
100                                InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
101  //
102  // Scaled register offset with update
103  InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
104                                InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
105  //
106  // Load multiple
107  InstrItinData<IIC_iLoadm   , [InstrStage<1, [A9_Pipe1]>,
108                                InstrStage<1, [A9_LSPipe]>]>,
109
110  //
111  // Load multiple plus branch
112  InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
113                                InstrStage<1, [A9_LSPipe]>,
114                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
115
116  // Integer store pipeline
117  ///
118  // Immediate offset
119  InstrItinData<IIC_iStorei  , [InstrStage<1, [A9_Pipe1]>,
120                                InstrStage<1, [A9_LSPipe]>], [3, 1]>,
121  //
122  // Register offset
123  InstrItinData<IIC_iStorer  , [InstrStage<1, [ A9_Pipe1]>,
124                                InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
125  //
126  // Scaled register offset
127  InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
128                                InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
129  //
130  // Immediate offset with update
131  InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
132                                InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
133  //
134  // Register offset with update
135  InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
136                                InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
137  //
138  // Scaled register offset with update
139  InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
140                                InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
141  //
142  // Store multiple
143  InstrItinData<IIC_iStorem  , [InstrStage<1, [A9_Pipe1]>,
144                                InstrStage<1, [A9_LSPipe]>]>,
145  // Branch
146  //
147  // no delay slots, so the latency of a branch is unimportant
148  InstrItinData<IIC_Br       , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
149
150  // VFP and NEON shares the same register file. This means that every VFP
151  // instruction should wait for full completion of the consecutive NEON
152  // instruction and vice-versa. We model this behavior with two artificial FUs:
153  // DRegsVFP and DRegsVFP.
154  //
155  // Every VFP instruction:
156  //  - Acquires DRegsVFP resource for 1 cycle
157  //  - Reserves DRegsN resource for the whole duration (including time to
158  //    register file writeback!).
159  // Every NEON instruction does the same but with FUs swapped.
160  //
161  // Since the reserved FU cannot be acquired, this models precisely
162  // "cross-domain" stalls.
163
164  // VFP
165  // Issue through integer pipeline, and execute in NEON unit.
166
167  // FP Special Register to Integer Register File Move
168  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
169                              InstrStage<2, [A9_DRegsN],   0, Reserved>,
170                              InstrStage<1, [A9_Pipe1]>,
171                              InstrStage<1, [A9_NPipe]>]>,
172  //
173  // Single-precision FP Unary
174  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
175                               // Extra latency cycles since wbck is 2 cycles
176                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
177                               InstrStage<1, [A9_Pipe1]>,
178                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
179  //
180  // Double-precision FP Unary
181  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
182                               // Extra latency cycles since wbck is 2 cycles
183                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
184                               InstrStage<1, [A9_Pipe1]>,
185                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
186
187  //
188  // Single-precision FP Compare
189  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
190                               // Extra latency cycles since wbck is 4 cycles
191                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
192                               InstrStage<1, [A9_Pipe1]>,
193                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
194  //
195  // Double-precision FP Compare
196  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
197                               // Extra latency cycles since wbck is 4 cycles
198                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
199                               InstrStage<1, [A9_Pipe1]>,
200                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
201  //
202  // Single to Double FP Convert
203  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
204                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
205                               InstrStage<1, [A9_Pipe1]>,
206                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
207  //
208  // Double to Single FP Convert
209  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
210                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
211                               InstrStage<1, [A9_Pipe1]>,
212                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
213
214  //
215  // Single to Half FP Convert
216  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
217                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
218                               InstrStage<1, [A9_Pipe1]>,
219                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
220  //
221  // Half to Single FP Convert
222  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
223                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
224                               InstrStage<1, [A9_Pipe1]>,
225                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
226
227  //
228  // Single-Precision FP to Integer Convert
229  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
230                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
231                               InstrStage<1, [A9_Pipe1]>,
232                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
233  //
234  // Double-Precision FP to Integer Convert
235  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
236                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
237                               InstrStage<1, [A9_Pipe1]>,
238                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
239  //
240  // Integer to Single-Precision FP Convert
241  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
242                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
243                               InstrStage<1, [A9_Pipe1]>,
244                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
245  //
246  // Integer to Double-Precision FP Convert
247  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
248                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
249                               InstrStage<1, [A9_Pipe1]>,
250                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
251  //
252  // Single-precision FP ALU
253  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
254                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
255                               InstrStage<1, [A9_Pipe1]>,
256                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
257  //
258  // Double-precision FP ALU
259  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
260                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
261                               InstrStage<1, [A9_Pipe1]>,
262                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
263  //
264  // Single-precision FP Multiply
265  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
266                               InstrStage<6, [A9_DRegsN],   0, Reserved>,
267                               InstrStage<1, [A9_Pipe1]>,
268                               InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
269  //
270  // Double-precision FP Multiply
271  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
272                               InstrStage<7, [A9_DRegsN],   0, Reserved>,
273                               InstrStage<1, [A9_Pipe1]>,
274                               InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
275  //
276  // Single-precision FP MAC
277  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
278                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
279                               InstrStage<1, [A9_Pipe1]>,
280                               InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
281  //
282  // Double-precision FP MAC
283  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
284                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
285                               InstrStage<1,  [A9_Pipe1]>,
286                               InstrStage<2,  [A9_NPipe]>], [9, 0, 1, 1]>,
287  //
288  // Single-precision FP DIV
289  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
290                               InstrStage<16, [A9_DRegsN],  0, Reserved>,
291                               InstrStage<1,  [A9_Pipe1]>,
292                               InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
293  //
294  // Double-precision FP DIV
295  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
296                               InstrStage<26, [A9_DRegsN],  0, Reserved>,
297                               InstrStage<1,  [A9_Pipe1]>,
298                               InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
299  //
300  // Single-precision FP SQRT
301  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
302                               InstrStage<18, [A9_DRegsN],   0, Reserved>,
303                               InstrStage<1,  [A9_Pipe1]>,
304                               InstrStage<13, [A9_NPipe]>], [17, 1]>,
305  //
306  // Double-precision FP SQRT
307  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
308                               InstrStage<33, [A9_DRegsN],   0, Reserved>,
309                               InstrStage<1,  [A9_Pipe1]>,
310                               InstrStage<28, [A9_NPipe]>], [32, 1]>,
311
312  //
313  // Integer to Single-precision Move
314  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
315                               // Extra 1 latency cycle since wbck is 2 cycles
316                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
317                               InstrStage<1, [A9_Pipe1]>,
318                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
319  //
320  // Integer to Double-precision Move
321  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
322                               // Extra 1 latency cycle since wbck is 2 cycles
323                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
324                               InstrStage<1, [A9_Pipe1]>,
325                               InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
326  //
327  // Single-precision to Integer Move
328  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
329                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
330                               InstrStage<1, [A9_Pipe1]>,
331                               InstrStage<1, [A9_NPipe]>], [1, 1]>,
332  //
333  // Double-precision to Integer Move
334  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
335                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
336                               InstrStage<1, [A9_Pipe1]>,
337                               InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
338  //
339  // Single-precision FP Load
340  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
341                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
342                               InstrStage<1, [A9_Pipe1], 0>,
343                               InstrStage<1, [A9_LSPipe]>,
344                               InstrStage<1, [A9_NPipe]>]>,
345  //
346  // Double-precision FP Load
347  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
348                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
349                               InstrStage<1, [A9_Pipe1], 0>,
350                               InstrStage<1, [A9_LSPipe]>,
351                               InstrStage<1, [A9_NPipe]>]>,
352  //
353  // FP Load Multiple
354  InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
355                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
356                               InstrStage<1, [A9_Pipe1], 0>,
357                               InstrStage<1, [A9_LSPipe]>,
358                               InstrStage<1, [A9_NPipe]>]>,
359  //
360  // Single-precision FP Store
361  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
362                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
363                               InstrStage<1, [A9_Pipe1], 0>,
364                               InstrStage<1, [A9_LSPipe]>,
365                               InstrStage<1, [A9_NPipe]>]>,
366  //
367  // Double-precision FP Store
368  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
369                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
370                               InstrStage<1, [A9_Pipe1], 0>,
371                               InstrStage<1, [A9_LSPipe]>,
372                               InstrStage<1, [A9_NPipe]>]>,
373  //
374  // FP Store Multiple
375  InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
376                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
377                               InstrStage<1, [A9_Pipe1], 0>,
378                               InstrStage<1, [A9_LSPipe]>,
379                               InstrStage<1, [A9_NPipe]>]>,
380  // NEON
381  // Issue through integer pipeline, and execute in NEON unit.
382  // FIXME: Neon pipeline and LdSt unit are multiplexed.
383  //        Add some syntactic sugar to model this!
384  // VLD1
385  // FIXME: We don't model this instruction properly
386  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
387                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
388                               InstrStage<1, [A9_Pipe1], 0>,
389                               InstrStage<1, [A9_LSPipe]>,
390                               InstrStage<1, [A9_NPipe]>]>,
391  //
392  // VLD2
393  // FIXME: We don't model this instruction properly
394  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
395                               // Extra latency cycles since wbck is 6 cycles
396                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
397                               InstrStage<1, [A9_Pipe1], 0>,
398                               InstrStage<1, [A9_LSPipe]>,
399                               InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
400  //
401  // VLD3
402  // FIXME: We don't model this instruction properly
403  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
404                               // Extra latency cycles since wbck is 6 cycles
405                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
406                               InstrStage<1, [A9_Pipe1], 0>,
407                               InstrStage<1, [A9_LSPipe]>,
408                               InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
409  //
410  // VLD4
411  // FIXME: We don't model this instruction properly
412  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
413                               // Extra latency cycles since wbck is 6 cycles
414                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
415                               InstrStage<1, [A9_Pipe1], 0>,
416                               InstrStage<1, [A9_LSPipe]>,
417                               InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
418  //
419  // VST
420  // FIXME: We don't model this instruction properly
421  InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
422                               // Extra latency cycles since wbck is 6 cycles
423                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
424                               InstrStage<1, [A9_Pipe1], 0>,
425                               InstrStage<1, [A9_LSPipe]>,
426                               InstrStage<1, [A9_NPipe]>]>,
427  //
428  // Double-register Integer Unary
429  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
430                               // Extra latency cycles since wbck is 6 cycles
431                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
432                               InstrStage<1, [A9_Pipe1]>,
433                               InstrStage<1, [A9_NPipe]>], [4, 2]>,
434  //
435  // Quad-register Integer Unary
436  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
437                               // Extra latency cycles since wbck is 6 cycles
438                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
439                               InstrStage<1, [A9_Pipe1]>,
440                               InstrStage<1, [A9_NPipe]>], [4, 2]>,
441  //
442  // Double-register Integer Q-Unary
443  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
444                               // Extra latency cycles since wbck is 6 cycles
445                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
446                               InstrStage<1, [A9_Pipe1]>,
447                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
448  //
449  // Quad-register Integer CountQ-Unary
450  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
451                               // Extra latency cycles since wbck is 6 cycles
452                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
453                               InstrStage<1, [A9_Pipe1]>,
454                               InstrStage<1, [A9_NPipe]>], [4, 1]>,
455  //
456  // Double-register Integer Binary
457  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
458                               // Extra latency cycles since wbck is 6 cycles
459                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
460                               InstrStage<1, [A9_Pipe1]>,
461                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
462  //
463  // Quad-register Integer Binary
464  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
465                               // Extra latency cycles since wbck is 6 cycles
466                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
467                               InstrStage<1, [A9_Pipe1]>,
468                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
469  //
470  // Double-register Integer Subtract
471  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
472                               // Extra latency cycles since wbck is 6 cycles
473                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
474                               InstrStage<1, [A9_Pipe1]>,
475                               InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
476  //
477  // Quad-register Integer Subtract
478  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
479                               // Extra latency cycles since wbck is 6 cycles
480                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
481                               InstrStage<1, [A9_Pipe1]>,
482                               InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
483  //
484  // Double-register Integer Shift
485  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
486                               // Extra latency cycles since wbck is 6 cycles
487                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
488                               InstrStage<1, [A9_Pipe1]>,
489                               InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
490  //
491  // Quad-register Integer Shift
492  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
493                               // Extra latency cycles since wbck is 6 cycles
494                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
495                               InstrStage<1, [A9_Pipe1]>,
496                               InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
497  //
498  // Double-register Integer Shift (4 cycle)
499  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
500                               // Extra latency cycles since wbck is 6 cycles
501                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
502                               InstrStage<1, [A9_Pipe1]>,
503                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
504  //
505  // Quad-register Integer Shift (4 cycle)
506  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
507                               // Extra latency cycles since wbck is 6 cycles
508                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
509                               InstrStage<1, [A9_Pipe1]>,
510                               InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
511  //
512  // Double-register Integer Binary (4 cycle)
513  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
514                               // Extra latency cycles since wbck is 6 cycles
515                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
516                               InstrStage<1, [A9_Pipe1]>,
517                               InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
518  //
519  // Quad-register Integer Binary (4 cycle)
520  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
521                               // Extra latency cycles since wbck is 6 cycles
522                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
523                               InstrStage<1, [A9_Pipe1]>,
524                               InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
525  //
526  // Double-register Integer Subtract (4 cycle)
527  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
528                               // Extra latency cycles since wbck is 6 cycles
529                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
530                               InstrStage<1, [A9_Pipe1]>,
531                               InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
532  //
533  // Quad-register Integer Subtract (4 cycle)
534  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
535                               // Extra latency cycles since wbck is 6 cycles
536                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
537                               InstrStage<1, [A9_Pipe1]>,
538                               InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
539
540  //
541  // Double-register Integer Count
542  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
543                               // Extra latency cycles since wbck is 6 cycles
544                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
545                               InstrStage<1, [A9_Pipe1]>,
546                               InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
547  //
548  // Quad-register Integer Count
549  // Result written in N3, but that is relative to the last cycle of multicycle,
550  // so we use 4 for those cases
551  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
552                               // Extra latency cycles since wbck is 7 cycles
553                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
554                               InstrStage<1, [A9_Pipe1]>,
555                               InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
556  //
557  // Double-register Absolute Difference and Accumulate
558  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
559                               // Extra latency cycles since wbck is 6 cycles
560                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
561                               InstrStage<1, [A9_Pipe1]>,
562                               InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
563  //
564  // Quad-register Absolute Difference and Accumulate
565  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
566                               // Extra latency cycles since wbck is 6 cycles
567                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
568                               InstrStage<1, [A9_Pipe1]>,
569                               InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
570  //
571  // Double-register Integer Pair Add Long
572  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
573                               // Extra latency cycles since wbck is 6 cycles
574                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
575                               InstrStage<1, [A9_Pipe1]>,
576                               InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
577  //
578  // Quad-register Integer Pair Add Long
579  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
580                               // Extra latency cycles since wbck is 6 cycles
581                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
582                               InstrStage<1, [A9_Pipe1]>,
583                               InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
584
585  //
586  // Double-register Integer Multiply (.8, .16)
587  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
588                               // Extra latency cycles since wbck is 6 cycles
589                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
590                               InstrStage<1, [A9_Pipe1]>,
591                               InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
592  //
593  // Quad-register Integer Multiply (.8, .16)
594  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
595                               // Extra latency cycles since wbck is 7 cycles
596                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
597                               InstrStage<1, [A9_Pipe1]>,
598                               InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
599
600  //
601  // Double-register Integer Multiply (.32)
602  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
603                               // Extra latency cycles since wbck is 7 cycles
604                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
605                               InstrStage<1, [A9_Pipe1]>,
606                               InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
607  //
608  // Quad-register Integer Multiply (.32)
609  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
610                               // Extra latency cycles since wbck is 9 cycles
611                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
612                               InstrStage<1, [A9_Pipe1]>,
613                               InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
614  //
615  // Double-register Integer Multiply-Accumulate (.8, .16)
616  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
617                               // Extra latency cycles since wbck is 6 cycles
618                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
619                               InstrStage<1, [A9_Pipe1]>,
620                               InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
621  //
622  // Double-register Integer Multiply-Accumulate (.32)
623  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
624                               // Extra latency cycles since wbck is 7 cycles
625                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
626                               InstrStage<1, [A9_Pipe1]>,
627                               InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
628  //
629  // Quad-register Integer Multiply-Accumulate (.8, .16)
630  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
631                               // Extra latency cycles since wbck is 7 cycles
632                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
633                               InstrStage<1, [A9_Pipe1]>,
634                               InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
635  //
636  // Quad-register Integer Multiply-Accumulate (.32)
637  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
638                               // Extra latency cycles since wbck is 9 cycles
639                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
640                               InstrStage<1, [A9_Pipe1]>,
641                               InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
642  //
643  // Move Immediate
644  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
645                               // Extra latency cycles since wbck is 6 cycles
646                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
647                               InstrStage<1, [A9_Pipe1]>,
648                               InstrStage<1, [A9_NPipe]>], [3]>,
649  //
650  // Double-register Permute Move
651  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
652  // FIXME: all latencies are arbitrary, no information is available
653                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
654                               InstrStage<1, [A9_Pipe1]>,
655                               InstrStage<1, [A9_LSPipe]>], [2, 1]>,
656  //
657  // Quad-register Permute Move
658  // Result written in N2, but that is relative to the last cycle of multicycle,
659  // so we use 3 for those cases
660  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
661  // FIXME: all latencies are arbitrary, no information is available
662                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
663                               InstrStage<1, [A9_Pipe1]>,
664                               InstrStage<2, [A9_NPipe]>], [3, 1]>,
665  //
666  // Integer to Single-precision Move
667  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
668  // FIXME: all latencies are arbitrary, no information is available
669                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
670                               InstrStage<1, [A9_Pipe1]>,
671                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
672  //
673  // Integer to Double-precision Move
674  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
675  // FIXME: all latencies are arbitrary, no information is available
676                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
677                               InstrStage<1, [A9_Pipe1]>,
678                               InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
679  //
680  // Single-precision to Integer Move
681  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
682  // FIXME: all latencies are arbitrary, no information is available
683                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
684                               InstrStage<1, [A9_Pipe1]>,
685                               InstrStage<1, [A9_NPipe]>], [2, 1]>,
686  //
687  // Double-precision to Integer Move
688  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
689  // FIXME: all latencies are arbitrary, no information is available
690                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
691                               InstrStage<1, [A9_Pipe1]>,
692                               InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
693  //
694  // Integer to Lane Move
695  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
696  // FIXME: all latencies are arbitrary, no information is available
697                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
698                               InstrStage<1, [A9_Pipe1]>,
699                               InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
700
701  //
702  // Double-register FP Unary
703  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
704                               // Extra latency cycles since wbck is 6 cycles
705                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
706                               InstrStage<1, [A9_Pipe1]>,
707                               InstrStage<1, [A9_NPipe]>], [5, 2]>,
708  //
709  // Quad-register FP Unary
710  // Result written in N5, but that is relative to the last cycle of multicycle,
711  // so we use 6 for those cases
712  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
713                               // Extra latency cycles since wbck is 7 cycles
714                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
715                               InstrStage<1, [A9_Pipe1]>,
716                               InstrStage<2, [A9_NPipe]>], [6, 2]>,
717  //
718  // Double-register FP Binary
719  // FIXME: We're using this itin for many instructions and [2, 2] here is too
720  // optimistic.
721  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
722                               // Extra latency cycles since wbck is 7 cycles
723                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
724                               InstrStage<1, [A9_Pipe1]>,
725                               InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
726  //
727  // Quad-register FP Binary
728  // Result written in N5, but that is relative to the last cycle of multicycle,
729  // so we use 6 for those cases
730  // FIXME: We're using this itin for many instructions and [2, 2] here is too
731  // optimistic.
732  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
733                               // Extra latency cycles since wbck is 8 cycles
734                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
735                               InstrStage<1, [A9_Pipe1]>,
736                               InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
737  //
738  // Double-register FP Multiple-Accumulate
739  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
740                               // Extra latency cycles since wbck is 7 cycles
741                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
742                               InstrStage<1, [A9_Pipe1]>,
743                               InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
744  //
745  // Quad-register FP Multiple-Accumulate
746  // Result written in N9, but that is relative to the last cycle of multicycle,
747  // so we use 10 for those cases
748  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
749                               // Extra latency cycles since wbck is 9 cycles
750                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
751                               InstrStage<1, [A9_Pipe1]>,
752                               InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
753  //
754  // Double-register Reciprical Step
755  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
756                               // Extra latency cycles since wbck is 7 cycles
757                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
758                               InstrStage<1, [A9_Pipe1]>,
759                               InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
760  //
761  // Quad-register Reciprical Step
762  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
763                               // Extra latency cycles since wbck is 9 cycles
764                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
765                               InstrStage<1, [A9_Pipe1]>,
766                               InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
767  //
768  // Double-register Permute
769  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
770                               // Extra latency cycles since wbck is 6 cycles
771                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
772                               InstrStage<1, [A9_Pipe1]>,
773                               InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
774  //
775  // Quad-register Permute
776  // Result written in N2, but that is relative to the last cycle of multicycle,
777  // so we use 3 for those cases
778  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
779                               // Extra latency cycles since wbck is 7 cycles
780                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
781                               InstrStage<1, [A9_Pipe1]>,
782                               InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
783  //
784  // Quad-register Permute (3 cycle issue)
785  // Result written in N2, but that is relative to the last cycle of multicycle,
786  // so we use 4 for those cases
787  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
788                               // Extra latency cycles since wbck is 8 cycles
789                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
790                               InstrStage<1, [A9_Pipe1]>,
791                               InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
792
793  //
794  // Double-register VEXT
795  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
796                               // Extra latency cycles since wbck is 7 cycles
797                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
798                               InstrStage<1, [A9_Pipe1]>,
799                               InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
800  //
801  // Quad-register VEXT
802  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
803                               // Extra latency cycles since wbck is 9 cycles
804                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
805                               InstrStage<1, [A9_Pipe1]>,
806                               InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
807  //
808  // VTB
809  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
810                               // Extra latency cycles since wbck is 7 cycles
811                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
812                               InstrStage<1, [A9_Pipe1]>,
813                               InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
814  InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
815                               // Extra latency cycles since wbck is 7 cycles
816                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
817                               InstrStage<1, [A9_Pipe1]>,
818                               InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
819  InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
820                               // Extra latency cycles since wbck is 8 cycles
821                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
822                               InstrStage<1, [A9_Pipe1]>,
823                               InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
824  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
825                               // Extra latency cycles since wbck is 8 cycles
826                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
827                               InstrStage<1, [A9_Pipe1]>,
828                               InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
829  //
830  // VTBX
831  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
832                               // Extra latency cycles since wbck is 7 cycles
833                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
834                               InstrStage<1, [A9_Pipe1]>,
835                               InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
836  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
837                               // Extra latency cycles since wbck is 7 cycles
838                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
839                               InstrStage<1, [A9_Pipe1]>,
840                               InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
841  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
842                               // Extra latency cycles since wbck is 8 cycles
843                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
844                               InstrStage<1, [A9_Pipe1]>,
845                               InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
846  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
847                               // Extra latency cycles since wbck is 8 cycles
848                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
849                               InstrStage<1, [A9_Pipe1]>,
850                              InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
851]>;
852