X86ScheduleSLM.td revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the machine model for Intel Silvermont to support
11// instruction scheduling and other instruction cost heuristics.
12//
13//===----------------------------------------------------------------------===//
14
15def SLMModel : SchedMachineModel {
16  // All x86 instructions are modeled as a single micro-op, and SLM can decode 2
17  // instructions per cycle.
18  let IssueWidth = 2;
19  let MicroOpBufferSize = 32; // Based on the reorder buffer.
20  let LoadLatency = 3;
21  let MispredictPenalty = 10;
22
23  // For small loops, expand by a small factor to hide the backedge cost.
24  let LoopMicroOpBufferSize = 10;
25
26  // FIXME: SSE4 is unimplemented. This flag is set to allow
27  // the scheduler to assign a default model to unrecognized opcodes.
28  let CompleteModel = 0;
29}
30
31let SchedModel = SLMModel in {
32
33// Silvermont has 5 reservation stations for micro-ops
34
35def IEC_RSV0 : ProcResource<1>;
36def IEC_RSV1 : ProcResource<1>;
37def FPC_RSV0 : ProcResource<1> { let BufferSize = 1; }
38def FPC_RSV1 : ProcResource<1> { let BufferSize = 1; }
39def MEC_RSV  : ProcResource<1>;
40
41// Many micro-ops are capable of issuing on multiple ports.
42def IEC_RSV01  : ProcResGroup<[IEC_RSV0, IEC_RSV1]>;
43def FPC_RSV01  : ProcResGroup<[FPC_RSV0, FPC_RSV1]>;
44
45def SMDivider      : ProcResource<1>;
46def SMFPMultiplier : ProcResource<1>;
47def SMFPDivider    : ProcResource<1>;
48
49// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
50// cycles after the memory operand.
51def : ReadAdvance<ReadAfterLd, 3>;
52
53// Many SchedWrites are defined in pairs with and without a folded load.
54// Instructions with folded loads are usually micro-fused, so they only appear
55// as two micro-ops when queued in the reservation station.
56// This multiclass defines the resource usage for variants with and without
57// folded loads.
58multiclass SMWriteResPair<X86FoldableSchedWrite SchedRW,
59                          ProcResourceKind ExePort,
60                          int Lat> {
61  // Register variant is using a single cycle on ExePort.
62  def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
63
64  // Memory variant also uses a cycle on MEC_RSV and adds 3 cycles to the
65  // latency.
66  def : WriteRes<SchedRW.Folded, [MEC_RSV, ExePort]> {
67     let Latency = !add(Lat, 3);
68  }
69}
70
71// A folded store needs a cycle on MEC_RSV for the store data, but it does not
72// need an extra port cycle to recompute the address.
73def : WriteRes<WriteRMW, [MEC_RSV]>;
74
75def : WriteRes<WriteStore, [IEC_RSV01, MEC_RSV]>;
76def : WriteRes<WriteLoad,  [MEC_RSV]> { let Latency = 3; }
77def : WriteRes<WriteMove,  [IEC_RSV01]>;
78def : WriteRes<WriteZero,  []>;
79
80defm : SMWriteResPair<WriteALU,   IEC_RSV01, 1>;
81defm : SMWriteResPair<WriteIMul,  IEC_RSV1,  3>;
82defm : SMWriteResPair<WriteShift, IEC_RSV0,  1>;
83defm : SMWriteResPair<WriteJump,  IEC_RSV1,   1>;
84
85// This is for simple LEAs with one or two input operands.
86// The complex ones can only execute on port 1, and they require two cycles on
87// the port to read all inputs. We don't model that.
88def : WriteRes<WriteLEA, [IEC_RSV1]>;
89
90// This is quite rough, latency depends on the dividend.
91def : WriteRes<WriteIDiv, [IEC_RSV01, SMDivider]> {
92  let Latency = 25;
93  let ResourceCycles = [1, 25];
94}
95def : WriteRes<WriteIDivLd, [MEC_RSV, IEC_RSV01, SMDivider]> {
96  let Latency = 29;
97  let ResourceCycles = [1, 1, 25];
98}
99
100// Scalar and vector floating point.
101defm : SMWriteResPair<WriteFAdd,   FPC_RSV1, 3>;
102defm : SMWriteResPair<WriteFRcp,   FPC_RSV0, 5>;
103defm : SMWriteResPair<WriteFSqrt,  FPC_RSV0, 15>;
104defm : SMWriteResPair<WriteCvtF2I, FPC_RSV01, 4>;
105defm : SMWriteResPair<WriteCvtI2F, FPC_RSV01, 4>;
106defm : SMWriteResPair<WriteCvtF2F, FPC_RSV01, 4>;
107defm : SMWriteResPair<WriteFShuffle,  FPC_RSV0,  1>;
108defm : SMWriteResPair<WriteFBlend,  FPC_RSV0,  1>;
109
110// This is quite rough, latency depends on precision
111def : WriteRes<WriteFMul, [FPC_RSV0, SMFPMultiplier]> {
112  let Latency = 5;
113  let ResourceCycles = [1, 2];
114}
115def : WriteRes<WriteFMulLd, [MEC_RSV, FPC_RSV0, SMFPMultiplier]> {
116  let Latency = 8;
117  let ResourceCycles = [1, 1, 2];
118}
119
120def : WriteRes<WriteFDiv, [FPC_RSV0, SMFPDivider]> {
121  let Latency = 34;
122  let ResourceCycles = [1, 34];
123}
124def : WriteRes<WriteFDivLd, [MEC_RSV, FPC_RSV0, SMFPDivider]> {
125  let Latency = 37;
126  let ResourceCycles = [1, 1, 34];
127}
128
129// Vector integer operations.
130defm : SMWriteResPair<WriteVecShift, FPC_RSV0,  1>;
131defm : SMWriteResPair<WriteVecLogic, FPC_RSV01, 1>;
132defm : SMWriteResPair<WriteVecALU,   FPC_RSV01,  1>;
133defm : SMWriteResPair<WriteVecIMul,  FPC_RSV0,   4>;
134defm : SMWriteResPair<WriteShuffle,  FPC_RSV0,  1>;
135defm : SMWriteResPair<WriteBlend,  FPC_RSV0,  1>;
136defm : SMWriteResPair<WriteMPSAD,  FPC_RSV0,  7>;
137
138// String instructions.
139// Packed Compare Implicit Length Strings, Return Mask
140def : WriteRes<WritePCmpIStrM, [FPC_RSV0]> {
141  let Latency = 13;
142  let ResourceCycles = [13];
143}
144def : WriteRes<WritePCmpIStrMLd, [FPC_RSV0, MEC_RSV]> {
145  let Latency = 13;
146  let ResourceCycles = [13, 1];
147}
148
149// Packed Compare Explicit Length Strings, Return Mask
150def : WriteRes<WritePCmpEStrM, [FPC_RSV0]> {
151  let Latency = 17;
152  let ResourceCycles = [17];
153}
154def : WriteRes<WritePCmpEStrMLd, [FPC_RSV0, MEC_RSV]> {
155  let Latency = 17;
156  let ResourceCycles = [17, 1];
157}
158
159// Packed Compare Implicit Length Strings, Return Index
160def : WriteRes<WritePCmpIStrI, [FPC_RSV0]> {
161  let Latency = 17;
162  let ResourceCycles = [17];
163}
164def : WriteRes<WritePCmpIStrILd, [FPC_RSV0, MEC_RSV]> {
165  let Latency = 17;
166  let ResourceCycles = [17, 1];
167}
168
169// Packed Compare Explicit Length Strings, Return Index
170def : WriteRes<WritePCmpEStrI, [FPC_RSV0]> {
171  let Latency = 21;
172  let ResourceCycles = [21];
173}
174def : WriteRes<WritePCmpEStrILd, [FPC_RSV0, MEC_RSV]> {
175  let Latency = 21;
176  let ResourceCycles = [21, 1];
177}
178
179// AES Instructions.
180def : WriteRes<WriteAESDecEnc, [FPC_RSV0]> {
181  let Latency = 8;
182  let ResourceCycles = [5];
183}
184def : WriteRes<WriteAESDecEncLd, [FPC_RSV0, MEC_RSV]> {
185  let Latency = 8;
186  let ResourceCycles = [5, 1];
187}
188
189def : WriteRes<WriteAESIMC, [FPC_RSV0]> {
190  let Latency = 8;
191  let ResourceCycles = [5];
192}
193def : WriteRes<WriteAESIMCLd, [FPC_RSV0, MEC_RSV]> {
194  let Latency = 8;
195  let ResourceCycles = [5, 1];
196}
197
198def : WriteRes<WriteAESKeyGen, [FPC_RSV0]> {
199  let Latency = 8;
200  let ResourceCycles = [5];
201}
202def : WriteRes<WriteAESKeyGenLd, [FPC_RSV0, MEC_RSV]> {
203  let Latency = 8;
204  let ResourceCycles = [5, 1];
205}
206
207// Carry-less multiplication instructions.
208def : WriteRes<WriteCLMul, [FPC_RSV0]> {
209  let Latency = 10;
210  let ResourceCycles = [10];
211}
212def : WriteRes<WriteCLMulLd, [FPC_RSV0, MEC_RSV]> {
213  let Latency = 10;
214  let ResourceCycles = [10, 1];
215}
216
217
218def : WriteRes<WriteSystem,     [FPC_RSV0]> { let Latency = 100; }
219def : WriteRes<WriteMicrocoded, [FPC_RSV0]> { let Latency = 100; }
220def : WriteRes<WriteFence, [MEC_RSV]>;
221def : WriteRes<WriteNop, []>;
222
223// AVX is not supported on that architecture, but we should define the basic
224// scheduling resources anyway.
225def  : WriteRes<WriteIMulH, [FPC_RSV0]>;
226defm : SMWriteResPair<WriteVarBlend, FPC_RSV0, 1>;
227defm : SMWriteResPair<WriteFVarBlend, FPC_RSV0, 1>;
228defm : SMWriteResPair<WriteFShuffle256, FPC_RSV0,  1>;
229defm : SMWriteResPair<WriteShuffle256, FPC_RSV0,  1>;
230defm : SMWriteResPair<WriteVarVecShift, FPC_RSV0,  1>;
231} // SchedModel
232