1//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the machine model for Intel Silvermont to support 11// instruction scheduling and other instruction cost heuristics. 12// 13//===----------------------------------------------------------------------===// 14 15def SLMModel : SchedMachineModel { 16 // All x86 instructions are modeled as a single micro-op, and SLM can decode 2 17 // instructions per cycle. 18 let IssueWidth = 2; 19 let MicroOpBufferSize = 32; // Based on the reorder buffer. 20 let LoadLatency = 3; 21 let MispredictPenalty = 10; 22 let PostRAScheduler = 1; 23 24 // For small loops, expand by a small factor to hide the backedge cost. 25 let LoopMicroOpBufferSize = 10; 26 27 // FIXME: SSE4 is unimplemented. This flag is set to allow 28 // the scheduler to assign a default model to unrecognized opcodes. 29 let CompleteModel = 0; 30} 31 32let SchedModel = SLMModel in { 33 34// Silvermont has 5 reservation stations for micro-ops 35 36def IEC_RSV0 : ProcResource<1>; 37def IEC_RSV1 : ProcResource<1>; 38def FPC_RSV0 : ProcResource<1> { let BufferSize = 1; } 39def FPC_RSV1 : ProcResource<1> { let BufferSize = 1; } 40def MEC_RSV : ProcResource<1>; 41 42// Many micro-ops are capable of issuing on multiple ports. 43def IEC_RSV01 : ProcResGroup<[IEC_RSV0, IEC_RSV1]>; 44def FPC_RSV01 : ProcResGroup<[FPC_RSV0, FPC_RSV1]>; 45 46def SMDivider : ProcResource<1>; 47def SMFPMultiplier : ProcResource<1>; 48def SMFPDivider : ProcResource<1>; 49 50// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3 51// cycles after the memory operand. 52def : ReadAdvance<ReadAfterLd, 3>; 53 54// Many SchedWrites are defined in pairs with and without a folded load. 55// Instructions with folded loads are usually micro-fused, so they only appear 56// as two micro-ops when queued in the reservation station. 57// This multiclass defines the resource usage for variants with and without 58// folded loads. 59multiclass SMWriteResPair<X86FoldableSchedWrite SchedRW, 60 ProcResourceKind ExePort, 61 int Lat> { 62 // Register variant is using a single cycle on ExePort. 63 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } 64 65 // Memory variant also uses a cycle on MEC_RSV and adds 3 cycles to the 66 // latency. 67 def : WriteRes<SchedRW.Folded, [MEC_RSV, ExePort]> { 68 let Latency = !add(Lat, 3); 69 } 70} 71 72// A folded store needs a cycle on MEC_RSV for the store data, but it does not 73// need an extra port cycle to recompute the address. 74def : WriteRes<WriteRMW, [MEC_RSV]>; 75 76def : WriteRes<WriteStore, [IEC_RSV01, MEC_RSV]>; 77def : WriteRes<WriteLoad, [MEC_RSV]> { let Latency = 3; } 78def : WriteRes<WriteMove, [IEC_RSV01]>; 79def : WriteRes<WriteZero, []>; 80 81defm : SMWriteResPair<WriteALU, IEC_RSV01, 1>; 82defm : SMWriteResPair<WriteIMul, IEC_RSV1, 3>; 83defm : SMWriteResPair<WriteShift, IEC_RSV0, 1>; 84defm : SMWriteResPair<WriteJump, IEC_RSV1, 1>; 85 86// This is for simple LEAs with one or two input operands. 87// The complex ones can only execute on port 1, and they require two cycles on 88// the port to read all inputs. We don't model that. 89def : WriteRes<WriteLEA, [IEC_RSV1]>; 90 91// This is quite rough, latency depends on the dividend. 92def : WriteRes<WriteIDiv, [IEC_RSV01, SMDivider]> { 93 let Latency = 25; 94 let ResourceCycles = [1, 25]; 95} 96def : WriteRes<WriteIDivLd, [MEC_RSV, IEC_RSV01, SMDivider]> { 97 let Latency = 29; 98 let ResourceCycles = [1, 1, 25]; 99} 100 101// Scalar and vector floating point. 102defm : SMWriteResPair<WriteFAdd, FPC_RSV1, 3>; 103defm : SMWriteResPair<WriteFRcp, FPC_RSV0, 5>; 104defm : SMWriteResPair<WriteFRsqrt, FPC_RSV0, 5>; 105defm : SMWriteResPair<WriteFSqrt, FPC_RSV0, 15>; 106defm : SMWriteResPair<WriteCvtF2I, FPC_RSV01, 4>; 107defm : SMWriteResPair<WriteCvtI2F, FPC_RSV01, 4>; 108defm : SMWriteResPair<WriteCvtF2F, FPC_RSV01, 4>; 109defm : SMWriteResPair<WriteFShuffle, FPC_RSV0, 1>; 110defm : SMWriteResPair<WriteFBlend, FPC_RSV0, 1>; 111 112// This is quite rough, latency depends on precision 113def : WriteRes<WriteFMul, [FPC_RSV0, SMFPMultiplier]> { 114 let Latency = 5; 115 let ResourceCycles = [1, 2]; 116} 117def : WriteRes<WriteFMulLd, [MEC_RSV, FPC_RSV0, SMFPMultiplier]> { 118 let Latency = 8; 119 let ResourceCycles = [1, 1, 2]; 120} 121 122def : WriteRes<WriteFDiv, [FPC_RSV0, SMFPDivider]> { 123 let Latency = 34; 124 let ResourceCycles = [1, 34]; 125} 126def : WriteRes<WriteFDivLd, [MEC_RSV, FPC_RSV0, SMFPDivider]> { 127 let Latency = 37; 128 let ResourceCycles = [1, 1, 34]; 129} 130 131// Vector integer operations. 132defm : SMWriteResPair<WriteVecShift, FPC_RSV0, 1>; 133defm : SMWriteResPair<WriteVecLogic, FPC_RSV01, 1>; 134defm : SMWriteResPair<WriteVecALU, FPC_RSV01, 1>; 135defm : SMWriteResPair<WriteVecIMul, FPC_RSV0, 4>; 136defm : SMWriteResPair<WriteShuffle, FPC_RSV0, 1>; 137defm : SMWriteResPair<WriteBlend, FPC_RSV0, 1>; 138defm : SMWriteResPair<WriteMPSAD, FPC_RSV0, 7>; 139 140// String instructions. 141// Packed Compare Implicit Length Strings, Return Mask 142def : WriteRes<WritePCmpIStrM, [FPC_RSV0]> { 143 let Latency = 13; 144 let ResourceCycles = [13]; 145} 146def : WriteRes<WritePCmpIStrMLd, [FPC_RSV0, MEC_RSV]> { 147 let Latency = 13; 148 let ResourceCycles = [13, 1]; 149} 150 151// Packed Compare Explicit Length Strings, Return Mask 152def : WriteRes<WritePCmpEStrM, [FPC_RSV0]> { 153 let Latency = 17; 154 let ResourceCycles = [17]; 155} 156def : WriteRes<WritePCmpEStrMLd, [FPC_RSV0, MEC_RSV]> { 157 let Latency = 17; 158 let ResourceCycles = [17, 1]; 159} 160 161// Packed Compare Implicit Length Strings, Return Index 162def : WriteRes<WritePCmpIStrI, [FPC_RSV0]> { 163 let Latency = 17; 164 let ResourceCycles = [17]; 165} 166def : WriteRes<WritePCmpIStrILd, [FPC_RSV0, MEC_RSV]> { 167 let Latency = 17; 168 let ResourceCycles = [17, 1]; 169} 170 171// Packed Compare Explicit Length Strings, Return Index 172def : WriteRes<WritePCmpEStrI, [FPC_RSV0]> { 173 let Latency = 21; 174 let ResourceCycles = [21]; 175} 176def : WriteRes<WritePCmpEStrILd, [FPC_RSV0, MEC_RSV]> { 177 let Latency = 21; 178 let ResourceCycles = [21, 1]; 179} 180 181// AES Instructions. 182def : WriteRes<WriteAESDecEnc, [FPC_RSV0]> { 183 let Latency = 8; 184 let ResourceCycles = [5]; 185} 186def : WriteRes<WriteAESDecEncLd, [FPC_RSV0, MEC_RSV]> { 187 let Latency = 8; 188 let ResourceCycles = [5, 1]; 189} 190 191def : WriteRes<WriteAESIMC, [FPC_RSV0]> { 192 let Latency = 8; 193 let ResourceCycles = [5]; 194} 195def : WriteRes<WriteAESIMCLd, [FPC_RSV0, MEC_RSV]> { 196 let Latency = 8; 197 let ResourceCycles = [5, 1]; 198} 199 200def : WriteRes<WriteAESKeyGen, [FPC_RSV0]> { 201 let Latency = 8; 202 let ResourceCycles = [5]; 203} 204def : WriteRes<WriteAESKeyGenLd, [FPC_RSV0, MEC_RSV]> { 205 let Latency = 8; 206 let ResourceCycles = [5, 1]; 207} 208 209// Carry-less multiplication instructions. 210def : WriteRes<WriteCLMul, [FPC_RSV0]> { 211 let Latency = 10; 212 let ResourceCycles = [10]; 213} 214def : WriteRes<WriteCLMulLd, [FPC_RSV0, MEC_RSV]> { 215 let Latency = 10; 216 let ResourceCycles = [10, 1]; 217} 218 219 220def : WriteRes<WriteSystem, [FPC_RSV0]> { let Latency = 100; } 221def : WriteRes<WriteMicrocoded, [FPC_RSV0]> { let Latency = 100; } 222def : WriteRes<WriteFence, [MEC_RSV]>; 223def : WriteRes<WriteNop, []>; 224 225// AVX is not supported on that architecture, but we should define the basic 226// scheduling resources anyway. 227def : WriteRes<WriteIMulH, [FPC_RSV0]>; 228defm : SMWriteResPair<WriteVarBlend, FPC_RSV0, 1>; 229defm : SMWriteResPair<WriteFVarBlend, FPC_RSV0, 1>; 230defm : SMWriteResPair<WriteFShuffle256, FPC_RSV0, 1>; 231defm : SMWriteResPair<WriteShuffle256, FPC_RSV0, 1>; 232defm : SMWriteResPair<WriteVarVecShift, FPC_RSV0, 1>; 233} // SchedModel 234