X86SchedSandyBridge.td revision 9b5575d55add0bb2c8769f76db250ff0f4efe8dc
17ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=// 27ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 37ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// The LLVM Compiler Infrastructure 47ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 57ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This file is distributed under the University of Illinois Open Source 67ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// License. See LICENSE.TXT for details. 77ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 87ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen//===----------------------------------------------------------------------===// 97ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 107ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This file defines the machine model for Sandy Bridge to support instruction 117ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// scheduling and other instruction cost heuristics. 127ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 137ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen//===----------------------------------------------------------------------===// 147ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 157ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SandyBridgeModel : SchedMachineModel { 167ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // All x86 instructions are modeled as a single micro-op, and SB can decode 4 177ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // instructions per cycle. 187ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // FIXME: Identify instructions that aren't a single fused micro-op. 197ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let IssueWidth = 4; 20a5ce5f36d3a1e312304e8312ca64a1342f5f55a6Andrew Trick let MicroOpBufferSize = 168; // Based on the reorder buffer. 217ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let LoadLatency = 4; 227ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let MispredictPenalty = 16; 237ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} 247ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 257ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesenlet SchedModel = SandyBridgeModel in { 267ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 277ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Sandy Bridge can issue micro-ops to 6 different ports in one cycle. 287ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 297ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Ports 0, 1, and 5 handle all computation. 307ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort0 : ProcResource<1>; 317ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort1 : ProcResource<1>; 327ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort5 : ProcResource<1>; 337ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 347ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Ports 2 and 3 are identical. They handle loads and the address half of 357ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// stores. 367ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort23 : ProcResource<2>; 377ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 387ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Port 4 gets the data half of stores. Store data can be available later than 397ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// the store address, but since we don't model the latency of stores, we can 407ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// ignore that. 417ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort4 : ProcResource<1>; 427ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 437ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Many micro-ops are capable of issuing on multiple ports. 447ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; 457ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; 467ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; 477ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 48a3d82ce19fd825cbf3bf85b5969424217fc40b45Andrew Trick// 54 Entry Unified Scheduler 49a3d82ce19fd825cbf3bf85b5969424217fc40b45Andrew Trickdef SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> { 50a3d82ce19fd825cbf3bf85b5969424217fc40b45Andrew Trick let BufferSize=54; 51a3d82ce19fd825cbf3bf85b5969424217fc40b45Andrew Trick} 52a3d82ce19fd825cbf3bf85b5969424217fc40b45Andrew Trick 5392142b327598822fdbeb386e5a5b68ec963be4adAndrew Trick// Integer division issued on port 0. 5492142b327598822fdbeb386e5a5b68ec963be4adAndrew Trickdef SBDivider : ProcResource<1>; 557ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 567ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 577ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// cycles after the memory operand. 587ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : ReadAdvance<ReadAfterLd, 4>; 597ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 607ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Many SchedWrites are defined in pairs with and without a folded load. 617ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Instructions with folded loads are usually micro-fused, so they only appear 627ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// as two micro-ops when queued in the reservation station. 637ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This multiclass defines the resource usage for variants with and without 647ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// folded loads. 657ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesenmulticlass SBWriteResPair<X86FoldableSchedWrite SchedRW, 667ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen ProcResourceKind ExePort, 677ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen int Lat> { 687ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // Register variant is using a single cycle on ExePort. 697ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } 707ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 717ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the 727ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // latency. 737ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> { 747ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let Latency = !add(Lat, 4); 757ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen } 767ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} 777ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 787ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// A folded store needs a cycle on port 4 for the store data, but it does not 797ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// need an extra port 2/3 cycle to recompute the address. 807ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteRMW, [SBPort4]>; 817ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 827ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteStore, [SBPort23, SBPort4]>; 837ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteLoad, [SBPort23]> { let Latency = 4; } 847ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteMove, [SBPort015]>; 857ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteZero, []>; 867ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 877ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteALU, SBPort015, 1>; 887ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteIMul, SBPort1, 3>; 899b5575d55add0bb2c8769f76db250ff0f4efe8dcAndrew Trickdef : WriteRes<WriteIMulH, []> { let Latency = 3; } 907ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteShift, SBPort05, 1>; 917ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteJump, SBPort5, 1>; 927ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 937ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This is for simple LEAs with one or two input operands. 947ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// The complex ones can only execute on port 1, and they require two cycles on 957ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// the port to read all inputs. We don't model that. 967ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteLEA, [SBPort15]>; 977ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 987ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This is quite rough, latency depends on the dividend. 997ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteIDiv, [SBPort0, SBDivider]> { 1007ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let Latency = 25; 1017ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let ResourceCycles = [1, 10]; 1027ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} 1037ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> { 1047ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let Latency = 29; 1057ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let ResourceCycles = [1, 1, 10]; 1067ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} 1077ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 1087ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Scalar and vector floating point. 1097ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFAdd, SBPort1, 3>; 1107ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFMul, SBPort0, 5>; 1117ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles. 1127ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFRcp, SBPort0, 5>; 1137ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFSqrt, SBPort0, 15>; 1147ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>; 1157ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>; 1167ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>; 1177ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 1187ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Vector integer operations. 1197ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteVecShift, SBPort05, 1>; 1207ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteVecLogic, SBPort015, 1>; 1217ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteVecALU, SBPort15, 1>; 1227ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteVecIMul, SBPort0, 5>; 1237ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteShuffle, SBPort15, 1>; 1247ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 1257ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; } 1267ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; } 1277ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} // SchedModel 128