X86SchedSandyBridge.td revision a5ce5f36d3a1e312304e8312ca64a1342f5f55a6
17ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=// 27ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 37ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// The LLVM Compiler Infrastructure 47ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 57ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This file is distributed under the University of Illinois Open Source 67ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// License. See LICENSE.TXT for details. 77ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 87ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen//===----------------------------------------------------------------------===// 97ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 107ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This file defines the machine model for Sandy Bridge to support instruction 117ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// scheduling and other instruction cost heuristics. 127ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// 137ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen//===----------------------------------------------------------------------===// 147ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 157ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SandyBridgeModel : SchedMachineModel { 167ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // All x86 instructions are modeled as a single micro-op, and SB can decode 4 177ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // instructions per cycle. 187ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // FIXME: Identify instructions that aren't a single fused micro-op. 197ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let IssueWidth = 4; 20a5ce5f36d3a1e312304e8312ca64a1342f5f55a6Andrew Trick let MicroOpBufferSize = 168; // Based on the reorder buffer. 217ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let LoadLatency = 4; 227ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let MispredictPenalty = 16; 237ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} 247ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 257ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesenlet SchedModel = SandyBridgeModel in { 267ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 277ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Sandy Bridge can issue micro-ops to 6 different ports in one cycle. 287ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 297ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Ports 0, 1, and 5 handle all computation. 307ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort0 : ProcResource<1>; 317ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort1 : ProcResource<1>; 327ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort5 : ProcResource<1>; 337ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 347ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Ports 2 and 3 are identical. They handle loads and the address half of 357ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// stores. 367ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort23 : ProcResource<2>; 377ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 387ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Port 4 gets the data half of stores. Store data can be available later than 397ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// the store address, but since we don't model the latency of stores, we can 407ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// ignore that. 417ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort4 : ProcResource<1>; 427ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 437ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Many micro-ops are capable of issuing on multiple ports. 447ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; 457ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; 467ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; 477ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 4892142b327598822fdbeb386e5a5b68ec963be4adAndrew Trick// Integer division issued on port 0. 4992142b327598822fdbeb386e5a5b68ec963be4adAndrew Trickdef SBDivider : ProcResource<1>; 507ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 517ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 527ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// cycles after the memory operand. 537ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : ReadAdvance<ReadAfterLd, 4>; 547ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 557ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Many SchedWrites are defined in pairs with and without a folded load. 567ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Instructions with folded loads are usually micro-fused, so they only appear 577ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// as two micro-ops when queued in the reservation station. 587ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This multiclass defines the resource usage for variants with and without 597ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// folded loads. 607ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesenmulticlass SBWriteResPair<X86FoldableSchedWrite SchedRW, 617ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen ProcResourceKind ExePort, 627ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen int Lat> { 637ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // Register variant is using a single cycle on ExePort. 647ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } 657ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 667ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the 677ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen // latency. 687ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> { 697ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let Latency = !add(Lat, 4); 707ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen } 717ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} 727ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 737ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// A folded store needs a cycle on port 4 for the store data, but it does not 747ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// need an extra port 2/3 cycle to recompute the address. 757ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteRMW, [SBPort4]>; 767ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 777ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteStore, [SBPort23, SBPort4]>; 787ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteLoad, [SBPort23]> { let Latency = 4; } 797ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteMove, [SBPort015]>; 807ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteZero, []>; 817ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 827ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteALU, SBPort015, 1>; 837ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteIMul, SBPort1, 3>; 847ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteShift, SBPort05, 1>; 857ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteJump, SBPort5, 1>; 867ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 877ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This is for simple LEAs with one or two input operands. 887ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// The complex ones can only execute on port 1, and they require two cycles on 897ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// the port to read all inputs. We don't model that. 907ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteLEA, [SBPort15]>; 917ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 927ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// This is quite rough, latency depends on the dividend. 937ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteIDiv, [SBPort0, SBDivider]> { 947ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let Latency = 25; 957ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let ResourceCycles = [1, 10]; 967ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} 977ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> { 987ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let Latency = 29; 997ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen let ResourceCycles = [1, 1, 10]; 1007ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} 1017ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 1027ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Scalar and vector floating point. 1037ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFAdd, SBPort1, 3>; 1047ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFMul, SBPort0, 5>; 1057ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles. 1067ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFRcp, SBPort0, 5>; 1077ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteFSqrt, SBPort0, 15>; 1087ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>; 1097ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>; 1107ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>; 1117ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 1127ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen// Vector integer operations. 1137ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteVecShift, SBPort05, 1>; 1147ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteVecLogic, SBPort015, 1>; 1157ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteVecALU, SBPort15, 1>; 1167ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteVecIMul, SBPort0, 5>; 1177ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendefm : SBWriteResPair<WriteShuffle, SBPort15, 1>; 1187ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen 1197ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; } 1207ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesendef : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; } 1217ae14f3d976c6883edcf8d8152c34aa1075710bdJakob Stoklund Olesen} // SchedModel 122