TargetSchedule.cpp revision 6a22dba4854a8b3d7427f8493f663c1b52df4477
1//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a wrapper around MCSchedModel that allows the interface 11// to benefit from information currently only available in TargetInstrInfo. 12// 13//===----------------------------------------------------------------------===// 14 15#include "llvm/CodeGen/TargetSchedule.h" 16#include "llvm/Support/CommandLine.h" 17#include "llvm/Support/raw_ostream.h" 18#include "llvm/Target/TargetInstrInfo.h" 19#include "llvm/Target/TargetMachine.h" 20#include "llvm/Target/TargetRegisterInfo.h" 21#include "llvm/Target/TargetSubtargetInfo.h" 22 23using namespace llvm; 24 25static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), 26 cl::desc("Use TargetSchedModel for latency lookup")); 27 28static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true), 29 cl::desc("Use InstrItineraryData for latency lookup")); 30 31bool TargetSchedModel::hasInstrSchedModel() const { 32 return EnableSchedModel && SchedModel.hasInstrSchedModel(); 33} 34 35bool TargetSchedModel::hasInstrItineraries() const { 36 return EnableSchedItins && !InstrItins.isEmpty(); 37} 38 39static unsigned gcd(unsigned Dividend, unsigned Divisor) { 40 // Dividend and Divisor will be naturally swapped as needed. 41 while(Divisor) { 42 unsigned Rem = Dividend % Divisor; 43 Dividend = Divisor; 44 Divisor = Rem; 45 }; 46 return Dividend; 47} 48static unsigned lcm(unsigned A, unsigned B) { 49 unsigned LCM = (uint64_t(A) * B) / gcd(A, B); 50 assert((LCM >= A && LCM >= B) && "LCM overflow"); 51 return LCM; 52} 53 54void TargetSchedModel::init(const MCSchedModel &sm, 55 const TargetSubtargetInfo *sti, 56 const TargetInstrInfo *tii) { 57 SchedModel = sm; 58 STI = sti; 59 TII = tii; 60 STI->initInstrItins(InstrItins); 61 62 unsigned NumRes = SchedModel.getNumProcResourceKinds(); 63 ResourceFactors.resize(NumRes); 64 ResourceLCM = SchedModel.IssueWidth; 65 for (unsigned Idx = 0; Idx < NumRes; ++Idx) { 66 unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; 67 if (NumUnits > 0) 68 ResourceLCM = lcm(ResourceLCM, NumUnits); 69 } 70 MicroOpFactor = ResourceLCM / SchedModel.IssueWidth; 71 for (unsigned Idx = 0; Idx < NumRes; ++Idx) { 72 unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; 73 ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0; 74 } 75} 76 77unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, 78 const MCSchedClassDesc *SC) const { 79 if (hasInstrItineraries()) { 80 int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); 81 return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI); 82 } 83 if (hasInstrSchedModel()) { 84 if (!SC) 85 SC = resolveSchedClass(MI); 86 if (SC->isValid()) 87 return SC->NumMicroOps; 88 } 89 return MI->isTransient() ? 0 : 1; 90} 91 92// The machine model may explicitly specify an invalid latency, which 93// effectively means infinite latency. Since users of the TargetSchedule API 94// don't know how to handle this, we convert it to a very large latency that is 95// easy to distinguish when debugging the DAG but won't induce overflow. 96static unsigned convertLatency(int Cycles) { 97 return Cycles >= 0 ? Cycles : 1000; 98} 99 100/// If we can determine the operand latency from the def only, without machine 101/// model or itinerary lookup, do so. Otherwise return -1. 102int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, 103 bool FindMin) const { 104 105 // Return a latency based on the itinerary properties and defining instruction 106 // if possible. Some common subtargets don't require per-operand latency, 107 // especially for minimum latencies. 108 if (FindMin) { 109 // If MinLatency is invalid, then use the itinerary for MinLatency. If no 110 // itinerary exists either, then use single cycle latency. 111 if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) { 112 return 1; 113 } 114 return SchedModel.MinLatency; 115 } 116 else if (!hasInstrSchedModel() && !hasInstrItineraries()) { 117 return TII->defaultDefLatency(&SchedModel, DefMI); 118 } 119 // ...operand lookup required 120 return -1; 121} 122 123/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require 124/// evaluation of predicates that depend on instruction operands or flags. 125const MCSchedClassDesc *TargetSchedModel:: 126resolveSchedClass(const MachineInstr *MI) const { 127 128 // Get the definition's scheduling class descriptor from this machine model. 129 unsigned SchedClass = MI->getDesc().getSchedClass(); 130 const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); 131 if (!SCDesc->isValid()) 132 return SCDesc; 133 134#ifndef NDEBUG 135 unsigned NIter = 0; 136#endif 137 while (SCDesc->isVariant()) { 138 assert(++NIter < 6 && "Variants are nested deeper than the magic number"); 139 140 SchedClass = STI->resolveSchedClass(SchedClass, MI, this); 141 SCDesc = SchedModel.getSchedClassDesc(SchedClass); 142 } 143 return SCDesc; 144} 145 146/// Find the def index of this operand. This index maps to the machine model and 147/// is independent of use operands. Def operands may be reordered with uses or 148/// merged with uses without affecting the def index (e.g. before/after 149/// regalloc). However, an instruction's def operands must never be reordered 150/// with respect to each other. 151static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { 152 unsigned DefIdx = 0; 153 for (unsigned i = 0; i != DefOperIdx; ++i) { 154 const MachineOperand &MO = MI->getOperand(i); 155 if (MO.isReg() && MO.isDef()) 156 ++DefIdx; 157 } 158 return DefIdx; 159} 160 161/// Find the use index of this operand. This is independent of the instruction's 162/// def operands. 163/// 164/// Note that uses are not determined by the operand's isUse property, which 165/// is simply the inverse of isDef. Here we consider any readsReg operand to be 166/// a "use". The machine model allows an operand to be both a Def and Use. 167static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { 168 unsigned UseIdx = 0; 169 for (unsigned i = 0; i != UseOperIdx; ++i) { 170 const MachineOperand &MO = MI->getOperand(i); 171 if (MO.isReg() && MO.readsReg()) 172 ++UseIdx; 173 } 174 return UseIdx; 175} 176 177// Top-level API for clients that know the operand indices. 178unsigned TargetSchedModel::computeOperandLatency( 179 const MachineInstr *DefMI, unsigned DefOperIdx, 180 const MachineInstr *UseMI, unsigned UseOperIdx, 181 bool FindMin) const { 182 183 int DefLatency = getDefLatency(DefMI, FindMin); 184 if (DefLatency >= 0) 185 return DefLatency; 186 187 if (hasInstrItineraries()) { 188 int OperLatency = 0; 189 if (UseMI) { 190 OperLatency = 191 TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx); 192 } 193 else { 194 unsigned DefClass = DefMI->getDesc().getSchedClass(); 195 OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); 196 } 197 if (OperLatency >= 0) 198 return OperLatency; 199 200 // No operand latency was found. 201 unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI); 202 203 // Expected latency is the max of the stage latency and itinerary props. 204 // Rather than directly querying InstrItins stage latency, we call a TII 205 // hook to allow subtargets to specialize latency. This hook is only 206 // applicable to the InstrItins model. InstrSchedModel should model all 207 // special cases without TII hooks. 208 if (!FindMin) 209 InstrLatency = std::max(InstrLatency, 210 TII->defaultDefLatency(&SchedModel, DefMI)); 211 return InstrLatency; 212 } 213 assert(!FindMin && hasInstrSchedModel() && 214 "Expected a SchedModel for this cpu"); 215 const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); 216 unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); 217 if (DefIdx < SCDesc->NumWriteLatencyEntries) { 218 // Lookup the definition's write latency in SubtargetInfo. 219 const MCWriteLatencyEntry *WLEntry = 220 STI->getWriteLatencyEntry(SCDesc, DefIdx); 221 unsigned WriteID = WLEntry->WriteResourceID; 222 unsigned Latency = convertLatency(WLEntry->Cycles); 223 if (!UseMI) 224 return Latency; 225 226 // Lookup the use's latency adjustment in SubtargetInfo. 227 const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); 228 if (UseDesc->NumReadAdvanceEntries == 0) 229 return Latency; 230 unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); 231 return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); 232 } 233 // If DefIdx does not exist in the model (e.g. implicit defs), then return 234 // unit latency (defaultDefLatency may be too conservative). 235#ifndef NDEBUG 236 if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() 237 && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) { 238 std::string Err; 239 raw_string_ostream ss(Err); 240 ss << "DefIdx " << DefIdx << " exceeds machine model writes for " 241 << *DefMI; 242 report_fatal_error(ss.str()); 243 } 244#endif 245 // FIXME: Automatically giving all implicit defs defaultDefLatency is 246 // undesirable. We should only do it for defs that are known to the MC 247 // desc like flags. Truly implicit defs should get 1 cycle latency. 248 return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); 249} 250 251unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { 252 // For the itinerary model, fall back to the old subtarget hook. 253 // Allow subtargets to compute Bundle latencies outside the machine model. 254 if (hasInstrItineraries() || MI->isBundle()) 255 return TII->getInstrLatency(&InstrItins, MI); 256 257 if (hasInstrSchedModel()) { 258 const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); 259 if (SCDesc->isValid()) { 260 unsigned Latency = 0; 261 for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; 262 DefIdx != DefEnd; ++DefIdx) { 263 // Lookup the definition's write latency in SubtargetInfo. 264 const MCWriteLatencyEntry *WLEntry = 265 STI->getWriteLatencyEntry(SCDesc, DefIdx); 266 Latency = std::max(Latency, convertLatency(WLEntry->Cycles)); 267 } 268 return Latency; 269 } 270 } 271 return TII->defaultDefLatency(&SchedModel, MI); 272} 273 274unsigned TargetSchedModel:: 275computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, 276 const MachineInstr *DepMI) const { 277 // MinLatency == -1 is for in-order processors that always have unit 278 // MinLatency. MinLatency > 0 is for in-order processors with varying min 279 // latencies, but since this is not a RAW dep, we always use unit latency. 280 if (SchedModel.MinLatency != 0) 281 return 1; 282 283 // MinLatency == 0 indicates an out-of-order processor that can dispatch 284 // WAW dependencies in the same cycle. 285 286 // Treat predication as a data dependency for out-of-order cpus. In-order 287 // cpus do not need to treat predicated writes specially. 288 // 289 // TODO: The following hack exists because predication passes do not 290 // correctly append imp-use operands, and readsReg() strangely returns false 291 // for predicated defs. 292 unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); 293 const MachineFunction &MF = *DefMI->getParent()->getParent(); 294 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 295 if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) 296 return computeInstrLatency(DefMI); 297 298 // If we have a per operand scheduling model, check if this def is writing 299 // an unbuffered resource. If so, it treated like an in-order cpu. 300 if (hasInstrSchedModel()) { 301 const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); 302 if (SCDesc->isValid()) { 303 for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), 304 *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { 305 if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) 306 return 1; 307 } 308 } 309 } 310 return 0; 311} 312