1//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interface for the MachineTraceMetrics analysis pass
11// that estimates CPU resource usage and critical data dependency paths through
12// preferred traces. This is useful for super-scalar CPUs where execution speed
13// can be limited both by data dependencies and by limited execution resources.
14//
15// Out-of-order CPUs will often be executing instructions from multiple basic
16// blocks at the same time. This makes it difficult to estimate the resource
17// usage accurately in a single basic block. Resources can be estimated better
18// by looking at a trace through the current basic block.
19//
20// For every block, the MachineTraceMetrics pass will pick a preferred trace
21// that passes through the block. The trace is chosen based on loop structure,
22// branch probabilities, and resource usage. The intention is to pick likely
23// traces that would be the most affected by code transformations.
24//
25// It is expensive to compute a full arbitrary trace for every block, so to
26// save some computations, traces are chosen to be convergent. This means that
27// if the traces through basic blocks A and B ever cross when moving away from
28// A and B, they never diverge again. This applies in both directions - If the
29// traces meet above A and B, they won't diverge when going further back.
30//
31// Traces tend to align with loops. The trace through a block in an inner loop
32// will begin at the loop entry block and end at a back edge. If there are
33// nested loops, the trace may begin and end at those instead.
34//
35// For each trace, we compute the critical path length, which is the number of
36// cycles required to execute the trace when execution is limited by data
37// dependencies only. We also compute the resource height, which is the number
38// of cycles required to execute all instructions in the trace when ignoring
39// data dependencies.
40//
41// Every instruction in the current block has a slack - the number of cycles
42// execution of the instruction can be delayed without extending the critical
43// path.
44//
45//===----------------------------------------------------------------------===//
46
47#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
48#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
49
50#include "llvm/ADT/ArrayRef.h"
51#include "llvm/ADT/DenseMap.h"
52#include "llvm/CodeGen/MachineFunctionPass.h"
53
54namespace llvm {
55
56class InstrItineraryData;
57class MachineBasicBlock;
58class MachineInstr;
59class MachineLoop;
60class MachineLoopInfo;
61class MachineRegisterInfo;
62class TargetInstrInfo;
63class TargetRegisterInfo;
64class raw_ostream;
65
66class MachineTraceMetrics : public MachineFunctionPass {
67  const MachineFunction *MF;
68  const TargetInstrInfo *TII;
69  const TargetRegisterInfo *TRI;
70  const InstrItineraryData *ItinData;
71  const MachineRegisterInfo *MRI;
72  const MachineLoopInfo *Loops;
73
74public:
75  class Ensemble;
76  class Trace;
77  static char ID;
78  MachineTraceMetrics();
79  void getAnalysisUsage(AnalysisUsage&) const;
80  bool runOnMachineFunction(MachineFunction&);
81  void releaseMemory();
82  void verifyAnalysis() const;
83
84  friend class Ensemble;
85  friend class Trace;
86
87  /// Per-basic block information that doesn't depend on the trace through the
88  /// block.
89  struct FixedBlockInfo {
90    /// The number of non-trivial instructions in the block.
91    /// Doesn't count PHI and COPY instructions that are likely to be removed.
92    unsigned InstrCount;
93
94    /// True when the block contains calls.
95    bool HasCalls;
96
97    FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
98
99    /// Returns true when resource information for this block has been computed.
100    bool hasResources() const { return InstrCount != ~0u; }
101
102    /// Invalidate resource information.
103    void invalidate() { InstrCount = ~0u; }
104  };
105
106  /// Get the fixed resource information about MBB. Compute it on demand.
107  const FixedBlockInfo *getResources(const MachineBasicBlock*);
108
109  /// A virtual register or regunit required by a basic block or its trace
110  /// successors.
111  struct LiveInReg {
112    /// The virtual register required, or a register unit.
113    unsigned Reg;
114
115    /// For virtual registers: Minimum height of the defining instruction.
116    /// For regunits: Height of the highest user in the trace.
117    unsigned Height;
118
119    LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
120  };
121
122  /// Per-basic block information that relates to a specific trace through the
123  /// block. Convergent traces means that only one of these is required per
124  /// block in a trace ensemble.
125  struct TraceBlockInfo {
126    /// Trace predecessor, or NULL for the first block in the trace.
127    /// Valid when hasValidDepth().
128    const MachineBasicBlock *Pred;
129
130    /// Trace successor, or NULL for the last block in the trace.
131    /// Valid when hasValidHeight().
132    const MachineBasicBlock *Succ;
133
134    /// The block number of the head of the trace. (When hasValidDepth()).
135    unsigned Head;
136
137    /// The block number of the tail of the trace. (When hasValidHeight()).
138    unsigned Tail;
139
140    /// Accumulated number of instructions in the trace above this block.
141    /// Does not include instructions in this block.
142    unsigned InstrDepth;
143
144    /// Accumulated number of instructions in the trace below this block.
145    /// Includes instructions in this block.
146    unsigned InstrHeight;
147
148    TraceBlockInfo() :
149      Pred(0), Succ(0),
150      InstrDepth(~0u), InstrHeight(~0u),
151      HasValidInstrDepths(false), HasValidInstrHeights(false) {}
152
153    /// Returns true if the depth resources have been computed from the trace
154    /// above this block.
155    bool hasValidDepth() const { return InstrDepth != ~0u; }
156
157    /// Returns true if the height resources have been computed from the trace
158    /// below this block.
159    bool hasValidHeight() const { return InstrHeight != ~0u; }
160
161    /// Invalidate depth resources when some block above this one has changed.
162    void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
163
164    /// Invalidate height resources when a block below this one has changed.
165    void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
166
167    // Data-dependency-related information. Per-instruction depth and height
168    // are computed from data dependencies in the current trace, using
169    // itinerary data.
170
171    /// Instruction depths have been computed. This implies hasValidDepth().
172    bool HasValidInstrDepths;
173
174    /// Instruction heights have been computed. This implies hasValidHeight().
175    bool HasValidInstrHeights;
176
177    /// Critical path length. This is the number of cycles in the longest data
178    /// dependency chain through the trace. This is only valid when both
179    /// HasValidInstrDepths and HasValidInstrHeights are set.
180    unsigned CriticalPath;
181
182    /// Live-in registers. These registers are defined above the current block
183    /// and used by this block or a block below it.
184    /// This does not include PHI uses in the current block, but it does
185    /// include PHI uses in deeper blocks.
186    SmallVector<LiveInReg, 4> LiveIns;
187
188    void print(raw_ostream&) const;
189  };
190
191  /// InstrCycles represents the cycle height and depth of an instruction in a
192  /// trace.
193  struct InstrCycles {
194    /// Earliest issue cycle as determined by data dependencies and instruction
195    /// latencies from the beginning of the trace. Data dependencies from
196    /// before the trace are not included.
197    unsigned Depth;
198
199    /// Minimum number of cycles from this instruction is issued to the of the
200    /// trace, as determined by data dependencies and instruction latencies.
201    unsigned Height;
202  };
203
204  /// A trace represents a plausible sequence of executed basic blocks that
205  /// passes through the current basic block one. The Trace class serves as a
206  /// handle to internal cached data structures.
207  class Trace {
208    Ensemble &TE;
209    TraceBlockInfo &TBI;
210
211    unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
212
213  public:
214    explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
215    void print(raw_ostream&) const;
216
217    /// Compute the total number of instructions in the trace.
218    unsigned getInstrCount() const {
219      return TBI.InstrDepth + TBI.InstrHeight;
220    }
221
222    /// Return the resource depth of the top/bottom of the trace center block.
223    /// This is the number of cycles required to execute all instructions from
224    /// the trace head to the trace center block. The resource depth only
225    /// considers execution resources, it ignores data dependencies.
226    /// When Bottom is set, instructions in the trace center block are included.
227    unsigned getResourceDepth(bool Bottom) const;
228
229    /// Return the resource length of the trace. This is the number of cycles
230    /// required to execute the instructions in the trace if they were all
231    /// independent, exposing the maximum instruction-level parallelism.
232    ///
233    /// Any blocks in Extrablocks are included as if they were part of the
234    /// trace.
235    unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
236                               ArrayRef<const MachineBasicBlock*>()) const;
237
238    /// Return the length of the (data dependency) critical path through the
239    /// trace.
240    unsigned getCriticalPath() const { return TBI.CriticalPath; }
241
242    /// Return the depth and height of MI. The depth is only valid for
243    /// instructions in or above the trace center block. The height is only
244    /// valid for instructions in or below the trace center block.
245    InstrCycles getInstrCycles(const MachineInstr *MI) const {
246      return TE.Cycles.lookup(MI);
247    }
248
249    /// Return the slack of MI. This is the number of cycles MI can be delayed
250    /// before the critical path becomes longer.
251    /// MI must be an instruction in the trace center block.
252    unsigned getInstrSlack(const MachineInstr *MI) const;
253
254    /// Return the Depth of a PHI instruction in a trace center block successor.
255    /// The PHI does not have to be part of the trace.
256    unsigned getPHIDepth(const MachineInstr *PHI) const;
257  };
258
259  /// A trace ensemble is a collection of traces selected using the same
260  /// strategy, for example 'minimum resource height'. There is one trace for
261  /// every block in the function.
262  class Ensemble {
263    SmallVector<TraceBlockInfo, 4> BlockInfo;
264    DenseMap<const MachineInstr*, InstrCycles> Cycles;
265    friend class Trace;
266
267    void computeTrace(const MachineBasicBlock*);
268    void computeDepthResources(const MachineBasicBlock*);
269    void computeHeightResources(const MachineBasicBlock*);
270    unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
271    void computeInstrDepths(const MachineBasicBlock*);
272    void computeInstrHeights(const MachineBasicBlock*);
273    void addLiveIns(const MachineInstr *DefMI,
274                    ArrayRef<const MachineBasicBlock*> Trace);
275
276  protected:
277    MachineTraceMetrics &MTM;
278    virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
279    virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
280    explicit Ensemble(MachineTraceMetrics*);
281    const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
282    const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
283    const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
284
285  public:
286    virtual ~Ensemble();
287    virtual const char *getName() const =0;
288    void print(raw_ostream&) const;
289    void invalidate(const MachineBasicBlock *MBB);
290    void verify() const;
291
292    /// Get the trace that passes through MBB.
293    /// The trace is computed on demand.
294    Trace getTrace(const MachineBasicBlock *MBB);
295  };
296
297  /// Strategies for selecting traces.
298  enum Strategy {
299    /// Select the trace through a block that has the fewest instructions.
300    TS_MinInstrCount,
301
302    TS_NumStrategies
303  };
304
305  /// Get the trace ensemble representing the given trace selection strategy.
306  /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
307  /// and valid for the lifetime of the analysis pass.
308  Ensemble *getEnsemble(Strategy);
309
310  /// Invalidate cached information about MBB. This must be called *before* MBB
311  /// is erased, or the CFG is otherwise changed.
312  ///
313  /// This invalidates per-block information about resource usage for MBB only,
314  /// and it invalidates per-trace information for any trace that passes
315  /// through MBB.
316  ///
317  /// Call Ensemble::getTrace() again to update any trace handles.
318  void invalidate(const MachineBasicBlock *MBB);
319
320private:
321  // One entry per basic block, indexed by block number.
322  SmallVector<FixedBlockInfo, 4> BlockInfo;
323
324  // One ensemble per strategy.
325  Ensemble* Ensembles[TS_NumStrategies];
326};
327
328inline raw_ostream &operator<<(raw_ostream &OS,
329                               const MachineTraceMetrics::Trace &Tr) {
330  Tr.print(OS);
331  return OS;
332}
333
334inline raw_ostream &operator<<(raw_ostream &OS,
335                               const MachineTraceMetrics::Ensemble &En) {
336  En.print(OS);
337  return OS;
338}
339} // end namespace llvm
340
341#endif
342