ScheduleDAGSDNodes.cpp revision c589e03865bb31da70e0037d5c32fdaaa5f79f24
1//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements the ScheduleDAG class, which is a base class used by
11// scheduling implementation classes.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "pre-RA-sched"
16#include "ScheduleDAGSDNodes.h"
17#include "InstrEmitter.h"
18#include "llvm/CodeGen/SelectionDAG.h"
19#include "llvm/Target/TargetMachine.h"
20#include "llvm/Target/TargetInstrInfo.h"
21#include "llvm/Target/TargetRegisterInfo.h"
22#include "llvm/Target/TargetSubtarget.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/SmallPtrSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/raw_ostream.h"
30using namespace llvm;
31
32STATISTIC(LoadsClustered, "Number of loads clustered together");
33
34static cl::opt<bool>
35ClusterLoads("cluster-loads", cl::Hidden,
36           cl::desc("Schedule nearby loads together and in order"));
37
38ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
39  : ScheduleDAG(mf) {
40}
41
42/// Run - perform scheduling.
43///
44void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
45                             MachineBasicBlock::iterator insertPos) {
46  DAG = dag;
47  ScheduleDAG::Run(bb, insertPos);
48}
49
50SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
51  SUnit *SU = NewSUnit(Old->getNode());
52  SU->OrigNode = Old->OrigNode;
53  SU->Latency = Old->Latency;
54  SU->isTwoAddress = Old->isTwoAddress;
55  SU->isCommutable = Old->isCommutable;
56  SU->hasPhysRegDefs = Old->hasPhysRegDefs;
57  SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
58  Old->isCloned = true;
59  return SU;
60}
61
62/// CheckForPhysRegDependency - Check if the dependency between def and use of
63/// a specified operand is a physical register dependency. If so, returns the
64/// register and the cost of copying the register.
65static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
66                                      const TargetRegisterInfo *TRI,
67                                      const TargetInstrInfo *TII,
68                                      unsigned &PhysReg, int &Cost) {
69  if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
70    return;
71
72  unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
73  if (TargetRegisterInfo::isVirtualRegister(Reg))
74    return;
75
76  unsigned ResNo = User->getOperand(2).getResNo();
77  if (Def->isMachineOpcode()) {
78    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
79    if (ResNo >= II.getNumDefs() &&
80        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
81      PhysReg = Reg;
82      const TargetRegisterClass *RC =
83        TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
84      Cost = RC->getCopyCost();
85    }
86  }
87}
88
89static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
90                     SelectionDAG *DAG) {
91  SmallVector<EVT, 4> VTs;
92  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
93    VTs.push_back(N->getValueType(i));
94  if (AddFlag)
95    VTs.push_back(MVT::Flag);
96  SmallVector<SDValue, 4> Ops;
97  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
98    Ops.push_back(N->getOperand(i));
99  if (Flag.getNode())
100    Ops.push_back(Flag);
101  SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
102  DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
103}
104
105/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
106/// This function finds loads of the same base and different offsets. If the
107/// offsets are not far apart (target specific), it add MVT::Flag inputs and
108/// outputs to ensure they are scheduled together and in order. This
109/// optimization may benefit some targets by improving cache locality.
110void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
111  SmallPtrSet<SDNode*, 16> Visited;
112  SmallVector<int64_t, 4> Offsets;
113  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
114  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
115       E = DAG->allnodes_end(); NI != E; ++NI) {
116    SDNode *Node = &*NI;
117    if (!Node || !Node->isMachineOpcode())
118      continue;
119
120    unsigned Opc = Node->getMachineOpcode();
121    const TargetInstrDesc &TID = TII->get(Opc);
122    if (!TID.mayLoad())
123      continue;
124
125    SDNode *Chain = 0;
126    unsigned NumOps = Node->getNumOperands();
127    if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
128      Chain = Node->getOperand(NumOps-1).getNode();
129    if (!Chain)
130      continue;
131
132    // Look for other loads of the same chain. Find loads that are loading from
133    // the same base pointer and different offsets.
134    Visited.clear();
135    Offsets.clear();
136    O2SMap.clear();
137    bool Cluster = false;
138    SDNode *Base = Node;
139    int64_t BaseOffset;
140    for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
141         I != E; ++I) {
142      SDNode *User = *I;
143      if (User == Node || !Visited.insert(User))
144        continue;
145      int64_t Offset1, Offset2;
146      if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
147          Offset1 == Offset2)
148        // FIXME: Should be ok if they addresses are identical. But earlier
149        // optimizations really should have eliminated one of the loads.
150        continue;
151      if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
152        Offsets.push_back(Offset1);
153      O2SMap.insert(std::make_pair(Offset2, User));
154      Offsets.push_back(Offset2);
155      if (Offset2 < Offset1) {
156        Base = User;
157        BaseOffset = Offset2;
158      } else {
159        BaseOffset = Offset1;
160      }
161      Cluster = true;
162    }
163
164    if (!Cluster)
165      continue;
166
167    // Sort them in increasing order.
168    std::sort(Offsets.begin(), Offsets.end());
169
170    // Check if the loads are close enough.
171    SmallVector<SDNode*, 4> Loads;
172    unsigned NumLoads = 0;
173    int64_t BaseOff = Offsets[0];
174    SDNode *BaseLoad = O2SMap[BaseOff];
175    Loads.push_back(BaseLoad);
176    for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
177      int64_t Offset = Offsets[i];
178      SDNode *Load = O2SMap[Offset];
179      if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
180                                        NumLoads))
181        break; // Stop right here. Ignore loads that are further away.
182      Loads.push_back(Load);
183      ++NumLoads;
184    }
185
186    if (NumLoads == 0)
187      continue;
188
189    // Cluster loads by adding MVT::Flag outputs and inputs. This also
190    // ensure they are scheduled in order of increasing addresses.
191    SDNode *Lead = Loads[0];
192    AddFlags(Lead, SDValue(0,0), true, DAG);
193    SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
194    for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
195      bool OutFlag = i < e-1;
196      SDNode *Load = Loads[i];
197      AddFlags(Load, InFlag, OutFlag, DAG);
198      if (OutFlag)
199        InFlag = SDValue(Load, Load->getNumValues()-1);
200      ++LoadsClustered;
201    }
202  }
203}
204
205void ScheduleDAGSDNodes::BuildSchedUnits() {
206  // During scheduling, the NodeId field of SDNode is used to map SDNodes
207  // to their associated SUnits by holding SUnits table indices. A value
208  // of -1 means the SDNode does not yet have an associated SUnit.
209  unsigned NumNodes = 0;
210  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
211       E = DAG->allnodes_end(); NI != E; ++NI) {
212    NI->setNodeId(-1);
213    ++NumNodes;
214  }
215
216  // Reserve entries in the vector for each of the SUnits we are creating.  This
217  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
218  // invalidated.
219  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
220  // This is a temporary workaround.
221  SUnits.reserve(NumNodes * 2);
222
223  // Check to see if the scheduler cares about latencies.
224  bool UnitLatencies = ForceUnitLatencies();
225
226  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
227       E = DAG->allnodes_end(); NI != E; ++NI) {
228    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
229      continue;
230
231    // If this node has already been processed, stop now.
232    if (NI->getNodeId() != -1) continue;
233
234    SUnit *NodeSUnit = NewSUnit(NI);
235
236    // See if anything is flagged to this node, if so, add them to flagged
237    // nodes.  Nodes can have at most one flag input and one flag output.  Flags
238    // are required to be the last operand and result of a node.
239
240    // Scan up to find flagged preds.
241    SDNode *N = NI;
242    while (N->getNumOperands() &&
243           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
244      N = N->getOperand(N->getNumOperands()-1).getNode();
245      assert(N->getNodeId() == -1 && "Node already inserted!");
246      N->setNodeId(NodeSUnit->NodeNum);
247    }
248
249    // Scan down to find any flagged succs.
250    N = NI;
251    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
252      SDValue FlagVal(N, N->getNumValues()-1);
253
254      // There are either zero or one users of the Flag result.
255      bool HasFlagUse = false;
256      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
257           UI != E; ++UI)
258        if (FlagVal.isOperandOf(*UI)) {
259          HasFlagUse = true;
260          assert(N->getNodeId() == -1 && "Node already inserted!");
261          N->setNodeId(NodeSUnit->NodeNum);
262          N = *UI;
263          break;
264        }
265      if (!HasFlagUse) break;
266    }
267
268    // If there are flag operands involved, N is now the bottom-most node
269    // of the sequence of nodes that are flagged together.
270    // Update the SUnit.
271    NodeSUnit->setNode(N);
272    assert(N->getNodeId() == -1 && "Node already inserted!");
273    N->setNodeId(NodeSUnit->NodeNum);
274
275    // Assign the Latency field of NodeSUnit using target-provided information.
276    if (UnitLatencies)
277      NodeSUnit->Latency = 1;
278    else
279      ComputeLatency(NodeSUnit);
280  }
281}
282
283void ScheduleDAGSDNodes::AddSchedEdges() {
284  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
285
286  // Check to see if the scheduler cares about latencies.
287  bool UnitLatencies = ForceUnitLatencies();
288
289  // Pass 2: add the preds, succs, etc.
290  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
291    SUnit *SU = &SUnits[su];
292    SDNode *MainNode = SU->getNode();
293
294    if (MainNode->isMachineOpcode()) {
295      unsigned Opc = MainNode->getMachineOpcode();
296      const TargetInstrDesc &TID = TII->get(Opc);
297      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
298        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
299          SU->isTwoAddress = true;
300          break;
301        }
302      }
303      if (TID.isCommutable())
304        SU->isCommutable = true;
305    }
306
307    // Find all predecessors and successors of the group.
308    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
309      if (N->isMachineOpcode() &&
310          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
311        SU->hasPhysRegClobbers = true;
312        unsigned NumUsed = InstrEmitter::CountResults(N);
313        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
314          --NumUsed;    // Skip over unused values at the end.
315        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
316          SU->hasPhysRegDefs = true;
317      }
318
319      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
320        SDNode *OpN = N->getOperand(i).getNode();
321        if (isPassiveNode(OpN)) continue;   // Not scheduled.
322        SUnit *OpSU = &SUnits[OpN->getNodeId()];
323        assert(OpSU && "Node has no SUnit!");
324        if (OpSU == SU) continue;           // In the same group.
325
326        EVT OpVT = N->getOperand(i).getValueType();
327        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
328        bool isChain = OpVT == MVT::Other;
329
330        unsigned PhysReg = 0;
331        int Cost = 1;
332        // Determine if this is a physical register dependency.
333        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
334        assert((PhysReg == 0 || !isChain) &&
335               "Chain dependence via physreg data?");
336        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
337        // emits a copy from the physical register to a virtual register unless
338        // it requires a cross class copy (cost < 0). That means we are only
339        // treating "expensive to copy" register dependency as physical register
340        // dependency. This may change in the future though.
341        if (Cost >= 0)
342          PhysReg = 0;
343
344        const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
345                               OpSU->Latency, PhysReg);
346        if (!isChain && !UnitLatencies) {
347          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
348          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
349        }
350
351        SU->addPred(dep);
352      }
353    }
354  }
355}
356
357/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
358/// are input.  This SUnit graph is similar to the SelectionDAG, but
359/// excludes nodes that aren't interesting to scheduling, and represents
360/// flagged together nodes with a single SUnit.
361void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
362  // Cluster loads from "near" addresses into combined SUnits.
363  if (ClusterLoads)
364    ClusterNeighboringLoads();
365  // Populate the SUnits array.
366  BuildSchedUnits();
367  // Compute all the scheduling dependencies between nodes.
368  AddSchedEdges();
369}
370
371void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
372  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
373
374  // Compute the latency for the node.  We use the sum of the latencies for
375  // all nodes flagged together into this SUnit.
376  SU->Latency = 0;
377  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
378    if (N->isMachineOpcode()) {
379      SU->Latency += InstrItins.
380        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
381    }
382}
383
384void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
385  if (!SU->getNode()) {
386    dbgs() << "PHYS REG COPY\n";
387    return;
388  }
389
390  SU->getNode()->dump(DAG);
391  dbgs() << "\n";
392  SmallVector<SDNode *, 4> FlaggedNodes;
393  for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
394    FlaggedNodes.push_back(N);
395  while (!FlaggedNodes.empty()) {
396    dbgs() << "    ";
397    FlaggedNodes.back()->dump(DAG);
398    dbgs() << "\n";
399    FlaggedNodes.pop_back();
400  }
401}
402
403/// EmitSchedule - Emit the machine code in scheduled order.
404MachineBasicBlock *ScheduleDAGSDNodes::
405EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
406  InstrEmitter Emitter(BB, InsertPos);
407  DenseMap<SDValue, unsigned> VRBaseMap;
408  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
409  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
410    SUnit *SU = Sequence[i];
411    if (!SU) {
412      // Null SUnit* is a noop.
413      EmitNoop();
414      continue;
415    }
416
417    // For pre-regalloc scheduling, create instructions corresponding to the
418    // SDNode and any flagged SDNodes and append them to the block.
419    if (!SU->getNode()) {
420      // Emit a copy.
421      EmitPhysRegCopy(SU, CopyVRBaseMap);
422      continue;
423    }
424
425    SmallVector<SDNode *, 4> FlaggedNodes;
426    for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
427         N = N->getFlaggedNode())
428      FlaggedNodes.push_back(N);
429    while (!FlaggedNodes.empty()) {
430      Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
431                       VRBaseMap, EM);
432      FlaggedNodes.pop_back();
433    }
434    Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
435                     VRBaseMap, EM);
436  }
437
438  BB = Emitter.getBlock();
439  InsertPos = Emitter.getInsertPos();
440  return BB;
441}
442