ScheduleDAGRRList.cpp revision 1cc3984148be113c6e5e470f23c9ddbd37679c5f
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/Target/TargetRegisterInfo.h"
24#include "llvm/Target/TargetData.h"
25#include "llvm/Target/TargetMachine.h"
26#include "llvm/Target/TargetInstrInfo.h"
27#include "llvm/ADT/PriorityQueue.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/raw_ostream.h"
34#include <climits>
35using namespace llvm;
36
37STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
38STATISTIC(NumUnfolds,    "Number of nodes unfolded");
39STATISTIC(NumDups,       "Number of duplicated nodes");
40STATISTIC(NumPRCopies,   "Number of physical register copies");
41
42static RegisterScheduler
43  burrListDAGScheduler("list-burr",
44                       "Bottom-up register reduction list scheduling",
45                       createBURRListDAGScheduler);
46static RegisterScheduler
47  tdrListrDAGScheduler("list-tdrr",
48                       "Top-down register reduction list scheduling",
49                       createTDRRListDAGScheduler);
50static RegisterScheduler
51  sourceListDAGScheduler("source",
52                         "Similar to list-burr but schedules in source "
53                         "order when possible",
54                         createSourceListDAGScheduler);
55
56static RegisterScheduler
57  hybridListDAGScheduler("hybrid",
58                         "Bottom-up rr list scheduling which avoid stalls for "
59                         "long latency instructions",
60                         createHybridListDAGScheduler);
61
62namespace {
63//===----------------------------------------------------------------------===//
64/// ScheduleDAGRRList - The actual register reduction list scheduler
65/// implementation.  This supports both top-down and bottom-up scheduling.
66///
67class ScheduleDAGRRList : public ScheduleDAGSDNodes {
68private:
69  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
70  /// it is top-down.
71  bool isBottomUp;
72
73  /// NeedLatency - True if the scheduler will make use of latency information.
74  ///
75  bool NeedLatency;
76
77  /// AvailableQueue - The priority queue to use for the available SUnits.
78  SchedulingPriorityQueue *AvailableQueue;
79
80  /// LiveRegDefs - A set of physical registers and their definition
81  /// that are "live". These nodes must be scheduled before any other nodes that
82  /// modifies the registers can be scheduled.
83  unsigned NumLiveRegs;
84  std::vector<SUnit*> LiveRegDefs;
85  std::vector<unsigned> LiveRegCycles;
86
87  /// Topo - A topological ordering for SUnits which permits fast IsReachable
88  /// and similar queries.
89  ScheduleDAGTopologicalSort Topo;
90
91public:
92  ScheduleDAGRRList(MachineFunction &mf,
93                    bool isbottomup, bool needlatency,
94                    SchedulingPriorityQueue *availqueue)
95    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
96      AvailableQueue(availqueue), Topo(SUnits) {
97    }
98
99  ~ScheduleDAGRRList() {
100    delete AvailableQueue;
101  }
102
103  void Schedule();
104
105  /// IsReachable - Checks if SU is reachable from TargetSU.
106  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
107    return Topo.IsReachable(SU, TargetSU);
108  }
109
110  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
111  /// create a cycle.
112  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
113    return Topo.WillCreateCycle(SU, TargetSU);
114  }
115
116  /// AddPred - adds a predecessor edge to SUnit SU.
117  /// This returns true if this is a new predecessor.
118  /// Updates the topological ordering if required.
119  void AddPred(SUnit *SU, const SDep &D) {
120    Topo.AddPred(SU, D.getSUnit());
121    SU->addPred(D);
122  }
123
124  /// RemovePred - removes a predecessor edge from SUnit SU.
125  /// This returns true if an edge was removed.
126  /// Updates the topological ordering if required.
127  void RemovePred(SUnit *SU, const SDep &D) {
128    Topo.RemovePred(SU, D.getSUnit());
129    SU->removePred(D);
130  }
131
132private:
133  void ReleasePred(SUnit *SU, const SDep *PredEdge);
134  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
135  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
136  void ReleaseSuccessors(SUnit *SU);
137  void CapturePred(SDep *PredEdge);
138  void ScheduleNodeBottomUp(SUnit*, unsigned);
139  void ScheduleNodeTopDown(SUnit*, unsigned);
140  void UnscheduleNodeBottomUp(SUnit*);
141  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
142  SUnit *CopyAndMoveSuccessors(SUnit*);
143  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
144                                const TargetRegisterClass*,
145                                const TargetRegisterClass*,
146                                SmallVector<SUnit*, 2>&);
147  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
148  void ListScheduleTopDown();
149  void ListScheduleBottomUp();
150
151
152  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
153  /// Updates the topological ordering if required.
154  SUnit *CreateNewSUnit(SDNode *N) {
155    unsigned NumSUnits = SUnits.size();
156    SUnit *NewNode = NewSUnit(N);
157    // Update the topological ordering.
158    if (NewNode->NodeNum >= NumSUnits)
159      Topo.InitDAGTopologicalSorting();
160    return NewNode;
161  }
162
163  /// CreateClone - Creates a new SUnit from an existing one.
164  /// Updates the topological ordering if required.
165  SUnit *CreateClone(SUnit *N) {
166    unsigned NumSUnits = SUnits.size();
167    SUnit *NewNode = Clone(N);
168    // Update the topological ordering.
169    if (NewNode->NodeNum >= NumSUnits)
170      Topo.InitDAGTopologicalSorting();
171    return NewNode;
172  }
173
174  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
175  /// need actual latency information but the hybrid scheduler does.
176  bool ForceUnitLatencies() const {
177    return !NeedLatency;
178  }
179};
180}  // end anonymous namespace
181
182
183/// Schedule - Schedule the DAG using list scheduling.
184void ScheduleDAGRRList::Schedule() {
185  DEBUG(dbgs() << "********** List Scheduling **********\n");
186
187  NumLiveRegs = 0;
188  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
189  LiveRegCycles.resize(TRI->getNumRegs(), 0);
190
191  // Build the scheduling graph.
192  BuildSchedGraph(NULL);
193
194  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
195          SUnits[su].dumpAll(this));
196  Topo.InitDAGTopologicalSorting();
197
198  AvailableQueue->initNodes(SUnits);
199
200  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
201  if (isBottomUp)
202    ListScheduleBottomUp();
203  else
204    ListScheduleTopDown();
205
206  AvailableQueue->releaseState();
207}
208
209//===----------------------------------------------------------------------===//
210//  Bottom-Up Scheduling
211//===----------------------------------------------------------------------===//
212
213/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
214/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
215void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
216  SUnit *PredSU = PredEdge->getSUnit();
217
218#ifndef NDEBUG
219  if (PredSU->NumSuccsLeft == 0) {
220    dbgs() << "*** Scheduling failed! ***\n";
221    PredSU->dump(this);
222    dbgs() << " has been released too many times!\n";
223    llvm_unreachable(0);
224  }
225#endif
226  --PredSU->NumSuccsLeft;
227
228  if (!ForceUnitLatencies()) {
229    // Updating predecessor's height. This is now the cycle when the
230    // predecessor can be scheduled without causing a pipeline stall.
231    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
232  }
233
234  // If all the node's successors are scheduled, this node is ready
235  // to be scheduled. Ignore the special EntrySU node.
236  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
237    PredSU->isAvailable = true;
238    AvailableQueue->push(PredSU);
239  }
240}
241
242void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
243  // Bottom up: release predecessors
244  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
245       I != E; ++I) {
246    ReleasePred(SU, &*I);
247    if (I->isAssignedRegDep()) {
248      // This is a physical register dependency and it's impossible or
249      // expensive to copy the register. Make sure nothing that can
250      // clobber the register is scheduled between the predecessor and
251      // this node.
252      if (!LiveRegDefs[I->getReg()]) {
253        ++NumLiveRegs;
254        LiveRegDefs[I->getReg()] = I->getSUnit();
255        LiveRegCycles[I->getReg()] = CurCycle;
256      }
257    }
258  }
259}
260
261/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
262/// count of its predecessors. If a predecessor pending count is zero, add it to
263/// the Available queue.
264void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
265  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
266  DEBUG(SU->dump(this));
267
268#ifndef NDEBUG
269  if (CurCycle < SU->getHeight())
270    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
271#endif
272
273  // FIXME: Handle noop hazard.
274  SU->setHeightToAtLeast(CurCycle);
275  Sequence.push_back(SU);
276
277  ReleasePredecessors(SU, CurCycle);
278
279  // Release all the implicit physical register defs that are live.
280  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
281       I != E; ++I) {
282    if (I->isAssignedRegDep()) {
283      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
284        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
285        assert(LiveRegDefs[I->getReg()] == SU &&
286               "Physical register dependency violated?");
287        --NumLiveRegs;
288        LiveRegDefs[I->getReg()] = NULL;
289        LiveRegCycles[I->getReg()] = 0;
290      }
291    }
292  }
293
294  SU->isScheduled = true;
295  AvailableQueue->ScheduledNode(SU);
296}
297
298/// CapturePred - This does the opposite of ReleasePred. Since SU is being
299/// unscheduled, incrcease the succ left count of its predecessors. Remove
300/// them from AvailableQueue if necessary.
301void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
302  SUnit *PredSU = PredEdge->getSUnit();
303  if (PredSU->isAvailable) {
304    PredSU->isAvailable = false;
305    if (!PredSU->isPending)
306      AvailableQueue->remove(PredSU);
307  }
308
309  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
310  ++PredSU->NumSuccsLeft;
311}
312
313/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
314/// its predecessor states to reflect the change.
315void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
316  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
317  DEBUG(SU->dump(this));
318
319  AvailableQueue->UnscheduledNode(SU);
320
321  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
322       I != E; ++I) {
323    CapturePred(&*I);
324    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
325      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
326      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
327             "Physical register dependency violated?");
328      --NumLiveRegs;
329      LiveRegDefs[I->getReg()] = NULL;
330      LiveRegCycles[I->getReg()] = 0;
331    }
332  }
333
334  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
335       I != E; ++I) {
336    if (I->isAssignedRegDep()) {
337      if (!LiveRegDefs[I->getReg()]) {
338        LiveRegDefs[I->getReg()] = SU;
339        ++NumLiveRegs;
340      }
341      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
342        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
343    }
344  }
345
346  SU->setHeightDirty();
347  SU->isScheduled = false;
348  SU->isAvailable = true;
349  AvailableQueue->push(SU);
350}
351
352/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
353/// BTCycle in order to schedule a specific node.
354void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
355                                          unsigned &CurCycle) {
356  SUnit *OldSU = NULL;
357  while (CurCycle > BtCycle) {
358    OldSU = Sequence.back();
359    Sequence.pop_back();
360    if (SU->isSucc(OldSU))
361      // Don't try to remove SU from AvailableQueue.
362      SU->isAvailable = false;
363    UnscheduleNodeBottomUp(OldSU);
364    --CurCycle;
365    AvailableQueue->setCurCycle(CurCycle);
366  }
367
368  assert(!SU->isSucc(OldSU) && "Something is wrong!");
369
370  ++NumBacktracks;
371}
372
373static bool isOperandOf(const SUnit *SU, SDNode *N) {
374  for (const SDNode *SUNode = SU->getNode(); SUNode;
375       SUNode = SUNode->getFlaggedNode()) {
376    if (SUNode->isOperandOf(N))
377      return true;
378  }
379  return false;
380}
381
382/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
383/// successors to the newly created node.
384SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
385  if (SU->getNode()->getFlaggedNode())
386    return NULL;
387
388  SDNode *N = SU->getNode();
389  if (!N)
390    return NULL;
391
392  SUnit *NewSU;
393  bool TryUnfold = false;
394  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
395    EVT VT = N->getValueType(i);
396    if (VT == MVT::Flag)
397      return NULL;
398    else if (VT == MVT::Other)
399      TryUnfold = true;
400  }
401  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
402    const SDValue &Op = N->getOperand(i);
403    EVT VT = Op.getNode()->getValueType(Op.getResNo());
404    if (VT == MVT::Flag)
405      return NULL;
406  }
407
408  if (TryUnfold) {
409    SmallVector<SDNode*, 2> NewNodes;
410    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
411      return NULL;
412
413    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
414    assert(NewNodes.size() == 2 && "Expected a load folding node!");
415
416    N = NewNodes[1];
417    SDNode *LoadNode = NewNodes[0];
418    unsigned NumVals = N->getNumValues();
419    unsigned OldNumVals = SU->getNode()->getNumValues();
420    for (unsigned i = 0; i != NumVals; ++i)
421      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
422    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
423                                   SDValue(LoadNode, 1));
424
425    // LoadNode may already exist. This can happen when there is another
426    // load from the same location and producing the same type of value
427    // but it has different alignment or volatileness.
428    bool isNewLoad = true;
429    SUnit *LoadSU;
430    if (LoadNode->getNodeId() != -1) {
431      LoadSU = &SUnits[LoadNode->getNodeId()];
432      isNewLoad = false;
433    } else {
434      LoadSU = CreateNewSUnit(LoadNode);
435      LoadNode->setNodeId(LoadSU->NodeNum);
436      ComputeLatency(LoadSU);
437    }
438
439    SUnit *NewSU = CreateNewSUnit(N);
440    assert(N->getNodeId() == -1 && "Node already inserted!");
441    N->setNodeId(NewSU->NodeNum);
442
443    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
444    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
445      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
446        NewSU->isTwoAddress = true;
447        break;
448      }
449    }
450    if (TID.isCommutable())
451      NewSU->isCommutable = true;
452    ComputeLatency(NewSU);
453
454    // Record all the edges to and from the old SU, by category.
455    SmallVector<SDep, 4> ChainPreds;
456    SmallVector<SDep, 4> ChainSuccs;
457    SmallVector<SDep, 4> LoadPreds;
458    SmallVector<SDep, 4> NodePreds;
459    SmallVector<SDep, 4> NodeSuccs;
460    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
461         I != E; ++I) {
462      if (I->isCtrl())
463        ChainPreds.push_back(*I);
464      else if (isOperandOf(I->getSUnit(), LoadNode))
465        LoadPreds.push_back(*I);
466      else
467        NodePreds.push_back(*I);
468    }
469    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
470         I != E; ++I) {
471      if (I->isCtrl())
472        ChainSuccs.push_back(*I);
473      else
474        NodeSuccs.push_back(*I);
475    }
476
477    // Now assign edges to the newly-created nodes.
478    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
479      const SDep &Pred = ChainPreds[i];
480      RemovePred(SU, Pred);
481      if (isNewLoad)
482        AddPred(LoadSU, Pred);
483    }
484    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
485      const SDep &Pred = LoadPreds[i];
486      RemovePred(SU, Pred);
487      if (isNewLoad)
488        AddPred(LoadSU, Pred);
489    }
490    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
491      const SDep &Pred = NodePreds[i];
492      RemovePred(SU, Pred);
493      AddPred(NewSU, Pred);
494    }
495    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
496      SDep D = NodeSuccs[i];
497      SUnit *SuccDep = D.getSUnit();
498      D.setSUnit(SU);
499      RemovePred(SuccDep, D);
500      D.setSUnit(NewSU);
501      AddPred(SuccDep, D);
502    }
503    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
504      SDep D = ChainSuccs[i];
505      SUnit *SuccDep = D.getSUnit();
506      D.setSUnit(SU);
507      RemovePred(SuccDep, D);
508      if (isNewLoad) {
509        D.setSUnit(LoadSU);
510        AddPred(SuccDep, D);
511      }
512    }
513
514    // Add a data dependency to reflect that NewSU reads the value defined
515    // by LoadSU.
516    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
517
518    if (isNewLoad)
519      AvailableQueue->addNode(LoadSU);
520    AvailableQueue->addNode(NewSU);
521
522    ++NumUnfolds;
523
524    if (NewSU->NumSuccsLeft == 0) {
525      NewSU->isAvailable = true;
526      return NewSU;
527    }
528    SU = NewSU;
529  }
530
531  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
532  NewSU = CreateClone(SU);
533
534  // New SUnit has the exact same predecessors.
535  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
536       I != E; ++I)
537    if (!I->isArtificial())
538      AddPred(NewSU, *I);
539
540  // Only copy scheduled successors. Cut them from old node's successor
541  // list and move them over.
542  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
543  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
544       I != E; ++I) {
545    if (I->isArtificial())
546      continue;
547    SUnit *SuccSU = I->getSUnit();
548    if (SuccSU->isScheduled) {
549      SDep D = *I;
550      D.setSUnit(NewSU);
551      AddPred(SuccSU, D);
552      D.setSUnit(SU);
553      DelDeps.push_back(std::make_pair(SuccSU, D));
554    }
555  }
556  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
557    RemovePred(DelDeps[i].first, DelDeps[i].second);
558
559  AvailableQueue->updateNode(SU);
560  AvailableQueue->addNode(NewSU);
561
562  ++NumDups;
563  return NewSU;
564}
565
566/// InsertCopiesAndMoveSuccs - Insert register copies and move all
567/// scheduled successors of the given SUnit to the last copy.
568void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
569                                               const TargetRegisterClass *DestRC,
570                                               const TargetRegisterClass *SrcRC,
571                                               SmallVector<SUnit*, 2> &Copies) {
572  SUnit *CopyFromSU = CreateNewSUnit(NULL);
573  CopyFromSU->CopySrcRC = SrcRC;
574  CopyFromSU->CopyDstRC = DestRC;
575
576  SUnit *CopyToSU = CreateNewSUnit(NULL);
577  CopyToSU->CopySrcRC = DestRC;
578  CopyToSU->CopyDstRC = SrcRC;
579
580  // Only copy scheduled successors. Cut them from old node's successor
581  // list and move them over.
582  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
583  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
584       I != E; ++I) {
585    if (I->isArtificial())
586      continue;
587    SUnit *SuccSU = I->getSUnit();
588    if (SuccSU->isScheduled) {
589      SDep D = *I;
590      D.setSUnit(CopyToSU);
591      AddPred(SuccSU, D);
592      DelDeps.push_back(std::make_pair(SuccSU, *I));
593    }
594  }
595  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
596    RemovePred(DelDeps[i].first, DelDeps[i].second);
597
598  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
599  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
600
601  AvailableQueue->updateNode(SU);
602  AvailableQueue->addNode(CopyFromSU);
603  AvailableQueue->addNode(CopyToSU);
604  Copies.push_back(CopyFromSU);
605  Copies.push_back(CopyToSU);
606
607  ++NumPRCopies;
608}
609
610/// getPhysicalRegisterVT - Returns the ValueType of the physical register
611/// definition of the specified node.
612/// FIXME: Move to SelectionDAG?
613static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
614                                 const TargetInstrInfo *TII) {
615  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
616  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
617  unsigned NumRes = TID.getNumDefs();
618  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
619    if (Reg == *ImpDef)
620      break;
621    ++NumRes;
622  }
623  return N->getValueType(NumRes);
624}
625
626/// CheckForLiveRegDef - Return true and update live register vector if the
627/// specified register def of the specified SUnit clobbers any "live" registers.
628static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
629                               std::vector<SUnit*> &LiveRegDefs,
630                               SmallSet<unsigned, 4> &RegAdded,
631                               SmallVector<unsigned, 4> &LRegs,
632                               const TargetRegisterInfo *TRI) {
633  bool Added = false;
634  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
635    if (RegAdded.insert(Reg)) {
636      LRegs.push_back(Reg);
637      Added = true;
638    }
639  }
640  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
641    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
642      if (RegAdded.insert(*Alias)) {
643        LRegs.push_back(*Alias);
644        Added = true;
645      }
646    }
647  return Added;
648}
649
650/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
651/// scheduling of the given node to satisfy live physical register dependencies.
652/// If the specific node is the last one that's available to schedule, do
653/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
654bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
655                                                 SmallVector<unsigned, 4> &LRegs){
656  if (NumLiveRegs == 0)
657    return false;
658
659  SmallSet<unsigned, 4> RegAdded;
660  // If this node would clobber any "live" register, then it's not ready.
661  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
662       I != E; ++I) {
663    if (I->isAssignedRegDep())
664      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
665                         RegAdded, LRegs, TRI);
666  }
667
668  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
669    if (Node->getOpcode() == ISD::INLINEASM) {
670      // Inline asm can clobber physical defs.
671      unsigned NumOps = Node->getNumOperands();
672      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
673        --NumOps;  // Ignore the flag operand.
674
675      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
676        unsigned Flags =
677          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
678        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
679
680        ++i; // Skip the ID value.
681        if (InlineAsm::isRegDefKind(Flags) ||
682            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
683          // Check for def of register or earlyclobber register.
684          for (; NumVals; --NumVals, ++i) {
685            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
686            if (TargetRegisterInfo::isPhysicalRegister(Reg))
687              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
688          }
689        } else
690          i += NumVals;
691      }
692      continue;
693    }
694
695    if (!Node->isMachineOpcode())
696      continue;
697    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
698    if (!TID.ImplicitDefs)
699      continue;
700    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
701      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
702  }
703  return !LRegs.empty();
704}
705
706
707/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
708/// schedulers.
709void ScheduleDAGRRList::ListScheduleBottomUp() {
710  unsigned CurCycle = 0;
711
712  // Release any predecessors of the special Exit node.
713  ReleasePredecessors(&ExitSU, CurCycle);
714
715  // Add root to Available queue.
716  if (!SUnits.empty()) {
717    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
718    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
719    RootSU->isAvailable = true;
720    AvailableQueue->push(RootSU);
721  }
722
723  // While Available queue is not empty, grab the node with the highest
724  // priority. If it is not ready put it back.  Schedule the node.
725  SmallVector<SUnit*, 4> NotReady;
726  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
727  Sequence.reserve(SUnits.size());
728  while (!AvailableQueue->empty()) {
729    bool Delayed = false;
730    LRegsMap.clear();
731    SUnit *CurSU = AvailableQueue->pop();
732    while (CurSU) {
733      SmallVector<unsigned, 4> LRegs;
734      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
735        break;
736      Delayed = true;
737      LRegsMap.insert(std::make_pair(CurSU, LRegs));
738
739      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
740      NotReady.push_back(CurSU);
741      CurSU = AvailableQueue->pop();
742    }
743
744    // All candidates are delayed due to live physical reg dependencies.
745    // Try backtracking, code duplication, or inserting cross class copies
746    // to resolve it.
747    if (Delayed && !CurSU) {
748      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
749        SUnit *TrySU = NotReady[i];
750        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
751
752        // Try unscheduling up to the point where it's safe to schedule
753        // this node.
754        unsigned LiveCycle = CurCycle;
755        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
756          unsigned Reg = LRegs[j];
757          unsigned LCycle = LiveRegCycles[Reg];
758          LiveCycle = std::min(LiveCycle, LCycle);
759        }
760        SUnit *OldSU = Sequence[LiveCycle];
761        if (!WillCreateCycle(TrySU, OldSU))  {
762          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
763          // Force the current node to be scheduled before the node that
764          // requires the physical reg dep.
765          if (OldSU->isAvailable) {
766            OldSU->isAvailable = false;
767            AvailableQueue->remove(OldSU);
768          }
769          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
770                              /*Reg=*/0, /*isNormalMemory=*/false,
771                              /*isMustAlias=*/false, /*isArtificial=*/true));
772          // If one or more successors has been unscheduled, then the current
773          // node is no longer avaialable. Schedule a successor that's now
774          // available instead.
775          if (!TrySU->isAvailable)
776            CurSU = AvailableQueue->pop();
777          else {
778            CurSU = TrySU;
779            TrySU->isPending = false;
780            NotReady.erase(NotReady.begin()+i);
781          }
782          break;
783        }
784      }
785
786      if (!CurSU) {
787        // Can't backtrack. If it's too expensive to copy the value, then try
788        // duplicate the nodes that produces these "too expensive to copy"
789        // values to break the dependency. In case even that doesn't work,
790        // insert cross class copies.
791        // If it's not too expensive, i.e. cost != -1, issue copies.
792        SUnit *TrySU = NotReady[0];
793        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
794        assert(LRegs.size() == 1 && "Can't handle this yet!");
795        unsigned Reg = LRegs[0];
796        SUnit *LRDef = LiveRegDefs[Reg];
797        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
798        const TargetRegisterClass *RC =
799          TRI->getPhysicalRegisterRegClass(Reg, VT);
800        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
801
802        // If cross copy register class is null, then it must be possible copy
803        // the value directly. Do not try duplicate the def.
804        SUnit *NewDef = 0;
805        if (DestRC)
806          NewDef = CopyAndMoveSuccessors(LRDef);
807        else
808          DestRC = RC;
809        if (!NewDef) {
810          // Issue copies, these can be expensive cross register class copies.
811          SmallVector<SUnit*, 2> Copies;
812          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
813          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
814                       << " to SU #" << Copies.front()->NodeNum << "\n");
815          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
816                              /*Reg=*/0, /*isNormalMemory=*/false,
817                              /*isMustAlias=*/false,
818                              /*isArtificial=*/true));
819          NewDef = Copies.back();
820        }
821
822        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
823                     << " to SU #" << TrySU->NodeNum << "\n");
824        LiveRegDefs[Reg] = NewDef;
825        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
826                             /*Reg=*/0, /*isNormalMemory=*/false,
827                             /*isMustAlias=*/false,
828                             /*isArtificial=*/true));
829        TrySU->isAvailable = false;
830        CurSU = NewDef;
831      }
832
833      assert(CurSU && "Unable to resolve live physical register dependencies!");
834    }
835
836    // Add the nodes that aren't ready back onto the available list.
837    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
838      NotReady[i]->isPending = false;
839      // May no longer be available due to backtracking.
840      if (NotReady[i]->isAvailable)
841        AvailableQueue->push(NotReady[i]);
842    }
843    NotReady.clear();
844
845    if (CurSU)
846      ScheduleNodeBottomUp(CurSU, CurCycle);
847    ++CurCycle;
848    AvailableQueue->setCurCycle(CurCycle);
849  }
850
851  // Reverse the order if it is bottom up.
852  std::reverse(Sequence.begin(), Sequence.end());
853
854#ifndef NDEBUG
855  VerifySchedule(isBottomUp);
856#endif
857}
858
859//===----------------------------------------------------------------------===//
860//  Top-Down Scheduling
861//===----------------------------------------------------------------------===//
862
863/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
864/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
865void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
866  SUnit *SuccSU = SuccEdge->getSUnit();
867
868#ifndef NDEBUG
869  if (SuccSU->NumPredsLeft == 0) {
870    dbgs() << "*** Scheduling failed! ***\n";
871    SuccSU->dump(this);
872    dbgs() << " has been released too many times!\n";
873    llvm_unreachable(0);
874  }
875#endif
876  --SuccSU->NumPredsLeft;
877
878  // If all the node's predecessors are scheduled, this node is ready
879  // to be scheduled. Ignore the special ExitSU node.
880  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
881    SuccSU->isAvailable = true;
882    AvailableQueue->push(SuccSU);
883  }
884}
885
886void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
887  // Top down: release successors
888  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
889       I != E; ++I) {
890    assert(!I->isAssignedRegDep() &&
891           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
892
893    ReleaseSucc(SU, &*I);
894  }
895}
896
897/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
898/// count of its successors. If a successor pending count is zero, add it to
899/// the Available queue.
900void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
901  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
902  DEBUG(SU->dump(this));
903
904  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
905  SU->setDepthToAtLeast(CurCycle);
906  Sequence.push_back(SU);
907
908  ReleaseSuccessors(SU);
909  SU->isScheduled = true;
910  AvailableQueue->ScheduledNode(SU);
911}
912
913/// ListScheduleTopDown - The main loop of list scheduling for top-down
914/// schedulers.
915void ScheduleDAGRRList::ListScheduleTopDown() {
916  unsigned CurCycle = 0;
917  AvailableQueue->setCurCycle(CurCycle);
918
919  // Release any successors of the special Entry node.
920  ReleaseSuccessors(&EntrySU);
921
922  // All leaves to Available queue.
923  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
924    // It is available if it has no predecessors.
925    if (SUnits[i].Preds.empty()) {
926      AvailableQueue->push(&SUnits[i]);
927      SUnits[i].isAvailable = true;
928    }
929  }
930
931  // While Available queue is not empty, grab the node with the highest
932  // priority. If it is not ready put it back.  Schedule the node.
933  Sequence.reserve(SUnits.size());
934  while (!AvailableQueue->empty()) {
935    SUnit *CurSU = AvailableQueue->pop();
936
937    if (CurSU)
938      ScheduleNodeTopDown(CurSU, CurCycle);
939    ++CurCycle;
940    AvailableQueue->setCurCycle(CurCycle);
941  }
942
943#ifndef NDEBUG
944  VerifySchedule(isBottomUp);
945#endif
946}
947
948
949//===----------------------------------------------------------------------===//
950//                RegReductionPriorityQueue Implementation
951//===----------------------------------------------------------------------===//
952//
953// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
954// to reduce register pressure.
955//
956namespace {
957  template<class SF>
958  class RegReductionPriorityQueue;
959
960  /// Sorting functions for the Available queue.
961  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
962    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
963    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
964    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
965
966    bool operator()(const SUnit* left, const SUnit* right) const;
967  };
968
969  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
970    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
971    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
972    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
973
974    bool operator()(const SUnit* left, const SUnit* right) const;
975  };
976
977  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
978    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
979    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
980      : SPQ(spq) {}
981    src_ls_rr_sort(const src_ls_rr_sort &RHS)
982      : SPQ(RHS.SPQ) {}
983
984    bool operator()(const SUnit* left, const SUnit* right) const;
985  };
986
987  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
988    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
989    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
990      : SPQ(spq) {}
991    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
992      : SPQ(RHS.SPQ) {}
993
994    bool operator()(const SUnit* left, const SUnit* right) const;
995  };
996}  // end anonymous namespace
997
998/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
999/// Smaller number is the higher priority.
1000static unsigned
1001CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1002  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1003  if (SethiUllmanNumber != 0)
1004    return SethiUllmanNumber;
1005
1006  unsigned Extra = 0;
1007  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1008       I != E; ++I) {
1009    if (I->isCtrl()) continue;  // ignore chain preds
1010    SUnit *PredSU = I->getSUnit();
1011    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1012    if (PredSethiUllman > SethiUllmanNumber) {
1013      SethiUllmanNumber = PredSethiUllman;
1014      Extra = 0;
1015    } else if (PredSethiUllman == SethiUllmanNumber)
1016      ++Extra;
1017  }
1018
1019  SethiUllmanNumber += Extra;
1020
1021  if (SethiUllmanNumber == 0)
1022    SethiUllmanNumber = 1;
1023
1024  return SethiUllmanNumber;
1025}
1026
1027namespace {
1028  template<class SF>
1029  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1030    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
1031    unsigned CurQueueId;
1032
1033  protected:
1034    // SUnits - The SUnits for the current graph.
1035    std::vector<SUnit> *SUnits;
1036
1037    const TargetInstrInfo *TII;
1038    const TargetRegisterInfo *TRI;
1039    ScheduleDAGRRList *scheduleDAG;
1040
1041    // SethiUllmanNumbers - The SethiUllman number for each node.
1042    std::vector<unsigned> SethiUllmanNumbers;
1043
1044  public:
1045    RegReductionPriorityQueue(const TargetInstrInfo *tii,
1046                              const TargetRegisterInfo *tri)
1047      : Queue(SF(this)), CurQueueId(0),
1048        TII(tii), TRI(tri), scheduleDAG(NULL) {}
1049
1050    void initNodes(std::vector<SUnit> &sunits) {
1051      SUnits = &sunits;
1052      // Add pseudo dependency edges for two-address nodes.
1053      AddPseudoTwoAddrDeps();
1054      // Reroute edges to nodes with multiple uses.
1055      PrescheduleNodesWithMultipleUses();
1056      // Calculate node priorities.
1057      CalculateSethiUllmanNumbers();
1058    }
1059
1060    void addNode(const SUnit *SU) {
1061      unsigned SUSize = SethiUllmanNumbers.size();
1062      if (SUnits->size() > SUSize)
1063        SethiUllmanNumbers.resize(SUSize*2, 0);
1064      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1065    }
1066
1067    void updateNode(const SUnit *SU) {
1068      SethiUllmanNumbers[SU->NodeNum] = 0;
1069      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1070    }
1071
1072    void releaseState() {
1073      SUnits = 0;
1074      SethiUllmanNumbers.clear();
1075    }
1076
1077    unsigned getNodePriority(const SUnit *SU) const {
1078      assert(SU->NodeNum < SethiUllmanNumbers.size());
1079      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1080      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1081        // CopyToReg should be close to its uses to facilitate coalescing and
1082        // avoid spilling.
1083        return 0;
1084      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1085          Opc == TargetOpcode::SUBREG_TO_REG ||
1086          Opc == TargetOpcode::INSERT_SUBREG)
1087        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1088        // close to their uses to facilitate coalescing.
1089        return 0;
1090      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1091        // If SU does not have a register use, i.e. it doesn't produce a value
1092        // that would be consumed (e.g. store), then it terminates a chain of
1093        // computation.  Give it a large SethiUllman number so it will be
1094        // scheduled right before its predecessors that it doesn't lengthen
1095        // their live ranges.
1096        return 0xffff;
1097      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1098        // If SU does not have a register def, schedule it close to its uses
1099        // because it does not lengthen any live ranges.
1100        return 0;
1101      return SethiUllmanNumbers[SU->NodeNum];
1102    }
1103
1104    unsigned getNodeOrdering(const SUnit *SU) const {
1105      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1106    }
1107
1108    unsigned size() const { return Queue.size(); }
1109
1110    bool empty() const { return Queue.empty(); }
1111
1112    void push(SUnit *U) {
1113      assert(!U->NodeQueueId && "Node in the queue already");
1114      U->NodeQueueId = ++CurQueueId;
1115      Queue.push(U);
1116    }
1117
1118    void push_all(const std::vector<SUnit *> &Nodes) {
1119      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1120        push(Nodes[i]);
1121    }
1122
1123    SUnit *pop() {
1124      if (empty()) return NULL;
1125      SUnit *V = Queue.top();
1126      Queue.pop();
1127      V->NodeQueueId = 0;
1128      return V;
1129    }
1130
1131    void remove(SUnit *SU) {
1132      assert(!Queue.empty() && "Queue is empty!");
1133      assert(SU->NodeQueueId != 0 && "Not in queue!");
1134      Queue.erase_one(SU);
1135      SU->NodeQueueId = 0;
1136    }
1137
1138    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1139      scheduleDAG = scheduleDag;
1140    }
1141
1142  protected:
1143    bool canClobber(const SUnit *SU, const SUnit *Op);
1144    void AddPseudoTwoAddrDeps();
1145    void PrescheduleNodesWithMultipleUses();
1146    void CalculateSethiUllmanNumbers();
1147  };
1148
1149  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1150    BURegReductionPriorityQueue;
1151
1152  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1153    TDRegReductionPriorityQueue;
1154
1155  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1156    SrcRegReductionPriorityQueue;
1157
1158  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1159    HybridBURRPriorityQueue;
1160}
1161
1162/// closestSucc - Returns the scheduled cycle of the successor which is
1163/// closest to the current cycle.
1164static unsigned closestSucc(const SUnit *SU) {
1165  unsigned MaxHeight = 0;
1166  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1167       I != E; ++I) {
1168    if (I->isCtrl()) continue;  // ignore chain succs
1169    unsigned Height = I->getSUnit()->getHeight();
1170    // If there are bunch of CopyToRegs stacked up, they should be considered
1171    // to be at the same position.
1172    if (I->getSUnit()->getNode() &&
1173        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1174      Height = closestSucc(I->getSUnit())+1;
1175    if (Height > MaxHeight)
1176      MaxHeight = Height;
1177  }
1178  return MaxHeight;
1179}
1180
1181/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1182/// for scratch registers, i.e. number of data dependencies.
1183static unsigned calcMaxScratches(const SUnit *SU) {
1184  unsigned Scratches = 0;
1185  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1186       I != E; ++I) {
1187    if (I->isCtrl()) continue;  // ignore chain preds
1188    Scratches++;
1189  }
1190  return Scratches;
1191}
1192
1193template <typename RRSort>
1194static bool BURRSort(const SUnit *left, const SUnit *right,
1195                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1196  unsigned LPriority = SPQ->getNodePriority(left);
1197  unsigned RPriority = SPQ->getNodePriority(right);
1198  if (LPriority != RPriority)
1199    return LPriority > RPriority;
1200
1201  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1202  // e.g.
1203  // t1 = op t2, c1
1204  // t3 = op t4, c2
1205  //
1206  // and the following instructions are both ready.
1207  // t2 = op c3
1208  // t4 = op c4
1209  //
1210  // Then schedule t2 = op first.
1211  // i.e.
1212  // t4 = op c4
1213  // t2 = op c3
1214  // t1 = op t2, c1
1215  // t3 = op t4, c2
1216  //
1217  // This creates more short live intervals.
1218  unsigned LDist = closestSucc(left);
1219  unsigned RDist = closestSucc(right);
1220  if (LDist != RDist)
1221    return LDist < RDist;
1222
1223  // How many registers becomes live when the node is scheduled.
1224  unsigned LScratch = calcMaxScratches(left);
1225  unsigned RScratch = calcMaxScratches(right);
1226  if (LScratch != RScratch)
1227    return LScratch > RScratch;
1228
1229  if (left->getHeight() != right->getHeight())
1230    return left->getHeight() > right->getHeight();
1231
1232  if (left->getDepth() != right->getDepth())
1233    return left->getDepth() < right->getDepth();
1234
1235  assert(left->NodeQueueId && right->NodeQueueId &&
1236         "NodeQueueId cannot be zero");
1237  return (left->NodeQueueId > right->NodeQueueId);
1238}
1239
1240// Bottom up
1241bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1242  return BURRSort(left, right, SPQ);
1243}
1244
1245// Source order, otherwise bottom up.
1246bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1247  unsigned LOrder = SPQ->getNodeOrdering(left);
1248  unsigned ROrder = SPQ->getNodeOrdering(right);
1249
1250  // Prefer an ordering where the lower the non-zero order number, the higher
1251  // the preference.
1252  if ((LOrder || ROrder) && LOrder != ROrder)
1253    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1254
1255  return BURRSort(left, right, SPQ);
1256}
1257
1258bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1259  bool LStall = left->SchedulingPref == Sched::Latency &&
1260    SPQ->getCurCycle() < left->getHeight();
1261  bool RStall = right->SchedulingPref == Sched::Latency &&
1262    SPQ->getCurCycle() < right->getHeight();
1263  // If scheduling one of the node will cause a pipeline stall, delay it.
1264  // If scheduling either one of the node will cause a pipeline stall, sort them
1265  // according to their height.
1266  // If neither will cause a pipeline stall, try to reduce register pressure.
1267  if (LStall) {
1268    if (!RStall)
1269      return true;
1270    if (left->getHeight() != right->getHeight())
1271      return left->getHeight() > right->getHeight();
1272  } else if (RStall)
1273      return false;
1274  return BURRSort(left, right, SPQ);
1275}
1276
1277template<class SF>
1278bool
1279RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1280  if (SU->isTwoAddress) {
1281    unsigned Opc = SU->getNode()->getMachineOpcode();
1282    const TargetInstrDesc &TID = TII->get(Opc);
1283    unsigned NumRes = TID.getNumDefs();
1284    unsigned NumOps = TID.getNumOperands() - NumRes;
1285    for (unsigned i = 0; i != NumOps; ++i) {
1286      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1287        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1288        if (DU->getNodeId() != -1 &&
1289            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1290          return true;
1291      }
1292    }
1293  }
1294  return false;
1295}
1296
1297/// hasCopyToRegUse - Return true if SU has a value successor that is a
1298/// CopyToReg node.
1299static bool hasCopyToRegUse(const SUnit *SU) {
1300  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1301       I != E; ++I) {
1302    if (I->isCtrl()) continue;
1303    const SUnit *SuccSU = I->getSUnit();
1304    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1305      return true;
1306  }
1307  return false;
1308}
1309
1310/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1311/// physical register defs.
1312static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1313                                  const TargetInstrInfo *TII,
1314                                  const TargetRegisterInfo *TRI) {
1315  SDNode *N = SuccSU->getNode();
1316  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1317  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1318  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1319  for (const SDNode *SUNode = SU->getNode(); SUNode;
1320       SUNode = SUNode->getFlaggedNode()) {
1321    if (!SUNode->isMachineOpcode())
1322      continue;
1323    const unsigned *SUImpDefs =
1324      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1325    if (!SUImpDefs)
1326      return false;
1327    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1328      EVT VT = N->getValueType(i);
1329      if (VT == MVT::Flag || VT == MVT::Other)
1330        continue;
1331      if (!N->hasAnyUseOfValue(i))
1332        continue;
1333      unsigned Reg = ImpDefs[i - NumDefs];
1334      for (;*SUImpDefs; ++SUImpDefs) {
1335        unsigned SUReg = *SUImpDefs;
1336        if (TRI->regsOverlap(Reg, SUReg))
1337          return true;
1338      }
1339    }
1340  }
1341  return false;
1342}
1343
1344/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1345/// are not handled well by the general register pressure reduction
1346/// heuristics. When presented with code like this:
1347///
1348///      N
1349///    / |
1350///   /  |
1351///  U  store
1352///  |
1353/// ...
1354///
1355/// the heuristics tend to push the store up, but since the
1356/// operand of the store has another use (U), this would increase
1357/// the length of that other use (the U->N edge).
1358///
1359/// This function transforms code like the above to route U's
1360/// dependence through the store when possible, like this:
1361///
1362///      N
1363///      ||
1364///      ||
1365///     store
1366///       |
1367///       U
1368///       |
1369///      ...
1370///
1371/// This results in the store being scheduled immediately
1372/// after N, which shortens the U->N live range, reducing
1373/// register pressure.
1374///
1375template<class SF>
1376void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1377  // Visit all the nodes in topological order, working top-down.
1378  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1379    SUnit *SU = &(*SUnits)[i];
1380    // For now, only look at nodes with no data successors, such as stores.
1381    // These are especially important, due to the heuristics in
1382    // getNodePriority for nodes with no data successors.
1383    if (SU->NumSuccs != 0)
1384      continue;
1385    // For now, only look at nodes with exactly one data predecessor.
1386    if (SU->NumPreds != 1)
1387      continue;
1388    // Avoid prescheduling copies to virtual registers, which don't behave
1389    // like other nodes from the perspective of scheduling heuristics.
1390    if (SDNode *N = SU->getNode())
1391      if (N->getOpcode() == ISD::CopyToReg &&
1392          TargetRegisterInfo::isVirtualRegister
1393            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1394        continue;
1395
1396    // Locate the single data predecessor.
1397    SUnit *PredSU = 0;
1398    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1399         EE = SU->Preds.end(); II != EE; ++II)
1400      if (!II->isCtrl()) {
1401        PredSU = II->getSUnit();
1402        break;
1403      }
1404    assert(PredSU);
1405
1406    // Don't rewrite edges that carry physregs, because that requires additional
1407    // support infrastructure.
1408    if (PredSU->hasPhysRegDefs)
1409      continue;
1410    // Short-circuit the case where SU is PredSU's only data successor.
1411    if (PredSU->NumSuccs == 1)
1412      continue;
1413    // Avoid prescheduling to copies from virtual registers, which don't behave
1414    // like other nodes from the perspective of scheduling // heuristics.
1415    if (SDNode *N = SU->getNode())
1416      if (N->getOpcode() == ISD::CopyFromReg &&
1417          TargetRegisterInfo::isVirtualRegister
1418            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1419        continue;
1420
1421    // Perform checks on the successors of PredSU.
1422    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1423         EE = PredSU->Succs.end(); II != EE; ++II) {
1424      SUnit *PredSuccSU = II->getSUnit();
1425      if (PredSuccSU == SU) continue;
1426      // If PredSU has another successor with no data successors, for
1427      // now don't attempt to choose either over the other.
1428      if (PredSuccSU->NumSuccs == 0)
1429        goto outer_loop_continue;
1430      // Don't break physical register dependencies.
1431      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1432        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1433          goto outer_loop_continue;
1434      // Don't introduce graph cycles.
1435      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1436        goto outer_loop_continue;
1437    }
1438
1439    // Ok, the transformation is safe and the heuristics suggest it is
1440    // profitable. Update the graph.
1441    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
1442                 << " next to PredSU #" << PredSU->NodeNum
1443                 << " to guide scheduling in the presence of multiple uses\n");
1444    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1445      SDep Edge = PredSU->Succs[i];
1446      assert(!Edge.isAssignedRegDep());
1447      SUnit *SuccSU = Edge.getSUnit();
1448      if (SuccSU != SU) {
1449        Edge.setSUnit(PredSU);
1450        scheduleDAG->RemovePred(SuccSU, Edge);
1451        scheduleDAG->AddPred(SU, Edge);
1452        Edge.setSUnit(SU);
1453        scheduleDAG->AddPred(SuccSU, Edge);
1454        --i;
1455      }
1456    }
1457  outer_loop_continue:;
1458  }
1459}
1460
1461/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1462/// it as a def&use operand. Add a pseudo control edge from it to the other
1463/// node (if it won't create a cycle) so the two-address one will be scheduled
1464/// first (lower in the schedule). If both nodes are two-address, favor the
1465/// one that has a CopyToReg use (more likely to be a loop induction update).
1466/// If both are two-address, but one is commutable while the other is not
1467/// commutable, favor the one that's not commutable.
1468template<class SF>
1469void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1470  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1471    SUnit *SU = &(*SUnits)[i];
1472    if (!SU->isTwoAddress)
1473      continue;
1474
1475    SDNode *Node = SU->getNode();
1476    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1477      continue;
1478
1479    unsigned Opc = Node->getMachineOpcode();
1480    const TargetInstrDesc &TID = TII->get(Opc);
1481    unsigned NumRes = TID.getNumDefs();
1482    unsigned NumOps = TID.getNumOperands() - NumRes;
1483    for (unsigned j = 0; j != NumOps; ++j) {
1484      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1485        continue;
1486      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1487      if (DU->getNodeId() == -1)
1488        continue;
1489      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1490      if (!DUSU) continue;
1491      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1492           E = DUSU->Succs.end(); I != E; ++I) {
1493        if (I->isCtrl()) continue;
1494        SUnit *SuccSU = I->getSUnit();
1495        if (SuccSU == SU)
1496          continue;
1497        // Be conservative. Ignore if nodes aren't at roughly the same
1498        // depth and height.
1499        if (SuccSU->getHeight() < SU->getHeight() &&
1500            (SU->getHeight() - SuccSU->getHeight()) > 1)
1501          continue;
1502        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1503        // constrains whatever is using the copy, instead of the copy
1504        // itself. In the case that the copy is coalesced, this
1505        // preserves the intent of the pseudo two-address heurietics.
1506        while (SuccSU->Succs.size() == 1 &&
1507               SuccSU->getNode()->isMachineOpcode() &&
1508               SuccSU->getNode()->getMachineOpcode() ==
1509                 TargetOpcode::COPY_TO_REGCLASS)
1510          SuccSU = SuccSU->Succs.front().getSUnit();
1511        // Don't constrain non-instruction nodes.
1512        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1513          continue;
1514        // Don't constrain nodes with physical register defs if the
1515        // predecessor can clobber them.
1516        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1517          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1518            continue;
1519        }
1520        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1521        // these may be coalesced away. We want them close to their uses.
1522        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1523        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1524            SuccOpc == TargetOpcode::INSERT_SUBREG ||
1525            SuccOpc == TargetOpcode::SUBREG_TO_REG)
1526          continue;
1527        if ((!canClobber(SuccSU, DUSU) ||
1528             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1529             (!SU->isCommutable && SuccSU->isCommutable)) &&
1530            !scheduleDAG->IsReachable(SuccSU, SU)) {
1531          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
1532                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1533          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1534                                        /*Reg=*/0, /*isNormalMemory=*/false,
1535                                        /*isMustAlias=*/false,
1536                                        /*isArtificial=*/true));
1537        }
1538      }
1539    }
1540  }
1541}
1542
1543/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1544/// scheduling units.
1545template<class SF>
1546void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1547  SethiUllmanNumbers.assign(SUnits->size(), 0);
1548
1549  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1550    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1551}
1552
1553/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1554/// predecessors of the successors of the SUnit SU. Stop when the provided
1555/// limit is exceeded.
1556static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1557                                                    unsigned Limit) {
1558  unsigned Sum = 0;
1559  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1560       I != E; ++I) {
1561    const SUnit *SuccSU = I->getSUnit();
1562    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1563         EE = SuccSU->Preds.end(); II != EE; ++II) {
1564      SUnit *PredSU = II->getSUnit();
1565      if (!PredSU->isScheduled)
1566        if (++Sum > Limit)
1567          return Sum;
1568    }
1569  }
1570  return Sum;
1571}
1572
1573
1574// Top down
1575bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1576  unsigned LPriority = SPQ->getNodePriority(left);
1577  unsigned RPriority = SPQ->getNodePriority(right);
1578  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1579  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1580  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1581  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1582  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1583  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1584
1585  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1586    return false;
1587  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1588    return true;
1589
1590  if (LIsFloater)
1591    LBonus -= 2;
1592  if (RIsFloater)
1593    RBonus -= 2;
1594  if (left->NumSuccs == 1)
1595    LBonus += 2;
1596  if (right->NumSuccs == 1)
1597    RBonus += 2;
1598
1599  if (LPriority+LBonus != RPriority+RBonus)
1600    return LPriority+LBonus < RPriority+RBonus;
1601
1602  if (left->getDepth() != right->getDepth())
1603    return left->getDepth() < right->getDepth();
1604
1605  if (left->NumSuccsLeft != right->NumSuccsLeft)
1606    return left->NumSuccsLeft > right->NumSuccsLeft;
1607
1608  assert(left->NodeQueueId && right->NodeQueueId &&
1609         "NodeQueueId cannot be zero");
1610  return (left->NodeQueueId > right->NodeQueueId);
1611}
1612
1613//===----------------------------------------------------------------------===//
1614//                         Public Constructor Functions
1615//===----------------------------------------------------------------------===//
1616
1617llvm::ScheduleDAGSDNodes *
1618llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1619  const TargetMachine &TM = IS->TM;
1620  const TargetInstrInfo *TII = TM.getInstrInfo();
1621  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1622
1623  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1624
1625  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1626  PQ->setScheduleDAG(SD);
1627  return SD;
1628}
1629
1630llvm::ScheduleDAGSDNodes *
1631llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1632  const TargetMachine &TM = IS->TM;
1633  const TargetInstrInfo *TII = TM.getInstrInfo();
1634  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1635
1636  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1637
1638  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
1639  PQ->setScheduleDAG(SD);
1640  return SD;
1641}
1642
1643llvm::ScheduleDAGSDNodes *
1644llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1645  const TargetMachine &TM = IS->TM;
1646  const TargetInstrInfo *TII = TM.getInstrInfo();
1647  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1648
1649  SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
1650
1651  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1652  PQ->setScheduleDAG(SD);
1653  return SD;
1654}
1655
1656llvm::ScheduleDAGSDNodes *
1657llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1658  const TargetMachine &TM = IS->TM;
1659  const TargetInstrInfo *TII = TM.getInstrInfo();
1660  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1661
1662  HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI);
1663
1664  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
1665  PQ->setScheduleDAG(SD);
1666  return SD;
1667}
1668